From d7067b2db78aab1a76f9c443b1fcf81c3e35dd50 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 17 Jul 2013 17:09:01 -0400 Subject: NFSv4: encode_attrs should not backfill the bitmap and attribute length The attribute length is already calculated in advance. There is no reason why we cannot calculate the bitmap in advance too so that we don't have to play pointer games. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 3850b01..1a4a3bd 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -997,12 +997,10 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, int owner_namelen = 0; int owner_grouplen = 0; __be32 *p; - __be32 *q; - int len; - uint32_t bmval_len = 2; - uint32_t bmval0 = 0; - uint32_t bmval1 = 0; - uint32_t bmval2 = 0; + unsigned i; + uint32_t len = 0; + uint32_t bmval_len; + uint32_t bmval[3] = { 0 }; /* * We reserve enough space to write the entire attribute buffer at once. @@ -1011,13 +1009,14 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, * = 40 bytes, plus any contribution from variable-length fields * such as owner/group. */ - len = 8; - - /* Sigh */ - if (iap->ia_valid & ATTR_SIZE) + if (iap->ia_valid & ATTR_SIZE) { + bmval[0] |= FATTR4_WORD0_SIZE; len += 8; - if (iap->ia_valid & ATTR_MODE) + } + if (iap->ia_valid & ATTR_MODE) { + bmval[1] |= FATTR4_WORD1_MODE; len += 4; + } if (iap->ia_valid & ATTR_UID) { owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ); if (owner_namelen < 0) { @@ -1028,6 +1027,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, owner_namelen = sizeof("nobody") - 1; /* goto out; */ } + bmval[1] |= FATTR4_WORD1_OWNER; len += 4 + (XDR_QUADLEN(owner_namelen) << 2); } if (iap->ia_valid & ATTR_GID) { @@ -1039,92 +1039,73 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, owner_grouplen = sizeof("nobody") - 1; /* goto out; */ } + bmval[1] |= FATTR4_WORD1_OWNER_GROUP; len += 4 + (XDR_QUADLEN(owner_grouplen) << 2); } - if (iap->ia_valid & ATTR_ATIME_SET) + if (iap->ia_valid & ATTR_ATIME_SET) { + bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET; len += 16; - else if (iap->ia_valid & ATTR_ATIME) + } else if (iap->ia_valid & ATTR_ATIME) { + bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET; len += 4; - if (iap->ia_valid & ATTR_MTIME_SET) + } + if (iap->ia_valid & ATTR_MTIME_SET) { + bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET; len += 16; - else if (iap->ia_valid & ATTR_MTIME) + } else if (iap->ia_valid & ATTR_MTIME) { + bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET; len += 4; + } if (label) { len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2); - bmval_len = 3; + bmval[2] |= FATTR4_WORD2_SECURITY_LABEL; } - len += bmval_len << 2; - p = reserve_space(xdr, len); + if (bmval[2] != 0) + bmval_len = 3; + else if (bmval[1] != 0) + bmval_len = 2; + else + bmval_len = 1; + + p = reserve_space(xdr, 4 + (bmval_len << 2) + 4 + len); - /* - * We write the bitmap length now, but leave the bitmap and the attribute - * buffer length to be backfilled at the end of this routine. - */ *p++ = cpu_to_be32(bmval_len); - q = p; - /* Skip bitmap entries + attrlen */ - p += bmval_len + 1; + for (i = 0; i < bmval_len; i++) + *p++ = cpu_to_be32(bmval[i]); + *p++ = cpu_to_be32(len); - if (iap->ia_valid & ATTR_SIZE) { - bmval0 |= FATTR4_WORD0_SIZE; + if (bmval[0] & FATTR4_WORD0_SIZE) p = xdr_encode_hyper(p, iap->ia_size); - } - if (iap->ia_valid & ATTR_MODE) { - bmval1 |= FATTR4_WORD1_MODE; + if (bmval[1] & FATTR4_WORD1_MODE) *p++ = cpu_to_be32(iap->ia_mode & S_IALLUGO); - } - if (iap->ia_valid & ATTR_UID) { - bmval1 |= FATTR4_WORD1_OWNER; + if (bmval[1] & FATTR4_WORD1_OWNER) p = xdr_encode_opaque(p, owner_name, owner_namelen); - } - if (iap->ia_valid & ATTR_GID) { - bmval1 |= FATTR4_WORD1_OWNER_GROUP; + if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) p = xdr_encode_opaque(p, owner_group, owner_grouplen); + if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { + if (iap->ia_valid & ATTR_ATIME_SET) { + *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); + p = xdr_encode_hyper(p, (s64)iap->ia_atime.tv_sec); + *p++ = cpu_to_be32(iap->ia_atime.tv_nsec); + } else + *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); } - if (iap->ia_valid & ATTR_ATIME_SET) { - bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; - *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); - p = xdr_encode_hyper(p, (s64)iap->ia_atime.tv_sec); - *p++ = cpu_to_be32(iap->ia_atime.tv_nsec); - } - else if (iap->ia_valid & ATTR_ATIME) { - bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; - *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); - } - if (iap->ia_valid & ATTR_MTIME_SET) { - bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; - *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); - p = xdr_encode_hyper(p, (s64)iap->ia_mtime.tv_sec); - *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec); - } - else if (iap->ia_valid & ATTR_MTIME) { - bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; - *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); + if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { + if (iap->ia_valid & ATTR_MTIME_SET) { + *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); + p = xdr_encode_hyper(p, (s64)iap->ia_mtime.tv_sec); + *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec); + } else + *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); } - if (label) { - bmval2 |= FATTR4_WORD2_SECURITY_LABEL; + if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { *p++ = cpu_to_be32(label->lfs); *p++ = cpu_to_be32(label->pi); *p++ = cpu_to_be32(label->len); p = xdr_encode_opaque_fixed(p, label->label, label->len); } - /* - * Now we backfill the bitmap and the attribute buffer length. - */ - if (len != ((char *)p - (char *)q) + 4) { - printk(KERN_ERR "NFS: Attr length error, %u != %Zu\n", - len, ((char *)p - (char *)q) + 4); - BUG(); - } - *q++ = htonl(bmval0); - *q++ = htonl(bmval1); - if (bmval_len == 3) - *q++ = htonl(bmval2); - len = (char *)p - (char *)(q + 1); - *q = htonl(len); - /* out: */ } -- cgit v0.10.2 From cc7936f9ad085351bc2ccb403c403392e1337050 Mon Sep 17 00:00:00 2001 From: Nadav Shemer Date: Sun, 21 Jul 2013 17:21:43 +0300 Subject: nfs: fix open(O_RDONLY|O_TRUNC) in NFS4.0 nfs4_proc_setattr removes ATTR_OPEN from sattr->ia_valid, but later nfs4_do_setattr checks for it Signed-off-by: Nadav Shemer Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cf11799..1bf291f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2940,10 +2940,10 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, /* Deal with open(O_TRUNC) */ if (sattr->ia_valid & ATTR_OPEN) - sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN); + sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME); /* Optimization: if the end result is no change, don't RPC */ - if ((sattr->ia_valid & ~(ATTR_FILE)) == 0) + if ((sattr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0) return 0; /* Search for an existing open(O_WRITE) file */ -- cgit v0.10.2 From 275448eb10cfb2b09b34b0b08498a3401222b413 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 16 Jul 2013 09:16:45 -0400 Subject: rpc_pipe: convert back to simple_dir_inode_operations Now that Al has fixed simple_lookup to account for the case where sb->s_d_op is set, there's no need to keep our own special lookup op. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 406859c..017aedc 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -480,23 +480,6 @@ static const struct dentry_operations rpc_dentry_operations = { .d_delete = rpc_delete_dentry, }; -/* - * Lookup the data. This is trivial - if the dentry didn't already - * exist, we know it is negative. - */ -static struct dentry * -rpc_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) -{ - if (dentry->d_name.len > NAME_MAX) - return ERR_PTR(-ENAMETOOLONG); - d_add(dentry, NULL); - return NULL; -} - -static const struct inode_operations rpc_dir_inode_operations = { - .lookup = rpc_lookup, -}; - static struct inode * rpc_get_inode(struct super_block *sb, umode_t mode) { @@ -509,7 +492,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode) switch (mode & S_IFMT) { case S_IFDIR: inode->i_fop = &simple_dir_operations; - inode->i_op = &rpc_dir_inode_operations; + inode->i_op = &simple_dir_inode_operations; inc_nlink(inode); default: break; -- cgit v0.10.2 From b14b7979d70c19b7fa7c4d3244a4ea5916d77492 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 12 Jul 2013 12:31:45 -0400 Subject: NFS: Fix return type of nfs4_end_drain_session() stub Clean up: when NFSv4.1 support is compiled out, nfs4_end_drain_session() becomes a stub. Make the synopsis of the stub match the synopsis of the real version of the function. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e22862f..ad1a753 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2116,7 +2116,7 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) } #else /* CONFIG_NFS_V4_1 */ static int nfs4_reset_session(struct nfs_client *clp) { return 0; } -static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; } +static void nfs4_end_drain_session(struct nfs_client *clp) { } static int nfs4_bind_conn_to_session(struct nfs_client *clp) { -- cgit v0.10.2 From 1771c5774ba0b8ec35c3337fe9246f2f0f42b11b Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Mon, 22 Jul 2013 12:42:05 -0400 Subject: NFSv4.1 Use the mount point rpc_clnt for layoutreturn Should not use the clientid maintenance rpc_clnt. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1bf291f..c2fc5dd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6876,7 +6876,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) .rpc_cred = lrp->cred, }; struct rpc_task_setup task_setup_data = { - .rpc_client = lrp->clp->cl_rpcclient, + .rpc_client = NFS_SERVER(lrp->args.inode)->client, .rpc_message = &msg, .callback_ops = &nfs4_layoutreturn_call_ops, .callback_data = lrp, -- cgit v0.10.2 From 55b592933b7d0091d515ef1663334470a343ec98 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 24 Jul 2013 16:36:35 -0400 Subject: NFSv4: Fix nfs4_init_uniform_client_string for net namespaces Commit 6f2ea7f2a (NFS: Add nfs4_unique_id boot parameter) introduces a boot parameter that allows client administrators to set a string identifier for use by the EXCHANGE_ID and SETCLIENTID arguments in order to make them more globally unique. Unfortunately, that uniquifier is no longer globally unique in the presence of net namespaces, since each container expects to be able to set up their own lease when mounting a new NFSv4/4.1 partition. The fix is to add back in the container-specific hostname in addition to the unique id. Cc: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c2fc5dd..0e64ccc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4662,10 +4662,14 @@ static unsigned int nfs4_init_uniform_client_string(const struct nfs_client *clp, char *buf, size_t len) { - char *nodename = clp->cl_rpcclient->cl_nodename; + const char *nodename = clp->cl_rpcclient->cl_nodename; if (nfs4_client_id_uniquifier[0] != '\0') - nodename = nfs4_client_id_uniquifier; + return scnprintf(buf, len, "Linux NFSv%u.%u %s/%s", + clp->rpc_ops->version, + clp->cl_minorversion, + nfs4_client_id_uniquifier, + nodename); return scnprintf(buf, len, "Linux NFSv%u.%u %s", clp->rpc_ops->version, clp->cl_minorversion, nodename); -- cgit v0.10.2 From 9597c13b2f3c54240b1b902a677672faa70ab7c5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 2 Aug 2013 11:39:32 -0400 Subject: nfs: verify open flags before allowing an atomic open Currently, you can open a NFSv4 file with O_APPEND|O_DIRECT, but cannot fcntl(F_SETFL,...) with those flags. This flag combination is explicitly forbidden on NFSv3 opens, and it seems like it should also be on NFSv4. Reported-by: Chao Ye Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e474ca2b..39e69d4 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1413,6 +1413,10 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, dfprintk(VFS, "NFS: atomic_open(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + err = nfs_check_flags(open_flags); + if (err) + return err; + /* NFS only supports OPEN on regular files */ if ((open_flags & O_DIRECTORY)) { if (!d_unhashed(dentry)) { -- cgit v0.10.2 From 6da1a034362f86e157e251e65394f0b6570e3e3a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 7 Aug 2013 12:30:52 -0400 Subject: NFSv4: Refuse mount attempts with proto=udp RFC3530 disallows the use of udp as a transport protocol for NFSv4. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 71fdc0d..f2071d2 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2084,6 +2084,8 @@ static int nfs_validate_text_mount_data(void *options, max_namelen = NFS4_MAXNAMLEN; max_pathlen = NFS4_MAXPATHLEN; nfs_validate_transport_protocol(args); + if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP) + goto out_invalid_transport_udp; nfs4_validate_mount_flags(args); #else goto out_v4_not_compiled; @@ -2106,6 +2108,10 @@ static int nfs_validate_text_mount_data(void *options, out_v4_not_compiled: dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n"); return -EPROTONOSUPPORT; +#else +out_invalid_transport_udp: + dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n"); + return -EINVAL; #endif /* !CONFIG_NFS_V4 */ out_no_address: @@ -2711,6 +2717,8 @@ static int nfs4_validate_mount_data(void *options, args->acdirmax = data->acdirmax; args->nfs_server.protocol = data->proto; nfs_validate_transport_protocol(args); + if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP) + goto out_invalid_transport_udp; break; default: @@ -2731,6 +2739,10 @@ out_inval_auth: out_no_address: dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); return -EINVAL; + +out_invalid_transport_udp: + dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n"); + return -EINVAL; } /* -- cgit v0.10.2 From d688f7b8f62857c252b886fa16e8b38b83cfaf7e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Jul 2013 12:28:28 -0400 Subject: NFS: Use root's credential for lease management when keytab is missing Commit 05f4c350 "NFS: Discover NFSv4 server trunking when mounting" Fri Sep 14 17:24:32 2012 introduced Uniform Client String support, which forces our NFS client to establish a client ID immediately during a mount operation rather than waiting until a user wants to open a file. Normally machine credentials (eg. from a keytab) are used to perform a mount operation that is protected by Kerberos. Before 05fc350, SETCLIENTID used a machine credential, or fell back to a regular user's credential if no keytab is available. On clients that don't have a keytab, performing SETCLIENTID early means there's no user credential to fall back on, since no regular user has kinit'd yet. 05f4c350 seems to have broken the ability to mount with sec=krb5 on clients that don't have a keytab in kernels 3.7 - 3.10. To address this regression, commit 4edaa308 (NFS: Use "krb5i" to establish NFSv4 state whenever possible), Sat Mar 16 15:56:20 2013, was merged in 3.10. This commit forces the NFS client to fall back to AUTH_SYS for lease management operations if no keytab is available. Neil Brown noticed that, since root is required to kinit to do a sec=krb5 mount when a client doesn't have a keytab, we can try to use root's Kerberos credential before AUTH_SYS. Now, when determining a principal and flavor to use for lease management, the NFS client tries in this order: 1. Flavor: AUTH_GSS, krb5i Principal: service principal (via keytab) 2. Flavor: AUTH_GSS, krb5i Principal: user principal established for UID 0 (via kinit) 3. Flavor: AUTH_SYS Principal: UID 0 / GID 0 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index ad1a753..f27760b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -154,6 +154,19 @@ struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp) return cred; } +static void nfs4_root_machine_cred(struct nfs_client *clp) +{ + struct rpc_cred *cred, *new; + + new = rpc_lookup_machine_cred(NULL); + spin_lock(&clp->cl_lock); + cred = clp->cl_machine_cred; + clp->cl_machine_cred = new; + spin_unlock(&clp->cl_lock); + if (cred != NULL) + put_rpccred(cred); +} + static struct rpc_cred * nfs4_get_renew_cred_server_locked(struct nfs_server *server) { @@ -1896,7 +1909,11 @@ again: __func__, status); goto again; case -EACCES: - if (i++) + if (i++ == 0) { + nfs4_root_machine_cred(clp); + goto again; + } + if (i > 2) break; case -NFS4ERR_CLID_INUSE: case -NFS4ERR_WRONGSEC: -- cgit v0.10.2 From 73d8bde5e4b658948be0d3df51b323ed323997a2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Jul 2013 12:28:37 -0400 Subject: NFS: Never use user credentials for lease renewal Never try to use a non-UID 0 user credential for lease management, as that credential can change out from under us. The server will block NFSv4 lease recovery with NFS4ERR_CLID_INUSE. Since the mechanism to acquire a credential for lease management is now the same for all minor versions, replace the minor version- specific callout with a single function. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index ee81e35..d7bb59d 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -193,7 +193,6 @@ struct nfs4_state_recovery_ops { int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); int (*recover_lock)(struct nfs4_state *, struct file_lock *); int (*establish_clid)(struct nfs_client *, struct rpc_cred *); - struct rpc_cred * (*get_clid_cred)(struct nfs_client *); int (*reclaim_complete)(struct nfs_client *, struct rpc_cred *); int (*detect_trunking)(struct nfs_client *, struct nfs_client **, struct rpc_cred *); @@ -319,7 +318,7 @@ extern void nfs4_kill_renewd(struct nfs_client *); extern void nfs4_renew_state(struct work_struct *); /* nfs4state.c */ -struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp); +struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp); struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp); int nfs4_discover_server_trunking(struct nfs_client *clp, @@ -327,7 +326,6 @@ int nfs4_discover_server_trunking(struct nfs_client *clp, int nfs40_discover_server_trunking(struct nfs_client *clp, struct nfs_client **, struct rpc_cred *); #if defined(CONFIG_NFS_V4_1) -struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); int nfs41_discover_server_trunking(struct nfs_client *clp, struct nfs_client **, struct rpc_cred *); extern void nfs4_schedule_session_recovery(struct nfs4_session *, int); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0e64ccc..a187f4d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6069,7 +6069,7 @@ int nfs4_destroy_clientid(struct nfs_client *clp) goto out; if (clp->cl_preserve_clid) goto out; - cred = nfs4_get_exchange_id_cred(clp); + cred = nfs4_get_clid_cred(clp); ret = nfs4_proc_destroy_clientid(clp, cred); if (cred) put_rpccred(cred); @@ -7363,7 +7363,6 @@ static const struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { .recover_open = nfs4_open_reclaim, .recover_lock = nfs4_lock_reclaim, .establish_clid = nfs4_init_clientid, - .get_clid_cred = nfs4_get_setclientid_cred, .detect_trunking = nfs40_discover_server_trunking, }; @@ -7374,7 +7373,6 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { .recover_open = nfs4_open_reclaim, .recover_lock = nfs4_lock_reclaim, .establish_clid = nfs41_init_clientid, - .get_clid_cred = nfs4_get_exchange_id_cred, .reclaim_complete = nfs41_proc_reclaim_complete, .detect_trunking = nfs41_discover_server_trunking, }; @@ -7386,7 +7384,6 @@ static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { .recover_open = nfs4_open_expired, .recover_lock = nfs4_lock_expired, .establish_clid = nfs4_init_clientid, - .get_clid_cred = nfs4_get_setclientid_cred, }; #if defined(CONFIG_NFS_V4_1) @@ -7396,7 +7393,6 @@ static const struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { .recover_open = nfs41_open_expired, .recover_lock = nfs41_lock_expired, .establish_clid = nfs41_init_clientid, - .get_clid_cred = nfs4_get_exchange_id_cred, }; #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index 36e21cb..202e363 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -441,7 +441,7 @@ void nfs4_destroy_session(struct nfs4_session *session) struct rpc_xprt *xprt; struct rpc_cred *cred; - cred = nfs4_get_exchange_id_cred(session->clp); + cred = nfs4_get_clid_cred(session->clp); nfs4_proc_destroy_session(session, cred); if (cred) put_rpccred(cred); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f27760b..6818964 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -352,62 +352,21 @@ int nfs41_discover_server_trunking(struct nfs_client *clp, return nfs41_walk_client_list(clp, result, cred); } -struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp) -{ - struct rpc_cred *cred; - - spin_lock(&clp->cl_lock); - cred = nfs4_get_machine_cred_locked(clp); - spin_unlock(&clp->cl_lock); - return cred; -} - #endif /* CONFIG_NFS_V4_1 */ -static struct rpc_cred * -nfs4_get_setclientid_cred_server(struct nfs_server *server) -{ - struct nfs_client *clp = server->nfs_client; - struct rpc_cred *cred = NULL; - struct nfs4_state_owner *sp; - struct rb_node *pos; - - spin_lock(&clp->cl_lock); - pos = rb_first(&server->state_owners); - if (pos != NULL) { - sp = rb_entry(pos, struct nfs4_state_owner, so_server_node); - cred = get_rpccred(sp->so_cred); - } - spin_unlock(&clp->cl_lock); - return cred; -} - /** - * nfs4_get_setclientid_cred - Acquire credential for a setclientid operation + * nfs4_get_clid_cred - Acquire credential for a setclientid operation * @clp: client state handle * * Returns an rpc_cred with reference count bumped, or NULL. */ -struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) +struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp) { - struct nfs_server *server; struct rpc_cred *cred; spin_lock(&clp->cl_lock); cred = nfs4_get_machine_cred_locked(clp); spin_unlock(&clp->cl_lock); - if (cred != NULL) - goto out; - - rcu_read_lock(); - list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { - cred = nfs4_get_setclientid_cred_server(server); - if (cred != NULL) - break; - } - rcu_read_unlock(); - -out: return cred; } @@ -1631,7 +1590,7 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) if (!nfs4_state_clear_reclaim_reboot(clp)) return; ops = clp->cl_mvops->reboot_recovery_ops; - cred = ops->get_clid_cred(clp); + cred = nfs4_get_clid_cred(clp); nfs4_reclaim_complete(clp, ops, cred); put_rpccred(cred); } @@ -1745,7 +1704,7 @@ static int nfs4_check_lease(struct nfs_client *clp) cred = ops->get_state_renewal_cred_locked(clp); spin_unlock(&clp->cl_lock); if (cred == NULL) { - cred = nfs4_get_setclientid_cred(clp); + cred = nfs4_get_clid_cred(clp); status = -ENOKEY; if (cred == NULL) goto out; @@ -1817,7 +1776,7 @@ static int nfs4_establish_lease(struct nfs_client *clp) clp->cl_mvops->reboot_recovery_ops; int status; - cred = ops->get_clid_cred(clp); + cred = nfs4_get_clid_cred(clp); if (cred == NULL) return -ENOENT; status = ops->establish_clid(clp, cred); @@ -1891,7 +1850,7 @@ int nfs4_discover_server_trunking(struct nfs_client *clp, mutex_lock(&nfs_clid_init_mutex); again: status = -ENOENT; - cred = ops->get_clid_cred(clp); + cred = nfs4_get_clid_cred(clp); if (cred == NULL) goto out_unlock; @@ -2069,7 +2028,7 @@ static int nfs4_reset_session(struct nfs_client *clp) if (!nfs4_has_session(clp)) return 0; nfs4_begin_drain_session(clp); - cred = nfs4_get_exchange_id_cred(clp); + cred = nfs4_get_clid_cred(clp); status = nfs4_proc_destroy_session(clp->cl_session, cred); switch (status) { case 0: @@ -2112,7 +2071,7 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) if (!nfs4_has_session(clp)) return 0; nfs4_begin_drain_session(clp); - cred = nfs4_get_exchange_id_cred(clp); + cred = nfs4_get_clid_cred(clp); ret = nfs4_proc_bind_conn_to_session(clp, cred); if (cred) put_rpccred(cred); -- cgit v0.10.2 From f8407299f61681e9733f06c7214e81002cb459b3 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 24 Jul 2013 11:59:49 -0400 Subject: NFS Remove unused authflavour parameter from init_client Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 340b1ef..2dceee4 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -501,8 +501,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); new->cl_flags = cl_init->init_flags; - return rpc_ops->init_client(new, timeparms, ip_addr, - authflavour); + return rpc_ops->init_client(new, timeparms, ip_addr); } spin_unlock(&nn->nfs_client_lock); @@ -694,13 +693,12 @@ EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient); * @clp: nfs_client to initialise * @timeparms: timeout parameters for underlying RPC transport * @ip_addr: IP presentation address (not used) - * @authflavor: authentication flavor for underlying RPC transport * * Returns pointer to an NFS client, or an ERR_PTR value. */ struct nfs_client *nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, - const char *ip_addr, rpc_authflavor_t authflavour) + const char *ip_addr) { int error; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 3c8373f..9b694f1 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -267,7 +267,7 @@ extern struct rpc_procinfo nfs4_procedures[]; void nfs_close_context(struct nfs_open_context *ctx, int is_sync); extern struct nfs_client *nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, - const char *ip_addr, rpc_authflavor_t authflavour); + const char *ip_addr); /* dir.c */ extern int nfs_access_cache_shrinker(struct shrinker *shrink, @@ -451,8 +451,7 @@ extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); extern void __nfs4_read_done_cb(struct nfs_read_data *); extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, - const char *ip_addr, - rpc_authflavor_t authflavour); + const char *ip_addr); extern int nfs40_walk_client_list(struct nfs_client *clp, struct nfs_client **result, struct rpc_cred *cred); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 90dce91..767a5e3 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -187,8 +187,7 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp) */ struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, - const char *ip_addr, - rpc_authflavor_t authflavour) + const char *ip_addr) { char buf[INET6_ADDRSTRLEN + 1]; struct nfs_client *old; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 8651574..ddc3e32 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1442,7 +1442,7 @@ struct nfs_rpc_ops { struct nfs_client *(*alloc_client) (const struct nfs_client_initdata *); struct nfs_client * (*init_client) (struct nfs_client *, const struct rpc_timeout *, - const char *, rpc_authflavor_t); + const char *); void (*free_client) (struct nfs_client *); struct nfs_server *(*create_server)(struct nfs_mount_info *, struct nfs_subversion *); struct nfs_server *(*clone_server)(struct nfs_server *, struct nfs_fh *, -- cgit v0.10.2 From bc4b2a86a55a767fabbc70b4b8634d519c59edef Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Mon, 22 Jul 2013 18:41:23 -0400 Subject: NFSv4.1 Increase NFS4_DEF_SLOT_TABLE_SIZE Increase NFS4_DEF_SLOT_TABLE_SIZE which is used as the client ca_maxreequests value in CREATE_SESSION. Current non-dynamic session slot server implementations use the client ca_maxrequests as a maximum slot number: 64 session slots can handle most workloads. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 3a153d8..86a0669 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -8,7 +8,7 @@ #define __LINUX_FS_NFS_NFS4SESSION_H /* maximum number of slots to use */ -#define NFS4_DEF_SLOT_TABLE_SIZE (16U) +#define NFS4_DEF_SLOT_TABLE_SIZE (64U) #define NFS4_MAX_SLOT_TABLE (1024U) #define NFS4_NO_SLOT ((u32)-1) -- cgit v0.10.2 From 5ec16a8500d339b0e7a0cc76b785d18daad354d4 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 8 Aug 2013 10:57:55 -0400 Subject: NFSv4.1 Use clientid management rpc_clnt for secinfo As per RFC 3530 and RFC 5661 Security Considerations Commit 4edaa308 "NFS: Use "krb5i" to establish NFSv4 state whenever possible" uses the nfs_client cl_rpcclient for all clientid management operations. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a187f4d..f81dcec 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5794,6 +5794,10 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, return err; } +/** + * Use the state managment nfs_client cl_rpcclient, which uses krb5i (if + * possible) as per RFC3530bis and RFC5661 Security Considerations sections + */ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors) { int status; @@ -5809,9 +5813,10 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct .rpc_argp = &args, .rpc_resp = &res, }; + struct rpc_clnt *clnt = NFS_SERVER(dir)->nfs_client->cl_rpcclient; dprintk("NFS call secinfo %s\n", name->name); - status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); + status = nfs4_call_sync(clnt, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); dprintk("NFS reply secinfo: %d\n", status); return status; } -- cgit v0.10.2 From 97431204ea005ec8070ac94bc3251e836daa7ca7 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 8 Aug 2013 10:57:56 -0400 Subject: NFSv4.1 Use clientid management rpc_clnt for secinfo_no_name As per RFC 5661 Security Considerations Commit 4edaa308 "NFS: Use "krb5i" to establish NFSv4 state whenever possible" uses the nfs_client cl_rpcclient for all clientid management operations. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f81dcec..f50ad28 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7090,6 +7090,10 @@ out: return status; } +/** + * Use the state managment nfs_client cl_rpcclient, which uses krb5i (if + * possible) as per RFC3530bis and RFC5661 Security Considerations sections + */ static int _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) @@ -7105,7 +7109,8 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_argp = &args, .rpc_resp = &res, }; - return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); + return nfs4_call_sync(server->nfs_client->cl_rpcclient, server, &msg, + &args.seq_args, &res.seq_res, 0); } static int -- cgit v0.10.2 From 5948a401a7f06d67f8548651041e00fd1aafcaf9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Aug 2013 12:29:27 -0400 Subject: NFS: Remove the NFSv4 "open optimisation" from nfs_permission Ever since commit 6168f62cb (Add ACCESS operation to OPEN compound) the NFSv4 atomic open has primed the access cache, and so nfs_permission will no longer do an RPC call on the wire. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 39e69d4..5d737bd 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2245,11 +2245,6 @@ int nfs_permission(struct inode *inode, int mask) case S_IFLNK: goto out; case S_IFREG: - /* NFSv4 has atomic_open... */ - if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN) - && (mask & MAY_OPEN) - && !(mask & MAY_EXEC)) - goto out; break; case S_IFDIR: /* -- cgit v0.10.2 From a9943d11c1e07c3f8cc6438c2700179971c4056f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Aug 2013 21:04:11 -0400 Subject: NFSv3: Deal with a sparse warning in nfs3_proc_create Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index f5c84c3..1db588a 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -336,8 +336,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, data->arg.create.createmode = NFS3_CREATE_UNCHECKED; if (flags & O_EXCL) { data->arg.create.createmode = NFS3_CREATE_EXCLUSIVE; - data->arg.create.verifier[0] = jiffies; - data->arg.create.verifier[1] = current->pid; + data->arg.create.verifier[0] = cpu_to_be32(jiffies); + data->arg.create.verifier[1] = cpu_to_be32(current->pid); } sattr->ia_mode &= ~current_umask(); -- cgit v0.10.2 From c281fa9c1f273542b45711e4737ace43c2066605 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Aug 2013 21:06:49 -0400 Subject: NFSv4: Deal with a sparse warning in nfs4_opendata_alloc Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f50ad28..3bc1632 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -933,7 +933,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.fh = NFS_FH(dentry->d_inode); } if (attrs != NULL && attrs->ia_valid != 0) { - __be32 verf[2]; + __u32 verf[2]; p->o_arg.u.attrs = &p->attrs; memcpy(&p->attrs, attrs, sizeof(p->attrs)); -- cgit v0.10.2 From 17f26b1246425a5b77f05ac871889265357566a8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Aug 2013 15:48:42 -0400 Subject: NFSv4: Deal with some more sparse warnings Technically, we don't really need to convert these time stamps, since they are actually cookies. Signed-off-by: Trond Myklebust Cc: Chuck Lever diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3bc1632..e53e42b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1103,7 +1103,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat goto no_delegation; spin_lock(&deleg_cur->lock); - if (nfsi->delegation != deleg_cur || + if (rcu_dereference(nfsi->delegation) != deleg_cur || test_bit(NFS_DELEGATION_RETURNING, &deleg_cur->flags) || (deleg_cur->type & fmode) != fmode) goto no_delegation_unlock; @@ -4632,11 +4632,11 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, /* An impossible timestamp guarantees this value * will never match a generated boot time. */ verf[0] = 0; - verf[1] = (__be32)(NSEC_PER_SEC + 1); + verf[1] = cpu_to_be32(NSEC_PER_SEC + 1); } else { struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); - verf[0] = (__be32)nn->boot_time.tv_sec; - verf[1] = (__be32)nn->boot_time.tv_nsec; + verf[0] = cpu_to_be32(nn->boot_time.tv_sec); + verf[1] = cpu_to_be32(nn->boot_time.tv_nsec); } memcpy(bootverf->data, verf, sizeof(bootverf->data)); } @@ -7263,7 +7263,7 @@ static void nfs41_free_stateid_release(void *calldata) kfree(calldata); } -const struct rpc_call_ops nfs41_free_stateid_ops = { +static const struct rpc_call_ops nfs41_free_stateid_ops = { .rpc_call_prepare = nfs41_free_stateid_prepare, .rpc_call_done = nfs41_free_stateid_done, .rpc_release = nfs41_free_stateid_release, @@ -7483,7 +7483,7 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { #endif }; -const struct inode_operations nfs4_dir_inode_operations = { +static const struct inode_operations nfs4_dir_inode_operations = { .create = nfs_create, .lookup = nfs_lookup, .atomic_open = nfs_atomic_open, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1a4a3bd..1336263 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1816,7 +1816,7 @@ static void encode_create_session(struct xdr_stream *xdr, *p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */ /* authsys_parms rfc1831 */ - *p++ = (__be32)nn->boot_time.tv_nsec; /* stamp */ + *p++ = cpu_to_be32(nn->boot_time.tv_nsec); /* stamp */ p = xdr_encode_opaque(p, machine_name, len); *p++ = cpu_to_be32(0); /* UID */ *p++ = cpu_to_be32(0); /* GID */ -- cgit v0.10.2 From 393faffe6f0d1cd3fb84dc591c4b220cd82efbb6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Aug 2013 20:06:11 -0400 Subject: NFSv4: Deal with a sparse warning in nfs_idmap_get_key() Signed-off-by: Trond Myklebust Cc: Bryan Schumaker diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index c2c4163..65c7d91 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -310,7 +310,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, if (ret < 0) goto out_up; - payload = rcu_dereference(rkey->payload.data); + payload = rcu_dereference(rkey->payload.rcudata); if (IS_ERR_OR_NULL(payload)) { ret = PTR_ERR(payload); goto out_up; -- cgit v0.10.2 From b8a8a0dd506faea934164d30bd1d6f86f8e87b1e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Aug 2013 21:08:56 -0400 Subject: NFSv4: Fix an incorrect pointer declaration in decode_first_pnfs_layout_type We always encode to __be32 format in XDR: silences a sparse warning. Signed-off-by: Trond Myklebust Cc: Andy Adamson diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1336263..4593728 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4630,7 +4630,7 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, static int decode_first_pnfs_layout_type(struct xdr_stream *xdr, uint32_t *layouttype) { - uint32_t *p; + __be32 *p; int num; p = xdr_inline_decode(xdr, 4); -- cgit v0.10.2 From c2dd1378fa3b52ab1705f1ce0bd46d1b91eb1d58 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Aug 2013 19:54:08 -0400 Subject: NFS: Clean up nfs_sillyrename() Optimise for the case where we only do one lookup. Clean up the code so it is obvious that silly[] is not a dynamic array. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 60395ad..488fd16 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -444,6 +444,14 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, return rpc_run_task(&task_setup_data); } +#define SILLYNAME_PREFIX ".nfs" +#define SILLYNAME_PREFIX_LEN ((unsigned)sizeof(SILLYNAME_PREFIX) - 1) +#define SILLYNAME_FILEID_LEN ((unsigned)sizeof(u64) << 1) +#define SILLYNAME_COUNTER_LEN ((unsigned)sizeof(unsigned int) << 1) +#define SILLYNAME_LEN (SILLYNAME_PREFIX_LEN + \ + SILLYNAME_FILEID_LEN + \ + SILLYNAME_COUNTER_LEN) + /** * nfs_sillyrename - Perform a silly-rename of a dentry * @dir: inode of directory that contains dentry @@ -469,10 +477,8 @@ int nfs_sillyrename(struct inode *dir, struct dentry *dentry) { static unsigned int sillycounter; - const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2; - const int countersize = sizeof(sillycounter)*2; - const int slen = sizeof(".nfs")+fileidsize+countersize-1; - char silly[slen+1]; + unsigned char silly[SILLYNAME_LEN + 1]; + unsigned long long fileid; struct dentry *sdentry; struct rpc_task *task; int error = -EIO; @@ -489,20 +495,20 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) if (dentry->d_flags & DCACHE_NFSFS_RENAMED) goto out; - sprintf(silly, ".nfs%*.*Lx", - fileidsize, fileidsize, - (unsigned long long)NFS_FILEID(dentry->d_inode)); + fileid = NFS_FILEID(dentry->d_inode); /* Return delegation in anticipation of the rename */ NFS_PROTO(dentry->d_inode)->return_delegation(dentry->d_inode); sdentry = NULL; do { - char *suffix = silly + slen - countersize; - + int slen; dput(sdentry); sillycounter++; - sprintf(suffix, "%*.*x", countersize, countersize, sillycounter); + slen = scnprintf(silly, sizeof(silly), + SILLYNAME_PREFIX "%0*llx%0*x", + SILLYNAME_FILEID_LEN, fileid, + SILLYNAME_COUNTER_LEN, sillycounter); dfprintk(VFS, "NFS: trying to rename %s to %s\n", dentry->d_name.name, silly); -- cgit v0.10.2 From 1264a2f053a32376696e51184f086d35113e75ff Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Aug 2013 16:06:31 -0400 Subject: NFS: refactor code for calculating the crc32 hash of a filehandle We want to be able to display the crc32 hash of the filehandle in tracepoints. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index af6e806..9a98b04 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -38,7 +38,6 @@ #include #include #include -#include #include @@ -1190,7 +1189,7 @@ u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh) { /* wireshark uses 32-bit AUTODIN crc and does a bitwise * not on the result */ - return ~crc32(0xFFFFFFFF, &fh->data[0], fh->size); + return nfs_fhandle_hash(fh); } /* diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 9b694f1..50f7068 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -5,6 +5,7 @@ #include "nfs4_fs.h" #include #include +#include #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) @@ -574,3 +575,22 @@ u64 nfs_timespec_to_change_attr(const struct timespec *ts) { return ((u64)ts->tv_sec << 30) + ts->tv_nsec; } + +#ifdef CONFIG_CRC32 +/** + * nfs_fhandle_hash - calculate the crc32 hash for the filehandle + * @fh - pointer to filehandle + * + * returns a crc32 hash for the filehandle that is compatible with + * the one displayed by "wireshark". + */ +static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) +{ + return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size); +} +#else +static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) +{ + return 0; +} +#endif -- cgit v0.10.2 From f4ce1299b329e96bb247c95c4fee8809827d6931 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 19 Aug 2013 18:59:33 -0400 Subject: NFS: Add event tracing for generic NFS events Add tracepoints for inode attribute updates, attribute revalidation, writeback start/end fsync start/end, attribute change start/end, permission check start/end. The intention is to enable performance tracing using 'perf'as well as improving debugging. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index e0bb048..6bd483b 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -4,9 +4,10 @@ obj-$(CONFIG_NFS_FS) += nfs.o +CFLAGS_nfstrace.o += -I$(src) nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ direct.o pagelist.o read.o symlink.o unlink.o \ - write.o namespace.o mount_clnt.o + write.o namespace.o mount_clnt.o nfstrace.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5d737bd..be3da6f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -43,6 +43,8 @@ #include "internal.h" #include "fscache.h" +#include "nfstrace.h" + /* #define NFS_DEBUG_VERBOSE 1 */ static int nfs_opendir(struct inode *, struct file *); @@ -2178,9 +2180,11 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) struct nfs_access_entry cache; int status; + trace_nfs_access_enter(inode); + status = nfs_access_get_cached(inode, cred, &cache); if (status == 0) - goto out; + goto out_cached; /* Be clever: ask server to check for all possible rights */ cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; @@ -2193,13 +2197,15 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) if (!S_ISDIR(inode->i_mode)) set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); } - return status; + goto out; } nfs_access_add_cache(inode, &cache); +out_cached: + if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0) + status = -EACCES; out: - if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) - return 0; - return -EACCES; + trace_nfs_access_exit(inode, status); + return status; } static int nfs_open_permission_mask(int openflags) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 94e94bd..d6a9db0 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -37,6 +37,8 @@ #include "iostat.h" #include "fscache.h" +#include "nfstrace.h" + #define NFSDBG_FACILITY NFSDBG_FILE static const struct vm_operations_struct nfs_file_vm_ops; @@ -294,6 +296,8 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) int ret; struct inode *inode = file_inode(file); + trace_nfs_fsync_enter(inode); + do { ret = filemap_write_and_wait_range(inode->i_mapping, start, end); if (ret != 0) @@ -310,6 +314,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) end = LLONG_MAX; } while (ret == -EAGAIN); + trace_nfs_fsync_exit(inode, ret); return ret; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9a98b04..4bcb00a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -51,6 +51,8 @@ #include "nfs.h" #include "netns.h" +#include "nfstrace.h" + #define NFSDBG_FACILITY NFSDBG_VFS #define NFS_64_BIT_INODE_NUMBERS_ENABLED 1 @@ -503,6 +505,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) if ((attr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0) return 0; + trace_nfs_setattr_enter(inode); + /* Write all dirty data */ if (S_ISREG(inode->i_mode)) { nfs_inode_dio_wait(inode); @@ -522,6 +526,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) error = nfs_refresh_inode(inode, fattr); nfs_free_fattr(fattr); out: + trace_nfs_setattr_exit(inode, error); return error; } EXPORT_SYMBOL_GPL(nfs_setattr); @@ -591,6 +596,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; int err; + trace_nfs_getattr_enter(inode); /* Flush out writes to the server in order to update c/mtime. */ if (S_ISREG(inode->i_mode)) { nfs_inode_dio_wait(inode); @@ -621,6 +627,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); } out: + trace_nfs_getattr_exit(inode, err); return err; } EXPORT_SYMBOL_GPL(nfs_getattr); @@ -875,6 +882,8 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); + trace_nfs_revalidate_inode_enter(inode); + if (is_bad_inode(inode)) goto out; if (NFS_STALE(inode)) @@ -925,6 +934,7 @@ err_out: nfs4_label_free(label); out: nfs_free_fattr(fattr); + trace_nfs_revalidate_inode_exit(inode, status); return status; } @@ -975,6 +985,7 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map spin_unlock(&inode->i_lock); nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); nfs_fscache_wait_on_invalidate(inode); + dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); return 0; @@ -1008,8 +1019,12 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) if (ret < 0) goto out; } - if (nfsi->cache_validity & NFS_INO_INVALID_DATA) + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { + trace_nfs_invalidate_mapping_enter(inode); ret = nfs_invalidate_mapping(inode, mapping); + trace_nfs_invalidate_mapping_exit(inode, ret); + } + out: return ret; } @@ -1268,9 +1283,17 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) { + int ret; + + trace_nfs_refresh_inode_enter(inode); + if (nfs_inode_attrs_need_update(inode, fattr)) - return nfs_update_inode(inode, fattr); - return nfs_check_inode_attributes(inode, fattr); + ret = nfs_update_inode(inode, fattr); + else + ret = nfs_check_inode_attributes(inode, fattr); + + trace_nfs_refresh_inode_exit(inode, ret); + return ret; } /** diff --git a/fs/nfs/nfstrace.c b/fs/nfs/nfstrace.c new file mode 100644 index 0000000..cc91461 --- /dev/null +++ b/fs/nfs/nfstrace.c @@ -0,0 +1,8 @@ +/* + * Copyright (c) 2013 Trond Myklebust + */ +#include +#include "internal.h" + +#define CREATE_TRACE_POINTS +#include "nfstrace.h" diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h new file mode 100644 index 0000000..73c8e1e --- /dev/null +++ b/fs/nfs/nfstrace.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2013 Trond Myklebust + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nfs + +#if !defined(_TRACE_NFS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NFS_H + +#include + +#define nfs_show_file_type(ftype) \ + __print_symbolic(ftype, \ + { DT_UNKNOWN, "UNKNOWN" }, \ + { DT_FIFO, "FIFO" }, \ + { DT_CHR, "CHR" }, \ + { DT_DIR, "DIR" }, \ + { DT_BLK, "BLK" }, \ + { DT_REG, "REG" }, \ + { DT_LNK, "LNK" }, \ + { DT_SOCK, "SOCK" }, \ + { DT_WHT, "WHT" }) + +#define nfs_show_cache_validity(v) \ + __print_flags(v, "|", \ + { NFS_INO_INVALID_ATTR, "INVALID_ATTR" }, \ + { NFS_INO_INVALID_DATA, "INVALID_DATA" }, \ + { NFS_INO_INVALID_ATIME, "INVALID_ATIME" }, \ + { NFS_INO_INVALID_ACCESS, "INVALID_ACCESS" }, \ + { NFS_INO_INVALID_ACL, "INVALID_ACL" }, \ + { NFS_INO_REVAL_PAGECACHE, "REVAL_PAGECACHE" }, \ + { NFS_INO_REVAL_FORCED, "REVAL_FORCED" }, \ + { NFS_INO_INVALID_LABEL, "INVALID_LABEL" }) + +#define nfs_show_nfsi_flags(v) \ + __print_flags(v, "|", \ + { 1 << NFS_INO_ADVISE_RDPLUS, "ADVISE_RDPLUS" }, \ + { 1 << NFS_INO_STALE, "STALE" }, \ + { 1 << NFS_INO_FLUSHING, "FLUSHING" }, \ + { 1 << NFS_INO_FSCACHE, "FSCACHE" }, \ + { 1 << NFS_INO_COMMIT, "COMMIT" }, \ + { 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \ + { 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" }) + +DECLARE_EVENT_CLASS(nfs_inode_event, + TP_PROTO( + const struct inode *inode + ), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(u64, version) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->version = inode->i_version; + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu ", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + (unsigned long long)__entry->version + ) +); + +DECLARE_EVENT_CLASS(nfs_inode_event_done, + TP_PROTO( + const struct inode *inode, + int error + ), + + TP_ARGS(inode, error), + + TP_STRUCT__entry( + __field(int, error) + __field(dev_t, dev) + __field(u32, fhandle) + __field(unsigned char, type) + __field(u64, fileid) + __field(u64, version) + __field(loff_t, size) + __field(unsigned long, nfsi_flags) + __field(unsigned long, cache_validity) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + __entry->error = error; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->type = nfs_umode_to_dtype(inode->i_mode); + __entry->version = inode->i_version; + __entry->size = i_size_read(inode); + __entry->nfsi_flags = nfsi->flags; + __entry->cache_validity = nfsi->cache_validity; + ), + + TP_printk( + "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " + "type=%u (%s) version=%llu size=%lld " + "cache_validity=%lu (%s) nfs_flags=%ld (%s)", + __entry->error, + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->type, + nfs_show_file_type(__entry->type), + (unsigned long long)__entry->version, + (long long)__entry->size, + __entry->cache_validity, + nfs_show_cache_validity(__entry->cache_validity), + __entry->nfsi_flags, + nfs_show_nfsi_flags(__entry->nfsi_flags) + ) +); + +#define DEFINE_NFS_INODE_EVENT(name) \ + DEFINE_EVENT(nfs_inode_event, name, \ + TP_PROTO( \ + const struct inode *inode \ + ), \ + TP_ARGS(inode)) +#define DEFINE_NFS_INODE_EVENT_DONE(name) \ + DEFINE_EVENT(nfs_inode_event_done, name, \ + TP_PROTO( \ + const struct inode *inode, \ + int error \ + ), \ + TP_ARGS(inode, error)) +DEFINE_NFS_INODE_EVENT(nfs_refresh_inode_enter); +DEFINE_NFS_INODE_EVENT_DONE(nfs_refresh_inode_exit); +DEFINE_NFS_INODE_EVENT(nfs_revalidate_inode_enter); +DEFINE_NFS_INODE_EVENT_DONE(nfs_revalidate_inode_exit); +DEFINE_NFS_INODE_EVENT(nfs_invalidate_mapping_enter); +DEFINE_NFS_INODE_EVENT_DONE(nfs_invalidate_mapping_exit); +DEFINE_NFS_INODE_EVENT(nfs_getattr_enter); +DEFINE_NFS_INODE_EVENT_DONE(nfs_getattr_exit); +DEFINE_NFS_INODE_EVENT(nfs_setattr_enter); +DEFINE_NFS_INODE_EVENT_DONE(nfs_setattr_exit); +DEFINE_NFS_INODE_EVENT(nfs_writeback_page_enter); +DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_page_exit); +DEFINE_NFS_INODE_EVENT(nfs_writeback_inode_enter); +DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_inode_exit); +DEFINE_NFS_INODE_EVENT(nfs_fsync_enter); +DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit); +DEFINE_NFS_INODE_EVENT(nfs_access_enter); +DEFINE_NFS_INODE_EVENT_DONE(nfs_access_exit); + +#endif /* _TRACE_NFS_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE nfstrace +/* This part must be outside protection */ +#include diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f1bdb72..d37e8ca 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -31,6 +31,8 @@ #include "fscache.h" #include "pnfs.h" +#include "nfstrace.h" + #define NFSDBG_FACILITY NFSDBG_PAGECACHE #define MIN_POOL_WRITE (32) @@ -1732,8 +1734,14 @@ int nfs_wb_all(struct inode *inode) .range_start = 0, .range_end = LLONG_MAX, }; + int ret; + + trace_nfs_writeback_inode_enter(inode); + + ret = sync_inode(inode, &wbc); - return sync_inode(inode, &wbc); + trace_nfs_writeback_inode_exit(inode, ret); + return ret; } EXPORT_SYMBOL_GPL(nfs_wb_all); @@ -1781,6 +1789,8 @@ int nfs_wb_page(struct inode *inode, struct page *page) }; int ret; + trace_nfs_writeback_page_enter(inode); + for (;;) { wait_on_page_writeback(page); if (clear_page_dirty_for_io(page)) { @@ -1789,14 +1799,15 @@ int nfs_wb_page(struct inode *inode, struct page *page) goto out_error; continue; } + ret = 0; if (!PagePrivate(page)) break; ret = nfs_commit_inode(inode, FLUSH_SYNC); if (ret < 0) goto out_error; } - return 0; out_error: + trace_nfs_writeback_page_exit(inode, ret); return ret; } -- cgit v0.10.2 From 1472b83eae0bf09ab76ebcb1373dbd210e97f911 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Aug 2013 11:59:41 -0400 Subject: NFS: Pass in lookup flags from nfs_atomic_open to nfs_lookup When doing an open of a directory, ensure that we do pass the lookup flags from nfs_atomic_open into nfs_lookup. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index be3da6f..29d5463 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1407,6 +1407,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, struct dentry *res; struct iattr attr = { .ia_valid = ATTR_OPEN }; struct inode *inode; + unsigned int lookup_flags = 0; int err; /* Expect a negative dentry */ @@ -1429,6 +1430,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, */ return -ENOENT; } + lookup_flags = LOOKUP_OPEN|LOOKUP_DIRECTORY; goto no_open; } @@ -1479,7 +1481,7 @@ out: return err; no_open: - res = nfs_lookup(dir, dentry, 0); + res = nfs_lookup(dir, dentry, lookup_flags); err = PTR_ERR(res); if (IS_ERR(res)) goto out; -- cgit v0.10.2 From 6e0d0be715fe041fc7121b0b44cde3015d1cc846 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Aug 2013 11:26:17 -0400 Subject: NFS: Add event tracing for generic NFS lookups Add tracepoints for lookup, lookup_revalidate and atomic_open Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 29d5463..2263a6b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1102,7 +1102,9 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) if (IS_ERR(label)) goto out_error; + trace_nfs_lookup_revalidate_enter(dir, dentry, flags); error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); + trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error); if (error) goto out_bad; if (nfs_compare_fh(NFS_FH(inode), fhandle)) @@ -1315,6 +1317,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in parent = dentry->d_parent; /* Protect against concurrent sillydeletes */ + trace_nfs_lookup_enter(dir, dentry, flags); nfs_block_sillyrename(parent); error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); if (error == -ENOENT) @@ -1341,6 +1344,7 @@ no_entry: nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_unblock_sillyrename: nfs_unblock_sillyrename(parent); + trace_nfs_lookup_exit(dir, dentry, flags, error); nfs4_label_free(label); out: nfs_free_fattr(fattr); @@ -1451,12 +1455,14 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, if (IS_ERR(ctx)) goto out; + trace_nfs_atomic_open_enter(dir, ctx, open_flags); nfs_block_sillyrename(dentry->d_parent); inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); nfs_unblock_sillyrename(dentry->d_parent); if (IS_ERR(inode)) { put_nfs_open_context(ctx); err = PTR_ERR(inode); + trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); switch (err) { case -ENOENT: d_drop(dentry); @@ -1477,6 +1483,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, } err = nfs_finish_open(ctx, ctx->dentry, file, open_flags, opened); + trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); out: return err; diff --git a/fs/nfs/nfstrace.c b/fs/nfs/nfstrace.c index cc91461..4eb0aea 100644 --- a/fs/nfs/nfstrace.c +++ b/fs/nfs/nfstrace.c @@ -2,6 +2,7 @@ * Copyright (c) 2013 Trond Myklebust */ #include +#include #include "internal.h" #define CREATE_TRACE_POINTS diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 73c8e1e..fba194b 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -157,6 +157,201 @@ DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit); DEFINE_NFS_INODE_EVENT(nfs_access_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_access_exit); +#define show_lookup_flags(flags) \ + __print_flags((unsigned long)flags, "|", \ + { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \ + { LOOKUP_DIRECTORY, "DIRECTORY" }, \ + { LOOKUP_OPEN, "OPEN" }, \ + { LOOKUP_CREATE, "CREATE" }, \ + { LOOKUP_EXCL, "EXCL" }) + +DECLARE_EVENT_CLASS(nfs_lookup_event, + TP_PROTO( + const struct inode *dir, + const struct dentry *dentry, + unsigned int flags + ), + + TP_ARGS(dir, dentry, flags), + + TP_STRUCT__entry( + __field(unsigned int, flags) + __field(dev_t, dev) + __field(u64, dir) + __string(name, dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __entry->flags = flags; + __assign_str(name, dentry->d_name.name); + ), + + TP_printk( + "flags=%u (%s) name=%02x:%02x:%llu/%s", + __entry->flags, + show_lookup_flags(__entry->flags), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +#define DEFINE_NFS_LOOKUP_EVENT(name) \ + DEFINE_EVENT(nfs_lookup_event, name, \ + TP_PROTO( \ + const struct inode *dir, \ + const struct dentry *dentry, \ + unsigned int flags \ + ), \ + TP_ARGS(dir, dentry, flags)) + +DECLARE_EVENT_CLASS(nfs_lookup_event_done, + TP_PROTO( + const struct inode *dir, + const struct dentry *dentry, + unsigned int flags, + int error + ), + + TP_ARGS(dir, dentry, flags, error), + + TP_STRUCT__entry( + __field(int, error) + __field(unsigned int, flags) + __field(dev_t, dev) + __field(u64, dir) + __string(name, dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __entry->error = error; + __entry->flags = flags; + __assign_str(name, dentry->d_name.name); + ), + + TP_printk( + "error=%d flags=%u (%s) name=%02x:%02x:%llu/%s", + __entry->error, + __entry->flags, + show_lookup_flags(__entry->flags), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +#define DEFINE_NFS_LOOKUP_EVENT_DONE(name) \ + DEFINE_EVENT(nfs_lookup_event_done, name, \ + TP_PROTO( \ + const struct inode *dir, \ + const struct dentry *dentry, \ + unsigned int flags, \ + int error \ + ), \ + TP_ARGS(dir, dentry, flags, error)) + +DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_enter); +DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_exit); +DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_revalidate_enter); +DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_revalidate_exit); + +#define show_open_flags(flags) \ + __print_flags((unsigned long)flags, "|", \ + { O_CREAT, "O_CREAT" }, \ + { O_EXCL, "O_EXCL" }, \ + { O_TRUNC, "O_TRUNC" }, \ + { O_APPEND, "O_APPEND" }, \ + { O_DSYNC, "O_DSYNC" }, \ + { O_DIRECT, "O_DIRECT" }, \ + { O_DIRECTORY, "O_DIRECTORY" }) + +#define show_fmode_flags(mode) \ + __print_flags(mode, "|", \ + { ((__force unsigned long)FMODE_READ), "READ" }, \ + { ((__force unsigned long)FMODE_WRITE), "WRITE" }, \ + { ((__force unsigned long)FMODE_EXEC), "EXEC" }) + +TRACE_EVENT(nfs_atomic_open_enter, + TP_PROTO( + const struct inode *dir, + const struct nfs_open_context *ctx, + unsigned int flags + ), + + TP_ARGS(dir, ctx, flags), + + TP_STRUCT__entry( + __field(unsigned int, flags) + __field(unsigned int, fmode) + __field(dev_t, dev) + __field(u64, dir) + __string(name, ctx->dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __entry->flags = flags; + __entry->fmode = (__force unsigned int)ctx->mode; + __assign_str(name, ctx->dentry->d_name.name); + ), + + TP_printk( + "flags=%u (%s) fmode=%s name=%02x:%02x:%llu/%s", + __entry->flags, + show_open_flags(__entry->flags), + show_fmode_flags(__entry->fmode), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +TRACE_EVENT(nfs_atomic_open_exit, + TP_PROTO( + const struct inode *dir, + const struct nfs_open_context *ctx, + unsigned int flags, + int error + ), + + TP_ARGS(dir, ctx, flags, error), + + TP_STRUCT__entry( + __field(int, error) + __field(unsigned int, flags) + __field(unsigned int, fmode) + __field(dev_t, dev) + __field(u64, dir) + __string(name, ctx->dentry->d_name.name) + ), + + TP_fast_assign( + __entry->error = error; + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __entry->flags = flags; + __entry->fmode = (__force unsigned int)ctx->mode; + __assign_str(name, ctx->dentry->d_name.name); + ), + + TP_printk( + "error=%d flags=%u (%s) fmode=%s " + "name=%02x:%02x:%llu/%s", + __entry->error, + __entry->flags, + show_open_flags(__entry->flags), + show_fmode_flags(__entry->fmode), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + #endif /* _TRACE_NFS_H */ #undef TRACE_INCLUDE_PATH -- cgit v0.10.2 From 8b0ad3d489cb107804bd8c78695532794eec73d5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Aug 2013 10:53:09 -0400 Subject: NFS: Add tracepoints for debugging generic file create events Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2263a6b..9c07812 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1612,7 +1612,9 @@ int nfs_create(struct inode *dir, struct dentry *dentry, attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; + trace_nfs_create_enter(dir, dentry, open_flags); error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); + trace_nfs_create_exit(dir, dentry, open_flags, error); if (error != 0) goto out_err; return 0; diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index fba194b..2963a05 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -352,6 +352,76 @@ TRACE_EVENT(nfs_atomic_open_exit, ) ); +TRACE_EVENT(nfs_create_enter, + TP_PROTO( + const struct inode *dir, + const struct dentry *dentry, + unsigned int flags + ), + + TP_ARGS(dir, dentry, flags), + + TP_STRUCT__entry( + __field(unsigned int, flags) + __field(dev_t, dev) + __field(u64, dir) + __string(name, dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __entry->flags = flags; + __assign_str(name, dentry->d_name.name); + ), + + TP_printk( + "flags=%u (%s) name=%02x:%02x:%llu/%s", + __entry->flags, + show_open_flags(__entry->flags), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +TRACE_EVENT(nfs_create_exit, + TP_PROTO( + const struct inode *dir, + const struct dentry *dentry, + unsigned int flags, + int error + ), + + TP_ARGS(dir, dentry, flags, error), + + TP_STRUCT__entry( + __field(int, error) + __field(unsigned int, flags) + __field(dev_t, dev) + __field(u64, dir) + __string(name, dentry->d_name.name) + ), + + TP_fast_assign( + __entry->error = error; + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __entry->flags = flags; + __assign_str(name, dentry->d_name.name); + ), + + TP_printk( + "error=%d flags=%u (%s) name=%02x:%02x:%llu/%s", + __entry->error, + __entry->flags, + show_open_flags(__entry->flags), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + #endif /* _TRACE_NFS_H */ #undef TRACE_INCLUDE_PATH -- cgit v0.10.2 From 1ca42382afd67bf58523d36b00fb4ff487d8173b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Aug 2013 12:36:04 -0400 Subject: NFS: Add tracepoints for debugging directory changes Add tracepoints for mknod, mkdir, rmdir, remove (unlink) and symlink. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9c07812..e41dec5 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1642,7 +1642,9 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; + trace_nfs_mknod_enter(dir, dentry); status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev); + trace_nfs_mknod_exit(dir, dentry, status); if (status != 0) goto out_err; return 0; @@ -1666,7 +1668,9 @@ int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) attr.ia_valid = ATTR_MODE; attr.ia_mode = mode | S_IFDIR; + trace_nfs_mkdir_enter(dir, dentry); error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr); + trace_nfs_mkdir_exit(dir, dentry, error); if (error != 0) goto out_err; return 0; @@ -1689,12 +1693,14 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + trace_nfs_rmdir_enter(dir, dentry); error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); /* Ensure the VFS deletes this inode */ if (error == 0 && dentry->d_inode != NULL) clear_nlink(dentry->d_inode); else if (error == -ENOENT) nfs_dentry_handle_enoent(dentry); + trace_nfs_rmdir_exit(dir, dentry, error); return error; } @@ -1722,6 +1728,7 @@ static int nfs_safe_remove(struct dentry *dentry) goto out; } + trace_nfs_remove_enter(dir, dentry); if (inode != NULL) { NFS_PROTO(inode)->return_delegation(inode); error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); @@ -1731,6 +1738,7 @@ static int nfs_safe_remove(struct dentry *dentry) error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); if (error == -ENOENT) nfs_dentry_handle_enoent(dentry); + trace_nfs_remove_exit(dir, dentry, error); out: return error; } @@ -1748,13 +1756,14 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + trace_nfs_unlink_enter(dir, dentry); spin_lock(&dentry->d_lock); if (d_count(dentry) > 1) { spin_unlock(&dentry->d_lock); /* Start asynchronous writeout of the inode */ write_inode_now(dentry->d_inode, 0); error = nfs_sillyrename(dir, dentry); - return error; + goto out; } if (!d_unhashed(dentry)) { __d_drop(dentry); @@ -1766,6 +1775,8 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); } else if (need_rehash) d_rehash(dentry); +out: + trace_nfs_unlink_exit(dir, dentry, error); return error; } EXPORT_SYMBOL_GPL(nfs_unlink); @@ -1812,7 +1823,9 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen); kunmap_atomic(kaddr); + trace_nfs_symlink_enter(dir, dentry); error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); + trace_nfs_symlink_exit(dir, dentry, error); if (error != 0) { dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n", dir->i_sb->s_id, dir->i_ino, diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 2963a05..5827906 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -422,6 +422,96 @@ TRACE_EVENT(nfs_create_exit, ) ); +DECLARE_EVENT_CLASS(nfs_directory_event, + TP_PROTO( + const struct inode *dir, + const struct dentry *dentry + ), + + TP_ARGS(dir, dentry), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u64, dir) + __string(name, dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __assign_str(name, dentry->d_name.name); + ), + + TP_printk( + "name=%02x:%02x:%llu/%s", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +#define DEFINE_NFS_DIRECTORY_EVENT(name) \ + DEFINE_EVENT(nfs_directory_event, name, \ + TP_PROTO( \ + const struct inode *dir, \ + const struct dentry *dentry \ + ), \ + TP_ARGS(dir, dentry)) + +DECLARE_EVENT_CLASS(nfs_directory_event_done, + TP_PROTO( + const struct inode *dir, + const struct dentry *dentry, + int error + ), + + TP_ARGS(dir, dentry, error), + + TP_STRUCT__entry( + __field(int, error) + __field(dev_t, dev) + __field(u64, dir) + __string(name, dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __entry->error = error; + __assign_str(name, dentry->d_name.name); + ), + + TP_printk( + "error=%d name=%02x:%02x:%llu/%s", + __entry->error, + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +#define DEFINE_NFS_DIRECTORY_EVENT_DONE(name) \ + DEFINE_EVENT(nfs_directory_event_done, name, \ + TP_PROTO( \ + const struct inode *dir, \ + const struct dentry *dentry, \ + int error \ + ), \ + TP_ARGS(dir, dentry, error)) + +DEFINE_NFS_DIRECTORY_EVENT(nfs_mknod_enter); +DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_mknod_exit); +DEFINE_NFS_DIRECTORY_EVENT(nfs_mkdir_enter); +DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_mkdir_exit); +DEFINE_NFS_DIRECTORY_EVENT(nfs_rmdir_enter); +DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_rmdir_exit); +DEFINE_NFS_DIRECTORY_EVENT(nfs_remove_enter); +DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_remove_exit); +DEFINE_NFS_DIRECTORY_EVENT(nfs_unlink_enter); +DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_unlink_exit); +DEFINE_NFS_DIRECTORY_EVENT(nfs_symlink_enter); +DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_symlink_exit); + #endif /* _TRACE_NFS_H */ #undef TRACE_INCLUDE_PATH -- cgit v0.10.2 From 70ded2017072ae16aeaa7fb2a15a879a475161a6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Aug 2013 12:08:45 -0400 Subject: NFS: Add tracepoints for debugging NFS rename and sillyrename issues Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e41dec5..dca7deb 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1909,6 +1909,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_dentry->d_parent->d_name.name, new_dentry->d_name.name, d_count(new_dentry)); + trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry); /* * For non-directories, check whether the target is busy and if so, * make a copy of the dentry and then do a silly-rename. If the @@ -1955,6 +1956,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, out: if (rehash) d_rehash(rehash); + trace_nfs_rename_exit(old_dir, old_dentry, + new_dir, new_dentry, error); if (!error) { if (new_inode != NULL) nfs_drop_nlink(new_inode); diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 5827906..a20f0d1 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -512,6 +512,144 @@ DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_unlink_exit); DEFINE_NFS_DIRECTORY_EVENT(nfs_symlink_enter); DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_symlink_exit); +DECLARE_EVENT_CLASS(nfs_rename_event, + TP_PROTO( + const struct inode *old_dir, + const struct dentry *old_dentry, + const struct inode *new_dir, + const struct dentry *new_dentry + ), + + TP_ARGS(old_dir, old_dentry, new_dir, new_dentry), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u64, old_dir) + __field(u64, new_dir) + __string(old_name, old_dentry->d_name.name) + __string(new_name, new_dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = old_dir->i_sb->s_dev; + __entry->old_dir = NFS_FILEID(old_dir); + __entry->new_dir = NFS_FILEID(new_dir); + __assign_str(old_name, old_dentry->d_name.name); + __assign_str(new_name, new_dentry->d_name.name); + ), + + TP_printk( + "old_name=%02x:%02x:%llu/%s new_name=%02x:%02x:%llu/%s", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->old_dir, + __get_str(old_name), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->new_dir, + __get_str(new_name) + ) +); +#define DEFINE_NFS_RENAME_EVENT(name) \ + DEFINE_EVENT(nfs_rename_event, name, \ + TP_PROTO( \ + const struct inode *old_dir, \ + const struct dentry *old_dentry, \ + const struct inode *new_dir, \ + const struct dentry *new_dentry \ + ), \ + TP_ARGS(old_dir, old_dentry, new_dir, new_dentry)) + +DECLARE_EVENT_CLASS(nfs_rename_event_done, + TP_PROTO( + const struct inode *old_dir, + const struct dentry *old_dentry, + const struct inode *new_dir, + const struct dentry *new_dentry, + int error + ), + + TP_ARGS(old_dir, old_dentry, new_dir, new_dentry, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, error) + __field(u64, old_dir) + __string(old_name, old_dentry->d_name.name) + __field(u64, new_dir) + __string(new_name, new_dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = old_dir->i_sb->s_dev; + __entry->old_dir = NFS_FILEID(old_dir); + __entry->new_dir = NFS_FILEID(new_dir); + __entry->error = error; + __assign_str(old_name, old_dentry->d_name.name); + __assign_str(new_name, new_dentry->d_name.name); + ), + + TP_printk( + "error=%d old_name=%02x:%02x:%llu/%s " + "new_name=%02x:%02x:%llu/%s", + __entry->error, + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->old_dir, + __get_str(old_name), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->new_dir, + __get_str(new_name) + ) +); +#define DEFINE_NFS_RENAME_EVENT_DONE(name) \ + DEFINE_EVENT(nfs_rename_event_done, name, \ + TP_PROTO( \ + const struct inode *old_dir, \ + const struct dentry *old_dentry, \ + const struct inode *new_dir, \ + const struct dentry *new_dentry, \ + int error \ + ), \ + TP_ARGS(old_dir, old_dentry, new_dir, \ + new_dentry, error)) + +DEFINE_NFS_RENAME_EVENT(nfs_rename_enter); +DEFINE_NFS_RENAME_EVENT_DONE(nfs_rename_exit); + +DEFINE_NFS_RENAME_EVENT_DONE(nfs_sillyrename_rename); + +TRACE_EVENT(nfs_sillyrename_unlink, + TP_PROTO( + const struct nfs_unlinkdata *data, + int error + ), + + TP_ARGS(data, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, error) + __field(u64, dir) + __dynamic_array(char, name, data->args.name.len + 1) + ), + + TP_fast_assign( + struct inode *dir = data->dir; + size_t len = data->args.name.len; + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __entry->error = error; + memcpy(__get_dynamic_array(name), + data->args.name.name, len); + ((char *)__get_dynamic_array(name))[len] = 0; + ), + + TP_printk( + "error=%d name=%02x:%02x:%llu/%s", + __entry->error, + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); #endif /* _TRACE_NFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 488fd16..2c1485d 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -20,6 +20,8 @@ #include "iostat.h" #include "delegation.h" +#include "nfstrace.h" + /** * nfs_free_unlinkdata - release data from a sillydelete operation. * @data: pointer to unlink structure. @@ -77,6 +79,7 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) struct nfs_unlinkdata *data = calldata; struct inode *dir = data->dir; + trace_nfs_sillyrename_unlink(data, task->tk_status); if (!NFS_PROTO(dir)->unlink_done(task, dir)) rpc_restart_call_prepare(task); } @@ -336,6 +339,8 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) struct inode *new_dir = data->new_dir; struct dentry *old_dentry = data->old_dentry; + trace_nfs_sillyrename_rename(old_dir, old_dentry, + new_dir, data->new_dentry, task->tk_status); if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { rpc_restart_call_prepare(task); return; -- cgit v0.10.2 From 1fd1085b49f8cafbd0ce4e4682c209a31f7b287f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Aug 2013 13:54:44 -0400 Subject: NFS: Add tracepoints for debugging NFS hard links Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index dca7deb..4ce7f76 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1860,6 +1860,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) old_dentry->d_parent->d_name.name, old_dentry->d_name.name, dentry->d_parent->d_name.name, dentry->d_name.name); + trace_nfs_link_enter(inode, dir, dentry); NFS_PROTO(inode)->return_delegation(inode); d_drop(dentry); @@ -1868,6 +1869,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) ihold(inode); d_add(dentry, inode); } + trace_nfs_link_exit(inode, dir, dentry, error); return error; } EXPORT_SYMBOL_GPL(nfs_link); diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index a20f0d1..89fe741 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -512,6 +512,76 @@ DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_unlink_exit); DEFINE_NFS_DIRECTORY_EVENT(nfs_symlink_enter); DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_symlink_exit); +TRACE_EVENT(nfs_link_enter, + TP_PROTO( + const struct inode *inode, + const struct inode *dir, + const struct dentry *dentry + ), + + TP_ARGS(inode, dir, dentry), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u64, fileid) + __field(u64, dir) + __string(name, dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->dir = NFS_FILEID(dir); + __assign_str(name, dentry->d_name.name); + ), + + TP_printk( + "fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->fileid, + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +TRACE_EVENT(nfs_link_exit, + TP_PROTO( + const struct inode *inode, + const struct inode *dir, + const struct dentry *dentry, + int error + ), + + TP_ARGS(inode, dir, dentry, error), + + TP_STRUCT__entry( + __field(int, error) + __field(dev_t, dev) + __field(u64, fileid) + __field(u64, dir) + __string(name, dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->dir = NFS_FILEID(dir); + __entry->error = error; + __assign_str(name, dentry->d_name.name); + ), + + TP_printk( + "error=%d fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s", + __entry->error, + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->fileid, + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + DECLARE_EVENT_CLASS(nfs_rename_event, TP_PROTO( const struct inode *old_dir, -- cgit v0.10.2 From c6d01c6f9bf94075a726b3103a7e90d981017326 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Aug 2013 11:51:26 -0400 Subject: NFSv4: Add tracepoints for debugging state management problems Set up basic tracepoints for debugging client id creation/destruction and session creation/destruction. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 6bd483b..7238c4b 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -20,9 +20,11 @@ nfsv3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o obj-$(CONFIG_NFS_V4) += nfsv4.o +CFLAGS_nfs4trace.o += -I$(src) nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ - nfs4namespace.o nfs4getroot.o nfs4client.o dns_resolve.o + nfs4namespace.o nfs4getroot.o nfs4client.o dns_resolve.o \ + nfs4trace.o nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o nfsv4-$(CONFIG_NFS_V4_1) += nfs4session.o pnfs.o pnfs_dev.o diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e53e42b..5f228ed 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -66,6 +66,8 @@ #include "nfs4session.h" #include "fscache.h" +#include "nfs4trace.h" + #define NFSDBG_FACILITY NFSDBG_PROC #define NFS4_POLL_RETRY_MIN (HZ/10) @@ -4064,6 +4066,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata) struct nfs_client *clp = data->client; unsigned long timestamp = data->timestamp; + trace_nfs4_renew_async(clp, task->tk_status); if (task->tk_status < 0) { /* Unless we're shutting down, schedule state recovery! */ if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) == 0) @@ -4730,6 +4733,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, clp->cl_rpcclient->cl_auth->au_ops->au_name, setclientid.sc_name_len, setclientid.sc_name); status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + trace_nfs4_setclientid(clp, status); dprintk("NFS reply setclientid: %d\n", status); return status; } @@ -4757,6 +4761,7 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, clp->cl_rpcclient->cl_auth->au_ops->au_name, clp->cl_clientid); status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + trace_nfs4_setclientid_confirm(clp, status); dprintk("NFS reply setclientid_confirm: %d\n", status); return status; } @@ -5892,6 +5897,7 @@ int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred } status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + trace_nfs4_bind_conn_to_session(clp, status); if (status == 0) { if (memcmp(res.session->sess_id.data, clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) { @@ -5977,6 +5983,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) } status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + trace_nfs4_exchange_id(clp, status); if (status == 0) status = nfs4_check_cl_exchange_flags(res.flags); @@ -6037,6 +6044,7 @@ static int _nfs4_proc_destroy_clientid(struct nfs_client *clp, int status; status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + trace_nfs4_destroy_clientid(clp, status); if (status) dprintk("NFS: Got error %d from the server %s on " "DESTROY_CLIENTID.", status, clp->cl_hostname); @@ -6300,6 +6308,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp, args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN); status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + trace_nfs4_create_session(clp, status); if (!status) { /* Verify the session's negotiated channel_attrs values */ @@ -6363,6 +6372,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session, return status; status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + trace_nfs4_destroy_session(session->clp, status); if (status) dprintk("NFS: Got error %d from the server on DESTROY_SESSION. " @@ -6412,6 +6422,7 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data) if (!nfs41_sequence_done(task, task->tk_msg.rpc_resp)) return; + trace_nfs4_sequence(clp, task->tk_status); if (task->tk_status < 0) { dprintk("%s ERROR %d\n", __func__, task->tk_status); if (atomic_read(&clp->cl_count) == 1) @@ -6564,6 +6575,7 @@ static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data) if (!nfs41_sequence_done(task, res)) return; + trace_nfs4_reclaim_complete(clp, task->tk_status); if (nfs41_reclaim_complete_handle_errors(task, clp) == -EAGAIN) { rpc_restart_call_prepare(task); return; diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c new file mode 100644 index 0000000..3f02183 --- /dev/null +++ b/fs/nfs/nfs4trace.c @@ -0,0 +1,10 @@ +/* + * Copyright (c) 2013 Trond Myklebust + */ +#include +#include "nfs4_fs.h" +#include "internal.h" + +#define CREATE_TRACE_POINTS +#include "nfs4trace.h" + diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h new file mode 100644 index 0000000..9653feb --- /dev/null +++ b/fs/nfs/nfs4trace.h @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2013 Trond Myklebust + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nfs4 + +#if !defined(_TRACE_NFS4_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NFS4_H + +#include + +#define show_nfsv4_errors(error) \ + __print_symbolic(error, \ + { NFS4_OK, "OK" }, \ + /* Mapped by nfs4_stat_to_errno() */ \ + { -EPERM, "EPERM" }, \ + { -ENOENT, "ENOENT" }, \ + { -EIO, "EIO" }, \ + { -ENXIO, "ENXIO" }, \ + { -EACCES, "EACCES" }, \ + { -EEXIST, "EEXIST" }, \ + { -EXDEV, "EXDEV" }, \ + { -ENOTDIR, "ENOTDIR" }, \ + { -EISDIR, "EISDIR" }, \ + { -EFBIG, "EFBIG" }, \ + { -ENOSPC, "ENOSPC" }, \ + { -EROFS, "EROFS" }, \ + { -EMLINK, "EMLINK" }, \ + { -ENAMETOOLONG, "ENAMETOOLONG" }, \ + { -ENOTEMPTY, "ENOTEMPTY" }, \ + { -EDQUOT, "EDQUOT" }, \ + { -ESTALE, "ESTALE" }, \ + { -EBADHANDLE, "EBADHANDLE" }, \ + { -EBADCOOKIE, "EBADCOOKIE" }, \ + { -ENOTSUPP, "ENOTSUPP" }, \ + { -ETOOSMALL, "ETOOSMALL" }, \ + { -EREMOTEIO, "EREMOTEIO" }, \ + { -EBADTYPE, "EBADTYPE" }, \ + { -EAGAIN, "EAGAIN" }, \ + { -ELOOP, "ELOOP" }, \ + { -EOPNOTSUPP, "EOPNOTSUPP" }, \ + { -EDEADLK, "EDEADLK" }, \ + /* RPC errors */ \ + { -ENOMEM, "ENOMEM" }, \ + { -EKEYEXPIRED, "EKEYEXPIRED" }, \ + { -ETIMEDOUT, "ETIMEDOUT" }, \ + { -ERESTARTSYS, "ERESTARTSYS" }, \ + { -ECONNREFUSED, "ECONNREFUSED" }, \ + { -ECONNRESET, "ECONNRESET" }, \ + { -ENETUNREACH, "ENETUNREACH" }, \ + { -EHOSTUNREACH, "EHOSTUNREACH" }, \ + { -EHOSTDOWN, "EHOSTDOWN" }, \ + { -EPIPE, "EPIPE" }, \ + { -EPFNOSUPPORT, "EPFNOSUPPORT" }, \ + { -EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \ + /* NFSv4 native errors */ \ + { -NFS4ERR_ACCESS, "ACCESS" }, \ + { -NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \ + { -NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \ + { -NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \ + { -NFS4ERR_BADCHAR, "BADCHAR" }, \ + { -NFS4ERR_BADHANDLE, "BADHANDLE" }, \ + { -NFS4ERR_BADIOMODE, "BADIOMODE" }, \ + { -NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \ + { -NFS4ERR_BADLABEL, "BADLABEL" }, \ + { -NFS4ERR_BADNAME, "BADNAME" }, \ + { -NFS4ERR_BADOWNER, "BADOWNER" }, \ + { -NFS4ERR_BADSESSION, "BADSESSION" }, \ + { -NFS4ERR_BADSLOT, "BADSLOT" }, \ + { -NFS4ERR_BADTYPE, "BADTYPE" }, \ + { -NFS4ERR_BADXDR, "BADXDR" }, \ + { -NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \ + { -NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \ + { -NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \ + { -NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \ + { -NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \ + { -NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \ + { -NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ + { -NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \ + { -NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \ + { -NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \ + { -NFS4ERR_CONN_NOT_BOUND_TO_SESSION, \ + "CONN_NOT_BOUND_TO_SESSION" }, \ + { -NFS4ERR_DEADLOCK, "DEADLOCK" }, \ + { -NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \ + { -NFS4ERR_DELAY, "DELAY" }, \ + { -NFS4ERR_DELEG_ALREADY_WANTED, \ + "DELEG_ALREADY_WANTED" }, \ + { -NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \ + { -NFS4ERR_DENIED, "DENIED" }, \ + { -NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \ + { -NFS4ERR_DQUOT, "DQUOT" }, \ + { -NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \ + { -NFS4ERR_EXIST, "EXIST" }, \ + { -NFS4ERR_EXPIRED, "EXPIRED" }, \ + { -NFS4ERR_FBIG, "FBIG" }, \ + { -NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \ + { -NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \ + { -NFS4ERR_GRACE, "GRACE" }, \ + { -NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \ + { -NFS4ERR_INVAL, "INVAL" }, \ + { -NFS4ERR_IO, "IO" }, \ + { -NFS4ERR_ISDIR, "ISDIR" }, \ + { -NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \ + { -NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \ + { -NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \ + { -NFS4ERR_LOCKED, "LOCKED" }, \ + { -NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \ + { -NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \ + { -NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \ + { -NFS4ERR_MLINK, "MLINK" }, \ + { -NFS4ERR_MOVED, "MOVED" }, \ + { -NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \ + { -NFS4ERR_NOENT, "NOENT" }, \ + { -NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \ + { -NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \ + { -NFS4ERR_NOSPC, "NOSPC" }, \ + { -NFS4ERR_NOTDIR, "NOTDIR" }, \ + { -NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \ + { -NFS4ERR_NOTSUPP, "NOTSUPP" }, \ + { -NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \ + { -NFS4ERR_NOT_SAME, "NOT_SAME" }, \ + { -NFS4ERR_NO_GRACE, "NO_GRACE" }, \ + { -NFS4ERR_NXIO, "NXIO" }, \ + { -NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \ + { -NFS4ERR_OPENMODE, "OPENMODE" }, \ + { -NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \ + { -NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \ + { -NFS4ERR_PERM, "PERM" }, \ + { -NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \ + { -NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \ + { -NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \ + { -NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \ + { -NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \ + { -NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \ + { -NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \ + { -NFS4ERR_REP_TOO_BIG_TO_CACHE, \ + "REP_TOO_BIG_TO_CACHE" }, \ + { -NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \ + { -NFS4ERR_RESOURCE, "RESOURCE" }, \ + { -NFS4ERR_RESTOREFH, "RESTOREFH" }, \ + { -NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \ + { -NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \ + { -NFS4ERR_ROFS, "ROFS" }, \ + { -NFS4ERR_SAME, "SAME" }, \ + { -NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \ + { -NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \ + { -NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \ + { -NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \ + { -NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \ + { -NFS4ERR_STALE, "STALE" }, \ + { -NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \ + { -NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \ + { -NFS4ERR_SYMLINK, "SYMLINK" }, \ + { -NFS4ERR_TOOSMALL, "TOOSMALL" }, \ + { -NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \ + { -NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \ + { -NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \ + { -NFS4ERR_WRONGSEC, "WRONGSEC" }, \ + { -NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \ + { -NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \ + { -NFS4ERR_XDEV, "XDEV" }) + +DECLARE_EVENT_CLASS(nfs4_clientid_event, + TP_PROTO( + const struct nfs_client *clp, + int error + ), + + TP_ARGS(clp, error), + + TP_STRUCT__entry( + __string(dstaddr, + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR)) + __field(int, error) + ), + + TP_fast_assign( + __entry->error = error; + __assign_str(dstaddr, + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR)); + ), + + TP_printk( + "error=%d (%s) dstaddr=%s", + __entry->error, + show_nfsv4_errors(__entry->error), + __get_str(dstaddr) + ) +); +#define DEFINE_NFS4_CLIENTID_EVENT(name) \ + DEFINE_EVENT(nfs4_clientid_event, name, \ + TP_PROTO( \ + const struct nfs_client *clp, \ + int error \ + ), \ + TP_ARGS(clp, error)) +DEFINE_NFS4_CLIENTID_EVENT(nfs4_setclientid); +DEFINE_NFS4_CLIENTID_EVENT(nfs4_setclientid_confirm); +DEFINE_NFS4_CLIENTID_EVENT(nfs4_renew); +DEFINE_NFS4_CLIENTID_EVENT(nfs4_renew_async); +#ifdef CONFIG_NFS_V4_1 +DEFINE_NFS4_CLIENTID_EVENT(nfs4_exchange_id); +DEFINE_NFS4_CLIENTID_EVENT(nfs4_create_session); +DEFINE_NFS4_CLIENTID_EVENT(nfs4_destroy_session); +DEFINE_NFS4_CLIENTID_EVENT(nfs4_destroy_clientid); +DEFINE_NFS4_CLIENTID_EVENT(nfs4_bind_conn_to_session); +DEFINE_NFS4_CLIENTID_EVENT(nfs4_sequence); +DEFINE_NFS4_CLIENTID_EVENT(nfs4_reclaim_complete); +#endif /* CONFIG_NFS_V4_1 */ + +#endif /* _TRACE_NFS4_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE nfs4trace +/* This part must be outside protection */ +#include -- cgit v0.10.2 From 42113a75399353d87baf63956ab8f450958f1c8f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Aug 2013 16:19:27 -0400 Subject: NFSv4: Add tracepoints for debugging file open Set up basic tracepoints for debugging NFSv4 file open/close Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5f228ed..d609198 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1442,6 +1442,7 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state int err; do { err = _nfs4_do_open_reclaim(ctx, state); + trace_nfs4_open_reclaim(ctx, 0, err); if (nfs4_clear_cap_atomic_open_v1(server, err, &exception)) continue; if (err != -NFS4ERR_DELAY) @@ -1897,6 +1898,7 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state do { err = _nfs4_open_expired(ctx, state); + trace_nfs4_open_expired(ctx, 0, err); if (nfs4_clear_cap_atomic_open_v1(server, err, &exception)) continue; switch (err) { @@ -2199,6 +2201,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, do { status = _nfs4_do_open(dir, ctx, flags, sattr, label); res = ctx->state; + trace_nfs4_open_file(ctx, flags, status); if (status == 0) break; /* NOTE: BAD_SEQID means the server and client disagree about the @@ -2389,6 +2392,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) dprintk("%s: begin!\n", __func__); if (!nfs4_sequence_done(task, &calldata->res.seq_res)) return; + trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status); /* hmm. we are done with the inode, and in the process of freeing * the state_owner. we keep this around to process errors */ diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 9653feb..a558dc9 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -161,6 +161,19 @@ { -NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \ { -NFS4ERR_XDEV, "XDEV" }) +#define show_open_flags(flags) \ + __print_flags(flags, "|", \ + { O_CREAT, "O_CREAT" }, \ + { O_EXCL, "O_EXCL" }, \ + { O_TRUNC, "O_TRUNC" }, \ + { O_DIRECT, "O_DIRECT" }) + +#define show_fmode_flags(mode) \ + __print_flags(mode, "|", \ + { ((__force unsigned long)FMODE_READ), "READ" }, \ + { ((__force unsigned long)FMODE_WRITE), "WRITE" }, \ + { ((__force unsigned long)FMODE_EXEC), "EXEC" }) + DECLARE_EVENT_CLASS(nfs4_clientid_event, TP_PROTO( const struct nfs_client *clp, @@ -211,6 +224,118 @@ DEFINE_NFS4_CLIENTID_EVENT(nfs4_sequence); DEFINE_NFS4_CLIENTID_EVENT(nfs4_reclaim_complete); #endif /* CONFIG_NFS_V4_1 */ +DECLARE_EVENT_CLASS(nfs4_open_event, + TP_PROTO( + const struct nfs_open_context *ctx, + int flags, + int error + ), + + TP_ARGS(ctx, flags, error), + + TP_STRUCT__entry( + __field(int, error) + __field(unsigned int, flags) + __field(unsigned int, fmode) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(u64, dir) + __string(name, ctx->dentry->d_name.name) + ), + + TP_fast_assign( + const struct nfs4_state *state = ctx->state; + const struct inode *inode = NULL; + + __entry->error = error; + __entry->flags = flags; + __entry->fmode = (__force unsigned int)ctx->mode; + __entry->dev = ctx->dentry->d_sb->s_dev; + if (!IS_ERR(state)) + inode = state->inode; + if (inode != NULL) { + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + } else { + __entry->fileid = 0; + __entry->fhandle = 0; + } + __entry->dir = NFS_FILEID(ctx->dentry->d_parent->d_inode); + __assign_str(name, ctx->dentry->d_name.name); + ), + + TP_printk( + "error=%d (%s) flags=%d (%s) fmode=%s " + "fileid=%02x:%02x:%llu fhandle=0x%08x " + "name=%02x:%02x:%llu/%s", + __entry->error, + show_nfsv4_errors(__entry->error), + __entry->flags, + show_open_flags(__entry->flags), + show_fmode_flags(__entry->fmode), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +#define DEFINE_NFS4_OPEN_EVENT(name) \ + DEFINE_EVENT(nfs4_open_event, name, \ + TP_PROTO( \ + const struct nfs_open_context *ctx, \ + int flags, \ + int error \ + ), \ + TP_ARGS(ctx, flags, error)) +DEFINE_NFS4_OPEN_EVENT(nfs4_open_reclaim); +DEFINE_NFS4_OPEN_EVENT(nfs4_open_expired); +DEFINE_NFS4_OPEN_EVENT(nfs4_open_file); + +TRACE_EVENT(nfs4_close, + TP_PROTO( + const struct nfs4_state *state, + const struct nfs_closeargs *args, + const struct nfs_closeres *res, + int error + ), + + TP_ARGS(state, args, res, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(unsigned int, fmode) + __field(int, error) + ), + + TP_fast_assign( + const struct inode *inode = state->inode; + + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->fmode = (__force unsigned int)state->state; + __entry->error = error; + ), + + TP_printk( + "error=%d (%s) fmode=%s fileid=%02x:%02x:%llu " + "fhandle=0x%08x", + __entry->error, + show_nfsv4_errors(__entry->error), + __entry->fmode ? show_fmode_flags(__entry->fmode) : + "closed", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle + ) +); + #endif /* _TRACE_NFS4_H */ #undef TRACE_INCLUDE_PATH -- cgit v0.10.2 From d1b748a5e71106481015e36ce327747b075bcbdc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Aug 2013 16:35:20 -0400 Subject: NFSv4: Add tracepoints for debugging file locking Set up basic tracepoints for debugging NFSv4 file lock/unlock Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d609198..6d12c3e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4964,8 +4964,9 @@ static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock * int err; do { - err = nfs4_handle_exception(NFS_SERVER(state->inode), - _nfs4_proc_getlk(state, cmd, request), + err = _nfs4_proc_getlk(state, cmd, request); + trace_nfs4_get_lock(request, state, cmd, err); + err = nfs4_handle_exception(NFS_SERVER(state->inode), err, &exception); } while (exception.retry); return err; @@ -5163,6 +5164,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * rpc_put_task(task); out: request->fl_flags = fl_flags; + trace_nfs4_unlock(request, state, F_SETLK, status); return status; } @@ -5386,6 +5388,7 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) return 0; err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM); + trace_nfs4_lock_reclaim(request, state, F_SETLK, err); if (err != -NFS4ERR_DELAY) break; nfs4_handle_exception(server, err, &exception); @@ -5408,6 +5411,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) return 0; err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_EXPIRED); + trace_nfs4_lock_expired(request, state, F_SETLK, err); switch (err) { default: goto out; @@ -5530,6 +5534,7 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock * do { err = _nfs4_proc_setlk(state, cmd, request); + trace_nfs4_set_lock(request, state, cmd, err); if (err == -NFS4ERR_DENIED) err = -EAGAIN; err = nfs4_handle_exception(NFS_SERVER(state->inode), diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index a558dc9..3f124db 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -336,6 +336,81 @@ TRACE_EVENT(nfs4_close, ) ); +#define show_lock_cmd(type) \ + __print_symbolic((int)type, \ + { F_GETLK, "GETLK" }, \ + { F_SETLK, "SETLK" }, \ + { F_SETLKW, "SETLKW" }) +#define show_lock_type(type) \ + __print_symbolic((int)type, \ + { F_RDLCK, "RDLCK" }, \ + { F_WRLCK, "WRLCK" }, \ + { F_UNLCK, "UNLCK" }) + +DECLARE_EVENT_CLASS(nfs4_lock_event, + TP_PROTO( + const struct file_lock *request, + const struct nfs4_state *state, + int cmd, + int error + ), + + TP_ARGS(request, state, cmd, error), + + TP_STRUCT__entry( + __field(int, error) + __field(int, cmd) + __field(char, type) + __field(loff_t, start) + __field(loff_t, end) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + ), + + TP_fast_assign( + const struct inode *inode = state->inode; + + __entry->error = error; + __entry->cmd = cmd; + __entry->type = request->fl_type; + __entry->start = request->fl_start; + __entry->end = request->fl_end; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + ), + + TP_printk( + "error=%d (%s) cmd=%s:%s range=%lld:%lld " + "fileid=%02x:%02x:%llu fhandle=0x%08x", + __entry->error, + show_nfsv4_errors(__entry->error), + show_lock_cmd(__entry->cmd), + show_lock_type(__entry->type), + (long long)__entry->start, + (long long)__entry->end, + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle + ) +); + +#define DEFINE_NFS4_LOCK_EVENT(name) \ + DEFINE_EVENT(nfs4_lock_event, name, \ + TP_PROTO( \ + const struct file_lock *request, \ + const struct nfs4_state *state, \ + int cmd, \ + int error \ + ), \ + TP_ARGS(request, state, cmd, error)) +DEFINE_NFS4_LOCK_EVENT(nfs4_get_lock); +DEFINE_NFS4_LOCK_EVENT(nfs4_set_lock); +DEFINE_NFS4_LOCK_EVENT(nfs4_lock_reclaim); +DEFINE_NFS4_LOCK_EVENT(nfs4_lock_expired); +DEFINE_NFS4_LOCK_EVENT(nfs4_unlock); + #endif /* _TRACE_NFS4_H */ #undef TRACE_INCLUDE_PATH -- cgit v0.10.2 From 078ea3dfe396b18ccf608c71a9585b6f7e519bb1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Aug 2013 16:45:55 -0400 Subject: NFSv4: Add tracepoints for debugging lookup/create operations Set up basic tracepoints for debugging NFSv4 lookup, unlink/remove, symlink, mkdir, mknod, fs_locations and secinfo. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6d12c3e..3234758 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3026,6 +3026,7 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, int err; do { err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr, label); + trace_nfs4_lookup(dir, name, err); switch (err) { case -NFS4ERR_BADNAME: err = -ENOENT; @@ -3261,8 +3262,9 @@ static int nfs4_proc_remove(struct inode *dir, struct qstr *name) struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_remove(dir, name), + err = _nfs4_proc_remove(dir, name); + trace_nfs4_remove(dir, name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); } while (exception.retry); return err; @@ -3291,7 +3293,8 @@ static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlin static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) { - struct nfs_removeres *res = task->tk_msg.rpc_resp; + struct nfs_unlinkdata *data = task->tk_calldata; + struct nfs_removeres *res = &data->res; if (!nfs4_sequence_done(task, &res->seq_res)) return 0; @@ -3533,9 +3536,9 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, label = nfs4_label_init_security(dir, dentry, sattr, &l); do { - err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_symlink(dir, dentry, page, - len, sattr, label), + err = _nfs4_proc_symlink(dir, dentry, page, len, sattr, label); + trace_nfs4_symlink(dir, &dentry->d_name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); } while (exception.retry); @@ -3572,8 +3575,9 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, sattr->ia_mode &= ~current_umask(); do { - err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_mkdir(dir, dentry, sattr, label), + err = _nfs4_proc_mkdir(dir, dentry, sattr, label); + trace_nfs4_mkdir(dir, &dentry->d_name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); } while (exception.retry); nfs4_label_release_security(label); @@ -3680,8 +3684,9 @@ static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, sattr->ia_mode &= ~current_umask(); do { - err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_mknod(dir, dentry, sattr, label, rdev), + err = _nfs4_proc_mknod(dir, dentry, sattr, label, rdev); + trace_nfs4_mknod(dir, &dentry->d_name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); } while (exception.retry); @@ -5801,8 +5806,10 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_fs_locations(client, dir, name, fs_locations, page), + err = _nfs4_proc_fs_locations(client, dir, name, + fs_locations, page); + trace_nfs4_get_fs_locations(dir, name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); } while (exception.retry); return err; @@ -5841,8 +5848,9 @@ int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_secinfo(dir, name, flavors), + err = _nfs4_proc_secinfo(dir, name, flavors); + trace_nfs4_secinfo(dir, name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); } while (exception.retry); return err; diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 3f124db..2646a50 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -411,6 +411,56 @@ DEFINE_NFS4_LOCK_EVENT(nfs4_lock_reclaim); DEFINE_NFS4_LOCK_EVENT(nfs4_lock_expired); DEFINE_NFS4_LOCK_EVENT(nfs4_unlock); +DECLARE_EVENT_CLASS(nfs4_lookup_event, + TP_PROTO( + const struct inode *dir, + const struct qstr *name, + int error + ), + + TP_ARGS(dir, name, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, error) + __field(u64, dir) + __string(name, name->name) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __entry->error = error; + __assign_str(name, name->name); + ), + + TP_printk( + "error=%d (%s) name=%02x:%02x:%llu/%s", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +#define DEFINE_NFS4_LOOKUP_EVENT(name) \ + DEFINE_EVENT(nfs4_lookup_event, name, \ + TP_PROTO( \ + const struct inode *dir, \ + const struct qstr *name, \ + int error \ + ), \ + TP_ARGS(dir, name, error)) + +DEFINE_NFS4_LOOKUP_EVENT(nfs4_lookup); +DEFINE_NFS4_LOOKUP_EVENT(nfs4_symlink); +DEFINE_NFS4_LOOKUP_EVENT(nfs4_mkdir); +DEFINE_NFS4_LOOKUP_EVENT(nfs4_mknod); +DEFINE_NFS4_LOOKUP_EVENT(nfs4_remove); +DEFINE_NFS4_LOOKUP_EVENT(nfs4_get_fs_locations); +DEFINE_NFS4_LOOKUP_EVENT(nfs4_secinfo); + #endif /* _TRACE_NFS4_H */ #undef TRACE_INCLUDE_PATH -- cgit v0.10.2 From c1578b769a644fe1ff3e8324fc404b18f3f01fbe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Aug 2013 16:58:42 -0400 Subject: NFSv4: Add tracepoints for debugging inode manipulations Set up basic tracepoints for debugging NFSv4 setattr, access, readlink, readdir, get_acl set_acl get_security_label, and set_security_label. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3234758..daf0854 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2315,6 +2315,7 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, int err; do { err = _nfs4_do_setattr(inode, cred, fattr, sattr, state, ilabel, olabel); + trace_nfs4_setattr(inode, err); switch (err) { case -NFS4ERR_OPENMODE: if (!(sattr->ia_valid & ATTR_SIZE)) { @@ -3143,8 +3144,9 @@ static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(inode), - _nfs4_proc_access(inode, entry), + err = _nfs4_proc_access(inode, entry); + trace_nfs4_access(inode, err); + err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); } while (exception.retry); return err; @@ -3197,8 +3199,9 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page, struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(inode), - _nfs4_proc_readlink(inode, page, pgbase, pglen), + err = _nfs4_proc_readlink(inode, page, pgbase, pglen); + trace_nfs4_readlink(inode, err); + err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); } while (exception.retry); return err; @@ -3630,9 +3633,10 @@ static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), - _nfs4_proc_readdir(dentry, cred, cookie, - pages, count, plus), + err = _nfs4_proc_readdir(dentry, cred, cookie, + pages, count, plus); + trace_nfs4_readdir(dentry->d_inode, err); + err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), err, &exception); } while (exception.retry); return err; @@ -4333,6 +4337,7 @@ static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bufl ssize_t ret; do { ret = __nfs4_get_acl_uncached(inode, buf, buflen); + trace_nfs4_get_acl(inode, ret); if (ret >= 0) break; ret = nfs4_handle_exception(NFS_SERVER(inode), ret, &exception); @@ -4412,8 +4417,9 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(inode), - __nfs4_proc_set_acl(inode, buf, buflen), + err = __nfs4_proc_set_acl(inode, buf, buflen); + trace_nfs4_set_acl(inode, err); + err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); } while (exception.retry); return err; @@ -4466,8 +4472,9 @@ static int nfs4_get_security_label(struct inode *inode, void *buf, return -EOPNOTSUPP; do { - err = nfs4_handle_exception(NFS_SERVER(inode), - _nfs4_get_security_label(inode, buf, buflen), + err = _nfs4_get_security_label(inode, buf, buflen); + trace_nfs4_get_security_label(inode, err); + err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); } while (exception.retry); return err; @@ -4519,9 +4526,10 @@ static int nfs4_do_set_security_label(struct inode *inode, int err; do { - err = nfs4_handle_exception(NFS_SERVER(inode), - _nfs4_do_set_security_label(inode, ilabel, - fattr, olabel), + err = _nfs4_do_set_security_label(inode, ilabel, + fattr, olabel); + trace_nfs4_set_security_label(inode, err); + err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); } while (exception.retry); return err; diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 2646a50..6bd65c2 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -461,6 +461,57 @@ DEFINE_NFS4_LOOKUP_EVENT(nfs4_remove); DEFINE_NFS4_LOOKUP_EVENT(nfs4_get_fs_locations); DEFINE_NFS4_LOOKUP_EVENT(nfs4_secinfo); +DECLARE_EVENT_CLASS(nfs4_inode_event, + TP_PROTO( + const struct inode *inode, + int error + ), + + TP_ARGS(inode, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(int, error) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->error = error; + ), + + TP_printk( + "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle + ) +); + +#define DEFINE_NFS4_INODE_EVENT(name) \ + DEFINE_EVENT(nfs4_inode_event, name, \ + TP_PROTO( \ + const struct inode *inode, \ + int error \ + ), \ + TP_ARGS(inode, error)) + +DEFINE_NFS4_INODE_EVENT(nfs4_setattr); +DEFINE_NFS4_INODE_EVENT(nfs4_access); +DEFINE_NFS4_INODE_EVENT(nfs4_readlink); +DEFINE_NFS4_INODE_EVENT(nfs4_readdir); +DEFINE_NFS4_INODE_EVENT(nfs4_get_acl); +DEFINE_NFS4_INODE_EVENT(nfs4_set_acl); +#ifdef CONFIG_NFS_V4_SECURITY_LABEL +DEFINE_NFS4_INODE_EVENT(nfs4_get_security_label); +DEFINE_NFS4_INODE_EVENT(nfs4_set_security_label); +#endif /* CONFIG_NFS_V4_SECURITY_LABEL */ + #endif /* _TRACE_NFS4_H */ #undef TRACE_INCLUDE_PATH -- cgit v0.10.2 From fbc6f7c233ff0a7e98f5dc2837b08adf03aa9376 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Aug 2013 17:08:26 -0400 Subject: NFSv4: Add tracepoints for debugging rename Add tracepoints to debug renames. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index daf0854..4b3ba92 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3329,7 +3329,8 @@ static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renam static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, struct inode *new_dir) { - struct nfs_renameres *res = task->tk_msg.rpc_resp; + struct nfs_renamedata *data = task->tk_calldata; + struct nfs_renameres *res = &data->res; if (!nfs4_sequence_done(task, &res->seq_res)) return 0; @@ -3375,9 +3376,10 @@ static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(old_dir), - _nfs4_proc_rename(old_dir, old_name, - new_dir, new_name), + err = _nfs4_proc_rename(old_dir, old_name, + new_dir, new_name); + trace_nfs4_rename(old_dir, old_name, new_dir, new_name, err); + err = nfs4_handle_exception(NFS_SERVER(old_dir), err, &exception); } while (exception.retry); return err; diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 6bd65c2..6388db8 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -461,6 +461,49 @@ DEFINE_NFS4_LOOKUP_EVENT(nfs4_remove); DEFINE_NFS4_LOOKUP_EVENT(nfs4_get_fs_locations); DEFINE_NFS4_LOOKUP_EVENT(nfs4_secinfo); +TRACE_EVENT(nfs4_rename, + TP_PROTO( + const struct inode *olddir, + const struct qstr *oldname, + const struct inode *newdir, + const struct qstr *newname, + int error + ), + + TP_ARGS(olddir, oldname, newdir, newname, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, error) + __field(u64, olddir) + __string(oldname, oldname->name) + __field(u64, newdir) + __string(newname, newname->name) + ), + + TP_fast_assign( + __entry->dev = olddir->i_sb->s_dev; + __entry->olddir = NFS_FILEID(olddir); + __entry->newdir = NFS_FILEID(newdir); + __entry->error = error; + __assign_str(oldname, oldname->name); + __assign_str(newname, newname->name); + ), + + TP_printk( + "error=%d (%s) oldname=%02x:%02x:%llu/%s " + "newname=%02x:%02x:%llu/%s", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->olddir, + __get_str(oldname), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->newdir, + __get_str(newname) + ) +); + DECLARE_EVENT_CLASS(nfs4_inode_event, TP_PROTO( const struct inode *inode, -- cgit v0.10.2 From ca8acf8d84029b395ade8d4562dae2108bb00c17 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 13 Aug 2013 10:36:56 -0400 Subject: NFSv4: Add tracepoints for debugging delegations Set up tracepoints to track when delegations are set, reclaimed, returned by the client, or recalled by the server. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index e6ebc4c..c074930 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -15,6 +15,7 @@ #include "internal.h" #include "pnfs.h" #include "nfs4session.h" +#include "nfs4trace.h" #ifdef NFS_DEBUG #define NFSDBG_FACILITY NFSDBG_CALLBACK @@ -93,6 +94,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, default: res = htonl(NFS4ERR_RESOURCE); } + trace_nfs4_recall_delegation(inode, -ntohl(res)); iput(inode); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 7ec4814..ef792f2 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -20,6 +20,7 @@ #include "nfs4_fs.h" #include "delegation.h" #include "internal.h" +#include "nfs4trace.h" static void nfs_free_delegation(struct nfs_delegation *delegation) { @@ -160,6 +161,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, spin_unlock(&delegation->lock); put_rpccred(oldcred); rcu_read_unlock(); + trace_nfs4_reclaim_delegation(inode, res->delegation_type); } else { /* We appear to have raced with a delegation return. */ spin_unlock(&delegation->lock); @@ -344,6 +346,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct spin_lock(&inode->i_lock); nfsi->cache_validity |= NFS_INO_REVAL_FORCED; spin_unlock(&inode->i_lock); + trace_nfs4_set_delegation(inode, res->delegation_type); out: spin_unlock(&clp->cl_lock); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4b3ba92..cbbaee2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4802,6 +4802,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) if (!nfs4_sequence_done(task, &data->res.seq_res)) return; + trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status); switch (task->tk_status) { case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: @@ -4905,6 +4906,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 int err; do { err = _nfs4_proc_delegreturn(inode, cred, stateid, issync); + trace_nfs4_delegreturn(inode, err); switch (err) { case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 6388db8..900f233 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -411,6 +411,76 @@ DEFINE_NFS4_LOCK_EVENT(nfs4_lock_reclaim); DEFINE_NFS4_LOCK_EVENT(nfs4_lock_expired); DEFINE_NFS4_LOCK_EVENT(nfs4_unlock); +DECLARE_EVENT_CLASS(nfs4_set_delegation_event, + TP_PROTO( + const struct inode *inode, + fmode_t fmode + ), + + TP_ARGS(inode, fmode), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(unsigned int, fmode) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->fmode = (__force unsigned int)fmode; + ), + + TP_printk( + "fmode=%s fileid=%02x:%02x:%llu fhandle=0x%08x", + show_fmode_flags(__entry->fmode), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle + ) +); +#define DEFINE_NFS4_SET_DELEGATION_EVENT(name) \ + DEFINE_EVENT(nfs4_set_delegation_event, name, \ + TP_PROTO( \ + const struct inode *inode, \ + fmode_t fmode \ + ), \ + TP_ARGS(inode, fmode)) +DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_set_delegation); +DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_reclaim_delegation); + +TRACE_EVENT(nfs4_delegreturn_exit, + TP_PROTO( + const struct nfs4_delegreturnargs *args, + const struct nfs4_delegreturnres *res, + int error + ), + + TP_ARGS(args, res, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(int, error) + ), + + TP_fast_assign( + __entry->dev = res->server->s_dev; + __entry->fhandle = nfs_fhandle_hash(args->fhandle); + __entry->error = error; + ), + + TP_printk( + "error=%d (%s) dev=%02x:%02x fhandle=0x%08x", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->fhandle + ) +); + DECLARE_EVENT_CLASS(nfs4_lookup_event, TP_PROTO( const struct inode *dir, @@ -554,6 +624,8 @@ DEFINE_NFS4_INODE_EVENT(nfs4_set_acl); DEFINE_NFS4_INODE_EVENT(nfs4_get_security_label); DEFINE_NFS4_INODE_EVENT(nfs4_set_security_label); #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ +DEFINE_NFS4_INODE_EVENT(nfs4_recall_delegation); +DEFINE_NFS4_INODE_EVENT(nfs4_delegreturn); #endif /* _TRACE_NFS4_H */ -- cgit v0.10.2 From 1f2d30b5338f1adaac249707ae49f1c675049cfc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 13 Aug 2013 11:34:01 -0400 Subject: NFSv4: Add tracepoints for debugging the idmapper Add tracepoints to help debug uid/gid mappings to username/group. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 65c7d91..e9b0c59 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -49,6 +49,7 @@ #include "internal.h" #include "netns.h" +#include "nfs4trace.h" #define NFS_UINT_MAXLEN 11 @@ -849,6 +850,7 @@ int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_ if (!uid_valid(*uid)) ret = -ERANGE; } + trace_nfs4_map_name_to_uid(name, namelen, id, ret); return ret; } @@ -865,6 +867,7 @@ int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size if (!gid_valid(*gid)) ret = -ERANGE; } + trace_nfs4_map_group_to_gid(name, namelen, id, ret); return ret; } @@ -879,6 +882,7 @@ int nfs_map_uid_to_name(const struct nfs_server *server, kuid_t uid, char *buf, ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap); if (ret < 0) ret = nfs_map_numeric_to_string(id, buf, buflen); + trace_nfs4_map_uid_to_name(buf, ret, id, ret); return ret; } int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf, size_t buflen) @@ -892,5 +896,6 @@ int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf, ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap); if (ret < 0) ret = nfs_map_numeric_to_string(id, buf, buflen); + trace_nfs4_map_gid_to_group(buf, ret, id, ret); return ret; } diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 900f233..f58edcf 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -627,6 +627,52 @@ DEFINE_NFS4_INODE_EVENT(nfs4_set_security_label); DEFINE_NFS4_INODE_EVENT(nfs4_recall_delegation); DEFINE_NFS4_INODE_EVENT(nfs4_delegreturn); +DECLARE_EVENT_CLASS(nfs4_idmap_event, + TP_PROTO( + const char *name, + int len, + u32 id, + int error + ), + + TP_ARGS(name, len, id, error), + + TP_STRUCT__entry( + __field(int, error) + __field(u32, id) + __dynamic_array(char, name, len > 0 ? len + 1 : 1) + ), + + TP_fast_assign( + if (len < 0) + len = 0; + __entry->error = error < 0 ? error : 0; + __entry->id = id; + memcpy(__get_dynamic_array(name), name, len); + ((char *)__get_dynamic_array(name))[len] = 0; + ), + + TP_printk( + "error=%d id=%u name=%s", + __entry->error, + __entry->id, + __get_str(name) + ) +); +#define DEFINE_NFS4_IDMAP_EVENT(name) \ + DEFINE_EVENT(nfs4_idmap_event, name, \ + TP_PROTO( \ + const char *name, \ + int len, \ + u32 id, \ + int error \ + ), \ + TP_ARGS(name, len, id, error)) +DEFINE_NFS4_IDMAP_EVENT(nfs4_map_name_to_uid); +DEFINE_NFS4_IDMAP_EVENT(nfs4_map_group_to_gid); +DEFINE_NFS4_IDMAP_EVENT(nfs4_map_uid_to_name); +DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group); + #endif /* _TRACE_NFS4_H */ #undef TRACE_INCLUDE_PATH -- cgit v0.10.2 From b5f875a92591d06c57e786b3c916e21e290ad844 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 13 Aug 2013 13:01:39 -0400 Subject: NFSv4: Add tracepoints for debugging getattr Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cbbaee2..bcfe3ad 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2697,6 +2697,7 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, int err; do { err = _nfs4_lookup_root(server, fhandle, info); + trace_nfs4_lookup_root(server, fhandle, info->fattr, err); switch (err) { case 0: case -NFS4ERR_WRONGSEC: @@ -2906,8 +2907,9 @@ static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(server, - _nfs4_proc_getattr(server, fhandle, fattr, label), + err = _nfs4_proc_getattr(server, fhandle, fattr, label); + trace_nfs4_getattr(server, fhandle, fattr, err); + err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; @@ -3760,6 +3762,7 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str do { err = _nfs4_do_fsinfo(server, fhandle, fsinfo); + trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err); if (err == 0) { struct nfs_client *clp = server->nfs_client; diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index f58edcf..99d1b1e 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -174,6 +174,24 @@ { ((__force unsigned long)FMODE_WRITE), "WRITE" }, \ { ((__force unsigned long)FMODE_EXEC), "EXEC" }) +#define show_nfs_fattr_flags(valid) \ + __print_flags((unsigned long)valid, "|", \ + { NFS_ATTR_FATTR_TYPE, "TYPE" }, \ + { NFS_ATTR_FATTR_MODE, "MODE" }, \ + { NFS_ATTR_FATTR_NLINK, "NLINK" }, \ + { NFS_ATTR_FATTR_OWNER, "OWNER" }, \ + { NFS_ATTR_FATTR_GROUP, "GROUP" }, \ + { NFS_ATTR_FATTR_RDEV, "RDEV" }, \ + { NFS_ATTR_FATTR_SIZE, "SIZE" }, \ + { NFS_ATTR_FATTR_FSID, "FSID" }, \ + { NFS_ATTR_FATTR_FILEID, "FILEID" }, \ + { NFS_ATTR_FATTR_ATIME, "ATIME" }, \ + { NFS_ATTR_FATTR_MTIME, "MTIME" }, \ + { NFS_ATTR_FATTR_CTIME, "CTIME" }, \ + { NFS_ATTR_FATTR_CHANGE, "CHANGE" }, \ + { NFS_ATTR_FATTR_OWNER_NAME, "OWNER_NAME" }, \ + { NFS_ATTR_FATTR_GROUP_NAME, "GROUP_NAME" }) + DECLARE_EVENT_CLASS(nfs4_clientid_event, TP_PROTO( const struct nfs_client *clp, @@ -627,6 +645,57 @@ DEFINE_NFS4_INODE_EVENT(nfs4_set_security_label); DEFINE_NFS4_INODE_EVENT(nfs4_recall_delegation); DEFINE_NFS4_INODE_EVENT(nfs4_delegreturn); +DECLARE_EVENT_CLASS(nfs4_getattr_event, + TP_PROTO( + const struct nfs_server *server, + const struct nfs_fh *fhandle, + const struct nfs_fattr *fattr, + int error + ), + + TP_ARGS(server, fhandle, fattr, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(unsigned int, valid) + __field(int, error) + ), + + TP_fast_assign( + __entry->dev = server->s_dev; + __entry->valid = fattr->valid; + __entry->fhandle = nfs_fhandle_hash(fhandle); + __entry->fileid = (fattr->valid & NFS_ATTR_FATTR_FILEID) ? fattr->fileid : 0; + __entry->error = error; + ), + + TP_printk( + "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "valid=%s", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + show_nfs_fattr_flags(__entry->valid) + ) +); + +#define DEFINE_NFS4_GETATTR_EVENT(name) \ + DEFINE_EVENT(nfs4_getattr_event, name, \ + TP_PROTO( \ + const struct nfs_server *server, \ + const struct nfs_fh *fhandle, \ + const struct nfs_fattr *fattr, \ + int error \ + ), \ + TP_ARGS(server, fhandle, fattr, error)) +DEFINE_NFS4_GETATTR_EVENT(nfs4_getattr); +DEFINE_NFS4_GETATTR_EVENT(nfs4_lookup_root); +DEFINE_NFS4_GETATTR_EVENT(nfs4_fsinfo); + DECLARE_EVENT_CLASS(nfs4_idmap_event, TP_PROTO( const char *name, -- cgit v0.10.2 From cc668ab30b8a5505c1651b073882c1a67c802a48 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 14 Aug 2013 15:31:28 -0400 Subject: NFSv4: Add tracepoints for debugging reads and writes Set up tracepoints to track read, write and commit, as well as pNFS reads and writes and commits to the data server. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 17ed87e..a70cb3a 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -39,6 +39,7 @@ #include "internal.h" #include "delegation.h" #include "nfs4filelayout.h" +#include "nfs4trace.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -247,6 +248,7 @@ static int filelayout_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr = data->header; int err; + trace_nfs4_pnfs_read(data, task->tk_status); err = filelayout_async_handle_error(task, data->args.context->state, data->ds_clp, hdr->lseg); @@ -363,6 +365,7 @@ static int filelayout_write_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr = data->header; int err; + trace_nfs4_pnfs_write(data, task->tk_status); err = filelayout_async_handle_error(task, data->args.context->state, data->ds_clp, hdr->lseg); @@ -395,6 +398,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task, { int err; + trace_nfs4_pnfs_commit_ds(data, task->tk_status); err = filelayout_async_handle_error(task, NULL, data->ds_clp, data->lseg); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bcfe3ad..214f9c4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3881,6 +3881,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) { struct nfs_server *server = NFS_SERVER(data->header->inode); + trace_nfs4_read(data, task->tk_status); if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { rpc_restart_call_prepare(task); return -EAGAIN; @@ -3942,6 +3943,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data { struct inode *inode = data->header->inode; + trace_nfs4_write(data, task->tk_status); if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { rpc_restart_call_prepare(task); return -EAGAIN; @@ -4033,6 +4035,7 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *da { struct inode *inode = data->inode; + trace_nfs4_commit(data, task->tk_status); if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { rpc_restart_call_prepare(task); return -EAGAIN; diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c index 3f02183..f840974 100644 --- a/fs/nfs/nfs4trace.c +++ b/fs/nfs/nfs4trace.c @@ -8,3 +8,8 @@ #define CREATE_TRACE_POINTS #include "nfs4trace.h" +#ifdef CONFIG_NFS_V4_1 +EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_read); +EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_write); +EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_commit_ds); +#endif diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 99d1b1e..1ea6983 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -742,6 +742,160 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_group_to_gid); DEFINE_NFS4_IDMAP_EVENT(nfs4_map_uid_to_name); DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group); +DECLARE_EVENT_CLASS(nfs4_read_event, + TP_PROTO( + const struct nfs_read_data *data, + int error + ), + + TP_ARGS(data, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(loff_t, offset) + __field(size_t, count) + __field(int, error) + ), + + TP_fast_assign( + const struct inode *inode = data->header->inode; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->offset = data->args.offset; + __entry->count = data->args.count; + __entry->error = error; + ), + + TP_printk( + "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%zu", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + (long long)__entry->offset, + __entry->count + ) +); +#define DEFINE_NFS4_READ_EVENT(name) \ + DEFINE_EVENT(nfs4_read_event, name, \ + TP_PROTO( \ + const struct nfs_read_data *data, \ + int error \ + ), \ + TP_ARGS(data, error)) +DEFINE_NFS4_READ_EVENT(nfs4_read); +#ifdef CONFIG_NFS_V4_1 +DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read); +#endif /* CONFIG_NFS_V4_1 */ + +DECLARE_EVENT_CLASS(nfs4_write_event, + TP_PROTO( + const struct nfs_write_data *data, + int error + ), + + TP_ARGS(data, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(loff_t, offset) + __field(size_t, count) + __field(int, error) + ), + + TP_fast_assign( + const struct inode *inode = data->header->inode; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->offset = data->args.offset; + __entry->count = data->args.count; + __entry->error = error; + ), + + TP_printk( + "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%zu", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + (long long)__entry->offset, + __entry->count + ) +); + +#define DEFINE_NFS4_WRITE_EVENT(name) \ + DEFINE_EVENT(nfs4_write_event, name, \ + TP_PROTO( \ + const struct nfs_write_data *data, \ + int error \ + ), \ + TP_ARGS(data, error)) +DEFINE_NFS4_WRITE_EVENT(nfs4_write); +#ifdef CONFIG_NFS_V4_1 +DEFINE_NFS4_WRITE_EVENT(nfs4_pnfs_write); +#endif /* CONFIG_NFS_V4_1 */ + +DECLARE_EVENT_CLASS(nfs4_commit_event, + TP_PROTO( + const struct nfs_commit_data *data, + int error + ), + + TP_ARGS(data, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(loff_t, offset) + __field(size_t, count) + __field(int, error) + ), + + TP_fast_assign( + const struct inode *inode = data->inode; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->offset = data->args.offset; + __entry->count = data->args.count; + __entry->error = error; + ), + + TP_printk( + "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%zu", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + (long long)__entry->offset, + __entry->count + ) +); +#define DEFINE_NFS4_COMMIT_EVENT(name) \ + DEFINE_EVENT(nfs4_commit_event, name, \ + TP_PROTO( \ + const struct nfs_commit_data *data, \ + int error \ + ), \ + TP_ARGS(data, error)) +DEFINE_NFS4_COMMIT_EVENT(nfs4_commit); +#ifdef CONFIG_NFS_V4_1 +DEFINE_NFS4_COMMIT_EVENT(nfs4_pnfs_commit_ds); +#endif /* CONFIG_NFS_V4_1 */ + #endif /* _TRACE_NFS4_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3a3a79d..d75d938 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -33,6 +33,7 @@ #include "internal.h" #include "pnfs.h" #include "iostat.h" +#include "nfs4trace.h" #define NFSDBG_FACILITY NFSDBG_PNFS #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) @@ -1526,6 +1527,7 @@ void pnfs_ld_write_done(struct nfs_write_data *data) { struct nfs_pgio_header *hdr = data->header; + trace_nfs4_pnfs_write(data, hdr->pnfs_error); if (!hdr->pnfs_error) { pnfs_set_layoutcommit(data); hdr->mds_ops->rpc_call_done(&data->task, data); @@ -1680,6 +1682,7 @@ void pnfs_ld_read_done(struct nfs_read_data *data) { struct nfs_pgio_header *hdr = data->header; + trace_nfs4_pnfs_read(data, hdr->pnfs_error); if (likely(!hdr->pnfs_error)) { __nfs4_read_done_cb(data); hdr->mds_ops->rpc_call_done(&data->task, data); -- cgit v0.10.2 From 1037e6eaa37a42cec877f103c091cfe5304f4450 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 14 Aug 2013 16:36:51 -0400 Subject: NFSv4.1: Add tracepoints for debugging layoutget/return/commit Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 214f9c4..c8b5ff6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6860,6 +6860,10 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) status = nfs4_wait_for_completion_rpc_task(task); if (status == 0) status = task->tk_status; + trace_nfs4_layoutget(lgp->args.ctx, + &lgp->args.range, + &lgp->res.range, + status); /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ if (status == 0 && lgp->res.layoutp->len) lseg = pnfs_layout_process(lgp); @@ -6945,6 +6949,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) if (IS_ERR(task)) return PTR_ERR(task); status = task->tk_status; + trace_nfs4_layoutreturn(lrp->args.inode, status); dprintk("<-- %s status=%d\n", __func__, status); rpc_put_task(task); return status; @@ -7131,6 +7136,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) if (status != 0) goto out; status = task->tk_status; + trace_nfs4_layoutcommit(data->args.inode, status); out: dprintk("%s: status %d\n", __func__, status); rpc_put_task(task); diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 1ea6983..b2a2d8a 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -894,6 +894,61 @@ DECLARE_EVENT_CLASS(nfs4_commit_event, DEFINE_NFS4_COMMIT_EVENT(nfs4_commit); #ifdef CONFIG_NFS_V4_1 DEFINE_NFS4_COMMIT_EVENT(nfs4_pnfs_commit_ds); + +#define show_pnfs_iomode(iomode) \ + __print_symbolic(iomode, \ + { IOMODE_READ, "READ" }, \ + { IOMODE_RW, "RW" }, \ + { IOMODE_ANY, "ANY" }) + +TRACE_EVENT(nfs4_layoutget, + TP_PROTO( + const struct nfs_open_context *ctx, + const struct pnfs_layout_range *args, + const struct pnfs_layout_range *res, + int error + ), + + TP_ARGS(ctx, args, res, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(u32, iomode) + __field(u64, offset) + __field(u64, count) + __field(int, error) + ), + + TP_fast_assign( + const struct inode *inode = ctx->dentry->d_inode; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->iomode = args->iomode; + __entry->offset = args->offset; + __entry->count = args->length; + __entry->error = error; + ), + + TP_printk( + "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "iomode=%s offset=%llu count=%llu", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + show_pnfs_iomode(__entry->iomode), + (unsigned long long)__entry->offset, + (unsigned long long)__entry->count + ) +); + +DEFINE_NFS4_INODE_EVENT(nfs4_layoutcommit); +DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn); + #endif /* CONFIG_NFS_V4_1 */ #endif /* _TRACE_NFS4_H */ -- cgit v0.10.2 From 2f92ae343e2358a4936c2470debfc4424b29eb3e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 14 Aug 2013 17:58:28 -0400 Subject: NFSv4.1: Add tracepoints for debugging slot table operations Add tracepoints to nfs41_setup_sequence and nfs41_sequence_done to track session and slot table state changes. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index c074930..b25ed7e 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -464,6 +464,7 @@ out: } else res->csr_status = status; + trace_nfs4_cb_sequence(args, res, status); dprintk("%s: exit with status = %d res->csr_status %d\n", __func__, ntohl(status), ntohl(res->csr_status)); return status; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c8b5ff6..a745f7c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -508,6 +508,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * interrupted = true; } + trace_nfs4_sequence_done(session, res); /* Check the SEQUENCE operation status */ switch (res->sr_status) { case 0: @@ -660,6 +661,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, * set to 1 if an rpc level failure occurs. */ res->sr_status = 1; + trace_nfs4_setup_sequence(session, args); out_success: rpc_call_start(task); return 0; diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 86a0669..e3e6ce3 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -117,6 +117,16 @@ static inline int nfs4_has_persistent_session(const struct nfs_client *clp) return 0; } +#ifdef CONFIG_CRC32 +/* + * nfs_session_id_hash - calculate the crc32 hash for the session id + * @session - pointer to session + */ +#define nfs_session_id_hash(sess_id) \ + (~crc32_le(0xFFFFFFFF, &(sess_id)->data[0], sizeof((sess_id)->data))) +#else +#define nfs_session_id_hash(session) (0) +#endif #else /* defined(CONFIG_NFS_V4_1) */ static inline int nfs4_init_session(struct nfs_client *clp) diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c index f840974..d774335 100644 --- a/fs/nfs/nfs4trace.c +++ b/fs/nfs/nfs4trace.c @@ -4,6 +4,8 @@ #include #include "nfs4_fs.h" #include "internal.h" +#include "nfs4session.h" +#include "callback.h" #define CREATE_TRACE_POINTS #include "nfs4trace.h" diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index b2a2d8a..05b96a6 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -240,6 +240,145 @@ DEFINE_NFS4_CLIENTID_EVENT(nfs4_destroy_clientid); DEFINE_NFS4_CLIENTID_EVENT(nfs4_bind_conn_to_session); DEFINE_NFS4_CLIENTID_EVENT(nfs4_sequence); DEFINE_NFS4_CLIENTID_EVENT(nfs4_reclaim_complete); + +TRACE_EVENT(nfs4_setup_sequence, + TP_PROTO( + const struct nfs4_session *session, + const struct nfs4_sequence_args *args + ), + TP_ARGS(session, args), + + TP_STRUCT__entry( + __field(unsigned int, session) + __field(unsigned int, slot_nr) + __field(unsigned int, seq_nr) + __field(unsigned int, highest_used_slotid) + ), + + TP_fast_assign( + const struct nfs4_slot *sa_slot = args->sa_slot; + __entry->session = nfs_session_id_hash(&session->sess_id); + __entry->slot_nr = sa_slot->slot_nr; + __entry->seq_nr = sa_slot->seq_nr; + __entry->highest_used_slotid = + sa_slot->table->highest_used_slotid; + ), + TP_printk( + "session=0x%08x slot_nr=%u seq_nr=%u " + "highest_used_slotid=%u", + __entry->session, + __entry->slot_nr, + __entry->seq_nr, + __entry->highest_used_slotid + ) +); + +#define show_nfs4_sequence_status_flags(status) \ + __print_flags((unsigned long)status, "|", \ + { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ + { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING, \ + "CB_GSS_CONTEXTS_EXPIRING" }, \ + { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED, \ + "CB_GSS_CONTEXTS_EXPIRED" }, \ + { SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, \ + "EXPIRED_ALL_STATE_REVOKED" }, \ + { SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, \ + "EXPIRED_SOME_STATE_REVOKED" }, \ + { SEQ4_STATUS_ADMIN_STATE_REVOKED, \ + "ADMIN_STATE_REVOKED" }, \ + { SEQ4_STATUS_RECALLABLE_STATE_REVOKED, \ + "RECALLABLE_STATE_REVOKED" }, \ + { SEQ4_STATUS_LEASE_MOVED, "LEASE_MOVED" }, \ + { SEQ4_STATUS_RESTART_RECLAIM_NEEDED, \ + "RESTART_RECLAIM_NEEDED" }, \ + { SEQ4_STATUS_CB_PATH_DOWN_SESSION, \ + "CB_PATH_DOWN_SESSION" }, \ + { SEQ4_STATUS_BACKCHANNEL_FAULT, \ + "BACKCHANNEL_FAULT" }) + +TRACE_EVENT(nfs4_sequence_done, + TP_PROTO( + const struct nfs4_session *session, + const struct nfs4_sequence_res *res + ), + TP_ARGS(session, res), + + TP_STRUCT__entry( + __field(unsigned int, session) + __field(unsigned int, slot_nr) + __field(unsigned int, seq_nr) + __field(unsigned int, highest_slotid) + __field(unsigned int, target_highest_slotid) + __field(unsigned int, status_flags) + __field(int, error) + ), + + TP_fast_assign( + const struct nfs4_slot *sr_slot = res->sr_slot; + __entry->session = nfs_session_id_hash(&session->sess_id); + __entry->slot_nr = sr_slot->slot_nr; + __entry->seq_nr = sr_slot->seq_nr; + __entry->highest_slotid = res->sr_highest_slotid; + __entry->target_highest_slotid = + res->sr_target_highest_slotid; + __entry->error = res->sr_status; + ), + TP_printk( + "error=%d (%s) session=0x%08x slot_nr=%u seq_nr=%u " + "highest_slotid=%u target_highest_slotid=%u " + "status_flags=%u (%s)", + __entry->error, + show_nfsv4_errors(__entry->error), + __entry->session, + __entry->slot_nr, + __entry->seq_nr, + __entry->highest_slotid, + __entry->target_highest_slotid, + __entry->status_flags, + show_nfs4_sequence_status_flags(__entry->status_flags) + ) +); + +struct cb_sequenceargs; +struct cb_sequenceres; + +TRACE_EVENT(nfs4_cb_sequence, + TP_PROTO( + const struct cb_sequenceargs *args, + const struct cb_sequenceres *res, + __be32 status + ), + TP_ARGS(args, res, status), + + TP_STRUCT__entry( + __field(unsigned int, session) + __field(unsigned int, slot_nr) + __field(unsigned int, seq_nr) + __field(unsigned int, highest_slotid) + __field(unsigned int, cachethis) + __field(int, error) + ), + + TP_fast_assign( + __entry->session = nfs_session_id_hash(&args->csa_sessionid); + __entry->slot_nr = args->csa_slotid; + __entry->seq_nr = args->csa_sequenceid; + __entry->highest_slotid = args->csa_highestslotid; + __entry->cachethis = args->csa_cachethis; + __entry->error = -be32_to_cpu(status); + ), + + TP_printk( + "error=%d (%s) session=0x%08x slot_nr=%u seq_nr=%u " + "highest_slotid=%u", + __entry->error, + show_nfsv4_errors(__entry->error), + __entry->session, + __entry->slot_nr, + __entry->seq_nr, + __entry->highest_slotid + ) +); #endif /* CONFIG_NFS_V4_1 */ DECLARE_EVENT_CLASS(nfs4_open_event, -- cgit v0.10.2 From 08cb47faa4687342e9cbde54ff0f15a768eb5632 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Aug 2013 21:59:40 -0400 Subject: NFSv4.1: Add tracepoints for debugging test_stateid events Add tracepoints to detect issues with the TEST_STATEID operation. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a745f7c..1671e1f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1950,6 +1950,7 @@ static void nfs41_clear_delegation_stateid(struct nfs4_state *state) cred = get_rpccred(delegation->cred); rcu_read_unlock(); status = nfs41_test_stateid(server, stateid, cred); + trace_nfs4_test_delegation_stateid(state, NULL, status); } else rcu_read_unlock(); @@ -1992,6 +1993,7 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) return -NFS4ERR_BAD_STATEID; status = nfs41_test_stateid(server, stateid, cred); + trace_nfs4_test_open_stateid(state, NULL, status); if (status != NFS_OK) { /* Free the stateid unless the server explicitly * informs us the stateid is unrecognized. */ @@ -5472,6 +5474,7 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) status = nfs41_test_stateid(server, &lsp->ls_stateid, cred); + trace_nfs4_test_lock_stateid(state, lsp, status); if (status != NFS_OK) { /* Free the stateid unless the server * informs us the stateid is unrecognized. */ diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 05b96a6..849cf14 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -638,6 +638,55 @@ TRACE_EVENT(nfs4_delegreturn_exit, ) ); +#ifdef CONFIG_NFS_V4_1 +DECLARE_EVENT_CLASS(nfs4_test_stateid_event, + TP_PROTO( + const struct nfs4_state *state, + const struct nfs4_lock_state *lsp, + int error + ), + + TP_ARGS(state, lsp, error), + + TP_STRUCT__entry( + __field(int, error) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + ), + + TP_fast_assign( + const struct inode *inode = state->inode; + + __entry->error = error; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + ), + + TP_printk( + "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle + ) +); + +#define DEFINE_NFS4_TEST_STATEID_EVENT(name) \ + DEFINE_EVENT(nfs4_test_stateid_event, name, \ + TP_PROTO( \ + const struct nfs4_state *state, \ + const struct nfs4_lock_state *lsp, \ + int error \ + ), \ + TP_ARGS(state, lsp, error)) +DEFINE_NFS4_TEST_STATEID_EVENT(nfs4_test_delegation_stateid); +DEFINE_NFS4_TEST_STATEID_EVENT(nfs4_test_open_stateid); +DEFINE_NFS4_TEST_STATEID_EVENT(nfs4_test_lock_stateid); +#endif /* CONFIG_NFS_V4_1 */ + DECLARE_EVENT_CLASS(nfs4_lookup_event, TP_PROTO( const struct inode *dir, -- cgit v0.10.2 From 6686390bab6a0e049fa7040631aee08b35a55293 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 12 Aug 2013 16:52:47 +1000 Subject: NFS: remove incorrect "Lock reclaim failed!" warning. After reclaiming state that was lost, the NFS client tries to reclaim any locks, and then checks that each one has NFS_LOCK_INITIALIZED set (which means that the server has confirmed the lock). However if the client holds a delegation, nfs_reclaim_locks() simply aborts (or more accurately it called nfs_lock_reclaim() and that returns without doing anything). This is because when a delegation is held, the server doesn't need to know about locks. So if a delegation is held, NFS_LOCK_INITIALIZED is not expected, and its absence is certainly not an error. So don't print the warnings if NFS_DELGATED_STATE is set. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 6818964..25b7cf6 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1415,14 +1415,16 @@ restart: if (status >= 0) { status = nfs4_reclaim_locks(state, ops); if (status >= 0) { - spin_lock(&state->state_lock); - list_for_each_entry(lock, &state->lock_states, ls_locks) { - if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags)) - pr_warn_ratelimited("NFS: " - "%s: Lock reclaim " - "failed!\n", __func__); + if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) { + spin_lock(&state->state_lock); + list_for_each_entry(lock, &state->lock_states, ls_locks) { + if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags)) + pr_warn_ratelimited("NFS: " + "%s: Lock reclaim " + "failed!\n", __func__); + } + spin_unlock(&state->state_lock); } - spin_unlock(&state->state_lock); nfs4_put_open_state(state); spin_lock(&sp->so_lock); goto restart; -- cgit v0.10.2 From 2d9db75005effd6d4e0c8be4f74922e4f413fbe5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 30 Aug 2013 09:17:33 -0400 Subject: NFS: Fix up two use-after-free issues with the new tracing code We don't want to pass the context argument to trace_nfs_atomic_open_exit() after it has been released. Reported-by: Dan Carpenter Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 4ce7f76..d8149e9 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1399,7 +1399,6 @@ static int nfs_finish_open(struct nfs_open_context *ctx, nfs_file_set_open_context(file, ctx); out: - put_nfs_open_context(ctx); return err; } @@ -1460,9 +1459,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); nfs_unblock_sillyrename(dentry->d_parent); if (IS_ERR(inode)) { - put_nfs_open_context(ctx); err = PTR_ERR(inode); trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); + put_nfs_open_context(ctx); switch (err) { case -ENOENT: d_drop(dentry); @@ -1484,6 +1483,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, err = nfs_finish_open(ctx, ctx->dentry, file, open_flags, opened); trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); + put_nfs_open_context(ctx); out: return err; -- cgit v0.10.2 From 55909f21a1079c5f9751d7919f8c9411500e1667 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 23 Aug 2013 11:48:15 -0400 Subject: SUNRPC: Deprecate rpc_client->cl_protname It just duplicates the cl_program->name, and is not used in any fast paths where the extra dereference will cause a hit. Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 74f6a70..e862f8c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -629,7 +629,7 @@ void rpc_shutdown_client(struct rpc_clnt *clnt) might_sleep(); dprintk_rcu("RPC: shutting down %s client for %s\n", - clnt->cl_protname, + clnt->cl_program->name, rcu_dereference(clnt->cl_xprt)->servername); while (!list_empty(&clnt->cl_tasks)) { @@ -649,7 +649,7 @@ static void rpc_free_client(struct rpc_clnt *clnt) { dprintk_rcu("RPC: destroying %s client for %s\n", - clnt->cl_protname, + clnt->cl_program->name, rcu_dereference(clnt->cl_xprt)->servername); if (clnt->cl_parent != clnt) rpc_release_client(clnt->cl_parent); @@ -1299,7 +1299,7 @@ call_start(struct rpc_task *task) struct rpc_clnt *clnt = task->tk_client; dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid, - clnt->cl_protname, clnt->cl_vers, + clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task), (RPC_IS_ASYNC(task) ? "async" : "sync")); @@ -1908,7 +1908,7 @@ call_status(struct rpc_task *task) default: if (clnt->cl_chatty) printk("%s: RPC call returned error %d\n", - clnt->cl_protname, -status); + clnt->cl_program->name, -status); rpc_exit(task, status); } } @@ -1939,7 +1939,7 @@ call_timeout(struct rpc_task *task) if (clnt->cl_chatty) { rcu_read_lock(); printk(KERN_NOTICE "%s: server %s not responding, timed out\n", - clnt->cl_protname, + clnt->cl_program->name, rcu_dereference(clnt->cl_xprt)->servername); rcu_read_unlock(); } @@ -1955,7 +1955,7 @@ call_timeout(struct rpc_task *task) if (clnt->cl_chatty) { rcu_read_lock(); printk(KERN_NOTICE "%s: server %s not responding, still trying\n", - clnt->cl_protname, + clnt->cl_program->name, rcu_dereference(clnt->cl_xprt)->servername); rcu_read_unlock(); } @@ -1990,7 +1990,7 @@ call_decode(struct rpc_task *task) if (clnt->cl_chatty) { rcu_read_lock(); printk(KERN_NOTICE "%s: server %s OK\n", - clnt->cl_protname, + clnt->cl_program->name, rcu_dereference(clnt->cl_xprt)->servername); rcu_read_unlock(); } @@ -2015,7 +2015,7 @@ call_decode(struct rpc_task *task) goto out_retry; } dprintk("RPC: %s: too small RPC reply size (%d bytes)\n", - clnt->cl_protname, task->tk_status); + clnt->cl_program->name, task->tk_status); task->tk_action = call_timeout; goto out_retry; } @@ -2287,7 +2287,7 @@ static void rpc_show_task(const struct rpc_clnt *clnt, printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n", task->tk_pid, task->tk_flags, task->tk_status, clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops, - clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task), + clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task), task->tk_action, rpc_waitq); } diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 017aedc..b36bfb9 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -409,7 +409,7 @@ rpc_show_info(struct seq_file *m, void *v) rcu_read_lock(); seq_printf(m, "RPC server: %s\n", rcu_dereference(clnt->cl_xprt)->servername); - seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname, + seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_program->name, clnt->cl_prog, clnt->cl_vers); seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO)); diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 21b75cb..5453049 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -188,7 +188,7 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) seq_printf(seq, "\tRPC iostats version: %s ", RPC_IOSTATS_VERS); seq_printf(seq, "p/v: %u/%u (%s)\n", - clnt->cl_prog, clnt->cl_vers, clnt->cl_protname); + clnt->cl_prog, clnt->cl_vers, clnt->cl_program->name); rcu_read_lock(); xprt = rcu_dereference(clnt->cl_xprt); -- cgit v0.10.2 From 1dada8e1f94e863a94f6622f0ddb5d1b01420150 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Aug 2013 19:58:45 -0400 Subject: SUNRPC: Remove unused struct rpc_clnt field cl_protname Signed-off-by: Trond Myklebust diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index bfe11be..481f9c0 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -41,7 +41,6 @@ struct rpc_clnt { cl_vers, /* RPC version number */ cl_maxproc; /* max procedure number */ - const char * cl_protname; /* protocol name */ struct rpc_auth * cl_auth; /* authenticator */ struct rpc_stat * cl_stats; /* per-program statistics */ struct rpc_iostats * cl_metrics; /* per-client statistics */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e862f8c..a327cc7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -348,7 +348,6 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru rcu_assign_pointer(clnt->cl_xprt, xprt); clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; - clnt->cl_protname = program->name; clnt->cl_prog = args->prognumber ? : program->number; clnt->cl_vers = version->number; clnt->cl_stats = program->stats; -- cgit v0.10.2 From 41b6b4d0b88f80d04729a5286e838e972733db1e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 23 Aug 2013 13:03:50 -0400 Subject: SUNRPC: Cleanup rpc_setup_pipedir The directory name is _always_ clnt->cl_program->pipe_dir_name. Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a327cc7..79d4bc2 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -123,10 +123,10 @@ static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) } static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb, - struct rpc_clnt *clnt, - const char *dir_name) + struct rpc_clnt *clnt) { static uint32_t clntid; + const char *dir_name = clnt->cl_program->pipe_dir_name; char name[15]; struct dentry *dir, *dentry; @@ -153,23 +153,26 @@ static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb, } static int -rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name, - struct super_block *pipefs_sb) +rpc_setup_pipedir(struct super_block *pipefs_sb, struct rpc_clnt *clnt) { struct dentry *dentry; + if (clnt->cl_program->pipe_dir_name == NULL) + goto out; clnt->cl_dentry = NULL; - if (dir_name == NULL) - return 0; - dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt, dir_name); + dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt); if (IS_ERR(dentry)) return PTR_ERR(dentry); clnt->cl_dentry = dentry; +out: return 0; } -static inline int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event) +static int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event) { + if (clnt->cl_program->pipe_dir_name == NULL) + return 1; + if (((event == RPC_PIPEFS_MOUNT) && clnt->cl_dentry) || ((event == RPC_PIPEFS_UMOUNT) && !clnt->cl_dentry)) return 1; @@ -186,8 +189,7 @@ static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event, switch (event) { case RPC_PIPEFS_MOUNT: - dentry = rpc_setup_pipedir_sb(sb, clnt, - clnt->cl_program->pipe_dir_name); + dentry = rpc_setup_pipedir_sb(sb, clnt); if (!dentry) return -ENOENT; if (IS_ERR(dentry)) @@ -230,8 +232,6 @@ static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event) spin_lock(&sn->rpc_client_lock); list_for_each_entry(clnt, &sn->all_clients, cl_clients) { - if (clnt->cl_program->pipe_dir_name == NULL) - continue; if (rpc_clnt_skip_event(clnt, event)) continue; spin_unlock(&sn->rpc_client_lock); @@ -282,7 +282,6 @@ static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename) static int rpc_client_register(const struct rpc_create_args *args, struct rpc_clnt *clnt) { - const struct rpc_program *program = args->program; struct rpc_auth *auth; struct net *net = rpc_net_ns(clnt); struct super_block *pipefs_sb; @@ -290,7 +289,7 @@ static int rpc_client_register(const struct rpc_create_args *args, pipefs_sb = rpc_get_sb_net(net); if (pipefs_sb) { - err = rpc_setup_pipedir(clnt, program->pipe_dir_name, pipefs_sb); + err = rpc_setup_pipedir(pipefs_sb, clnt); if (err) goto out; } -- cgit v0.10.2 From bd4a3eb15bb42296e61d0fd16f2c7f8cc171b681 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 23 Aug 2013 14:02:24 -0400 Subject: RPCSEC_GSS: Clean up upcall message allocation Optimise away gss_encode_msg: we don't need to look up the pipe version a second time. Save the gss target name in struct gss_auth. It is a property of the auth cache itself, and doesn't really belong in the rpc_client. Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index fc2f78d..b62812a 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -84,6 +84,7 @@ struct gss_auth { * backwards-compatibility with older gssd's. */ struct rpc_pipe *pipe[2]; + const char *target_name; }; /* pipe_version >= 0 if and only if someone has a pipe open. */ @@ -406,8 +407,8 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg) } static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, - struct rpc_clnt *clnt, - const char *service_name) + const char *service_name, + const char *target_name) { struct gss_api_mech *mech = gss_msg->auth->mech; char *p = gss_msg->databuf; @@ -417,8 +418,8 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, mech->gm_name, from_kuid(&init_user_ns, gss_msg->uid)); p += gss_msg->msg.len; - if (clnt->cl_principal) { - len = sprintf(p, "target=%s ", clnt->cl_principal); + if (target_name) { + len = sprintf(p, "target=%s ", target_name); p += len; gss_msg->msg.len += len; } @@ -439,19 +440,6 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, BUG_ON(gss_msg->msg.len > UPCALL_BUF_LEN); } -static void gss_encode_msg(struct gss_upcall_msg *gss_msg, - struct rpc_clnt *clnt, - const char *service_name) -{ - struct net *net = rpc_net_ns(clnt); - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - - if (sn->pipe_version == 0) - gss_encode_v0_msg(gss_msg); - else /* pipe_version == 1 */ - gss_encode_v1_msg(gss_msg, clnt, service_name); -} - static struct gss_upcall_msg * gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt, kuid_t uid, const char *service_name) @@ -474,7 +462,12 @@ gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt, atomic_set(&gss_msg->count, 1); gss_msg->uid = uid; gss_msg->auth = gss_auth; - gss_encode_msg(gss_msg, clnt, service_name); + switch (vers) { + case 0: + gss_encode_v0_msg(gss_msg); + default: + gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name); + }; return gss_msg; } @@ -883,6 +876,12 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) return ERR_PTR(err); if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL))) goto out_dec; + gss_auth->target_name = NULL; + if (clnt->cl_principal) { + gss_auth->target_name = kstrdup(clnt->cl_principal, GFP_KERNEL); + if (gss_auth->target_name == NULL) + goto err_free; + } gss_auth->client = clnt; err = -EINVAL; gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor); @@ -937,6 +936,7 @@ err_destroy_pipe_1: err_put_mech: gss_mech_put(gss_auth->mech); err_free: + kfree(gss_auth->target_name); kfree(gss_auth); out_dec: module_put(THIS_MODULE); @@ -950,6 +950,7 @@ gss_free(struct gss_auth *gss_auth) rpc_destroy_pipe_data(gss_auth->pipe[0]); rpc_destroy_pipe_data(gss_auth->pipe[1]); gss_mech_put(gss_auth->mech); + kfree(gss_auth->target_name); kfree(gss_auth); module_put(THIS_MODULE); -- cgit v0.10.2 From c2190661039b3817b4cc1cbfea620b3f7dbe5cd8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Aug 2013 19:23:04 -0400 Subject: SUNRPC: Replace clnt->cl_principal The clnt->cl_principal is being used exclusively to store the service target name for RPCSEC_GSS/krb5 callbacks. Replace it with something that is stored only in the RPCSEC_GSS-specific code. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1671e1f..09c7e3b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2717,10 +2717,13 @@ out: static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info, rpc_authflavor_t flavor) { + struct rpc_auth_create_args auth_args = { + .pseudoflavor = flavor, + }; struct rpc_auth *auth; int ret; - auth = rpcauth_create(flavor, server->client); + auth = rpcauth_create(&auth_args, server->client); if (IS_ERR(auth)) { ret = -EACCES; goto out; diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 0dd00f4..a9ab577 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -87,6 +87,11 @@ struct rpc_auth { /* per-flavor data */ }; +struct rpc_auth_create_args { + rpc_authflavor_t pseudoflavor; + const char *target_name; +}; + /* Flags for rpcauth_lookupcred() */ #define RPCAUTH_LOOKUP_NEW 0x01 /* Accept an uninitialised cred */ @@ -97,7 +102,7 @@ struct rpc_authops { struct module *owner; rpc_authflavor_t au_flavor; /* flavor (RPC_AUTH_*) */ char * au_name; - struct rpc_auth * (*create)(struct rpc_clnt *, rpc_authflavor_t); + struct rpc_auth * (*create)(struct rpc_auth_create_args *, struct rpc_clnt *); void (*destroy)(struct rpc_auth *); struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int); @@ -140,7 +145,8 @@ struct rpc_cred * rpc_lookup_cred(void); struct rpc_cred * rpc_lookup_machine_cred(const char *service_name); int rpcauth_register(const struct rpc_authops *); int rpcauth_unregister(const struct rpc_authops *); -struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); +struct rpc_auth * rpcauth_create(struct rpc_auth_create_args *, + struct rpc_clnt *); void rpcauth_release(struct rpc_auth *); rpc_authflavor_t rpcauth_get_pseudoflavor(rpc_authflavor_t, struct rpcsec_gss_info *); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 481f9c0..b93b4aa 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -60,7 +60,6 @@ struct rpc_clnt { struct rpc_rtt cl_rtt_default; struct rpc_timeout cl_timeout_default; const struct rpc_program *cl_program; - char *cl_principal; /* target to authenticate to */ }; /* diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index ed2fdd2..5071e43 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -250,11 +250,11 @@ rpcauth_list_flavors(rpc_authflavor_t *array, int size) EXPORT_SYMBOL_GPL(rpcauth_list_flavors); struct rpc_auth * -rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) +rpcauth_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) { struct rpc_auth *auth; const struct rpc_authops *ops; - u32 flavor = pseudoflavor_to_flavor(pseudoflavor); + u32 flavor = pseudoflavor_to_flavor(args->pseudoflavor); auth = ERR_PTR(-EINVAL); if (flavor >= RPC_AUTH_MAXFLAVOR) @@ -269,7 +269,7 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) goto out; } spin_unlock(&rpc_authflavor_lock); - auth = ops->create(clnt, pseudoflavor); + auth = ops->create(args, clnt); module_put(ops->owner); if (IS_ERR(auth)) return auth; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index b62812a..672a67f 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -864,8 +864,9 @@ static int gss_pipes_dentries_create_net(struct rpc_clnt *clnt, * parameters based on the input flavor (which must be a pseudoflavor) */ static struct rpc_auth * -gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) +gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) { + rpc_authflavor_t flavor = args->pseudoflavor; struct gss_auth *gss_auth; struct rpc_auth * auth; int err = -ENOMEM; /* XXX? */ @@ -877,8 +878,8 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL))) goto out_dec; gss_auth->target_name = NULL; - if (clnt->cl_principal) { - gss_auth->target_name = kstrdup(clnt->cl_principal, GFP_KERNEL); + if (args->target_name) { + gss_auth->target_name = kstrdup(args->target_name, GFP_KERNEL); if (gss_auth->target_name == NULL) goto err_free; } diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index a5c36c0..4664eb4 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -18,7 +18,7 @@ static struct rpc_auth null_auth; static struct rpc_cred null_cred; static struct rpc_auth * -nul_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) +nul_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) { atomic_inc(&null_auth.au_count); return &null_auth; diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index dc37021..e52d832 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -33,7 +33,7 @@ static struct rpc_auth unix_auth; static const struct rpc_credops unix_credops; static struct rpc_auth * -unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) +unx_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) { dprintk("RPC: creating UNIX authenticator for client %p\n", clnt); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 79d4bc2..7407f1d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -282,6 +282,10 @@ static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename) static int rpc_client_register(const struct rpc_create_args *args, struct rpc_clnt *clnt) { + struct rpc_auth_create_args auth_args = { + .pseudoflavor = args->authflavor, + .target_name = args->client_name, + }; struct rpc_auth *auth; struct net *net = rpc_net_ns(clnt); struct super_block *pipefs_sb; @@ -298,7 +302,7 @@ static int rpc_client_register(const struct rpc_create_args *args, if (pipefs_sb) rpc_put_sb_net(net); - auth = rpcauth_create(args->authflavor, clnt); + auth = rpcauth_create(&auth_args, clnt); if (IS_ERR(auth)) { dprintk("RPC: Couldn't create auth handle (flavor %u)\n", args->authflavor); @@ -370,12 +374,6 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru clnt->cl_rtt = &clnt->cl_rtt_default; rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval); - clnt->cl_principal = NULL; - if (args->client_name) { - clnt->cl_principal = kstrdup(args->client_name, GFP_KERNEL); - if (!clnt->cl_principal) - goto out_no_principal; - } atomic_set(&clnt->cl_count, 1); @@ -388,8 +386,6 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru return clnt; out_no_path: - kfree(clnt->cl_principal); -out_no_principal: rpc_free_iostats(clnt->cl_metrics); out_no_stats: kfree(clnt); @@ -559,7 +555,6 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt) .prognumber = clnt->cl_prog, .version = clnt->cl_vers, .authflavor = clnt->cl_auth->au_flavor, - .client_name = clnt->cl_principal, }; return __rpc_clone_client(&args, clnt); } @@ -581,7 +576,6 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor) .prognumber = clnt->cl_prog, .version = clnt->cl_vers, .authflavor = flavor, - .client_name = clnt->cl_principal, }; return __rpc_clone_client(&args, clnt); } @@ -654,7 +648,6 @@ rpc_free_client(struct rpc_clnt *clnt) rpc_clnt_remove_pipedir(clnt); rpc_unregister_client(clnt); rpc_free_iostats(clnt->cl_metrics); - kfree(clnt->cl_principal); clnt->cl_metrics = NULL; xprt_put(rcu_dereference_raw(clnt->cl_xprt)); rpciod_down(); @@ -718,7 +711,6 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, .prognumber = program->number, .version = vers, .authflavor = old->cl_auth->au_flavor, - .client_name = old->cl_principal, }; struct rpc_clnt *clnt; int err; -- cgit v0.10.2 From e726340ac9cf6bb5b3f92a064664e10cd2b748de Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 23 Aug 2013 14:42:29 -0400 Subject: RPCSEC_GSS: Further cleanups Don't pass the rpc_client as a parameter, when what we really want is the net namespace. Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 672a67f..6323803 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -77,6 +77,7 @@ struct gss_auth { struct gss_api_mech *mech; enum rpc_gss_svc service; struct rpc_clnt *client; + struct net *net; /* * There are two upcall pipes; dentry[1], named "gssd", is used * for the new text-based upcall; dentry[0] is named after the @@ -295,7 +296,7 @@ static void put_pipe_version(struct net *net) static void gss_release_msg(struct gss_upcall_msg *gss_msg) { - struct net *net = rpc_net_ns(gss_msg->auth->client); + struct net *net = gss_msg->auth->net; if (!atomic_dec_and_test(&gss_msg->count)) return; put_pipe_version(net); @@ -441,7 +442,7 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, } static struct gss_upcall_msg * -gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt, +gss_alloc_msg(struct gss_auth *gss_auth, kuid_t uid, const char *service_name) { struct gss_upcall_msg *gss_msg; @@ -450,7 +451,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt, gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS); if (gss_msg == NULL) return ERR_PTR(-ENOMEM); - vers = get_pipe_version(rpc_net_ns(clnt)); + vers = get_pipe_version(gss_auth->net); if (vers < 0) { kfree(gss_msg); return ERR_PTR(vers); @@ -472,14 +473,14 @@ gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt, } static struct gss_upcall_msg * -gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cred *cred) +gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_new, *gss_msg; kuid_t uid = cred->cr_uid; - gss_new = gss_alloc_msg(gss_auth, clnt, uid, gss_cred->gc_principal); + gss_new = gss_alloc_msg(gss_auth, uid, gss_cred->gc_principal); if (IS_ERR(gss_new)) return gss_new; gss_msg = gss_add_msg(gss_new); @@ -520,7 +521,7 @@ gss_refresh_upcall(struct rpc_task *task) dprintk("RPC: %5u %s for uid %u\n", task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_uid)); - gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred); + gss_msg = gss_setup_upcall(gss_auth, cred); if (PTR_ERR(gss_msg) == -EAGAIN) { /* XXX: warning on the first, under the assumption we * shouldn't normally hit this case on a refresh. */ @@ -559,7 +560,7 @@ out: static inline int gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) { - struct net *net = rpc_net_ns(gss_auth->client); + struct net *net = gss_auth->net; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct rpc_pipe *pipe; struct rpc_cred *cred = &gss_cred->gc_base; @@ -576,7 +577,7 @@ retry: timeout = 15 * HZ; if (!sn->gssd_running) timeout = HZ >> 2; - gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred); + gss_msg = gss_setup_upcall(gss_auth, cred); if (PTR_ERR(gss_msg) == -EAGAIN) { err = wait_event_interruptible_timeout(pipe_version_waitqueue, sn->pipe_version >= 0, timeout); @@ -832,7 +833,9 @@ err_unlink_pipe_1: static void gss_pipes_dentries_destroy_net(struct rpc_clnt *clnt, struct rpc_auth *auth) { - struct net *net = rpc_net_ns(clnt); + struct gss_auth *gss_auth = container_of(auth, struct gss_auth, + rpc_auth); + struct net *net = gss_auth->net; struct super_block *sb; sb = rpc_get_sb_net(net); @@ -846,7 +849,9 @@ static void gss_pipes_dentries_destroy_net(struct rpc_clnt *clnt, static int gss_pipes_dentries_create_net(struct rpc_clnt *clnt, struct rpc_auth *auth) { - struct net *net = rpc_net_ns(clnt); + struct gss_auth *gss_auth = container_of(auth, struct gss_auth, + rpc_auth); + struct net *net = gss_auth->net; struct super_block *sb; int err = 0; @@ -884,11 +889,12 @@ gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) goto err_free; } gss_auth->client = clnt; + gss_auth->net = get_net(rpc_net_ns(clnt)); err = -EINVAL; gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor); if (!gss_auth->mech) { dprintk("RPC: Pseudoflavor %d not found!\n", flavor); - goto err_free; + goto err_put_net; } gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); if (gss_auth->service == 0) @@ -936,6 +942,8 @@ err_destroy_pipe_1: rpc_destroy_pipe_data(gss_auth->pipe[1]); err_put_mech: gss_mech_put(gss_auth->mech); +err_put_net: + put_net(gss_auth->net); err_free: kfree(gss_auth->target_name); kfree(gss_auth); @@ -951,6 +959,7 @@ gss_free(struct gss_auth *gss_auth) rpc_destroy_pipe_data(gss_auth->pipe[0]); rpc_destroy_pipe_data(gss_auth->pipe[1]); gss_mech_put(gss_auth->mech); + put_net(gss_auth->net); kfree(gss_auth->target_name); kfree(gss_auth); -- cgit v0.10.2 From 6b2fddd3e7f7cd437fb234407d7504fd22caf064 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Aug 2013 16:05:11 -0400 Subject: RPCSEC_GSS: Fix an Oopsable condition when creating/destroying pipefs objects If an error condition occurs on rpc_pipefs creation, or the user mounts rpc_pipefs and then unmounts it, then the dentries in struct gss_auth need to be reset to NULL so that a second call to gss_pipes_dentries_destroy doesn't try to free them again. Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 6323803..c0d36bb 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -796,10 +796,14 @@ static void gss_pipes_dentries_destroy(struct rpc_auth *auth) struct gss_auth *gss_auth; gss_auth = container_of(auth, struct gss_auth, rpc_auth); - if (gss_auth->pipe[0]->dentry) + if (gss_auth->pipe[0]->dentry) { rpc_unlink(gss_auth->pipe[0]->dentry); - if (gss_auth->pipe[1]->dentry) + gss_auth->pipe[0]->dentry = NULL; + } + if (gss_auth->pipe[1]->dentry) { rpc_unlink(gss_auth->pipe[1]->dentry); + gss_auth->pipe[1]->dentry = NULL; + } } static int gss_pipes_dentries_create(struct rpc_auth *auth) @@ -807,26 +811,30 @@ static int gss_pipes_dentries_create(struct rpc_auth *auth) int err; struct gss_auth *gss_auth; struct rpc_clnt *clnt; + struct dentry *dentry; gss_auth = container_of(auth, struct gss_auth, rpc_auth); clnt = gss_auth->client; - gss_auth->pipe[1]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry, - "gssd", - clnt, gss_auth->pipe[1]); - if (IS_ERR(gss_auth->pipe[1]->dentry)) - return PTR_ERR(gss_auth->pipe[1]->dentry); - gss_auth->pipe[0]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry, - gss_auth->mech->gm_name, - clnt, gss_auth->pipe[0]); - if (IS_ERR(gss_auth->pipe[0]->dentry)) { - err = PTR_ERR(gss_auth->pipe[0]->dentry); + dentry = rpc_mkpipe_dentry(clnt->cl_dentry, "gssd", + clnt, gss_auth->pipe[1]); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); + goto err; + } + gss_auth->pipe[1]->dentry = dentry; + dentry = rpc_mkpipe_dentry(clnt->cl_dentry, gss_auth->mech->gm_name, + clnt, gss_auth->pipe[0]); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); goto err_unlink_pipe_1; } return 0; err_unlink_pipe_1: rpc_unlink(gss_auth->pipe[1]->dentry); + gss_auth->pipe[1]->dentry = NULL; +err: return err; } -- cgit v0.10.2 From d7631250b20119ea763d9ba461c3a9c5710cfa98 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Aug 2013 17:26:51 -0400 Subject: NFSv4: Fix a potentially Oopsable condition in __nfs_idmap_unregister Ensure that __nfs_idmap_unregister can be called twice without consequences. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index e9b0c59..8b7e94a 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -404,8 +404,10 @@ static struct key_type key_type_id_resolver_legacy = { static void __nfs_idmap_unregister(struct rpc_pipe *pipe) { - if (pipe->dentry) + if (pipe->dentry) { rpc_unlink(pipe->dentry); + pipe->dentry = NULL; + } } static int __nfs_idmap_register(struct dentry *dir, -- cgit v0.10.2 From 6739ffb754b47e6c0fa9d9e268bde828f6856528 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Aug 2013 15:38:11 -0400 Subject: SUNRPC: Add a framework to clean up management of rpc_pipefs directories The current system requires everyone to set up notifiers, manage directory locking, etc. What we really want to do is have the rpc_client create its directory, and then create all the entries. This patch will allow the RPCSEC_GSS and NFS code to register all the objects that they want to have appear in the directory, and then have the sunrpc code call them back to actually create/destroy their pipefs dentries when the rpc_client creates/destroys the parent. Signed-off-by: Trond Myklebust diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index b93b4aa..0dccd01 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +56,7 @@ struct rpc_clnt { int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; + struct rpc_pipe_dir_head cl_pipedir_objects; struct dentry * cl_dentry; struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index aa5b582..188e7fc 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -5,6 +5,26 @@ #include +struct rpc_pipe_dir_head { + struct list_head pdh_entries; + struct dentry *pdh_dentry; +}; + +struct rpc_pipe_dir_object_ops; +struct rpc_pipe_dir_object { + struct list_head pdo_head; + const struct rpc_pipe_dir_object_ops *pdo_ops; + + void *pdo_data; +}; + +struct rpc_pipe_dir_object_ops { + int (*create)(struct dentry *dir, + struct rpc_pipe_dir_object *pdo); + void (*destroy)(struct dentry *dir, + struct rpc_pipe_dir_object *pdo); +}; + struct rpc_pipe_msg { struct list_head list; void *data; @@ -74,7 +94,18 @@ extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *); struct rpc_clnt; extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *); -extern int rpc_remove_client_dir(struct dentry *); +extern int rpc_remove_client_dir(struct dentry *, struct rpc_clnt *); + +extern void rpc_init_pipe_dir_head(struct rpc_pipe_dir_head *pdh); +extern void rpc_init_pipe_dir_object(struct rpc_pipe_dir_object *pdo, + const struct rpc_pipe_dir_object_ops *pdo_ops, + void *pdo_data); +extern int rpc_add_pipe_dir_object(struct net *net, + struct rpc_pipe_dir_head *pdh, + struct rpc_pipe_dir_object *pdo); +extern void rpc_remove_pipe_dir_object(struct net *net, + struct rpc_pipe_dir_head *pdh, + struct rpc_pipe_dir_object *pdo); struct cache_detail; extern struct dentry *rpc_create_cache_dir(struct dentry *, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7407f1d..01d2296 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -105,7 +105,7 @@ static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) if (clnt->cl_dentry) { if (clnt->cl_auth && clnt->cl_auth->au_ops->pipes_destroy) clnt->cl_auth->au_ops->pipes_destroy(clnt->cl_auth); - rpc_remove_client_dir(clnt->cl_dentry); + rpc_remove_client_dir(clnt->cl_dentry, clnt); } clnt->cl_dentry = NULL; } @@ -355,6 +355,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru clnt->cl_vers = version->number; clnt->cl_stats = program->stats; clnt->cl_metrics = rpc_alloc_iostats(clnt); + rpc_init_pipe_dir_head(&clnt->cl_pipedir_objects); err = -ENOMEM; if (clnt->cl_metrics == NULL) goto out_no_stats; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index b36bfb9..cfeba77 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -884,6 +884,124 @@ rpc_unlink(struct dentry *dentry) } EXPORT_SYMBOL_GPL(rpc_unlink); +/** + * rpc_init_pipe_dir_head - initialise a struct rpc_pipe_dir_head + * @pdh: pointer to struct rpc_pipe_dir_head + */ +void rpc_init_pipe_dir_head(struct rpc_pipe_dir_head *pdh) +{ + INIT_LIST_HEAD(&pdh->pdh_entries); + pdh->pdh_dentry = NULL; +} +EXPORT_SYMBOL_GPL(rpc_init_pipe_dir_head); + +/** + * rpc_init_pipe_dir_object - initialise a struct rpc_pipe_dir_object + * @pdo: pointer to struct rpc_pipe_dir_object + * @pdo_ops: pointer to const struct rpc_pipe_dir_object_ops + * @pdo_data: pointer to caller-defined data + */ +void rpc_init_pipe_dir_object(struct rpc_pipe_dir_object *pdo, + const struct rpc_pipe_dir_object_ops *pdo_ops, + void *pdo_data) +{ + INIT_LIST_HEAD(&pdo->pdo_head); + pdo->pdo_ops = pdo_ops; + pdo->pdo_data = pdo_data; +} +EXPORT_SYMBOL_GPL(rpc_init_pipe_dir_object); + +static int +rpc_add_pipe_dir_object_locked(struct net *net, + struct rpc_pipe_dir_head *pdh, + struct rpc_pipe_dir_object *pdo) +{ + int ret = 0; + + if (pdh->pdh_dentry) + ret = pdo->pdo_ops->create(pdh->pdh_dentry, pdo); + if (ret == 0) + list_add_tail(&pdo->pdo_head, &pdh->pdh_entries); + return ret; +} + +static void +rpc_remove_pipe_dir_object_locked(struct net *net, + struct rpc_pipe_dir_head *pdh, + struct rpc_pipe_dir_object *pdo) +{ + if (pdh->pdh_dentry) + pdo->pdo_ops->destroy(pdh->pdh_dentry, pdo); + list_del_init(&pdo->pdo_head); +} + +/** + * rpc_add_pipe_dir_object - associate a rpc_pipe_dir_object to a directory + * @net: pointer to struct net + * @pdh: pointer to struct rpc_pipe_dir_head + * @pdo: pointer to struct rpc_pipe_dir_object + * + */ +int +rpc_add_pipe_dir_object(struct net *net, + struct rpc_pipe_dir_head *pdh, + struct rpc_pipe_dir_object *pdo) +{ + int ret = 0; + + if (list_empty(&pdo->pdo_head)) { + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + + mutex_lock(&sn->pipefs_sb_lock); + ret = rpc_add_pipe_dir_object_locked(net, pdh, pdo); + mutex_unlock(&sn->pipefs_sb_lock); + } + return ret; +} +EXPORT_SYMBOL_GPL(rpc_add_pipe_dir_object); + +/** + * rpc_remove_pipe_dir_object - remove a rpc_pipe_dir_object from a directory + * @net: pointer to struct net + * @pdh: pointer to struct rpc_pipe_dir_head + * @pdo: pointer to struct rpc_pipe_dir_object + * + */ +void +rpc_remove_pipe_dir_object(struct net *net, + struct rpc_pipe_dir_head *pdh, + struct rpc_pipe_dir_object *pdo) +{ + if (!list_empty(&pdo->pdo_head)) { + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + + mutex_lock(&sn->pipefs_sb_lock); + rpc_remove_pipe_dir_object_locked(net, pdh, pdo); + mutex_unlock(&sn->pipefs_sb_lock); + } +} +EXPORT_SYMBOL_GPL(rpc_remove_pipe_dir_object); + +static void +rpc_create_pipe_dir_objects(struct rpc_pipe_dir_head *pdh) +{ + struct rpc_pipe_dir_object *pdo; + struct dentry *dir = pdh->pdh_dentry; + + list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head) + pdo->pdo_ops->create(dir, pdo); +} + +static void +rpc_destroy_pipe_dir_objects(struct rpc_pipe_dir_head *pdh) +{ + struct rpc_pipe_dir_object *pdo; + struct dentry *dir = pdh->pdh_dentry; + + list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head) + pdo->pdo_ops->destroy(dir, pdo); +} + enum { RPCAUTH_info, RPCAUTH_EOF @@ -924,16 +1042,28 @@ struct dentry *rpc_create_client_dir(struct dentry *dentry, const char *name, struct rpc_clnt *rpc_client) { - return rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL, + struct dentry *ret; + + ret = rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL, rpc_clntdir_populate, rpc_client); + if (!IS_ERR(ret)) { + rpc_client->cl_pipedir_objects.pdh_dentry = ret; + rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects); + } + return ret; } /** * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir() * @dentry: dentry for the pipe + * @rpc_client: rpc_client for the pipe */ -int rpc_remove_client_dir(struct dentry *dentry) +int rpc_remove_client_dir(struct dentry *dentry, struct rpc_clnt *rpc_client) { + if (rpc_client->cl_pipedir_objects.pdh_dentry) { + rpc_destroy_pipe_dir_objects(&rpc_client->cl_pipedir_objects); + rpc_client->cl_pipedir_objects.pdh_dentry = NULL; + } return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate); } -- cgit v0.10.2 From 1917228435eebdf4e3267fa95cace694b2fb4efd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Aug 2013 16:44:42 -0400 Subject: RPCSEC_GSS: Switch auth_gss to use the new framework for pipefs dentries Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index c0d36bb..d214aec 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -71,6 +71,13 @@ static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED; * using integrity (two 4-byte integers): */ #define GSS_VERF_SLACK 100 +struct gss_pipe { + struct rpc_pipe_dir_object pdo; + struct rpc_pipe *pipe; + struct rpc_clnt *clnt; + const char *name; +}; + struct gss_auth { struct kref kref; struct rpc_auth rpc_auth; @@ -84,7 +91,7 @@ struct gss_auth { * mechanism (for example, "krb5") and exists for * backwards-compatibility with older gssd's. */ - struct rpc_pipe *pipe[2]; + struct gss_pipe *gss_pipe[2]; const char *target_name; }; @@ -456,7 +463,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, kfree(gss_msg); return ERR_PTR(vers); } - gss_msg->pipe = gss_auth->pipe[vers]; + gss_msg->pipe = gss_auth->gss_pipe[vers]->pipe; INIT_LIST_HEAD(&gss_msg->list); rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); init_waitqueue_head(&gss_msg->waitqueue); @@ -791,85 +798,83 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) } } -static void gss_pipes_dentries_destroy(struct rpc_auth *auth) +static void gss_pipe_dentry_destroy(struct dentry *dir, + struct rpc_pipe_dir_object *pdo) { - struct gss_auth *gss_auth; + struct gss_pipe *gss_pipe = pdo->pdo_data; + struct rpc_pipe *pipe = gss_pipe->pipe; - gss_auth = container_of(auth, struct gss_auth, rpc_auth); - if (gss_auth->pipe[0]->dentry) { - rpc_unlink(gss_auth->pipe[0]->dentry); - gss_auth->pipe[0]->dentry = NULL; - } - if (gss_auth->pipe[1]->dentry) { - rpc_unlink(gss_auth->pipe[1]->dentry); - gss_auth->pipe[1]->dentry = NULL; + if (pipe->dentry != NULL) { + rpc_unlink(pipe->dentry); + pipe->dentry = NULL; } } -static int gss_pipes_dentries_create(struct rpc_auth *auth) +static int gss_pipe_dentry_create(struct dentry *dir, + struct rpc_pipe_dir_object *pdo) { - int err; - struct gss_auth *gss_auth; - struct rpc_clnt *clnt; + struct gss_pipe *p = pdo->pdo_data; struct dentry *dentry; - gss_auth = container_of(auth, struct gss_auth, rpc_auth); - clnt = gss_auth->client; + dentry = rpc_mkpipe_dentry(dir, p->name, p->clnt, p->pipe); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + p->pipe->dentry = dentry; + return 0; +} + +static const struct rpc_pipe_dir_object_ops gss_pipe_dir_object_ops = { + .create = gss_pipe_dentry_create, + .destroy = gss_pipe_dentry_destroy, +}; + +static struct gss_pipe *gss_pipe_alloc(struct rpc_clnt *clnt, + const char *name, + const struct rpc_pipe_ops *upcall_ops) +{ + struct net *net = rpc_net_ns(clnt); + struct gss_pipe *p; + int err = -ENOMEM; - dentry = rpc_mkpipe_dentry(clnt->cl_dentry, "gssd", - clnt, gss_auth->pipe[1]); - if (IS_ERR(dentry)) { - err = PTR_ERR(dentry); + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) goto err; + p->pipe = rpc_mkpipe_data(upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); + if (IS_ERR(p->pipe)) { + err = PTR_ERR(p->pipe); + goto err_free_gss_pipe; } - gss_auth->pipe[1]->dentry = dentry; - dentry = rpc_mkpipe_dentry(clnt->cl_dentry, gss_auth->mech->gm_name, - clnt, gss_auth->pipe[0]); - if (IS_ERR(dentry)) { - err = PTR_ERR(dentry); - goto err_unlink_pipe_1; - } - return 0; - -err_unlink_pipe_1: - rpc_unlink(gss_auth->pipe[1]->dentry); - gss_auth->pipe[1]->dentry = NULL; + p->name = name; + p->clnt = clnt; + rpc_init_pipe_dir_object(&p->pdo, + &gss_pipe_dir_object_ops, + p); + err = rpc_add_pipe_dir_object(net, &clnt->cl_pipedir_objects, &p->pdo); + if (!err) + return p; + rpc_destroy_pipe_data(p->pipe); +err_free_gss_pipe: + kfree(p); err: - return err; + return ERR_PTR(err); } -static void gss_pipes_dentries_destroy_net(struct rpc_clnt *clnt, - struct rpc_auth *auth) +static void __gss_pipe_free(struct gss_pipe *p) { - struct gss_auth *gss_auth = container_of(auth, struct gss_auth, - rpc_auth); - struct net *net = gss_auth->net; - struct super_block *sb; + struct rpc_clnt *clnt = p->clnt; + struct net *net = rpc_net_ns(clnt); - sb = rpc_get_sb_net(net); - if (sb) { - if (clnt->cl_dentry) - gss_pipes_dentries_destroy(auth); - rpc_put_sb_net(net); - } + rpc_remove_pipe_dir_object(net, + &clnt->cl_pipedir_objects, + &p->pdo); + rpc_destroy_pipe_data(p->pipe); + kfree(p); } -static int gss_pipes_dentries_create_net(struct rpc_clnt *clnt, - struct rpc_auth *auth) +static void gss_pipe_free(struct gss_pipe *p) { - struct gss_auth *gss_auth = container_of(auth, struct gss_auth, - rpc_auth); - struct net *net = gss_auth->net; - struct super_block *sb; - int err = 0; - - sb = rpc_get_sb_net(net); - if (sb) { - if (clnt->cl_dentry) - err = gss_pipes_dentries_create(auth); - rpc_put_sb_net(net); - } - return err; + if (p != NULL) + __gss_pipe_free(p); } /* @@ -881,6 +886,7 @@ gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) { rpc_authflavor_t flavor = args->pseudoflavor; struct gss_auth *gss_auth; + struct gss_pipe *gss_pipe; struct rpc_auth * auth; int err = -ENOMEM; /* XXX? */ @@ -915,39 +921,35 @@ gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) atomic_set(&auth->au_count, 1); kref_init(&gss_auth->kref); + err = rpcauth_init_credcache(auth); + if (err) + goto err_put_mech; /* * Note: if we created the old pipe first, then someone who * examined the directory at the right moment might conclude * that we supported only the old pipe. So we instead create * the new pipe first. */ - gss_auth->pipe[1] = rpc_mkpipe_data(&gss_upcall_ops_v1, - RPC_PIPE_WAIT_FOR_OPEN); - if (IS_ERR(gss_auth->pipe[1])) { - err = PTR_ERR(gss_auth->pipe[1]); - goto err_put_mech; + gss_pipe = gss_pipe_alloc(clnt, "gssd", &gss_upcall_ops_v1); + if (IS_ERR(gss_pipe)) { + err = PTR_ERR(gss_pipe); + goto err_destroy_credcache; } + gss_auth->gss_pipe[1] = gss_pipe; - gss_auth->pipe[0] = rpc_mkpipe_data(&gss_upcall_ops_v0, - RPC_PIPE_WAIT_FOR_OPEN); - if (IS_ERR(gss_auth->pipe[0])) { - err = PTR_ERR(gss_auth->pipe[0]); + gss_pipe = gss_pipe_alloc(clnt, gss_auth->mech->gm_name, + &gss_upcall_ops_v0); + if (IS_ERR(gss_pipe)) { + err = PTR_ERR(gss_pipe); goto err_destroy_pipe_1; } - err = gss_pipes_dentries_create_net(clnt, auth); - if (err) - goto err_destroy_pipe_0; - err = rpcauth_init_credcache(auth); - if (err) - goto err_unlink_pipes; + gss_auth->gss_pipe[0] = gss_pipe; return auth; -err_unlink_pipes: - gss_pipes_dentries_destroy_net(clnt, auth); -err_destroy_pipe_0: - rpc_destroy_pipe_data(gss_auth->pipe[0]); err_destroy_pipe_1: - rpc_destroy_pipe_data(gss_auth->pipe[1]); + __gss_pipe_free(gss_auth->gss_pipe[1]); +err_destroy_credcache: + rpcauth_destroy_credcache(auth); err_put_mech: gss_mech_put(gss_auth->mech); err_put_net: @@ -963,9 +965,8 @@ out_dec: static void gss_free(struct gss_auth *gss_auth) { - gss_pipes_dentries_destroy_net(gss_auth->client, &gss_auth->rpc_auth); - rpc_destroy_pipe_data(gss_auth->pipe[0]); - rpc_destroy_pipe_data(gss_auth->pipe[1]); + gss_pipe_free(gss_auth->gss_pipe[0]); + gss_pipe_free(gss_auth->gss_pipe[1]); gss_mech_put(gss_auth->mech); put_net(gss_auth->net); kfree(gss_auth->target_name); @@ -985,14 +986,18 @@ gss_free_callback(struct kref *kref) static void gss_destroy(struct rpc_auth *auth) { - struct gss_auth *gss_auth; + struct gss_auth *gss_auth = container_of(auth, + struct gss_auth, rpc_auth); dprintk("RPC: destroying GSS authenticator %p flavor %d\n", auth, auth->au_flavor); + gss_pipe_free(gss_auth->gss_pipe[0]); + gss_auth->gss_pipe[0] = NULL; + gss_pipe_free(gss_auth->gss_pipe[1]); + gss_auth->gss_pipe[1] = NULL; rpcauth_destroy_credcache(auth); - gss_auth = container_of(auth, struct gss_auth, rpc_auth); kref_put(&gss_auth->kref, gss_free_callback); } @@ -1676,8 +1681,6 @@ static const struct rpc_authops authgss_ops = { .destroy = gss_destroy, .lookup_cred = gss_lookup_cred, .crcreate = gss_create_cred, - .pipes_create = gss_pipes_dentries_create, - .pipes_destroy = gss_pipes_dentries_destroy, .list_pseudoflavors = gss_mech_list_pseudoflavors, .info2flavor = gss_mech_info2flavor, .flavor2info = gss_mech_flavor2info, -- cgit v0.10.2 From 5f42b016d7341871948a2b8cc8fb654691522d1a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Aug 2013 16:47:20 -0400 Subject: SUNRPC: Remove the obsolete auth-only interface for pipefs dentry management Signed-off-by: Trond Myklebust diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index a9ab577..6de26f2 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -107,8 +107,6 @@ struct rpc_authops { struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int); struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int); - int (*pipes_create)(struct rpc_auth *); - void (*pipes_destroy)(struct rpc_auth *); int (*list_pseudoflavors)(rpc_authflavor_t *, int); rpc_authflavor_t (*info2flavor)(struct rpcsec_gss_info *); int (*flavor2info)(rpc_authflavor_t, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 01d2296..3c074de 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -102,11 +102,8 @@ static void rpc_unregister_client(struct rpc_clnt *clnt) static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) { - if (clnt->cl_dentry) { - if (clnt->cl_auth && clnt->cl_auth->au_ops->pipes_destroy) - clnt->cl_auth->au_ops->pipes_destroy(clnt->cl_auth); + if (clnt->cl_dentry) rpc_remove_client_dir(clnt->cl_dentry, clnt); - } clnt->cl_dentry = NULL; } @@ -195,11 +192,6 @@ static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event, if (IS_ERR(dentry)) return PTR_ERR(dentry); clnt->cl_dentry = dentry; - if (clnt->cl_auth->au_ops->pipes_create) { - err = clnt->cl_auth->au_ops->pipes_create(clnt->cl_auth); - if (err) - __rpc_clnt_remove_pipedir(clnt); - } break; case RPC_PIPEFS_UMOUNT: __rpc_clnt_remove_pipedir(clnt); -- cgit v0.10.2 From 2127d82af302be94066223311ca6ff317ee13ee0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Aug 2013 17:16:17 -0400 Subject: NFSv4: Convert idmapper to use the new framework for pipefs dentries Signed-off-by: Trond Myklebust diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 8b7e94a..567983d 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -64,6 +64,7 @@ struct idmap_legacy_upcalldata { }; struct idmap { + struct rpc_pipe_dir_object idmap_pdo; struct rpc_pipe *idmap_pipe; struct idmap_legacy_upcalldata *idmap_upcall_data; struct mutex idmap_mutex; @@ -402,18 +403,23 @@ static struct key_type key_type_id_resolver_legacy = { .request_key = nfs_idmap_legacy_upcall, }; -static void __nfs_idmap_unregister(struct rpc_pipe *pipe) +static void nfs_idmap_pipe_destroy(struct dentry *dir, + struct rpc_pipe_dir_object *pdo) { + struct idmap *idmap = pdo->pdo_data; + struct rpc_pipe *pipe = idmap->idmap_pipe; + if (pipe->dentry) { rpc_unlink(pipe->dentry); pipe->dentry = NULL; } } -static int __nfs_idmap_register(struct dentry *dir, - struct idmap *idmap, - struct rpc_pipe *pipe) +static int nfs_idmap_pipe_create(struct dentry *dir, + struct rpc_pipe_dir_object *pdo) { + struct idmap *idmap = pdo->pdo_data; + struct rpc_pipe *pipe = idmap->idmap_pipe; struct dentry *dentry; dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe); @@ -423,36 +429,10 @@ static int __nfs_idmap_register(struct dentry *dir, return 0; } -static void nfs_idmap_unregister(struct nfs_client *clp, - struct rpc_pipe *pipe) -{ - struct net *net = clp->cl_net; - struct super_block *pipefs_sb; - - pipefs_sb = rpc_get_sb_net(net); - if (pipefs_sb) { - __nfs_idmap_unregister(pipe); - rpc_put_sb_net(net); - } -} - -static int nfs_idmap_register(struct nfs_client *clp, - struct idmap *idmap, - struct rpc_pipe *pipe) -{ - struct net *net = clp->cl_net; - struct super_block *pipefs_sb; - int err = 0; - - pipefs_sb = rpc_get_sb_net(net); - if (pipefs_sb) { - if (clp->cl_rpcclient->cl_dentry) - err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry, - idmap, pipe); - rpc_put_sb_net(net); - } - return err; -} +static const struct rpc_pipe_dir_object_ops nfs_idmap_pipe_dir_object_ops = { + .create = nfs_idmap_pipe_create, + .destroy = nfs_idmap_pipe_destroy, +}; int nfs_idmap_new(struct nfs_client *clp) @@ -465,23 +445,31 @@ nfs_idmap_new(struct nfs_client *clp) if (idmap == NULL) return -ENOMEM; + rpc_init_pipe_dir_object(&idmap->idmap_pdo, + &nfs_idmap_pipe_dir_object_ops, + idmap); + pipe = rpc_mkpipe_data(&idmap_upcall_ops, 0); if (IS_ERR(pipe)) { error = PTR_ERR(pipe); - kfree(idmap); - return error; - } - error = nfs_idmap_register(clp, idmap, pipe); - if (error) { - rpc_destroy_pipe_data(pipe); - kfree(idmap); - return error; + goto err; } idmap->idmap_pipe = pipe; mutex_init(&idmap->idmap_mutex); + error = rpc_add_pipe_dir_object(clp->cl_net, + &clp->cl_rpcclient->cl_pipedir_objects, + &idmap->idmap_pdo); + if (error) + goto err_destroy_pipe; + clp->cl_idmap = idmap; return 0; +err_destroy_pipe: + rpc_destroy_pipe_data(idmap->idmap_pipe); +err: + kfree(idmap); + return error; } void @@ -491,130 +479,26 @@ nfs_idmap_delete(struct nfs_client *clp) if (!idmap) return; - nfs_idmap_unregister(clp, idmap->idmap_pipe); - rpc_destroy_pipe_data(idmap->idmap_pipe); clp->cl_idmap = NULL; + rpc_remove_pipe_dir_object(clp->cl_net, + &clp->cl_rpcclient->cl_pipedir_objects, + &idmap->idmap_pdo); + rpc_destroy_pipe_data(idmap->idmap_pipe); kfree(idmap); } -static int __rpc_pipefs_event(struct nfs_client *clp, unsigned long event, - struct super_block *sb) -{ - int err = 0; - - switch (event) { - case RPC_PIPEFS_MOUNT: - err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry, - clp->cl_idmap, - clp->cl_idmap->idmap_pipe); - break; - case RPC_PIPEFS_UMOUNT: - if (clp->cl_idmap->idmap_pipe) { - struct dentry *parent; - - parent = clp->cl_idmap->idmap_pipe->dentry->d_parent; - __nfs_idmap_unregister(clp->cl_idmap->idmap_pipe); - /* - * Note: This is a dirty hack. SUNRPC hook has been - * called already but simple_rmdir() call for the - * directory returned with error because of idmap pipe - * inside. Thus now we have to remove this directory - * here. - */ - if (rpc_rmdir(parent)) - printk(KERN_ERR "NFS: %s: failed to remove " - "clnt dir!\n", __func__); - } - break; - default: - printk(KERN_ERR "NFS: %s: unknown event: %ld\n", __func__, - event); - return -ENOTSUPP; - } - return err; -} - -static struct nfs_client *nfs_get_client_for_event(struct net *net, int event) -{ - struct nfs_net *nn = net_generic(net, nfs_net_id); - struct dentry *cl_dentry; - struct nfs_client *clp; - int err; - -restart: - spin_lock(&nn->nfs_client_lock); - list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { - /* Wait for initialisation to finish */ - if (clp->cl_cons_state == NFS_CS_INITING) { - atomic_inc(&clp->cl_count); - spin_unlock(&nn->nfs_client_lock); - err = nfs_wait_client_init_complete(clp); - nfs_put_client(clp); - if (err) - return NULL; - goto restart; - } - /* Skip nfs_clients that failed to initialise */ - if (clp->cl_cons_state < 0) - continue; - smp_rmb(); - if (clp->rpc_ops != &nfs_v4_clientops) - continue; - cl_dentry = clp->cl_idmap->idmap_pipe->dentry; - if (((event == RPC_PIPEFS_MOUNT) && cl_dentry) || - ((event == RPC_PIPEFS_UMOUNT) && !cl_dentry)) - continue; - atomic_inc(&clp->cl_count); - spin_unlock(&nn->nfs_client_lock); - return clp; - } - spin_unlock(&nn->nfs_client_lock); - return NULL; -} - -static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, - void *ptr) -{ - struct super_block *sb = ptr; - struct nfs_client *clp; - int error = 0; - - if (!try_module_get(THIS_MODULE)) - return 0; - - while ((clp = nfs_get_client_for_event(sb->s_fs_info, event))) { - error = __rpc_pipefs_event(clp, event, sb); - nfs_put_client(clp); - if (error) - break; - } - module_put(THIS_MODULE); - return error; -} - -#define PIPEFS_NFS_PRIO 1 - -static struct notifier_block nfs_idmap_block = { - .notifier_call = rpc_pipefs_event, - .priority = SUNRPC_PIPEFS_NFS_PRIO, -}; - int nfs_idmap_init(void) { int ret; ret = nfs_idmap_init_keyring(); if (ret != 0) goto out; - ret = rpc_pipefs_notifier_register(&nfs_idmap_block); - if (ret != 0) - nfs_idmap_quit_keyring(); out: return ret; } void nfs_idmap_quit(void) { - rpc_pipefs_notifier_unregister(&nfs_idmap_block); nfs_idmap_quit_keyring(); } -- cgit v0.10.2 From c36dcfe1f7712b7c12df2d80359e638b9d246ce6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Aug 2013 17:44:26 -0400 Subject: SUNRPC: Remove the rpc_client->cl_dentry It is now redundant. Signed-off-by: Trond Myklebust diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 0dccd01..76c0bf6 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -57,7 +57,6 @@ struct rpc_clnt { int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; struct rpc_pipe_dir_head cl_pipedir_objects; - struct dentry * cl_dentry; struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; struct rpc_timeout cl_timeout_default; diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 188e7fc..b0cf181 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -94,7 +94,7 @@ extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *); struct rpc_clnt; extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *); -extern int rpc_remove_client_dir(struct dentry *, struct rpc_clnt *); +extern int rpc_remove_client_dir(struct rpc_clnt *); extern void rpc_init_pipe_dir_head(struct rpc_pipe_dir_head *pdh); extern void rpc_init_pipe_dir_object(struct rpc_pipe_dir_object *pdo, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 3c074de..af3f0cf 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -102,9 +102,7 @@ static void rpc_unregister_client(struct rpc_clnt *clnt) static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) { - if (clnt->cl_dentry) - rpc_remove_client_dir(clnt->cl_dentry, clnt); - clnt->cl_dentry = NULL; + rpc_remove_client_dir(clnt); } static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) @@ -154,14 +152,11 @@ rpc_setup_pipedir(struct super_block *pipefs_sb, struct rpc_clnt *clnt) { struct dentry *dentry; - if (clnt->cl_program->pipe_dir_name == NULL) - goto out; - clnt->cl_dentry = NULL; - dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - clnt->cl_dentry = dentry; -out: + if (clnt->cl_program->pipe_dir_name != NULL) { + dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + } return 0; } @@ -170,11 +165,18 @@ static int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event) if (clnt->cl_program->pipe_dir_name == NULL) return 1; - if (((event == RPC_PIPEFS_MOUNT) && clnt->cl_dentry) || - ((event == RPC_PIPEFS_UMOUNT) && !clnt->cl_dentry)) - return 1; - if ((event == RPC_PIPEFS_MOUNT) && atomic_read(&clnt->cl_count) == 0) - return 1; + switch (event) { + case RPC_PIPEFS_MOUNT: + if (clnt->cl_pipedir_objects.pdh_dentry != NULL) + return 1; + if (atomic_read(&clnt->cl_count) == 0) + return 1; + break; + case RPC_PIPEFS_UMOUNT: + if (clnt->cl_pipedir_objects.pdh_dentry == NULL) + return 1; + break; + } return 0; } @@ -191,7 +193,6 @@ static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event, return -ENOENT; if (IS_ERR(dentry)) return PTR_ERR(dentry); - clnt->cl_dentry = dentry; break; case RPC_PIPEFS_UMOUNT: __rpc_clnt_remove_pipedir(clnt); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index cfeba77..a35b2f4 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1055,15 +1055,16 @@ struct dentry *rpc_create_client_dir(struct dentry *dentry, /** * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir() - * @dentry: dentry for the pipe * @rpc_client: rpc_client for the pipe */ -int rpc_remove_client_dir(struct dentry *dentry, struct rpc_clnt *rpc_client) +int rpc_remove_client_dir(struct rpc_clnt *rpc_client) { - if (rpc_client->cl_pipedir_objects.pdh_dentry) { - rpc_destroy_pipe_dir_objects(&rpc_client->cl_pipedir_objects); - rpc_client->cl_pipedir_objects.pdh_dentry = NULL; - } + struct dentry *dentry = rpc_client->cl_pipedir_objects.pdh_dentry; + + if (dentry == NULL) + return 0; + rpc_destroy_pipe_dir_objects(&rpc_client->cl_pipedir_objects); + rpc_client->cl_pipedir_objects.pdh_dentry = NULL; return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate); } -- cgit v0.10.2 From 298fc3558b9c1f5324c5ec6d5c587ca9ae6cc826 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 27 Aug 2013 16:27:04 -0400 Subject: SUNRPC: Add a helper to allow sharing of rpc_pipefs directory objects Add support for looking up existing objects and creating new ones if there is no match. Signed-off-by: Trond Myklebust diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index b0cf181..a353e03 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -106,6 +106,12 @@ extern int rpc_add_pipe_dir_object(struct net *net, extern void rpc_remove_pipe_dir_object(struct net *net, struct rpc_pipe_dir_head *pdh, struct rpc_pipe_dir_object *pdo); +extern struct rpc_pipe_dir_object *rpc_find_or_alloc_pipe_dir_object( + struct net *net, + struct rpc_pipe_dir_head *pdh, + int (*match)(struct rpc_pipe_dir_object *, void *), + struct rpc_pipe_dir_object *(*alloc)(void *), + void *data); struct cache_detail; extern struct dentry *rpc_create_cache_dir(struct dentry *, diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index a35b2f4..f94567b 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -982,6 +982,41 @@ rpc_remove_pipe_dir_object(struct net *net, } EXPORT_SYMBOL_GPL(rpc_remove_pipe_dir_object); +/** + * rpc_find_or_alloc_pipe_dir_object + * @net: pointer to struct net + * @pdh: pointer to struct rpc_pipe_dir_head + * @match: match struct rpc_pipe_dir_object to data + * @alloc: allocate a new struct rpc_pipe_dir_object + * @data: user defined data for match() and alloc() + * + */ +struct rpc_pipe_dir_object * +rpc_find_or_alloc_pipe_dir_object(struct net *net, + struct rpc_pipe_dir_head *pdh, + int (*match)(struct rpc_pipe_dir_object *, void *), + struct rpc_pipe_dir_object *(*alloc)(void *), + void *data) +{ + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + struct rpc_pipe_dir_object *pdo; + + mutex_lock(&sn->pipefs_sb_lock); + list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head) { + if (!match(pdo, data)) + continue; + goto out; + } + pdo = alloc(data); + if (!pdo) + goto out; + rpc_add_pipe_dir_object_locked(net, pdh, pdo); +out: + mutex_unlock(&sn->pipefs_sb_lock); + return pdo; +} +EXPORT_SYMBOL_GPL(rpc_find_or_alloc_pipe_dir_object); + static void rpc_create_pipe_dir_objects(struct rpc_pipe_dir_head *pdh) { -- cgit v0.10.2 From 414a629598409497c05f2387c22c77dee143b4ff Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 27 Aug 2013 16:52:16 -0400 Subject: RPCSEC_GSS: Share rpc_pipes when an rpc_clnt owns multiple rpcsec auth caches Ensure that if an rpc_clnt owns more than one RPCSEC_GSS-based authentication mechanism, then those caches will share the same 'gssd' upcall pipe. Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index d214aec..5ec15bb 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -76,6 +76,7 @@ struct gss_pipe { struct rpc_pipe *pipe; struct rpc_clnt *clnt; const char *name; + struct kref kref; }; struct gss_auth { @@ -832,7 +833,6 @@ static struct gss_pipe *gss_pipe_alloc(struct rpc_clnt *clnt, const char *name, const struct rpc_pipe_ops *upcall_ops) { - struct net *net = rpc_net_ns(clnt); struct gss_pipe *p; int err = -ENOMEM; @@ -846,19 +846,71 @@ static struct gss_pipe *gss_pipe_alloc(struct rpc_clnt *clnt, } p->name = name; p->clnt = clnt; + kref_init(&p->kref); rpc_init_pipe_dir_object(&p->pdo, &gss_pipe_dir_object_ops, p); - err = rpc_add_pipe_dir_object(net, &clnt->cl_pipedir_objects, &p->pdo); - if (!err) - return p; - rpc_destroy_pipe_data(p->pipe); + return p; err_free_gss_pipe: kfree(p); err: return ERR_PTR(err); } +struct gss_alloc_pdo { + struct rpc_clnt *clnt; + const char *name; + const struct rpc_pipe_ops *upcall_ops; +}; + +static int gss_pipe_match_pdo(struct rpc_pipe_dir_object *pdo, void *data) +{ + struct gss_pipe *gss_pipe; + struct gss_alloc_pdo *args = data; + + if (pdo->pdo_ops != &gss_pipe_dir_object_ops) + return 0; + gss_pipe = container_of(pdo, struct gss_pipe, pdo); + if (strcmp(gss_pipe->name, args->name) != 0) + return 0; + if (!kref_get_unless_zero(&gss_pipe->kref)) + return 0; + return 1; +} + +static struct rpc_pipe_dir_object *gss_pipe_alloc_pdo(void *data) +{ + struct gss_pipe *gss_pipe; + struct gss_alloc_pdo *args = data; + + gss_pipe = gss_pipe_alloc(args->clnt, args->name, args->upcall_ops); + if (!IS_ERR(gss_pipe)) + return &gss_pipe->pdo; + return NULL; +} + +static struct gss_pipe *gss_pipe_get(struct rpc_clnt *clnt, + const char *name, + const struct rpc_pipe_ops *upcall_ops) +{ + struct net *net = rpc_net_ns(clnt); + struct rpc_pipe_dir_object *pdo; + struct gss_alloc_pdo args = { + .clnt = clnt, + .name = name, + .upcall_ops = upcall_ops, + }; + + pdo = rpc_find_or_alloc_pipe_dir_object(net, + &clnt->cl_pipedir_objects, + gss_pipe_match_pdo, + gss_pipe_alloc_pdo, + &args); + if (pdo != NULL) + return container_of(pdo, struct gss_pipe, pdo); + return ERR_PTR(-ENOMEM); +} + static void __gss_pipe_free(struct gss_pipe *p) { struct rpc_clnt *clnt = p->clnt; @@ -871,10 +923,17 @@ static void __gss_pipe_free(struct gss_pipe *p) kfree(p); } +static void __gss_pipe_release(struct kref *kref) +{ + struct gss_pipe *p = container_of(kref, struct gss_pipe, kref); + + __gss_pipe_free(p); +} + static void gss_pipe_free(struct gss_pipe *p) { if (p != NULL) - __gss_pipe_free(p); + kref_put(&p->kref, __gss_pipe_release); } /* @@ -930,14 +989,14 @@ gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) * that we supported only the old pipe. So we instead create * the new pipe first. */ - gss_pipe = gss_pipe_alloc(clnt, "gssd", &gss_upcall_ops_v1); + gss_pipe = gss_pipe_get(clnt, "gssd", &gss_upcall_ops_v1); if (IS_ERR(gss_pipe)) { err = PTR_ERR(gss_pipe); goto err_destroy_credcache; } gss_auth->gss_pipe[1] = gss_pipe; - gss_pipe = gss_pipe_alloc(clnt, gss_auth->mech->gm_name, + gss_pipe = gss_pipe_get(clnt, gss_auth->mech->gm_name, &gss_upcall_ops_v0); if (IS_ERR(gss_pipe)) { err = PTR_ERR(gss_pipe); @@ -947,7 +1006,7 @@ gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) return auth; err_destroy_pipe_1: - __gss_pipe_free(gss_auth->gss_pipe[1]); + gss_pipe_free(gss_auth->gss_pipe[1]); err_destroy_credcache: rpcauth_destroy_credcache(auth); err_put_mech: -- cgit v0.10.2 From eb6dc19d8e72ce3a957af5511d20c0db0a8bd007 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 28 Aug 2013 15:26:25 -0400 Subject: RPCSEC_GSS: Share all credential caches on a per-transport basis Ensure that all struct rpc_clnt for any given socket/rdma channel share the same RPCSEC_GSS/krb5,krb5i,krb5p caches. Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 5ec15bb..dc4b449 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -51,6 +51,7 @@ #include #include #include +#include #include "../netns.h" @@ -71,6 +72,9 @@ static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED; * using integrity (two 4-byte integers): */ #define GSS_VERF_SLACK 100 +static DEFINE_HASHTABLE(gss_auth_hash_table, 16); +static DEFINE_SPINLOCK(gss_auth_hash_lock); + struct gss_pipe { struct rpc_pipe_dir_object pdo; struct rpc_pipe *pipe; @@ -81,6 +85,7 @@ struct gss_pipe { struct gss_auth { struct kref kref; + struct hlist_node hash; struct rpc_auth rpc_auth; struct gss_api_mech *mech; enum rpc_gss_svc service; @@ -940,8 +945,8 @@ static void gss_pipe_free(struct gss_pipe *p) * NOTE: we have the opportunity to use different * parameters based on the input flavor (which must be a pseudoflavor) */ -static struct rpc_auth * -gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) +static struct gss_auth * +gss_create_new(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) { rpc_authflavor_t flavor = args->pseudoflavor; struct gss_auth *gss_auth; @@ -955,6 +960,7 @@ gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) return ERR_PTR(err); if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL))) goto out_dec; + INIT_HLIST_NODE(&gss_auth->hash); gss_auth->target_name = NULL; if (args->target_name) { gss_auth->target_name = kstrdup(args->target_name, GFP_KERNEL); @@ -1004,7 +1010,7 @@ gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) } gss_auth->gss_pipe[0] = gss_pipe; - return auth; + return gss_auth; err_destroy_pipe_1: gss_pipe_free(gss_auth->gss_pipe[1]); err_destroy_credcache: @@ -1051,6 +1057,12 @@ gss_destroy(struct rpc_auth *auth) dprintk("RPC: destroying GSS authenticator %p flavor %d\n", auth, auth->au_flavor); + if (hash_hashed(&gss_auth->hash)) { + spin_lock(&gss_auth_hash_lock); + hash_del(&gss_auth->hash); + spin_unlock(&gss_auth_hash_lock); + } + gss_pipe_free(gss_auth->gss_pipe[0]); gss_auth->gss_pipe[0] = NULL; gss_pipe_free(gss_auth->gss_pipe[1]); @@ -1060,6 +1072,80 @@ gss_destroy(struct rpc_auth *auth) kref_put(&gss_auth->kref, gss_free_callback); } +static struct gss_auth * +gss_auth_find_or_add_hashed(struct rpc_auth_create_args *args, + struct rpc_clnt *clnt, + struct gss_auth *new) +{ + struct gss_auth *gss_auth; + unsigned long hashval = (unsigned long)clnt; + + spin_lock(&gss_auth_hash_lock); + hash_for_each_possible(gss_auth_hash_table, + gss_auth, + hash, + hashval) { + if (gss_auth->rpc_auth.au_flavor != args->pseudoflavor) + continue; + if (gss_auth->target_name != args->target_name) { + if (gss_auth->target_name == NULL) + continue; + if (args->target_name == NULL) + continue; + if (strcmp(gss_auth->target_name, args->target_name)) + continue; + } + if (!atomic_inc_not_zero(&gss_auth->rpc_auth.au_count)) + continue; + goto out; + } + if (new) + hash_add(gss_auth_hash_table, &new->hash, hashval); + gss_auth = new; +out: + spin_unlock(&gss_auth_hash_lock); + return gss_auth; +} + +static struct gss_auth * +gss_create_hashed(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) +{ + struct gss_auth *gss_auth; + struct gss_auth *new; + + gss_auth = gss_auth_find_or_add_hashed(args, clnt, NULL); + if (gss_auth != NULL) + goto out; + new = gss_create_new(args, clnt); + if (IS_ERR(new)) + return new; + gss_auth = gss_auth_find_or_add_hashed(args, clnt, new); + if (gss_auth != new) + gss_destroy(&new->rpc_auth); +out: + return gss_auth; +} + +static struct rpc_auth * +gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) +{ + struct gss_auth *gss_auth; + struct rpc_xprt *xprt = rcu_access_pointer(clnt->cl_xprt); + + while (clnt != clnt->cl_parent) { + struct rpc_clnt *parent = clnt->cl_parent; + /* Find the original parent for this transport */ + if (rcu_access_pointer(parent->cl_xprt) != xprt) + break; + clnt = parent; + } + + gss_auth = gss_create_hashed(args, clnt); + if (IS_ERR(gss_auth)) + return ERR_CAST(gss_auth); + return &gss_auth->rpc_auth; +} + /* * gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call * to the server with the GSS control procedure field set to -- cgit v0.10.2 From 280ebcf97ce4b252c72af74ff3149ab5a5c9e680 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Sep 2013 13:28:04 -0400 Subject: SUNRPC: rpcauth_create needs to know about rpc_clnt clone status Ensure that we set rpc_clnt->cl_parent before calling rpc_client_register so that rpcauth_create can find any existing RPCSEC_GSS caches for this transport. Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index af3f0cf..8572d32 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -313,7 +313,9 @@ out: return err; } -static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) +static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, + struct rpc_xprt *xprt, + struct rpc_clnt *parent) { const struct rpc_program *program = args->program; const struct rpc_version *version; @@ -339,7 +341,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru clnt = kzalloc(sizeof(*clnt), GFP_KERNEL); if (!clnt) goto out_err; - clnt->cl_parent = clnt; + clnt->cl_parent = parent ? : clnt; rcu_assign_pointer(clnt->cl_xprt, xprt); clnt->cl_procinfo = version->procs; @@ -377,6 +379,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru err = rpc_client_register(args, clnt); if (err) goto out_no_path; + if (parent) + atomic_inc(&parent->cl_count); return clnt; out_no_path: @@ -467,7 +471,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) xprt->resvport = 0; - clnt = rpc_new_client(args, xprt); + clnt = rpc_new_client(args, xprt, NULL); if (IS_ERR(clnt)) return clnt; @@ -514,15 +518,12 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, goto out_err; args->servername = xprt->servername; - new = rpc_new_client(args, xprt); + new = rpc_new_client(args, xprt, clnt); if (IS_ERR(new)) { err = PTR_ERR(new); goto out_err; } - atomic_inc(&clnt->cl_count); - new->cl_parent = clnt; - /* Turn off autobind on clones */ new->cl_autobind = 0; new->cl_softrtry = clnt->cl_softrtry; -- cgit v0.10.2 From f1ff0c27fd9987c59d707cd1a6b6c1fc3ae0a250 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 14 Aug 2013 11:59:13 -0400 Subject: SUNRPC: don't map EKEYEXPIRED to EACCES in call_refreshresult The NFS layer needs to know when a key has expired. This change also returns -EKEYEXPIRED to the application, and the informative "Key has expired" error message is displayed. The user then knows that credential renewal is required. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8572d32..631085f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1408,9 +1408,9 @@ call_refreshresult(struct rpc_task *task) return; case -ETIMEDOUT: rpc_delay(task, 3*HZ); - case -EKEYEXPIRED: case -EAGAIN: status = -EACCES; + case -EKEYEXPIRED: if (!task->tk_cred_retry) break; task->tk_cred_retry--; -- cgit v0.10.2 From 4de6caa270afaa381dd3373e9e6d148b1090e0ec Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 14 Aug 2013 11:59:15 -0400 Subject: SUNRPC new rpc_credops to test credential expiry This patch provides the RPC layer helper functions to allow NFS to manage data in the face of expired credentials - such as avoiding buffered WRITEs and COMMITs when the gss context will expire before the WRITEs are flushed and COMMITs are sent. These helper functions enable checking the expiration of an underlying credential key for a generic rpc credential, e.g. the gss_cred gss context gc_expiry which for Kerberos is set to the remaining TGT lifetime. A new rpc_authops key_timeout is only defined for the generic auth. A new rpc_credops crkey_to_expire is only defined for the generic cred. A new rpc_credops crkey_timeout is only defined for the gss cred. Set a credential key expiry watermark, RPC_KEY_EXPIRE_TIMEO set to 240 seconds as a default and can be set via a module parameter as we need to ensure there is time for any dirty data to be flushed. If key_timeout is called on a credential with an underlying credential key that will expire within watermark seconds, we set the RPC_CRED_KEY_EXPIRE_SOON flag in the generic_cred acred so that the NFS layer can clean up prior to key expiration. Checking a generic credential's underlying credential involves a cred lookup. To avoid this lookup in the normal case when the underlying credential has a key that is valid (before the watermark), a notify flag is set in the generic credential the first time the key_timeout is called. The generic credential then stops checking the underlying credential key expiry, and the underlying credential (gss_cred) match routine then checks the key expiration upon each normal use and sets a flag in the associated generic credential only when the key expiration is within the watermark. This in turn signals the generic credential key_timeout to perform the extra credential lookup thereafter. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 6de26f2..790be14 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -24,12 +24,21 @@ struct rpcsec_gss_info; +/* auth_cred ac_flags bits */ +enum { + RPC_CRED_NO_CRKEY_TIMEOUT = 0, /* underlying cred has no key timeout */ + RPC_CRED_KEY_EXPIRE_SOON = 1, /* underlying cred key will expire soon */ + RPC_CRED_NOTIFY_TIMEOUT = 2, /* nofity generic cred when underlying + key will expire soon */ +}; + /* Work around the lack of a VFS credential */ struct auth_cred { kuid_t uid; kgid_t gid; struct group_info *group_info; const char *principal; + unsigned long ac_flags; unsigned char machine_cred : 1; }; @@ -111,6 +120,8 @@ struct rpc_authops { rpc_authflavor_t (*info2flavor)(struct rpcsec_gss_info *); int (*flavor2info)(rpc_authflavor_t, struct rpcsec_gss_info *); + int (*key_timeout)(struct rpc_auth *, + struct rpc_cred *); }; struct rpc_credops { @@ -127,6 +138,8 @@ struct rpc_credops { void *, __be32 *, void *); int (*crunwrap_resp)(struct rpc_task *, kxdrdproc_t, void *, __be32 *, void *); + int (*crkey_timeout)(struct rpc_cred *); + bool (*crkey_to_expire)(struct rpc_cred *); }; extern const struct rpc_authops authunix_ops; @@ -166,6 +179,9 @@ int rpcauth_uptodatecred(struct rpc_task *); int rpcauth_init_credcache(struct rpc_auth *); void rpcauth_destroy_credcache(struct rpc_auth *); void rpcauth_clear_credcache(struct rpc_cred_cache *); +int rpcauth_key_timeout_notify(struct rpc_auth *, + struct rpc_cred *); +bool rpcauth_cred_key_to_expire(struct rpc_cred *); static inline struct rpc_cred * get_rpccred(struct rpc_cred *cred) diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 5071e43..4151590 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -343,6 +343,27 @@ out_nocache: EXPORT_SYMBOL_GPL(rpcauth_init_credcache); /* + * Setup a credential key lifetime timeout notification + */ +int +rpcauth_key_timeout_notify(struct rpc_auth *auth, struct rpc_cred *cred) +{ + if (!cred->cr_auth->au_ops->key_timeout) + return 0; + return cred->cr_auth->au_ops->key_timeout(auth, cred); +} +EXPORT_SYMBOL_GPL(rpcauth_key_timeout_notify); + +bool +rpcauth_cred_key_to_expire(struct rpc_cred *cred) +{ + if (!cred->cr_ops->crkey_to_expire) + return false; + return cred->cr_ops->crkey_to_expire(cred); +} +EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire); + +/* * Destroy a list of credentials */ static inline diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index b6badaf..f6d84be 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -89,6 +89,7 @@ generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) gcred->acred.uid = acred->uid; gcred->acred.gid = acred->gid; gcred->acred.group_info = acred->group_info; + gcred->acred.ac_flags = 0; if (gcred->acred.group_info != NULL) get_group_info(gcred->acred.group_info); gcred->acred.machine_cred = acred->machine_cred; @@ -182,11 +183,78 @@ void rpc_destroy_generic_auth(void) rpcauth_destroy_credcache(&generic_auth); } +/* + * Test the the current time (now) against the underlying credential key expiry + * minus a timeout and setup notification. + * + * The normal case: + * If 'now' is before the key expiry minus RPC_KEY_EXPIRE_TIMEO, set + * the RPC_CRED_NOTIFY_TIMEOUT flag to setup the underlying credential + * rpc_credops crmatch routine to notify this generic cred when it's key + * expiration is within RPC_KEY_EXPIRE_TIMEO, and return 0. + * + * The error case: + * If the underlying cred lookup fails, return -EACCES. + * + * The 'almost' error case: + * If 'now' is within key expiry minus RPC_KEY_EXPIRE_TIMEO, but not within + * key expiry minus RPC_KEY_EXPIRE_FAIL, set the RPC_CRED_EXPIRE_SOON bit + * on the acred ac_flags and return 0. + */ +static int +generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred) +{ + struct auth_cred *acred = &container_of(cred, struct generic_cred, + gc_base)->acred; + struct rpc_cred *tcred; + int ret = 0; + + + /* Fast track for non crkey_timeout (no key) underlying credentials */ + if (test_bit(RPC_CRED_NO_CRKEY_TIMEOUT, &acred->ac_flags)) + return 0; + + /* Fast track for the normal case */ + if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags)) + return 0; + + /* lookup_cred either returns a valid referenced rpc_cred, or PTR_ERR */ + tcred = auth->au_ops->lookup_cred(auth, acred, 0); + if (IS_ERR(tcred)) + return -EACCES; + + if (!tcred->cr_ops->crkey_timeout) { + set_bit(RPC_CRED_NO_CRKEY_TIMEOUT, &acred->ac_flags); + ret = 0; + goto out_put; + } + + /* Test for the almost error case */ + ret = tcred->cr_ops->crkey_timeout(tcred); + if (ret != 0) { + set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags); + ret = 0; + } else { + /* In case underlying cred key has been reset */ + if (test_and_clear_bit(RPC_CRED_KEY_EXPIRE_SOON, + &acred->ac_flags)) + dprintk("RPC: UID %d Credential key reset\n", + tcred->cr_uid); + /* set up fasttrack for the normal case */ + set_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags); + } + +out_put: + put_rpccred(tcred); + return ret; +} + static const struct rpc_authops generic_auth_ops = { .owner = THIS_MODULE, .au_name = "Generic", .lookup_cred = generic_lookup_cred, .crcreate = generic_create_cred, + .key_timeout = generic_key_timeout, }; static struct rpc_auth generic_auth = { @@ -194,9 +262,23 @@ static struct rpc_auth generic_auth = { .au_count = ATOMIC_INIT(0), }; +static bool generic_key_to_expire(struct rpc_cred *cred) +{ + struct auth_cred *acred = &container_of(cred, struct generic_cred, + gc_base)->acred; + bool ret; + + get_rpccred(cred); + ret = test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags); + put_rpccred(cred); + + return ret; +} + static const struct rpc_credops generic_credops = { .cr_name = "Generic cred", .crdestroy = generic_destroy_cred, .crbind = generic_bind_cred, .crmatch = generic_match, + .crkey_to_expire = generic_key_to_expire, }; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index dc4b449..a9b1edc 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -63,6 +63,9 @@ static const struct rpc_credops gss_nullops; #define GSS_RETRY_EXPIRED 5 static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED; +#define GSS_KEY_EXPIRE_TIMEO 240 +static unsigned int gss_key_expire_timeo = GSS_KEY_EXPIRE_TIMEO; + #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH #endif @@ -1295,10 +1298,32 @@ gss_cred_init(struct rpc_auth *auth, struct rpc_cred *cred) return err; } +/* + * Returns -EACCES if GSS context is NULL or will expire within the + * timeout (miliseconds) + */ +static int +gss_key_timeout(struct rpc_cred *rc) +{ + struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); + unsigned long now = jiffies; + unsigned long expire; + + if (gss_cred->gc_ctx == NULL) + return -EACCES; + + expire = gss_cred->gc_ctx->gc_expiry - (gss_key_expire_timeo * HZ); + + if (time_after(now, expire)) + return -EACCES; + return 0; +} + static int gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags) { struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); + int ret; if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags)) goto out; @@ -1311,11 +1336,26 @@ out: if (acred->principal != NULL) { if (gss_cred->gc_principal == NULL) return 0; - return strcmp(acred->principal, gss_cred->gc_principal) == 0; + ret = strcmp(acred->principal, gss_cred->gc_principal) == 0; + goto check_expire; } if (gss_cred->gc_principal != NULL) return 0; - return uid_eq(rc->cr_uid, acred->uid); + ret = uid_eq(rc->cr_uid, acred->uid); + +check_expire: + if (ret == 0) + return ret; + + /* Notify acred users of GSS context expiration timeout */ + if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags) && + (gss_key_timeout(rc) != 0)) { + /* test will now be done from generic cred */ + test_and_clear_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags); + /* tell NFS layer that key will expire soon */ + set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags); + } + return ret; } /* @@ -1842,6 +1882,7 @@ static const struct rpc_credops gss_credops = { .crvalidate = gss_validate, .crwrap_req = gss_wrap_req, .crunwrap_resp = gss_unwrap_resp, + .crkey_timeout = gss_key_timeout, }; static const struct rpc_credops gss_nullops = { @@ -1929,5 +1970,12 @@ module_param_named(expired_cred_retry_delay, MODULE_PARM_DESC(expired_cred_retry_delay, "Timeout (in seconds) until " "the RPC engine retries an expired credential"); +module_param_named(key_expire_timeo, + gss_key_expire_timeo, + uint, 0644); +MODULE_PARM_DESC(key_expire_timeo, "Time (in seconds) at the end of a " + "credential keys lifetime where the NFS layer cleans up " + "prior to key expiration"); + module_init(init_rpcsec_gss) module_exit(exit_rpcsec_gss) -- cgit v0.10.2 From dc24826bfca8d788d05f625208f06d90be5560b3 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 14 Aug 2013 11:59:16 -0400 Subject: NFS avoid expired credential keys for buffered writes We must avoid buffering a WRITE that is using a credential key (e.g. a GSS context key) that is about to expire or has expired. We currently will paint ourselves into a corner by returning success to the applciation for such a buffered WRITE, only to discover that we do not have permission when we attempt to flush the WRITE (and potentially associated COMMIT) to disk. Use the RPC layer credential key timeout and expire routines which use a a watermark, gss_key_expire_timeo. We test the key in nfs_file_write. If a WRITE is using a credential with a key that will expire within watermark seconds, flush the inode in nfs_write_end and send only NFS_FILE_SYNC WRITEs by adding nfs_ctx_key_to_expire to nfs_need_sync_write. Note that this results in single page NFS_FILE_SYNC WRITEs. Signed-off-by: Andy Adamson [Trond: removed a pr_warn_ratelimited() for now] Signed-off-by: Trond Myklebust diff --git a/fs/nfs/file.c b/fs/nfs/file.c index d6a9db0..1e6bfdb 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -411,6 +411,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata) { unsigned offset = pos & (PAGE_CACHE_SIZE - 1); + struct nfs_open_context *ctx = nfs_file_open_context(file); int status; dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n", @@ -446,6 +447,13 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, if (status < 0) return status; NFS_I(mapping->host)->write_io += copied; + + if (nfs_ctx_key_to_expire(ctx)) { + status = nfs_wb_all(mapping->host); + if (status < 0) + return status; + } + return copied; } @@ -642,7 +650,8 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode) if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC)) return 1; ctx = nfs_file_open_context(filp); - if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) + if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags) || + nfs_ctx_key_to_expire(ctx)) return 1; return 0; } @@ -656,6 +665,10 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, ssize_t result; size_t count = iov_length(iov, nr_segs); + result = nfs_key_timeout_notify(iocb->ki_filp, inode); + if (result) + return result; + if (iocb->ki_filp->f_flags & O_DIRECT) return nfs_file_direct_write(iocb, iov, nr_segs, pos, true); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 50f7068..2415198 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -431,6 +431,8 @@ void nfs_request_remove_commit_list(struct nfs_page *req, void nfs_init_cinfo(struct nfs_commit_info *cinfo, struct inode *inode, struct nfs_direct_req *dreq); +int nfs_key_timeout_notify(struct file *filp, struct inode *inode); +bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx); #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d37e8ca..94eb450 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -876,6 +876,33 @@ int nfs_flush_incompatible(struct file *file, struct page *page) } /* + * Avoid buffered writes when a open context credential's key would + * expire soon. + * + * Returns -EACCES if the key will expire within RPC_KEY_EXPIRE_FAIL. + * + * Return 0 and set a credential flag which triggers the inode to flush + * and performs NFS_FILE_SYNC writes if the key will expired within + * RPC_KEY_EXPIRE_TIMEO. + */ +int +nfs_key_timeout_notify(struct file *filp, struct inode *inode) +{ + struct nfs_open_context *ctx = nfs_file_open_context(filp); + struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth; + + return rpcauth_key_timeout_notify(auth, ctx->cred); +} + +/* + * Test if the open context credential key is marked to expire soon. + */ +bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx) +{ + return rpcauth_cred_key_to_expire(ctx->cred); +} + +/* * If the page cache is marked as unsafe or invalid, then we can't rely on * the PageUptodate() flag. In this case, we will need to turn off * write optimisations that depend on the page contents being correct. -- cgit v0.10.2 From 35fa5f7b35ca2076d594b2670a32d66dd3ae9eec Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 14 Aug 2013 11:59:17 -0400 Subject: SUNRPC refactor rpcauth_checkverf error returns Most of the time an error from the credops crvalidate function means the server has sent us a garbage verifier. The gss_validate function is the exception where there is an -EACCES case if the user GSS_context on the client has expired. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index a9b1edc..30eb502 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1501,6 +1501,7 @@ gss_validate(struct rpc_task *task, __be32 *p) struct xdr_netobj mic; u32 flav,len; u32 maj_stat; + __be32 *ret = ERR_PTR(-EIO); dprintk("RPC: %5u %s\n", task->tk_pid, __func__); @@ -1516,6 +1517,7 @@ gss_validate(struct rpc_task *task, __be32 *p) mic.data = (u8 *)p; mic.len = len; + ret = ERR_PTR(-EACCES); maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); @@ -1533,8 +1535,9 @@ gss_validate(struct rpc_task *task, __be32 *p) return p + XDR_QUADLEN(len); out_bad: gss_put_ctx(ctx); - dprintk("RPC: %5u %s failed.\n", task->tk_pid, __func__); - return NULL; + dprintk("RPC: %5u %s failed ret %ld.\n", task->tk_pid, __func__, + PTR_ERR(ret)); + return ret; } static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp, diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 4664eb4..f0ebe07 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -88,13 +88,13 @@ nul_validate(struct rpc_task *task, __be32 *p) flavor = ntohl(*p++); if (flavor != RPC_AUTH_NULL) { printk("RPC: bad verf flavor: %u\n", flavor); - return NULL; + return ERR_PTR(-EIO); } size = ntohl(*p++); if (size != 0) { printk("RPC: bad verf size: %u\n", size); - return NULL; + return ERR_PTR(-EIO); } return p; diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index e52d832..d5d6923 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -192,13 +192,13 @@ unx_validate(struct rpc_task *task, __be32 *p) flavor != RPC_AUTH_UNIX && flavor != RPC_AUTH_SHORT) { printk("RPC: bad verf flavor: %u\n", flavor); - return NULL; + return ERR_PTR(-EIO); } size = ntohl(*p++); if (size > RPC_MAX_AUTH_SIZE) { printk("RPC: giant verf size: %u\n", size); - return NULL; + return ERR_PTR(-EIO); } task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2; p += (size >> 2); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 631085f..0cd5b6d5 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2072,7 +2072,8 @@ rpc_verify_header(struct rpc_task *task) dprintk("RPC: %5u %s: XDR representation not a multiple of" " 4 bytes: 0x%x\n", task->tk_pid, __func__, task->tk_rqstp->rq_rcv_buf.len); - goto out_eio; + error = -EIO; + goto out_err; } if ((len -= 3) < 0) goto out_overflow; @@ -2081,6 +2082,7 @@ rpc_verify_header(struct rpc_task *task) if ((n = ntohl(*p++)) != RPC_REPLY) { dprintk("RPC: %5u %s: not an RPC reply: %x\n", task->tk_pid, __func__, n); + error = -EIO; goto out_garbage; } @@ -2099,7 +2101,8 @@ rpc_verify_header(struct rpc_task *task) dprintk("RPC: %5u %s: RPC call rejected, " "unknown error: %x\n", task->tk_pid, __func__, n); - goto out_eio; + error = -EIO; + goto out_err; } if (--len < 0) goto out_overflow; @@ -2144,9 +2147,11 @@ rpc_verify_header(struct rpc_task *task) task->tk_pid, __func__, n); goto out_err; } - if (!(p = rpcauth_checkverf(task, p))) { - dprintk("RPC: %5u %s: auth check failed\n", - task->tk_pid, __func__); + p = rpcauth_checkverf(task, p); + if (IS_ERR(p)) { + error = PTR_ERR(p); + dprintk("RPC: %5u %s: auth check failed with %d\n", + task->tk_pid, __func__, error); goto out_garbage; /* bad verifier, retry */ } len = p - (__be32 *)iov->iov_base - 1; @@ -2199,8 +2204,6 @@ out_garbage: out_retry: return ERR_PTR(-EAGAIN); } -out_eio: - error = -EIO; out_err: rpc_exit(task, error); dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid, -- cgit v0.10.2 From a5250def7c4549a6a1cd8257900bef9c12ffc2fc Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 3 Sep 2013 15:18:49 -0400 Subject: NFSv4: use the mach cred for SECINFO w/ integrity Commit 5ec16a8500d339b0e7a0cc76b785d18daad354d4 introduced a regression that causes SECINFO to fail without actualy sending an RPC if: 1) the nfs_client's rpc_client was using KRB5i/p (now tried by default) 2) the current user doesn't have valid kerberos credentials This situation is quite common - as of now a sec=sys mount would use krb5i for the nfs_client's rpc_client and a user would hardly be faulted for not having run kinit. The solution is to use the machine cred when trying to use an integrity protected auth flavor for SECINFO. Older servers may not support using the machine cred or an integrity protected auth flavor for SECINFO in every circumstance, so we fall back to using the user's cred and the filesystem's auth flavor in this case. We run into another problem when running against linux nfs servers - they return NFS4ERR_WRONGSEC when using integrity auth flavor (unless the mount is also that flavor) even though that is not a valid error for SECINFO*. Even though it's against spec, handle WRONGSEC errors on SECINFO by falling back to using the user cred and the filesystem's auth flavor. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 09c7e3b..85b1906 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -435,6 +435,20 @@ wait_on_recovery: return ret; } +/* + * Return 'true' if 'clp' is using an rpc_client that is integrity protected + * or 'false' otherwise. + */ +static bool _nfs4_is_integrity_protected(struct nfs_client *clp) +{ + rpc_authflavor_t flavor = clp->cl_rpcclient->cl_auth->au_flavor; + + if (flavor == RPC_AUTH_GSS_KRB5I || + flavor == RPC_AUTH_GSS_KRB5P) + return true; + + return false; +} static void do_renew_lease(struct nfs_client *clp, unsigned long timestamp) { @@ -5842,10 +5856,13 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, } /** - * Use the state managment nfs_client cl_rpcclient, which uses krb5i (if - * possible) as per RFC3530bis and RFC5661 Security Considerations sections + * If 'use_integrity' is true and the state managment nfs_client + * cl_rpcclient is using krb5i/p, use the integrity protected cl_rpcclient + * and the machine credential as per RFC3530bis and RFC5661 Security + * Considerations sections. Otherwise, just use the user cred with the + * filesystem's rpc_client. */ -static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors) +static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors, bool use_integrity) { int status; struct nfs4_secinfo_arg args = { @@ -5860,11 +5877,21 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct .rpc_argp = &args, .rpc_resp = &res, }; - struct rpc_clnt *clnt = NFS_SERVER(dir)->nfs_client->cl_rpcclient; + struct rpc_clnt *clnt = NFS_SERVER(dir)->client; + + if (use_integrity) { + clnt = NFS_SERVER(dir)->nfs_client->cl_rpcclient; + msg.rpc_cred = nfs4_get_clid_cred(NFS_SERVER(dir)->nfs_client); + } dprintk("NFS call secinfo %s\n", name->name); - status = nfs4_call_sync(clnt, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); + status = nfs4_call_sync(clnt, NFS_SERVER(dir), &msg, &args.seq_args, + &res.seq_res, 0); dprintk("NFS reply secinfo: %d\n", status); + + if (msg.rpc_cred) + put_rpccred(msg.rpc_cred); + return status; } @@ -5874,7 +5901,21 @@ int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_exception exception = { }; int err; do { - err = _nfs4_proc_secinfo(dir, name, flavors); + err = -NFS4ERR_WRONGSEC; + + /* try to use integrity protection with machine cred */ + if (_nfs4_is_integrity_protected(NFS_SERVER(dir)->nfs_client)) + err = _nfs4_proc_secinfo(dir, name, flavors, true); + + /* + * if unable to use integrity protection, or SECINFO with + * integrity protection returns NFS4ERR_WRONGSEC (which is + * disallowed by spec, but exists in deployed servers) use + * the current filesystem's rpc_client and the user cred. + */ + if (err == -NFS4ERR_WRONGSEC) + err = _nfs4_proc_secinfo(dir, name, flavors, false); + trace_nfs4_secinfo(dir, name, err); err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); -- cgit v0.10.2 From ba6c05928dcafc7e0a0c8e4ee6a293ba47190fd4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 30 Aug 2013 12:24:25 -0400 Subject: NFS: Ensure that rmdir() waits for sillyrenames to complete If an NFS client does mkdir("dir"); fd = open("dir/file"); unlink("dir/file"); close(fd); rmdir("dir"); then the asynchronous nature of the sillyrename operation means that we can end up getting EBUSY for the rmdir() in the above test. Fix that by ensuring that we wait for any in-progress sillyrenames before sending the rmdir() to the server. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index d8149e9..187caa4 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1694,12 +1694,19 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); trace_nfs_rmdir_enter(dir, dentry); - error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); - /* Ensure the VFS deletes this inode */ - if (error == 0 && dentry->d_inode != NULL) - clear_nlink(dentry->d_inode); - else if (error == -ENOENT) - nfs_dentry_handle_enoent(dentry); + if (dentry->d_inode) { + nfs_wait_on_sillyrename(dentry); + error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); + /* Ensure the VFS deletes this inode */ + switch (error) { + case 0: + clear_nlink(dentry->d_inode); + break; + case -ENOENT: + nfs_dentry_handle_enoent(dentry); + } + } else + error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); trace_nfs_rmdir_exit(dir, dentry, error); return error; diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 2c1485d..bb939ed 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -207,6 +207,13 @@ out_free: return ret; } +void nfs_wait_on_sillyrename(struct dentry *dentry) +{ + struct nfs_inode *nfsi = NFS_I(dentry->d_inode); + + wait_event(nfsi->waitqueue, atomic_read(&nfsi->silly_count) <= 1); +} + void nfs_block_sillyrename(struct dentry *dentry) { struct nfs_inode *nfsi = NFS_I(dentry->d_inode); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 7125cef..3ea4cde 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -524,6 +524,7 @@ static inline void nfs4_label_free(void *label) {} * linux/fs/nfs/unlink.c */ extern void nfs_complete_unlink(struct dentry *dentry, struct inode *); +extern void nfs_wait_on_sillyrename(struct dentry *dentry); extern void nfs_block_sillyrename(struct dentry *dentry); extern void nfs_unblock_sillyrename(struct dentry *dentry); extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry); -- cgit v0.10.2 From e8d92382dd65a3dd77ff533b09815c3dc65fa9a9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 9 Aug 2013 12:47:51 -0400 Subject: NFS: When displaying session slot numbers, use "%u" consistently MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clean up, since slot and sequence numbers are all unsigned anyway. Among other things, squelch compiler warnings: linux/fs/nfs/nfs4proc.c: In function ‘nfs4_setup_sequence’: linux/fs/nfs/nfs4proc.c:703:2: warning: signed and unsigned type in conditional expression [-Wsign-compare] and linux/fs/nfs/nfs4session.c: In function ‘nfs4_alloc_slot’: linux/fs/nfs/nfs4session.c:151:31: warning: signed and unsigned type in conditional expression [-Wsign-compare] Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index b25ed7e..ae2e87b 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -303,14 +303,14 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) { struct nfs4_slot *slot; - dprintk("%s enter. slotid %d seqid %d\n", + dprintk("%s enter. slotid %u seqid %u\n", __func__, args->csa_slotid, args->csa_sequenceid); if (args->csa_slotid >= NFS41_BC_MAX_CALLBACKS) return htonl(NFS4ERR_BADSLOT); slot = tbl->slots + args->csa_slotid; - dprintk("%s slot table seqid: %d\n", __func__, slot->seq_nr); + dprintk("%s slot table seqid: %u\n", __func__, slot->seq_nr); /* Normal */ if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { @@ -320,7 +320,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) /* Replay */ if (args->csa_sequenceid == slot->seq_nr) { - dprintk("%s seqid %d is a replay\n", + dprintk("%s seqid %u is a replay\n", __func__, args->csa_sequenceid); /* Signal process_op to set this error on next op */ if (args->csa_cachethis == 0) @@ -521,7 +521,7 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy, if (!cps->clp) /* set in cb_sequence */ goto out; - dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target highest slotid %d\n", + dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target highest slotid %u\n", rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR), args->crsa_target_highest_slotid); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 85b1906..05742b8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -664,7 +664,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, args->sa_slot = slot; - dprintk("<-- %s slotid=%d seqid=%d\n", __func__, + dprintk("<-- %s slotid=%u seqid=%u\n", __func__, slot->slot_nr, slot->seq_nr); res->sr_slot = slot; @@ -704,9 +704,9 @@ int nfs4_setup_sequence(const struct nfs_server *server, goto out; } - dprintk("--> %s clp %p session %p sr_slot %d\n", + dprintk("--> %s clp %p session %p sr_slot %u\n", __func__, session->clp, session, res->sr_slot ? - res->sr_slot->slot_nr : -1); + res->sr_slot->slot_nr : NFS4_NO_SLOT); ret = nfs41_setup_sequence(session, args, res, task); out: diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index 202e363..0161ad2 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -76,7 +76,7 @@ void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot) nfs4_slot_tbl_drain_complete(tbl); } } - dprintk("%s: slotid %u highest_used_slotid %d\n", __func__, + dprintk("%s: slotid %u highest_used_slotid %u\n", __func__, slotid, tbl->highest_used_slotid); } @@ -146,9 +146,9 @@ struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl) ret->generation = tbl->generation; out: - dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n", + dprintk("<-- %s used_slots=%04lx highest_used=%u slotid=%u\n", __func__, tbl->used_slots[0], tbl->highest_used_slotid, - !IS_ERR(ret) ? ret->slot_nr : -1); + !IS_ERR(ret) ? ret->slot_nr : NFS4_NO_SLOT); return ret; } @@ -191,7 +191,7 @@ static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, { int ret; - dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__, + dprintk("--> %s: max_reqs=%u, tbl->max_slots %u\n", __func__, max_reqs, tbl->max_slots); if (max_reqs > NFS4_MAX_SLOT_TABLE) @@ -205,7 +205,7 @@ static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, nfs4_reset_slot_table(tbl, max_reqs - 1, ivalue); spin_unlock(&tbl->slot_tbl_lock); - dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, + dprintk("%s: tbl=%p slots=%p max_slots=%u\n", __func__, tbl, tbl->slots, tbl->max_slots); out: dprintk("<-- %s: return %d\n", __func__, ret); -- cgit v0.10.2 From 2a3eb2b97b1268891b0251df537c32daac295503 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 9 Aug 2013 12:48:00 -0400 Subject: NFS: Rename nfs41_call_sync_data as a common data structure Clean up: rename nfs41_call_sync_data for use as a data structure common to all NFSv4 minor versions. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 05742b8..069cd82 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -463,6 +463,12 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp do_renew_lease(server->nfs_client, timestamp); } +struct nfs4_call_sync_data { + const struct nfs_server *seq_server; + struct nfs4_sequence_args *seq_args; + struct nfs4_sequence_res *seq_res; +}; + #if defined(CONFIG_NFS_V4_1) static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) @@ -714,15 +720,9 @@ out: return ret; } -struct nfs41_call_sync_data { - const struct nfs_server *seq_server; - struct nfs4_sequence_args *seq_args; - struct nfs4_sequence_res *seq_res; -}; - static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) { - struct nfs41_call_sync_data *data = calldata; + struct nfs4_call_sync_data *data = calldata; struct nfs4_session *session = nfs4_get_session(data->seq_server); dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server); @@ -732,7 +732,7 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) { - struct nfs41_call_sync_data *data = calldata; + struct nfs4_call_sync_data *data = calldata; nfs41_sequence_done(task, data->seq_res); } @@ -750,7 +750,7 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, { int ret; struct rpc_task *task; - struct nfs41_call_sync_data data = { + struct nfs4_call_sync_data data = { .seq_server = server, .seq_args = args, .seq_res = res, -- cgit v0.10.2 From 5a580e0ae2c0228aef4d10d649aaf387bebd70b6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 9 Aug 2013 12:48:09 -0400 Subject: NFS: Clean up nfs4_setup_sequence() Clean up: Both the NFSv4.0 and NFSv4.1 version of nfs4_setup_sequence() are used only in fs/nfs/nfs4proc.c. No need to keep global header declarations for either version. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index d7bb59d..0feb238 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -247,9 +247,6 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser return server->nfs_client->cl_session; } -extern int nfs4_setup_sequence(const struct nfs_server *server, - struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - struct rpc_task *task); extern int nfs41_setup_sequence(struct nfs4_session *session, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, struct rpc_task *task); @@ -278,14 +275,6 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser return NULL; } -static inline int nfs4_setup_sequence(const struct nfs_server *server, - struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - struct rpc_task *task) -{ - rpc_call_start(task); - return 0; -} - static inline bool is_ds_only_client(struct nfs_client *clp) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 069cd82..f6be226 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -697,10 +697,10 @@ out_sleep: } EXPORT_SYMBOL_GPL(nfs41_setup_sequence); -int nfs4_setup_sequence(const struct nfs_server *server, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - struct rpc_task *task) +static int nfs4_setup_sequence(const struct nfs_server *server, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + struct rpc_task *task) { struct nfs4_session *session = nfs4_get_session(server); int ret = 0; @@ -783,6 +783,14 @@ static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) { } +static int nfs4_setup_sequence(const struct nfs_server *server, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + struct rpc_task *task) +{ + rpc_call_start(task); + return 0; +} static int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) -- cgit v0.10.2 From a9c92d6b853547ceccb28594af3609ecaf35af6f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 9 Aug 2013 12:48:18 -0400 Subject: NFS: Common versions of sequence helper functions NFSv4.0 will have need for this functionality when I add the ability to block NFSv4.0 traffic before migration recovery. I'm not really clear on why nfs4_set_sequence_privileged() gets a generic name, but nfs41_init_sequence() gets a minor version-specific name. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f6be226..7b702bc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -469,6 +469,21 @@ struct nfs4_call_sync_data { struct nfs4_sequence_res *seq_res; }; +static void nfs4_init_sequence(struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, int cache_reply) +{ + args->sa_slot = NULL; + args->sa_cache_this = cache_reply; + args->sa_privileged = 0; + + res->sr_slot = NULL; +} + +static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) +{ + args->sa_privileged = 1; +} + #if defined(CONFIG_NFS_V4_1) static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) @@ -617,22 +632,6 @@ static int nfs4_sequence_done(struct rpc_task *task, return nfs41_sequence_done(task, res); } -static void nfs41_init_sequence(struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, int cache_reply) -{ - args->sa_slot = NULL; - args->sa_cache_this = 0; - args->sa_privileged = 0; - if (cache_reply) - args->sa_cache_this = 1; - res->sr_slot = NULL; -} - -static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) -{ - args->sa_privileged = 1; -} - int nfs41_setup_sequence(struct nfs4_session *session, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, @@ -773,16 +772,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, } #else -static -void nfs41_init_sequence(struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, int cache_reply) -{ -} - -static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) -{ -} - static int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, @@ -817,7 +806,7 @@ int nfs4_call_sync(struct rpc_clnt *clnt, struct nfs4_sequence_res *res, int cache_reply) { - nfs41_init_sequence(args, res, cache_reply); + nfs4_init_sequence(args, res, cache_reply); return server->nfs_client->cl_mvops->call_sync(clnt, server, msg, args, res); } @@ -1769,7 +1758,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) }; int status; - nfs41_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1); + nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1); kref_get(&data->kref); data->rpc_done = 0; data->rpc_status = 0; @@ -2547,7 +2536,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) calldata = kzalloc(sizeof(*calldata), gfp_mask); if (calldata == NULL) goto out; - nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1); + nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1); calldata->inode = state->inode; calldata->state = state; calldata->arg.fh = NFS_FH(state->inode); @@ -3312,7 +3301,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) res->server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; - nfs41_init_sequence(&args->seq_args, &res->seq_res, 1); + nfs4_init_sequence(&args->seq_args, &res->seq_res, 1); nfs_fattr_init(res->dir_attr); } @@ -3346,7 +3335,7 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; res->server = server; - nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1); + nfs4_init_sequence(&arg->seq_args, &res->seq_res, 1); } static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) @@ -3954,7 +3943,7 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message data->timestamp = jiffies; data->read_done_cb = nfs4_read_done_cb; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); } static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) @@ -4038,7 +4027,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag data->timestamp = jiffies; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); } static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) @@ -4087,7 +4076,7 @@ static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess data->commit_done_cb = nfs4_commit_done_cb; data->res.server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); } struct nfs4_renewdata { @@ -4901,7 +4890,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co data = kzalloc(sizeof(*data), GFP_NOFS); if (data == NULL) return -ENOMEM; - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); data->args.fhandle = &data->fh; data->args.stateid = &data->stateid; data->args.bitmask = server->cache_consistency_bitmask; @@ -5166,7 +5155,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, return ERR_PTR(-ENOMEM); } - nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); msg.rpc_argp = &data->arg; msg.rpc_resp = &data->res; task_setup_data.callback_data = data; @@ -5402,7 +5391,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f return -ENOMEM; if (IS_SETLKW(cmd)) data->arg.block = 1; - nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); msg.rpc_argp = &data->arg; msg.rpc_resp = &data->res; task_setup_data.callback_data = data; @@ -6266,7 +6255,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) }; int status; - nfs41_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0); + nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0); nfs4_set_sequence_privileged(&args.la_seq_args); dprintk("--> %s\n", __func__); task = rpc_run_task(&task_setup); @@ -6572,7 +6561,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, nfs_put_client(clp); return ERR_PTR(-ENOMEM); } - nfs41_init_sequence(&calldata->args, &calldata->res, 0); + nfs4_init_sequence(&calldata->args, &calldata->res, 0); if (is_privileged) nfs4_set_sequence_privileged(&calldata->args); msg.rpc_argp = &calldata->args; @@ -6715,7 +6704,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp, calldata->clp = clp; calldata->arg.one_fs = 0; - nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0); + nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0); nfs4_set_sequence_privileged(&calldata->arg.seq_args); msg.rpc_argp = &calldata->arg; msg.rpc_resp = &calldata->res; @@ -6906,7 +6895,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) lgp->res.layoutp = &lgp->args.layout; lgp->res.seq_res.sr_slot = NULL; - nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); + nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ pnfs_get_layout_hdr(NFS_I(inode)->layout); @@ -7001,7 +6990,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) int status; dprintk("--> %s\n", __func__); - nfs41_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); + nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -7183,7 +7172,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) data->args.lastbytewritten, data->args.inode->i_ino); - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -7302,7 +7291,7 @@ static int _nfs41_test_stateid(struct nfs_server *server, }; dprintk("NFS call test_stateid %p\n", stateid); - nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); nfs4_set_sequence_privileged(&args.seq_args); status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res); @@ -7407,7 +7396,7 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, msg.rpc_argp = &data->args; msg.rpc_resp = &data->res; - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); if (privileged) nfs4_set_sequence_privileged(&data->args.seq_args); -- cgit v0.10.2 From 9915ea7e0a83c0a4b94d0c5e05a9bb690fce809f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 9 Aug 2013 12:48:27 -0400 Subject: NFS: Add RPC callouts to start NFSv4.0 synchronous requests Refactor nfs4_call_sync_sequence() so it is used for NFSv4.0 now. The RPC callouts will house transport blocking logic similar to NFSv4.1 sessions. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 0feb238..bf2184e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -49,6 +49,7 @@ struct nfs4_minor_version_ops { struct nfs_fsinfo *); int (*free_lock_state)(struct nfs_server *, struct nfs4_lock_state *); + const struct rpc_call_ops *call_sync_ops; const struct nfs4_state_recovery_ops *reboot_recovery_ops; const struct nfs4_state_recovery_ops *nograce_recovery_ops; const struct nfs4_state_maintenance_ops *state_renewal_ops; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7b702bc..2fbf482 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -741,6 +741,41 @@ static const struct rpc_call_ops nfs41_call_sync_ops = { .rpc_call_done = nfs41_call_sync_done, }; +#else +static int nfs4_setup_sequence(const struct nfs_server *server, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + struct rpc_task *task) +{ + rpc_call_start(task); + return 0; +} + +static int nfs4_sequence_done(struct rpc_task *task, + struct nfs4_sequence_res *res) +{ + return 1; +} +#endif /* CONFIG_NFS_V4_1 */ + +static void nfs40_call_sync_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs4_call_sync_data *data = calldata; + nfs4_setup_sequence(data->seq_server, + data->seq_args, data->seq_res, task); +} + +static void nfs40_call_sync_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_call_sync_data *data = calldata; + nfs4_sequence_done(task, data->seq_res); +} + +static const struct rpc_call_ops nfs40_call_sync_ops = { + .rpc_call_prepare = nfs40_call_sync_prepare, + .rpc_call_done = nfs40_call_sync_done, +}; + static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, struct nfs_server *server, struct rpc_message *msg, @@ -749,6 +784,7 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, { int ret; struct rpc_task *task; + struct nfs_client *clp = server->nfs_client; struct nfs4_call_sync_data data = { .seq_server = server, .seq_args = args, @@ -757,7 +793,7 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, struct rpc_task_setup task_setup = { .rpc_client = clnt, .rpc_message = msg, - .callback_ops = &nfs41_call_sync_ops, + .callback_ops = clp->cl_mvops->call_sync_ops, .callback_data = &data }; @@ -771,23 +807,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, return ret; } -#else -static int nfs4_setup_sequence(const struct nfs_server *server, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - struct rpc_task *task) -{ - rpc_call_start(task); - return 0; -} - -static int nfs4_sequence_done(struct rpc_task *task, - struct nfs4_sequence_res *res) -{ - return 1; -} -#endif /* CONFIG_NFS_V4_1 */ - static int _nfs4_call_sync(struct rpc_clnt *clnt, struct nfs_server *server, @@ -807,8 +826,7 @@ int nfs4_call_sync(struct rpc_clnt *clnt, int cache_reply) { nfs4_init_sequence(args, res, cache_reply); - return server->nfs_client->cl_mvops->call_sync(clnt, server, msg, - args, res); + return nfs4_call_sync_sequence(clnt, server, msg, args, res); } static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) @@ -7529,6 +7547,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .match_stateid = nfs4_match_stateid, .find_root_sec = nfs4_find_root_sec, .free_lock_state = nfs4_release_lockowner, + .call_sync_ops = &nfs40_call_sync_ops, .reboot_recovery_ops = &nfs40_reboot_recovery_ops, .nograce_recovery_ops = &nfs40_nograce_recovery_ops, .state_renewal_ops = &nfs40_state_renewal_ops, @@ -7547,6 +7566,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, .free_lock_state = nfs41_free_lock_state, + .call_sync_ops = &nfs41_call_sync_ops, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, .state_renewal_ops = &nfs41_state_renewal_ops, @@ -7566,6 +7586,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, .free_lock_state = nfs41_free_lock_state, + .call_sync_ops = &nfs41_call_sync_ops, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, .state_renewal_ops = &nfs41_state_renewal_ops, -- cgit v0.10.2 From 220e09ccd36ab7836a6aa19295c314d7e04f7552 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 9 Aug 2013 12:48:35 -0400 Subject: NFS: Remove unused call_sync minor version op Clean up. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index bf2184e..8de9b93 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -38,11 +38,6 @@ struct nfs4_minor_version_ops { u32 minor_version; unsigned init_caps; - int (*call_sync)(struct rpc_clnt *clnt, - struct nfs_server *server, - struct rpc_message *msg, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res); bool (*match_stateid)(const nfs4_stateid *, const nfs4_stateid *); int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2fbf482..3abd32c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -808,16 +808,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, } static -int _nfs4_call_sync(struct rpc_clnt *clnt, - struct nfs_server *server, - struct rpc_message *msg, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res) -{ - return rpc_call_sync(clnt, msg, 0); -} - -static int nfs4_call_sync(struct rpc_clnt *clnt, struct nfs_server *server, struct rpc_message *msg, @@ -7543,7 +7533,6 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { | NFS_CAP_ATOMIC_OPEN | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK, - .call_sync = _nfs4_call_sync, .match_stateid = nfs4_match_stateid, .find_root_sec = nfs4_find_root_sec, .free_lock_state = nfs4_release_lockowner, @@ -7562,7 +7551,6 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1, - .call_sync = nfs4_call_sync_sequence, .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, .free_lock_state = nfs41_free_lock_state, @@ -7582,7 +7570,6 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1, - .call_sync = nfs4_call_sync_sequence, .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, .free_lock_state = nfs41_free_lock_state, -- cgit v0.10.2 From 9d33059c1b76d77004753799479af6d359d83217 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 9 Aug 2013 12:48:44 -0400 Subject: NFS: Enable slot table helpers for NFSv4.0 I'd like to re-use NFSv4.1's slot table machinery for NFSv4.0 transport blocking. Re-organize some of nfs4session.c so the slot table code is built even when NFS_V4_1 is disabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 7238c4b..03192a6 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -23,11 +23,11 @@ obj-$(CONFIG_NFS_V4) += nfsv4.o CFLAGS_nfs4trace.o += -I$(src) nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ - nfs4namespace.o nfs4getroot.o nfs4client.o dns_resolve.o \ - nfs4trace.o + nfs4namespace.o nfs4getroot.o nfs4client.o nfs4session.o \ + dns_resolve.o nfs4trace.o nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o -nfsv4-$(CONFIG_NFS_V4_1) += nfs4session.o pnfs.o pnfs_dev.o +nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index 0161ad2..746675b 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -44,6 +44,17 @@ static void nfs4_shrink_slot_table(struct nfs4_slot_table *tbl, u32 newsize) } } +/** + * nfs4_slot_tbl_drain_complete - wake waiters when drain is complete + * @tbl - controlling slot table + * + */ +void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl) +{ + if (nfs4_slot_tbl_draining(tbl)) + complete(&tbl->complete); +} + /* * nfs4_free_slot - free a slot and efficiently update slot table. * @@ -212,13 +223,6 @@ out: return ret; } -/* Destroy the slot table */ -static void nfs4_destroy_slot_tables(struct nfs4_session *session) -{ - nfs4_shrink_slot_table(&session->fc_slot_table, 0); - nfs4_shrink_slot_table(&session->bc_slot_table, 0); -} - static bool nfs41_assign_slot(struct rpc_task *task, void *pslot) { struct nfs4_sequence_args *args = task->tk_msg.rpc_argp; @@ -383,6 +387,15 @@ void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, spin_unlock(&tbl->slot_tbl_lock); } +#if defined(CONFIG_NFS_V4_1) + +/* Destroy the slot table */ +static void nfs4_destroy_slot_tables(struct nfs4_session *session) +{ + nfs4_shrink_slot_table(&session->fc_slot_table, 0); + nfs4_shrink_slot_table(&session->bc_slot_table, 0); +} + /* * Initialize or reset the forechannel and backchannel tables */ @@ -513,4 +526,4 @@ int nfs4_init_ds_session(struct nfs_client *clp, unsigned long lease_time) } EXPORT_SYMBOL_GPL(nfs4_init_ds_session); - +#endif /* defined(CONFIG_NFS_V4_1) */ diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index e3e6ce3..e069cf2 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -72,15 +72,24 @@ enum nfs4_session_state { NFS4_SESSION_INITING, }; -#if defined(CONFIG_NFS_V4_1) extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl); extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); - -extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, - u32 target_highest_slotid); +extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); extern void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, struct nfs4_slot *slot, struct nfs4_sequence_res *res); +bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl, + struct nfs4_slot *slot); +void nfs41_wake_slot_table(struct nfs4_slot_table *tbl); + +static inline bool nfs4_slot_tbl_draining(struct nfs4_slot_table *tbl) +{ + return !!test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state); +} + +#if defined(CONFIG_NFS_V4_1) +extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, + u32 target_highest_slotid); extern int nfs4_setup_session_slot_tables(struct nfs4_session *ses); @@ -89,17 +98,6 @@ extern void nfs4_destroy_session(struct nfs4_session *session); extern int nfs4_init_session(struct nfs_client *clp); extern int nfs4_init_ds_session(struct nfs_client *, unsigned long); -extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); - -static inline bool nfs4_slot_tbl_draining(struct nfs4_slot_table *tbl) -{ - return !!test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state); -} - -bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl, - struct nfs4_slot *slot); -void nfs41_wake_slot_table(struct nfs4_slot_table *tbl); - /* * Determine if sessions are in use. */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 25b7cf6..f5b0434 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -260,15 +260,6 @@ static void nfs4_end_drain_session(struct nfs_client *clp) } } -/* - * Signal state manager thread if session fore channel is drained - */ -void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl) -{ - if (nfs4_slot_tbl_draining(tbl)) - complete(&tbl->complete); -} - static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl) { set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state); -- cgit v0.10.2 From 744aa5253010636bb56f062336e9cae3132a46fb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 9 Aug 2013 12:48:53 -0400 Subject: NFS: Add global helper to set up a stand-along nfs4_slot_table Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index 746675b..be273c5 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -23,6 +23,14 @@ #define NFSDBG_FACILITY NFSDBG_STATE +static void nfs4_init_slot_table(struct nfs4_slot_table *tbl, const char *queue) +{ + tbl->highest_used_slotid = NFS4_NO_SLOT; + spin_lock_init(&tbl->slot_tbl_lock); + rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, queue); + init_completion(&tbl->complete); +} + /* * nfs4_shrink_slot_table - free retired slots from the slot table */ @@ -223,6 +231,21 @@ out: return ret; } +/** + * nfs4_setup_slot_table - prepare a stand-alone slot table for use + * @tbl: slot table to set up + * @max_reqs: maximum number of requests allowed + * @queue: name to give RPC wait queue + * + * Returns zero on success, or a negative errno. + */ +int nfs4_setup_slot_table(struct nfs4_slot_table *tbl, unsigned int max_reqs, + const char *queue) +{ + nfs4_init_slot_table(tbl, queue); + return nfs4_realloc_slot_table(tbl, max_reqs, 0); +} + static bool nfs41_assign_slot(struct rpc_task *task, void *pslot) { struct nfs4_sequence_args *args = task->tk_msg.rpc_argp; @@ -425,24 +448,13 @@ int nfs4_setup_session_slot_tables(struct nfs4_session *ses) struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) { struct nfs4_session *session; - struct nfs4_slot_table *tbl; session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS); if (!session) return NULL; - tbl = &session->fc_slot_table; - tbl->highest_used_slotid = NFS4_NO_SLOT; - spin_lock_init(&tbl->slot_tbl_lock); - rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); - init_completion(&tbl->complete); - - tbl = &session->bc_slot_table; - tbl->highest_used_slotid = NFS4_NO_SLOT; - spin_lock_init(&tbl->slot_tbl_lock); - rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); - init_completion(&tbl->complete); - + nfs4_init_slot_table(&session->fc_slot_table, "ForeChannel Slot table"); + nfs4_init_slot_table(&session->bc_slot_table, "BackChannel Slot table"); session->session_state = 1<clp = clp; diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index e069cf2..f223a76 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -72,6 +72,8 @@ enum nfs4_session_state { NFS4_SESSION_INITING, }; +extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl, + unsigned int max_reqs, const char *queue); extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl); extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); -- cgit v0.10.2 From eb2a1cd3c9df70ef84e91f2fc657fd19682346ca Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 9 Aug 2013 12:49:02 -0400 Subject: NFS: Add global helper for releasing slot table resources The nfs4_destroy_slot_tables() function is renamed to avoid confusion with the new helper. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index be273c5..e0bf666 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -232,6 +232,16 @@ out: } /** + * nfs4_release_slot_table - release resources attached to a slot table + * @tbl: slot table to shut down + * + */ +void nfs4_release_slot_table(struct nfs4_slot_table *tbl) +{ + nfs4_shrink_slot_table(tbl, 0); +} + +/** * nfs4_setup_slot_table - prepare a stand-alone slot table for use * @tbl: slot table to set up * @max_reqs: maximum number of requests allowed @@ -412,11 +422,10 @@ void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, #if defined(CONFIG_NFS_V4_1) -/* Destroy the slot table */ -static void nfs4_destroy_slot_tables(struct nfs4_session *session) +static void nfs4_destroy_session_slot_tables(struct nfs4_session *session) { - nfs4_shrink_slot_table(&session->fc_slot_table, 0); - nfs4_shrink_slot_table(&session->bc_slot_table, 0); + nfs4_release_slot_table(&session->fc_slot_table); + nfs4_release_slot_table(&session->bc_slot_table); } /* @@ -441,7 +450,7 @@ int nfs4_setup_session_slot_tables(struct nfs4_session *ses) if (status && tbl->slots == NULL) /* Fore and back channel share a connection so get * both slot tables or neither */ - nfs4_destroy_slot_tables(ses); + nfs4_destroy_session_slot_tables(ses); return status; } @@ -477,7 +486,7 @@ void nfs4_destroy_session(struct nfs4_session *session) dprintk("%s Destroy backchannel for xprt %p\n", __func__, xprt); xprt_destroy_backchannel(xprt, NFS41_BC_MIN_CALLBACKS); - nfs4_destroy_slot_tables(session); + nfs4_destroy_session_slot_tables(session); kfree(session); } diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index f223a76..bfb0fe3 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -74,6 +74,7 @@ enum nfs4_session_state { extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl, unsigned int max_reqs, const char *queue); +extern void nfs4_release_slot_table(struct nfs4_slot_table *tbl); extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl); extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); -- cgit v0.10.2 From abf79bb341bf52f75f295b850abdf5f78f584311 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 9 Aug 2013 12:49:11 -0400 Subject: NFS: Add a slot table to struct nfs_client for NFSv4.0 transport blocking Anchor an nfs4_slot_table in the nfs_client for use with NFSv4.0 transport blocking. It is initialized only for NFSv4.0 nfs_client's. Introduce appropriate minor version ops to handle nfs_client initialization and shutdown requirements that differ for each minor version. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 8de9b93..af2d5bf 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -38,6 +38,8 @@ struct nfs4_minor_version_ops { u32 minor_version; unsigned init_caps; + int (*init_client)(struct nfs_client *); + void (*shutdown_client)(struct nfs_client *); bool (*match_stateid)(const nfs4_stateid *, const nfs4_stateid *); int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, @@ -292,6 +294,10 @@ extern const u32 nfs4_pathconf_bitmap[3]; extern const u32 nfs4_fsinfo_bitmap[3]; extern const u32 nfs4_fs_locations_bitmap[3]; +void nfs40_shutdown_client(struct nfs_client *); +void nfs41_shutdown_client(struct nfs_client *); +int nfs40_init_client(struct nfs_client *); +int nfs41_init_client(struct nfs_client *); void nfs4_free_client(struct nfs_client *); struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 767a5e3..98c0104 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -41,7 +41,7 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) } #ifdef CONFIG_NFS_V4_1 -static void nfs4_shutdown_session(struct nfs_client *clp) +void nfs41_shutdown_client(struct nfs_client *clp) { if (nfs4_has_session(clp)) { nfs4_destroy_session(clp->cl_session); @@ -49,11 +49,15 @@ static void nfs4_shutdown_session(struct nfs_client *clp) } } -#else /* CONFIG_NFS_V4_1 */ -static void nfs4_shutdown_session(struct nfs_client *clp) +#endif /* CONFIG_NFS_V4_1 */ + +void nfs40_shutdown_client(struct nfs_client *clp) { + if (clp->cl_slot_tbl) { + nfs4_release_slot_table(clp->cl_slot_tbl); + kfree(clp->cl_slot_tbl); + } } -#endif /* CONFIG_NFS_V4_1 */ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) { @@ -97,7 +101,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp) { if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) nfs4_kill_renewd(clp); - nfs4_shutdown_session(clp); + clp->cl_mvops->shutdown_client(clp); nfs4_destroy_callback(clp); if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) nfs_idmap_delete(clp); @@ -144,34 +148,77 @@ static int nfs4_init_callback(struct nfs_client *clp) return 0; } +/** + * nfs40_init_client - nfs_client initialization tasks for NFSv4.0 + * @clp - nfs_client to initialize + * + * Returns zero on success, or a negative errno if some error occurred. + */ +int nfs40_init_client(struct nfs_client *clp) +{ + struct nfs4_slot_table *tbl; + int ret; + + tbl = kzalloc(sizeof(*tbl), GFP_NOFS); + if (tbl == NULL) + return -ENOMEM; + + ret = nfs4_setup_slot_table(tbl, NFS4_MAX_SLOT_TABLE, + "NFSv4.0 transport Slot table"); + if (ret) { + kfree(tbl); + return ret; + } + + clp->cl_slot_tbl = tbl; + return 0; +} + +#if defined(CONFIG_NFS_V4_1) + +/** + * nfs41_init_client - nfs_client initialization tasks for NFSv4.1+ + * @clp - nfs_client to initialize + * + * Returns zero on success, or a negative errno if some error occurred. + */ +int nfs41_init_client(struct nfs_client *clp) +{ + struct nfs4_session *session = NULL; + + /* + * Create the session and mark it expired. + * When a SEQUENCE operation encounters the expired session + * it will do session recovery to initialize it. + */ + session = nfs4_alloc_session(clp); + if (!session) + return -ENOMEM; + + clp->cl_session = session; + + /* + * The create session reply races with the server back + * channel probe. Mark the client NFS_CS_SESSION_INITING + * so that the client back channel can find the + * nfs_client struct + */ + nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING); + return 0; +} + +#endif /* CONFIG_NFS_V4_1 */ + /* * Initialize the minor version specific parts of an NFS4 client record */ static int nfs4_init_client_minor_version(struct nfs_client *clp) { -#if defined(CONFIG_NFS_V4_1) - if (clp->cl_mvops->minor_version) { - struct nfs4_session *session = NULL; - /* - * Create the session and mark it expired. - * When a SEQUENCE operation encounters the expired session - * it will do session recovery to initialize it. - */ - session = nfs4_alloc_session(clp); - if (!session) - return -ENOMEM; - - clp->cl_session = session; - /* - * The create session reply races with the server back - * channel probe. Mark the client NFS_CS_SESSION_INITING - * so that the client back channel can find the - * nfs_client struct - */ - nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING); - } -#endif /* CONFIG_NFS_V4_1 */ + int ret; + ret = clp->cl_mvops->init_client(clp); + if (ret) + return ret; return nfs4_init_callback(clp); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3abd32c..3b5166c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7533,6 +7533,8 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { | NFS_CAP_ATOMIC_OPEN | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK, + .init_client = nfs40_init_client, + .shutdown_client = nfs40_shutdown_client, .match_stateid = nfs4_match_stateid, .find_root_sec = nfs4_find_root_sec, .free_lock_state = nfs4_release_lockowner, @@ -7551,6 +7553,8 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1, + .init_client = nfs41_init_client, + .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, .free_lock_state = nfs41_free_lock_state, @@ -7570,6 +7574,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1, + .init_client = nfs41_init_client, + .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, .free_lock_state = nfs41_free_lock_state, diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index d221243..fc83d3d 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -78,6 +78,9 @@ struct nfs_client { u32 cl_cb_ident; /* v4.0 callback identifier */ const struct nfs4_minor_version_ops *cl_mvops; + /* NFSv4.0 transport blocking */ + struct nfs4_slot_table *cl_slot_tbl; + /* The sequence id to use for the next CREATE_SESSION */ u32 cl_seqid; /* The flags used for obtaining the clientid during EXCHANGE_ID */ -- cgit v0.10.2 From 3bd2384a77e7b277c1fd8dd4ebb071162e2d85b6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 9 Aug 2013 12:49:19 -0400 Subject: NFS: NFSv4.0 transport blocking Plumb in a mechanism for plugging an NFSv4.0 mount, using the same infrastructure as NFSv4.1 sessions. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3b5166c..e9e58cc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -484,6 +484,67 @@ static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) args->sa_privileged = 1; } +static int nfs40_setup_sequence(const struct nfs_server *server, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + struct rpc_task *task) +{ + struct nfs4_slot_table *tbl = server->nfs_client->cl_slot_tbl; + struct nfs4_slot *slot; + + /* slot already allocated? */ + if (res->sr_slot != NULL) + goto out_start; + + spin_lock(&tbl->slot_tbl_lock); + if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) + goto out_sleep; + + slot = nfs4_alloc_slot(tbl); + if (IS_ERR(slot)) { + if (slot == ERR_PTR(-ENOMEM)) + task->tk_timeout = HZ >> 2; + goto out_sleep; + } + spin_unlock(&tbl->slot_tbl_lock); + + args->sa_slot = slot; + res->sr_slot = slot; + +out_start: + rpc_call_start(task); + return 0; + +out_sleep: + if (args->sa_privileged) + rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task, + NULL, RPC_PRIORITY_PRIVILEGED); + else + rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); + spin_unlock(&tbl->slot_tbl_lock); + return -EAGAIN; +} + +static int nfs40_sequence_done(struct rpc_task *task, + struct nfs4_sequence_res *res) +{ + struct nfs4_slot *slot = res->sr_slot; + struct nfs4_slot_table *tbl; + + if (!RPC_WAS_SENT(task)) + goto out; + + tbl = slot->table; + spin_lock(&tbl->slot_tbl_lock); + if (!nfs41_wake_and_assign_slot(tbl, slot)) + nfs4_free_slot(tbl, slot); + spin_unlock(&tbl->slot_tbl_lock); + + res->sr_slot = NULL; +out: + return 1; +} + #if defined(CONFIG_NFS_V4_1) static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) @@ -629,6 +690,8 @@ static int nfs4_sequence_done(struct rpc_task *task, { if (res->sr_slot == NULL) return 1; + if (!res->sr_slot->table->session) + return nfs40_sequence_done(task, res); return nfs41_sequence_done(task, res); } @@ -704,17 +767,15 @@ static int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_session *session = nfs4_get_session(server); int ret = 0; - if (session == NULL) { - rpc_call_start(task); - goto out; - } + if (!session) + return nfs40_setup_sequence(server, args, res, task); dprintk("--> %s clp %p session %p sr_slot %u\n", __func__, session->clp, session, res->sr_slot ? res->sr_slot->slot_nr : NFS4_NO_SLOT); ret = nfs41_setup_sequence(session, args, res, task); -out: + dprintk("<-- %s status=%d\n", __func__, ret); return ret; } @@ -741,22 +802,23 @@ static const struct rpc_call_ops nfs41_call_sync_ops = { .rpc_call_done = nfs41_call_sync_done, }; -#else +#else /* !CONFIG_NFS_V4_1 */ + static int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, struct rpc_task *task) { - rpc_call_start(task); - return 0; + return nfs40_setup_sequence(server, args, res, task); } static int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { - return 1; + return nfs40_sequence_done(task, res); } -#endif /* CONFIG_NFS_V4_1 */ + +#endif /* !CONFIG_NFS_V4_1 */ static void nfs40_call_sync_prepare(struct rpc_task *task, void *calldata) { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4593728..cb5f946 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1858,11 +1858,10 @@ static void encode_sequence(struct xdr_stream *xdr, struct nfs4_slot *slot = args->sa_slot; __be32 *p; - if (slot == NULL) - return; - tp = slot->table; session = tp->session; + if (!session) + return; encode_op_hdr(xdr, OP_SEQUENCE, decode_sequence_maxsz, hdr); @@ -2043,9 +2042,9 @@ static void encode_free_stateid(struct xdr_stream *xdr, static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args) { #if defined(CONFIG_NFS_V4_1) - - if (args->sa_slot) - return args->sa_slot->table->session->clp->cl_mvops->minor_version; + struct nfs4_session *session = args->sa_slot->table->session; + if (session) + return session->clp->cl_mvops->minor_version; #endif /* CONFIG_NFS_V4_1 */ return 0; } @@ -5595,6 +5594,8 @@ static int decode_sequence(struct xdr_stream *xdr, if (res->sr_slot == NULL) return 0; + if (!res->sr_slot->table->session) + return 0; status = decode_op_hdr(xdr, OP_SEQUENCE); if (!status) -- cgit v0.10.2 From 160881e33d88129faeb08130865fdc0177ba1aa3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 9 Aug 2013 12:49:29 -0400 Subject: NFS: Enable nfs4_setup_sequence() for DELEGRETURN When CONFIG_NFS_V4_1 is disabled, the calls to nfs4_setup_sequence() and nfs4_sequence_done() are compiled out for the DELEGRETURN operation. To allow NFSv4.0 transport blocking to work for DELEGRETURN, these call sites have to be present all the time. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e9e58cc..6807e9b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4918,7 +4918,6 @@ static void nfs4_delegreturn_release(void *calldata) kfree(calldata); } -#if defined(CONFIG_NFS_V4_1) static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) { struct nfs4_delegreturndata *d_data; @@ -4930,12 +4929,9 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) &d_data->res.seq_res, task); } -#endif /* CONFIG_NFS_V4_1 */ static const struct rpc_call_ops nfs4_delegreturn_ops = { -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs4_delegreturn_prepare, -#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs4_delegreturn_done, .rpc_release = nfs4_delegreturn_release, }; -- cgit v0.10.2 From fbd4bfd1d9d21664a1c2a6730e8bc1a577f1e171 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 9 Aug 2013 12:49:38 -0400 Subject: NFS: Add nfs4_sequence calls for RELEASE_LOCKOWNER Ensure RELEASE_LOCKOWNER is not emitted while the transport is plugged. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6807e9b..5ad48f0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5725,8 +5725,23 @@ struct nfs_release_lockowner_data { struct nfs4_lock_state *lsp; struct nfs_server *server; struct nfs_release_lockowner_args args; + struct nfs4_sequence_args seq_args; + struct nfs4_sequence_res seq_res; }; +static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_release_lockowner_data *data = calldata; + nfs40_setup_sequence(data->server, + &data->seq_args, &data->seq_res, task); +} + +static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata) +{ + struct nfs_release_lockowner_data *data = calldata; + nfs40_sequence_done(task, &data->seq_res); +} + static void nfs4_release_lockowner_release(void *calldata) { struct nfs_release_lockowner_data *data = calldata; @@ -5735,6 +5750,8 @@ static void nfs4_release_lockowner_release(void *calldata) } static const struct rpc_call_ops nfs4_release_lockowner_ops = { + .rpc_call_prepare = nfs4_release_lockowner_prepare, + .rpc_call_done = nfs4_release_lockowner_done, .rpc_release = nfs4_release_lockowner_release, }; @@ -5747,14 +5764,17 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st if (server->nfs_client->cl_mvops->minor_version != 0) return -EINVAL; + data = kmalloc(sizeof(*data), GFP_NOFS); if (!data) return -ENOMEM; + nfs4_init_sequence(&data->seq_args, &data->seq_res, 0); data->lsp = lsp; data->server = server; data->args.lock_owner.clientid = server->nfs_client->cl_clientid; data->args.lock_owner.id = lsp->ls_seqid.owner_id; data->args.lock_owner.s_dev = server->s_dev; + msg.rpc_argp = &data->args; rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); return 0; -- cgit v0.10.2 From be05c860d79bd2688486e78a3b7a298543f0c6f1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 9 Aug 2013 12:49:47 -0400 Subject: NFS: Add nfs4_sequence calls for OPEN_CONFIRM Ensure OPEN_CONFIRM is not emitted while the transport is plugged. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5ad48f0..0122919 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1610,10 +1610,20 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state return nfs4_handle_delegation_recall_error(server, state, stateid, err); } +static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs4_opendata *data = calldata; + + nfs40_setup_sequence(data->o_arg.server, &data->o_arg.seq_args, + &data->o_res.seq_res, task); +} + static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) { struct nfs4_opendata *data = calldata; + nfs40_sequence_done(task, &data->o_res.seq_res); + data->rpc_status = task->tk_status; if (data->rpc_status == 0) { nfs4_stateid_copy(&data->o_res.stateid, &data->c_res.stateid); @@ -1642,6 +1652,7 @@ out_free: } static const struct rpc_call_ops nfs4_open_confirm_ops = { + .rpc_call_prepare = nfs4_open_confirm_prepare, .rpc_call_done = nfs4_open_confirm_done, .rpc_release = nfs4_open_confirm_release, }; @@ -1669,6 +1680,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) }; int status; + nfs4_init_sequence(&data->o_arg.seq_args, &data->o_res.seq_res, 1); kref_get(&data->kref); data->rpc_done = 0; data->rpc_status = 0; -- cgit v0.10.2 From 2cf8bca8b9980c58eab9f2f47586eb16a6e63300 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 9 Aug 2013 12:49:56 -0400 Subject: NFS: Update session draining barriers for NFSv4.0 transport blocking Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f5b0434..4e95666 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -215,32 +215,6 @@ out: return cred; } -#if defined(CONFIG_NFS_V4_1) - -static int nfs41_setup_state_renewal(struct nfs_client *clp) -{ - int status; - struct nfs_fsinfo fsinfo; - - if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) { - nfs4_schedule_state_renewal(clp); - return 0; - } - - status = nfs4_proc_get_lease_time(clp, &fsinfo); - if (status == 0) { - /* Update lease time and schedule renewal */ - spin_lock(&clp->cl_lock); - clp->cl_lease_time = fsinfo.lease_time * HZ; - clp->cl_last_renewal = jiffies; - spin_unlock(&clp->cl_lock); - - nfs4_schedule_state_renewal(clp); - } - - return status; -} - static void nfs4_end_drain_slot_table(struct nfs4_slot_table *tbl) { if (test_and_clear_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) { @@ -254,6 +228,11 @@ static void nfs4_end_drain_session(struct nfs_client *clp) { struct nfs4_session *ses = clp->cl_session; + if (clp->cl_slot_tbl) { + nfs4_end_drain_slot_table(clp->cl_slot_tbl); + return; + } + if (ses != NULL) { nfs4_end_drain_slot_table(&ses->bc_slot_table); nfs4_end_drain_slot_table(&ses->fc_slot_table); @@ -278,6 +257,9 @@ static int nfs4_begin_drain_session(struct nfs_client *clp) struct nfs4_session *ses = clp->cl_session; int ret = 0; + if (clp->cl_slot_tbl) + return nfs4_drain_slot_tbl(clp->cl_slot_tbl); + /* back channel */ ret = nfs4_drain_slot_tbl(&ses->bc_slot_table); if (ret) @@ -286,6 +268,32 @@ static int nfs4_begin_drain_session(struct nfs_client *clp) return nfs4_drain_slot_tbl(&ses->fc_slot_table); } +#if defined(CONFIG_NFS_V4_1) + +static int nfs41_setup_state_renewal(struct nfs_client *clp) +{ + int status; + struct nfs_fsinfo fsinfo; + + if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) { + nfs4_schedule_state_renewal(clp); + return 0; + } + + status = nfs4_proc_get_lease_time(clp, &fsinfo); + if (status == 0) { + /* Update lease time and schedule renewal */ + spin_lock(&clp->cl_lock); + clp->cl_lease_time = fsinfo.lease_time * HZ; + clp->cl_last_renewal = jiffies; + spin_unlock(&clp->cl_lock); + + nfs4_schedule_state_renewal(clp); + } + + return status; +} + static void nfs41_finish_session_reset(struct nfs_client *clp) { clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); @@ -2085,7 +2093,6 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) } #else /* CONFIG_NFS_V4_1 */ static int nfs4_reset_session(struct nfs_client *clp) { return 0; } -static void nfs4_end_drain_session(struct nfs_client *clp) { } static int nfs4_bind_conn_to_session(struct nfs_client *clp) { -- cgit v0.10.2 From 1cec16abf23d644b0e261956cb2b37a668354f5a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Sep 2013 12:26:03 -0400 Subject: When CONFIG_NFS_V4_1 is not enabled, "make C=2" emits this warning: linux/fs/nfs/nfs4session.c:337:6: warning: symbol 'nfs41_set_target_slotid' was not declared. Should it be static? Move nfs41_set_target_slotid() and nfs41_update_target_slotid() back behind CONFIG_NFS_V4_1, since, in the final revision of this work, they are used only in NFSv4.1 and later. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index e0bf666..cf883c7 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -310,6 +310,8 @@ void nfs41_wake_slot_table(struct nfs4_slot_table *tbl) } } +#if defined(CONFIG_NFS_V4_1) + static void nfs41_set_max_slotid_locked(struct nfs4_slot_table *tbl, u32 target_highest_slotid) { @@ -420,8 +422,6 @@ void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, spin_unlock(&tbl->slot_tbl_lock); } -#if defined(CONFIG_NFS_V4_1) - static void nfs4_destroy_session_slot_tables(struct nfs4_session *session) { nfs4_release_slot_table(&session->fc_slot_table); diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index bfb0fe3..2323061 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -78,9 +78,6 @@ extern void nfs4_release_slot_table(struct nfs4_slot_table *tbl); extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl); extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); -extern void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, - struct nfs4_slot *slot, - struct nfs4_sequence_res *res); bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); void nfs41_wake_slot_table(struct nfs4_slot_table *tbl); @@ -93,6 +90,9 @@ static inline bool nfs4_slot_tbl_draining(struct nfs4_slot_table *tbl) #if defined(CONFIG_NFS_V4_1) extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, u32 target_highest_slotid); +extern void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, + struct nfs4_slot *slot, + struct nfs4_sequence_res *res); extern int nfs4_setup_session_slot_tables(struct nfs4_session *ses); -- cgit v0.10.2 From b6a85258d8223a87e7ecf25791e634617a159f79 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Sep 2013 12:26:05 -0400 Subject: NFS: Fix warning introduced by NFSv4.0 transport blocking patches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_NFS_V4_1 is not enabled, gcc emits this warning: linux/fs/nfs/nfs4state.c:255:12: warning: ‘nfs4_begin_drain_session’ defined but not used [-Wunused-function] static int nfs4_begin_drain_session(struct nfs_client *clp) ^ Eventually NFSv4.0 migration recovery will invoke this function, but that has not yet been merged. Hide nfs4_begin_drain_session() behind CONFIG_NFS_V4_1 for now. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 4e95666..da608ee 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -239,6 +239,8 @@ static void nfs4_end_drain_session(struct nfs_client *clp) } } +#if defined(CONFIG_NFS_V4_1) + static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl) { set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state); @@ -268,8 +270,6 @@ static int nfs4_begin_drain_session(struct nfs_client *clp) return nfs4_drain_slot_tbl(&ses->fc_slot_table); } -#if defined(CONFIG_NFS_V4_1) - static int nfs41_setup_state_renewal(struct nfs_client *clp) { int status; -- cgit v0.10.2 From 40b5ea0c25669cb99ba7f4836437a7ebaba91408 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 4 Sep 2013 12:16:23 -0400 Subject: SUNRPC: Add tracepoints to help debug socket connection issues Add client side debugging to help trace socket connection/disconnection and unexpected state change issues. Signed-off-by: Trond Myklebust diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 43be87d..b74a8ac 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -6,6 +6,8 @@ #include #include +#include +#include #include DECLARE_EVENT_CLASS(rpc_task_status, @@ -172,6 +174,135 @@ DEFINE_EVENT(rpc_task_queued, rpc_task_wakeup, ); +#define rpc_show_socket_state(state) \ + __print_symbolic(state, \ + { SS_FREE, "FREE" }, \ + { SS_UNCONNECTED, "UNCONNECTED" }, \ + { SS_CONNECTING, "CONNECTING," }, \ + { SS_CONNECTED, "CONNECTED," }, \ + { SS_DISCONNECTING, "DISCONNECTING" }) + +#define rpc_show_sock_state(state) \ + __print_symbolic(state, \ + { TCP_ESTABLISHED, "ESTABLISHED" }, \ + { TCP_SYN_SENT, "SYN_SENT" }, \ + { TCP_SYN_RECV, "SYN_RECV" }, \ + { TCP_FIN_WAIT1, "FIN_WAIT1" }, \ + { TCP_FIN_WAIT2, "FIN_WAIT2" }, \ + { TCP_TIME_WAIT, "TIME_WAIT" }, \ + { TCP_CLOSE, "CLOSE" }, \ + { TCP_CLOSE_WAIT, "CLOSE_WAIT" }, \ + { TCP_LAST_ACK, "LAST_ACK" }, \ + { TCP_LISTEN, "LISTEN" }, \ + { TCP_CLOSING, "CLOSING" }) + +DECLARE_EVENT_CLASS(xs_socket_event, + + TP_PROTO( + struct rpc_xprt *xprt, + struct socket *socket + ), + + TP_ARGS(xprt, socket), + + TP_STRUCT__entry( + __field(unsigned int, socket_state) + __field(unsigned int, sock_state) + __field(unsigned long long, ino) + __string(dstaddr, + xprt->address_strings[RPC_DISPLAY_ADDR]) + __string(dstport, + xprt->address_strings[RPC_DISPLAY_PORT]) + ), + + TP_fast_assign( + struct inode *inode = SOCK_INODE(socket); + __entry->socket_state = socket->state; + __entry->sock_state = socket->sk->sk_state; + __entry->ino = (unsigned long long)inode->i_ino; + __assign_str(dstaddr, + xprt->address_strings[RPC_DISPLAY_ADDR]); + __assign_str(dstport, + xprt->address_strings[RPC_DISPLAY_PORT]); + ), + + TP_printk( + "socket:[%llu] dstaddr=%s/%s " + "state=%u (%s) sk_state=%u (%s)", + __entry->ino, __get_str(dstaddr), __get_str(dstport), + __entry->socket_state, + rpc_show_socket_state(__entry->socket_state), + __entry->sock_state, + rpc_show_sock_state(__entry->sock_state) + ) +); +#define DEFINE_RPC_SOCKET_EVENT(name) \ + DEFINE_EVENT(xs_socket_event, name, \ + TP_PROTO( \ + struct rpc_xprt *xprt, \ + struct socket *socket \ + ), \ + TP_ARGS(xprt, socket)) + +DECLARE_EVENT_CLASS(xs_socket_event_done, + + TP_PROTO( + struct rpc_xprt *xprt, + struct socket *socket, + int error + ), + + TP_ARGS(xprt, socket, error), + + TP_STRUCT__entry( + __field(int, error) + __field(unsigned int, socket_state) + __field(unsigned int, sock_state) + __field(unsigned long long, ino) + __string(dstaddr, + xprt->address_strings[RPC_DISPLAY_ADDR]) + __string(dstport, + xprt->address_strings[RPC_DISPLAY_PORT]) + ), + + TP_fast_assign( + struct inode *inode = SOCK_INODE(socket); + __entry->socket_state = socket->state; + __entry->sock_state = socket->sk->sk_state; + __entry->ino = (unsigned long long)inode->i_ino; + __entry->error = error; + __assign_str(dstaddr, + xprt->address_strings[RPC_DISPLAY_ADDR]); + __assign_str(dstport, + xprt->address_strings[RPC_DISPLAY_PORT]); + ), + + TP_printk( + "error=%d socket:[%llu] dstaddr=%s/%s " + "state=%u (%s) sk_state=%u (%s)", + __entry->error, + __entry->ino, __get_str(dstaddr), __get_str(dstport), + __entry->socket_state, + rpc_show_socket_state(__entry->socket_state), + __entry->sock_state, + rpc_show_sock_state(__entry->sock_state) + ) +); +#define DEFINE_RPC_SOCKET_EVENT_DONE(name) \ + DEFINE_EVENT(xs_socket_event_done, name, \ + TP_PROTO( \ + struct rpc_xprt *xprt, \ + struct socket *socket, \ + int error \ + ), \ + TP_ARGS(xprt, socket, error)) + +DEFINE_RPC_SOCKET_EVENT(rpc_socket_state_change); +DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_connect); +DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_reset_connection); +DEFINE_RPC_SOCKET_EVENT(rpc_socket_close); +DEFINE_RPC_SOCKET_EVENT(rpc_socket_shutdown); + #endif /* _TRACE_SUNRPC_H */ #include diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ddf0602..b98bce5 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -47,6 +47,8 @@ #include #include +#include + #include "sunrpc.h" static void xs_close(struct rpc_xprt *xprt); @@ -665,8 +667,10 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct socket *sock = transport->sock; - if (sock != NULL) + if (sock != NULL) { kernel_sock_shutdown(sock, SHUT_WR); + trace_rpc_socket_shutdown(xprt, sock); + } } /** @@ -811,6 +815,7 @@ static void xs_reset_transport(struct sock_xprt *transport) sk->sk_no_check = 0; + trace_rpc_socket_close(&transport->xprt, sock); sock_release(sock); } @@ -1492,6 +1497,7 @@ static void xs_tcp_state_change(struct sock *sk) sock_flag(sk, SOCK_ZAPPED), sk->sk_shutdown); + trace_rpc_socket_state_change(xprt, sk->sk_socket); switch (sk->sk_state) { case TCP_ESTABLISHED: spin_lock(&xprt->transport_lock); @@ -1896,6 +1902,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport) xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); status = xs_local_finish_connecting(xprt, sock); + trace_rpc_socket_connect(xprt, sock, status); switch (status) { case 0: dprintk("RPC: xprt %p connected to %s\n", @@ -2039,6 +2046,7 @@ static void xs_udp_setup_socket(struct work_struct *work) xprt->address_strings[RPC_DISPLAY_PORT]); xs_udp_finish_connecting(xprt, sock); + trace_rpc_socket_connect(xprt, sock, 0); status = 0; out: xprt_clear_connecting(xprt); @@ -2064,6 +2072,8 @@ static void xs_abort_connection(struct sock_xprt *transport) memset(&any, 0, sizeof(any)); any.sa_family = AF_UNSPEC; result = kernel_connect(transport->sock, &any, sizeof(any), 0); + trace_rpc_socket_reset_connection(&transport->xprt, + transport->sock, result); if (!result) xs_sock_reset_connection_flags(&transport->xprt); dprintk("RPC: AF_UNSPEC connect return code %d\n", result); @@ -2194,6 +2204,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) xprt->address_strings[RPC_DISPLAY_PORT]); status = xs_tcp_finish_connecting(xprt, sock); + trace_rpc_socket_connect(xprt, sock, status); dprintk("RPC: %p connect status %d connected %d sock state %d\n", xprt, -status, xprt_connected(xprt), sock->sk->sk_state); -- cgit v0.10.2 From ef1820f9be27b6ad158f433ab38002ab8131db4d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 4 Sep 2013 17:04:49 +1000 Subject: NFSv4: Don't try to recover NFSv4 locks when they are lost. When an NFSv4 client loses contact with the server it can lose any locks that it holds. Currently when it reconnects to the server it simply tries to reclaim those locks. This might succeed even though some other client has held and released a lock in the mean time. So the first client might think the file is unchanged, but it isn't. This isn't good. If, when recovery happens, the locks cannot be claimed because some other client still holds the lock, then we get a message in the kernel logs, but the client can still write. So two clients can both think they have a lock and can both write at the same time. This is equally not good. There was a patch a while ago http://comments.gmane.org/gmane.linux.nfs/41917 which tried to address some of this, but it didn't seem to go anywhere. That patch would also send a signal to the process. That might be useful but for now this patch just causes writes to fail. For NFSv4 (unlike v2/v3) there is a strong link between the lock and the write request so we can fairly easily fail any IO of the lock is gone. While some applications might not expect this, it is still safer than allowing the write to succeed. Because this is a fairly big change in behaviour a module parameter, "recover_locks", is introduced which defaults to true (the current behaviour) but can be set to "false" to tell the client not to try to recover things that were lost. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 1db588a..90cb10d 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -826,9 +826,10 @@ static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; } -static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) +static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) { rpc_call_start(task); + return 0; } static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) @@ -847,9 +848,10 @@ static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_messag msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; } -static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) +static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) { rpc_call_start(task); + return 0; } static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index af2d5bf..6411831 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -133,6 +133,7 @@ struct nfs4_lock_state { struct list_head ls_locks; /* Other lock stateids */ struct nfs4_state * ls_state; /* Pointer to open state */ #define NFS_LOCK_INITIALIZED 0 +#define NFS_LOCK_LOST 1 unsigned long ls_flags; struct nfs_seqid_counter ls_seqid; nfs4_stateid ls_stateid; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0122919..1eb694e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4028,15 +4028,19 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); } -static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) +static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) { if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, &data->res.seq_res, task)) - return; - nfs4_set_rw_stateid(&data->args.stateid, data->args.context, - data->args.lock_context, FMODE_READ); + return 0; + if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, + data->args.lock_context, FMODE_READ) == -EIO) + return -EIO; + if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) + return -EIO; + return 0; } static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) @@ -4112,15 +4116,19 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); } -static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) +static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) { if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, &data->res.seq_res, task)) - return; - nfs4_set_rw_stateid(&data->args.stateid, data->args.context, - data->args.lock_context, FMODE_WRITE); + return 0; + if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, + data->args.lock_context, FMODE_WRITE) == -EIO) + return -EIO; + if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) + return -EIO; + return 0; } static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) @@ -5515,6 +5523,12 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request return err; } +bool recover_locks = true; +module_param(recover_locks, bool, 0644); +MODULE_PARM_DESC(recover_locks, + "If the server reports that a lock might be lost, " + "try to recovery it risking corruption."); + static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request) { struct nfs_server *server = NFS_SERVER(state->inode); @@ -5526,6 +5540,10 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request err = nfs4_set_lock_state(state, request); if (err != 0) return err; + if (!recover_locks) { + set_bit(NFS_LOCK_LOST, &request->fl_u.nfs4_fl.owner->ls_flags); + return 0; + } do { if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) return 0; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index da608ee..cc14cbb 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -969,7 +969,9 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, fl_pid = lockowner->l_pid; spin_lock(&state->state_lock); lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); - if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { + if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags)) + ret = -EIO; + else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { nfs4_stateid_copy(dst, &lsp->ls_stateid); ret = 0; smp_rmb(); @@ -1009,11 +1011,17 @@ static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, fmode_t fmode, const struct nfs_lockowner *lockowner) { - int ret = 0; + int ret = nfs4_copy_lock_stateid(dst, state, lockowner); + if (ret == -EIO) + /* A lost lock - don't even consider delegations */ + goto out; if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) goto out; - ret = nfs4_copy_lock_stateid(dst, state, lockowner); if (ret != -ENOENT) + /* nfs4_copy_delegation_stateid() didn't over-write + * dst, so it still has the lock stateid which we now + * choose to use. + */ goto out; ret = nfs4_copy_open_stateid(dst, state); out: diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index c041c41..a8f57c7 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -623,9 +623,10 @@ static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message * msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; } -static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) +static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) { rpc_call_start(task); + return 0; } static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) @@ -644,9 +645,10 @@ static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; } -static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) +static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) { rpc_call_start(task); + return 0; } static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 70a26c6..31db5c3 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -513,9 +513,10 @@ static void nfs_readpage_release_common(void *calldata) void nfs_read_prepare(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; - NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); - if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) - rpc_exit(task, -EIO); + int err; + err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); + if (err) + rpc_exit(task, err); } static const struct rpc_call_ops nfs_read_common_ops = { diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 94eb450..379450c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1294,9 +1294,10 @@ EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); void nfs_write_prepare(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; - NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); - if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) - rpc_exit(task, -EIO); + int err; + err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); + if (err) + rpc_exit(task, err); } void nfs_commit_prepare(struct rpc_task *task, void *calldata) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index ddc3e32..7c3956d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1419,12 +1419,12 @@ struct nfs_rpc_ops { void (*read_setup) (struct nfs_read_data *, struct rpc_message *); void (*read_pageio_init)(struct nfs_pageio_descriptor *, struct inode *, const struct nfs_pgio_completion_ops *); - void (*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *); + int (*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *); int (*read_done) (struct rpc_task *, struct nfs_read_data *); void (*write_setup) (struct nfs_write_data *, struct rpc_message *); void (*write_pageio_init)(struct nfs_pageio_descriptor *, struct inode *, int, const struct nfs_pgio_completion_ops *); - void (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *); + int (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *); int (*write_done) (struct rpc_task *, struct nfs_write_data *); void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *); void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *); -- cgit v0.10.2 From f6de7a39c181dfb8a2c534661a53c73afb3081cd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 4 Sep 2013 10:08:54 -0400 Subject: NFSv4: Document the recover_lost_locks kernel parameter Rename the new 'recover_locks' kernel parameter to 'recover_lost_locks' and change the default to 'false'. Document why in Documentation/kernel-parameters.txt Move the 'recover_lost_locks' kernel parameter to fs/nfs/super.c to make it easy to backport to kernels prior to 3.6.x, which don't have a separate NFSv4 module. Signed-off-by: Trond Myklebust diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 15356ac..30584b1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1847,6 +1847,18 @@ bytes respectively. Such letter suffixes can also be entirely omitted. will be sent. The default is to send the implementation identification information. + + nfs.recover_lost_locks = + [NFSv4] Attempt to recover locks that were lost due + to a lease timeout on the server. Please note that + doing this risks data corruption, since there are + no guarantees that the file will remain unchanged + after the locks are lost. + If you want to enable the kernel legacy behaviour of + attempting to recover these locks, then set this + parameter to '1'. + The default parameter value of '0' causes the kernel + not to attempt recovery of lost locks. nfsd.nfs4_disable_idmapping= [NFSv4] When set to the default of '1', the NFSv4 diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 6411831..277407d 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -372,6 +372,7 @@ struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct extern bool nfs4_disable_idmapping; extern unsigned short max_session_slots; extern unsigned short send_implementation_id; +extern bool recover_lost_locks; #define NFS4_CLIENT_ID_UNIQ_LEN (64) extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN]; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1eb694e..535011a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5523,12 +5523,6 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request return err; } -bool recover_locks = true; -module_param(recover_locks, bool, 0644); -MODULE_PARM_DESC(recover_locks, - "If the server reports that a lock might be lost, " - "try to recovery it risking corruption."); - static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request) { struct nfs_server *server = NFS_SERVER(state->inode); @@ -5540,7 +5534,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request err = nfs4_set_lock_state(state, request); if (err != 0) return err; - if (!recover_locks) { + if (!recover_lost_locks) { set_bit(NFS_LOCK_LOST, &request->fl_u.nfs4_fl.owner->ls_flags); return 0; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f2071d2..6ad9053 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2758,6 +2758,7 @@ bool nfs4_disable_idmapping = true; unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; unsigned short send_implementation_id = 1; char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = ""; +bool recover_lost_locks = false; EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport); EXPORT_SYMBOL_GPL(nfs_callback_tcpport); @@ -2766,6 +2767,7 @@ EXPORT_SYMBOL_GPL(nfs4_disable_idmapping); EXPORT_SYMBOL_GPL(max_session_slots); EXPORT_SYMBOL_GPL(send_implementation_id); EXPORT_SYMBOL_GPL(nfs4_client_id_uniquifier); +EXPORT_SYMBOL_GPL(recover_lost_locks); #define NFS_CALLBACK_MAXPORTNR (65535U) @@ -2803,4 +2805,10 @@ MODULE_PARM_DESC(send_implementation_id, "Send implementation ID with NFSv4.1 exchange_id"); MODULE_PARM_DESC(nfs4_unique_id, "nfs_client_id4 uniquifier string"); +module_param(recover_lost_locks, bool, 0644); +MODULE_PARM_DESC(recover_lost_locks, + "If the server reports that a lock might be lost, " + "try to recover it risking data corruption."); + + #endif /* CONFIG_NFS_V4 */ -- cgit v0.10.2 From 8d1018c7745e5ab861cd6cfd3dc4fa425b90575d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 4 Sep 2013 14:45:13 -0400 Subject: SUNRPC: Ensure rpc_task->tk_pid is available for tracepoints Signed-off-by: Trond Myklebust diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 6d87035..8ebb7c0 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -79,7 +79,7 @@ struct rpc_task { unsigned short tk_flags; /* misc flags */ unsigned short tk_timeouts; /* maj timeouts */ -#ifdef RPC_DEBUG +#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS) unsigned short tk_pid; /* debugging aid */ #endif unsigned char tk_priority : 2,/* Task priority */ diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 93a7a4e..ff3cc4b 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -258,7 +258,7 @@ static int rpc_wait_bit_killable(void *word) return 0; } -#ifdef RPC_DEBUG +#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS) static void rpc_task_set_debuginfo(struct rpc_task *task) { static atomic_t rpc_pid; -- cgit v0.10.2 From 2f048db4680ae19da13df15d352ac02748781ecb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 4 Sep 2013 21:51:44 -0400 Subject: SUNRPC: Add an identifier for struct rpc_clnt Add an identifier in order to aid debugging. Signed-off-by: Trond Myklebust diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 76c0bf6..6740801 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -33,6 +33,7 @@ struct rpc_inode; */ struct rpc_clnt { atomic_t cl_count; /* Number of references */ + unsigned int cl_clid; /* client id */ struct list_head cl_clients; /* Global list of clients */ struct list_head cl_tasks; /* List of tasks */ spinlock_t cl_lock; /* spinlock */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0cd5b6d5..0a79069 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -313,6 +313,24 @@ out: return err; } +static DEFINE_IDA(rpc_clids); + +static int rpc_alloc_clid(struct rpc_clnt *clnt) +{ + int clid; + + clid = ida_simple_get(&rpc_clids, 0, 0, GFP_KERNEL); + if (clid < 0) + return clid; + clnt->cl_clid = clid; + return 0; +} + +static void rpc_free_clid(struct rpc_clnt *clnt) +{ + ida_simple_remove(&rpc_clids, clnt->cl_clid); +} + static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt, struct rpc_clnt *parent) @@ -343,6 +361,10 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, goto out_err; clnt->cl_parent = parent ? : clnt; + err = rpc_alloc_clid(clnt); + if (err) + goto out_no_clid; + rcu_assign_pointer(clnt->cl_xprt, xprt); clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; @@ -386,6 +408,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, out_no_path: rpc_free_iostats(clnt->cl_metrics); out_no_stats: + rpc_free_clid(clnt); +out_no_clid: kfree(clnt); out_err: rpciod_down(); @@ -646,6 +670,7 @@ rpc_free_client(struct rpc_clnt *clnt) clnt->cl_metrics = NULL; xprt_put(rcu_dereference_raw(clnt->cl_xprt)); rpciod_down(); + rpc_free_clid(clnt); kfree(clnt); } -- cgit v0.10.2 From 92cb6c5be8134db6f7c38f25f6afd13e444cebaf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 4 Sep 2013 22:09:50 -0400 Subject: SUNRPC: Replace pointer values with task->tk_pid and rpc_clnt->cl_clid Instead of the pointer values, use the task and client identifier values for tracing purposes. Signed-off-by: Trond Myklebust diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index b74a8ac..d51d16c 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -17,18 +17,20 @@ DECLARE_EVENT_CLASS(rpc_task_status, TP_ARGS(task), TP_STRUCT__entry( - __field(const struct rpc_task *, task) - __field(const struct rpc_clnt *, clnt) + __field(unsigned int, task_id) + __field(unsigned int, client_id) __field(int, status) ), TP_fast_assign( - __entry->task = task; - __entry->clnt = task->tk_client; + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; __entry->status = task->tk_status; ), - TP_printk("task:%p@%p, status %d",__entry->task, __entry->clnt, __entry->status) + TP_printk("task:%u@%u, status %d", + __entry->task_id, __entry->client_id, + __entry->status) ); DEFINE_EVENT(rpc_task_status, rpc_call_status, @@ -49,18 +51,20 @@ TRACE_EVENT(rpc_connect_status, TP_ARGS(task, status), TP_STRUCT__entry( - __field(const struct rpc_task *, task) - __field(const struct rpc_clnt *, clnt) + __field(unsigned int, task_id) + __field(unsigned int, client_id) __field(int, status) ), TP_fast_assign( - __entry->task = task; - __entry->clnt = task->tk_client; + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; __entry->status = status; ), - TP_printk("task:%p@%p, status %d",__entry->task, __entry->clnt, __entry->status) + TP_printk("task:%u@%u, status %d", + __entry->task_id, __entry->client_id, + __entry->status) ); DECLARE_EVENT_CLASS(rpc_task_running, @@ -70,8 +74,8 @@ DECLARE_EVENT_CLASS(rpc_task_running, TP_ARGS(clnt, task, action), TP_STRUCT__entry( - __field(const struct rpc_clnt *, clnt) - __field(const struct rpc_task *, task) + __field(unsigned int, task_id) + __field(unsigned int, client_id) __field(const void *, action) __field(unsigned long, runstate) __field(int, status) @@ -79,17 +83,16 @@ DECLARE_EVENT_CLASS(rpc_task_running, ), TP_fast_assign( - __entry->clnt = clnt; - __entry->task = task; + __entry->client_id = clnt->cl_clid; + __entry->task_id = task->tk_pid; __entry->action = action; __entry->runstate = task->tk_runstate; __entry->status = task->tk_status; __entry->flags = task->tk_flags; ), - TP_printk("task:%p@%p flags=%4.4x state=%4.4lx status=%d action=%pf", - __entry->task, - __entry->clnt, + TP_printk("task:%u@%u flags=%4.4x state=%4.4lx status=%d action=%pf", + __entry->task_id, __entry->client_id, __entry->flags, __entry->runstate, __entry->status, @@ -128,8 +131,8 @@ DECLARE_EVENT_CLASS(rpc_task_queued, TP_ARGS(clnt, task, q), TP_STRUCT__entry( - __field(const struct rpc_clnt *, clnt) - __field(const struct rpc_task *, task) + __field(unsigned int, task_id) + __field(unsigned int, client_id) __field(unsigned long, timeout) __field(unsigned long, runstate) __field(int, status) @@ -138,8 +141,8 @@ DECLARE_EVENT_CLASS(rpc_task_queued, ), TP_fast_assign( - __entry->clnt = clnt; - __entry->task = task; + __entry->client_id = clnt->cl_clid; + __entry->task_id = task->tk_pid; __entry->timeout = task->tk_timeout; __entry->runstate = task->tk_runstate; __entry->status = task->tk_status; @@ -147,9 +150,8 @@ DECLARE_EVENT_CLASS(rpc_task_queued, __assign_str(q_name, rpc_qname(q)); ), - TP_printk("task:%p@%p flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s", - __entry->task, - __entry->clnt, + TP_printk("task:%u@%u flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s", + __entry->task_id, __entry->client_id, __entry->flags, __entry->runstate, __entry->status, -- cgit v0.10.2 From 2031cd1af143f47dacacbb35efbef22f2fd079e6 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 13 Aug 2013 16:37:32 -0400 Subject: nfs4.1: Minimal SP4_MACH_CRED implementation This is a minimal client side implementation of SP4_MACH_CRED. It will attempt to negotiate SP4_MACH_CRED iff the EXCHANGE_ID is using krb5i or krb5p auth. SP4_MACH_CRED will be used if the server supports the minimal operations: BIND_CONN_TO_SESSION EXCHANGE_ID CREATE_SESSION DESTROY_SESSION DESTROY_CLIENTID This patch only includes the EXCHANGE_ID negotiation code because the client will already use the machine cred for these operations. If the server doesn't support SP4_MACH_CRED or doesn't support the minimal operations, the exchange id will be resent with SP4_NONE. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 535011a..ab6ee1d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6117,16 +6117,87 @@ out: } /* - * nfs4_proc_exchange_id() + * Minimum set of SP4_MACH_CRED operations from RFC 5661 + */ +static const struct nfs41_state_protection nfs4_sp4_mach_cred_request = { + .how = SP4_MACH_CRED, + .enforce.u.words = { + [1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) | + 1 << (OP_EXCHANGE_ID - 32) | + 1 << (OP_CREATE_SESSION - 32) | + 1 << (OP_DESTROY_SESSION - 32) | + 1 << (OP_DESTROY_CLIENTID - 32) + } +}; + +/* + * Select the state protection mode for client `clp' given the server results + * from exchange_id in `sp'. * - * Returns zero, a negative errno, or a negative NFS4ERR status code. + * Returns 0 on success, negative errno otherwise. + */ +static int nfs4_sp4_select_mode(struct nfs_client *clp, + struct nfs41_state_protection *sp) +{ + static const u32 supported_enforce[NFS4_OP_MAP_NUM_WORDS] = { + [1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) | + 1 << (OP_EXCHANGE_ID - 32) | + 1 << (OP_CREATE_SESSION - 32) | + 1 << (OP_DESTROY_SESSION - 32) | + 1 << (OP_DESTROY_CLIENTID - 32) + }; + unsigned int i; + + if (sp->how == SP4_MACH_CRED) { + /* Print state protect result */ + dfprintk(MOUNT, "Server SP4_MACH_CRED support:\n"); + for (i = 0; i <= LAST_NFS4_OP; i++) { + if (test_bit(i, sp->enforce.u.longs)) + dfprintk(MOUNT, " enforce op %d\n", i); + if (test_bit(i, sp->allow.u.longs)) + dfprintk(MOUNT, " allow op %d\n", i); + } + + /* make sure nothing is on enforce list that isn't supported */ + for (i = 0; i < NFS4_OP_MAP_NUM_WORDS; i++) { + if (sp->enforce.u.words[i] & ~supported_enforce[i]) { + dfprintk(MOUNT, "sp4_mach_cred: disabled\n"); + return -EINVAL; + } + } + + /* + * Minimal mode - state operations are allowed to use machine + * credential. Note this already happens by default, so the + * client doesn't have to do anything more than the negotiation. + * + * NOTE: we don't care if EXCHANGE_ID is in the list - + * we're already using the machine cred for exchange_id + * and will never use a different cred. + */ + if (test_bit(OP_BIND_CONN_TO_SESSION, sp->enforce.u.longs) && + test_bit(OP_CREATE_SESSION, sp->enforce.u.longs) && + test_bit(OP_DESTROY_SESSION, sp->enforce.u.longs) && + test_bit(OP_DESTROY_CLIENTID, sp->enforce.u.longs)) { + dfprintk(MOUNT, "sp4_mach_cred:\n"); + dfprintk(MOUNT, " minimal mode enabled\n"); + set_bit(NFS_SP4_MACH_CRED_MINIMAL, &clp->cl_sp4_flags); + } else { + dfprintk(MOUNT, "sp4_mach_cred: disabled\n"); + return -EINVAL; + } + } + + return 0; +} + +/* + * _nfs4_proc_exchange_id() * - * Since the clientid has expired, all compounds using sessions - * associated with the stale clientid will be returning - * NFS4ERR_BADSESSION in the sequence operation, and will therefore - * be in some phase of session reset. + * Wrapper for EXCHANGE_ID operation. */ -int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) +static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, + u32 sp4_how) { nfs4_verifier verifier; struct nfs41_exchange_id_args args = { @@ -6173,11 +6244,30 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) goto out_server_scope; } + switch (sp4_how) { + case SP4_NONE: + args.state_protect.how = SP4_NONE; + break; + + case SP4_MACH_CRED: + args.state_protect = nfs4_sp4_mach_cred_request; + break; + + default: + /* unsupported! */ + WARN_ON_ONCE(1); + status = -EINVAL; + goto out_server_scope; + } + status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); trace_nfs4_exchange_id(clp, status); if (status == 0) status = nfs4_check_cl_exchange_flags(res.flags); + if (status == 0) + status = nfs4_sp4_select_mode(clp, &res.state_protect); + if (status == 0) { clp->cl_clientid = res.clientid; clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R); @@ -6224,6 +6314,35 @@ out: return status; } +/* + * nfs4_proc_exchange_id() + * + * Returns zero, a negative errno, or a negative NFS4ERR status code. + * + * Since the clientid has expired, all compounds using sessions + * associated with the stale clientid will be returning + * NFS4ERR_BADSESSION in the sequence operation, and will therefore + * be in some phase of session reset. + * + * Will attempt to negotiate SP4_MACH_CRED if krb5i / krb5p auth is used. + */ +int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) +{ + rpc_authflavor_t authflavor = clp->cl_rpcclient->cl_auth->au_flavor; + int status; + + /* try SP4_MACH_CRED if krb5i/p */ + if (authflavor == RPC_AUTH_GSS_KRB5I || + authflavor == RPC_AUTH_GSS_KRB5P) { + status = _nfs4_proc_exchange_id(clp, cred, SP4_MACH_CRED); + if (!status) + return 0; + } + + /* try SP4_NONE */ + return _nfs4_proc_exchange_id(clp, cred, SP4_NONE); +} + static int _nfs4_proc_destroy_clientid(struct nfs_client *clp, struct rpc_cred *cred) { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index cb5f946..fbdad9e 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -294,7 +294,9 @@ static int nfs4_stat_to_errno(int); XDR_QUADLEN(NFS4_EXCHANGE_ID_LEN) + \ 1 /* flags */ + \ 1 /* spa_how */ + \ - 0 /* SP4_NONE (for now) */ + \ + /* max is SP4_MACH_CRED (for now) */ + \ + 1 + NFS4_OP_MAP_NUM_WORDS + \ + 1 + NFS4_OP_MAP_NUM_WORDS + \ 1 /* implementation id array of size 1 */ + \ 1 /* nii_domain */ + \ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ @@ -306,7 +308,9 @@ static int nfs4_stat_to_errno(int); 1 /* eir_sequenceid */ + \ 1 /* eir_flags */ + \ 1 /* spr_how */ + \ - 0 /* SP4_NONE (for now) */ + \ + /* max is SP4_MACH_CRED (for now) */ + \ + 1 + NFS4_OP_MAP_NUM_WORDS + \ + 1 + NFS4_OP_MAP_NUM_WORDS + \ 2 /* eir_server_owner.so_minor_id */ + \ /* eir_server_owner.so_major_id<> */ \ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \ @@ -1726,6 +1730,14 @@ static void encode_bind_conn_to_session(struct xdr_stream *xdr, *p = 0; /* use_conn_in_rdma_mode = False */ } +static void encode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map) +{ + unsigned int i; + encode_uint32(xdr, NFS4_OP_MAP_NUM_WORDS); + for (i = 0; i < NFS4_OP_MAP_NUM_WORDS; i++) + encode_uint32(xdr, op_map->u.words[i]); +} + static void encode_exchange_id(struct xdr_stream *xdr, struct nfs41_exchange_id_args *args, struct compound_hdr *hdr) @@ -1739,9 +1751,20 @@ static void encode_exchange_id(struct xdr_stream *xdr, encode_string(xdr, args->id_len, args->id); - p = reserve_space(xdr, 12); - *p++ = cpu_to_be32(args->flags); - *p++ = cpu_to_be32(0); /* zero length state_protect4_a */ + encode_uint32(xdr, args->flags); + encode_uint32(xdr, args->state_protect.how); + + switch (args->state_protect.how) { + case SP4_NONE: + break; + case SP4_MACH_CRED: + encode_op_map(xdr, &args->state_protect.enforce); + encode_op_map(xdr, &args->state_protect.allow); + break; + default: + WARN_ON_ONCE(1); + break; + } if (send_implementation_id && sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) > 1 && @@ -1752,7 +1775,7 @@ static void encode_exchange_id(struct xdr_stream *xdr, utsname()->version, utsname()->machine); if (len > 0) { - *p = cpu_to_be32(1); /* implementation id array length=1 */ + encode_uint32(xdr, 1); /* implementation id array length=1 */ encode_string(xdr, sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) - 1, @@ -1763,7 +1786,7 @@ static void encode_exchange_id(struct xdr_stream *xdr, p = xdr_encode_hyper(p, 0); *p = cpu_to_be32(0); } else - *p = cpu_to_be32(0); /* implementation id array length=0 */ + encode_uint32(xdr, 0); /* implementation id array length=0 */ } static void encode_create_session(struct xdr_stream *xdr, @@ -5374,6 +5397,23 @@ static int decode_secinfo_no_name(struct xdr_stream *xdr, struct nfs4_secinfo_re return decode_secinfo_common(xdr, res); } +static int decode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map) +{ + __be32 *p; + uint32_t bitmap_words; + unsigned int i; + + p = xdr_inline_decode(xdr, 4); + bitmap_words = be32_to_cpup(p++); + if (bitmap_words > NFS4_OP_MAP_NUM_WORDS) + return -EIO; + p = xdr_inline_decode(xdr, 4 * bitmap_words); + for (i = 0; i < bitmap_words; i++) + op_map->u.words[i] = be32_to_cpup(p++); + + return 0; +} + static int decode_exchange_id(struct xdr_stream *xdr, struct nfs41_exchange_id_res *res) { @@ -5397,10 +5437,22 @@ static int decode_exchange_id(struct xdr_stream *xdr, res->seqid = be32_to_cpup(p++); res->flags = be32_to_cpup(p++); - /* We ask for SP4_NONE */ - dummy = be32_to_cpup(p); - if (dummy != SP4_NONE) + res->state_protect.how = be32_to_cpup(p); + switch (res->state_protect.how) { + case SP4_NONE: + break; + case SP4_MACH_CRED: + status = decode_op_map(xdr, &res->state_protect.enforce); + if (status) + return status; + status = decode_op_map(xdr, &res->state_protect.allow); + if (status) + return status; + break; + default: + WARN_ON_ONCE(1); return -EIO; + } /* server_owner4.so_minor_id */ p = xdr_inline_decode(xdr, 8); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index fc83d3d..9f7b919 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -90,6 +90,10 @@ struct nfs_client { struct nfs41_server_owner *cl_serverowner; struct nfs41_server_scope *cl_serverscope; struct nfs41_impl_id *cl_implid; + /* nfs 4.1+ state protection modes: */ + unsigned long cl_sp4_flags; +#define NFS_SP4_MACH_CRED_MINIMAL 1 /* Minimal sp4_mach_cred - state ops + * must use machine cred */ #endif /* CONFIG_NFS_V4 */ #ifdef CONFIG_NFS_FSCACHE diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7c3956d..01fd84b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1107,6 +1107,23 @@ struct pnfs_ds_commit_info { struct pnfs_commit_bucket *buckets; }; +#define NFS4_OP_MAP_NUM_LONGS \ + DIV_ROUND_UP(LAST_NFS4_OP, 8 * sizeof(unsigned long)) +#define NFS4_OP_MAP_NUM_WORDS \ + (NFS4_OP_MAP_NUM_LONGS * sizeof(unsigned long) / sizeof(u32)) +struct nfs4_op_map { + union { + unsigned long longs[NFS4_OP_MAP_NUM_LONGS]; + u32 words[NFS4_OP_MAP_NUM_WORDS]; + } u; +}; + +struct nfs41_state_protection { + u32 how; + struct nfs4_op_map enforce; + struct nfs4_op_map allow; +}; + #define NFS4_EXCHANGE_ID_LEN (48) struct nfs41_exchange_id_args { struct nfs_client *client; @@ -1114,6 +1131,7 @@ struct nfs41_exchange_id_args { unsigned int id_len; char id[NFS4_EXCHANGE_ID_LEN]; u32 flags; + struct nfs41_state_protection state_protect; }; struct nfs41_server_owner { @@ -1146,6 +1164,7 @@ struct nfs41_exchange_id_res { struct nfs41_server_owner *server_owner; struct nfs41_server_scope *server_scope; struct nfs41_impl_id *impl_id; + struct nfs41_state_protection state_protect; }; struct nfs41_create_session_args { -- cgit v0.10.2 From ab4c2361358ebdf4933f6bfd1dc98d7a14b55769 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 13 Aug 2013 16:37:33 -0400 Subject: nfs4.1: Add state protection handler Add nfs4_state_protect - the function responsible for switching to the machine credential and the correct rpc client when SP4_MACH_CRED is in use. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 277407d..3559e89 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -268,6 +268,35 @@ is_ds_client(struct nfs_client *clp) { return clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS; } + +/* + * Function responsible for determining if an rpc_message should use the + * machine cred under SP4_MACH_CRED and if so switching the credential and + * authflavor (using the nfs_client's rpc_clnt which will be krb5i/p). + * Should be called before rpc_call_sync/rpc_call_async. + */ +static inline void +nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode, + struct rpc_clnt **clntp, struct rpc_message *msg) +{ + struct rpc_cred *newcred = NULL; + rpc_authflavor_t flavor; + + if (test_bit(sp4_mode, &clp->cl_sp4_flags)) { + spin_lock(&clp->cl_lock); + if (clp->cl_machine_cred != NULL) + newcred = get_rpccred(clp->cl_machine_cred); + spin_unlock(&clp->cl_lock); + if (msg->rpc_cred) + put_rpccred(msg->rpc_cred); + msg->rpc_cred = newcred; + + flavor = clp->cl_rpcclient->cl_auth->au_flavor; + WARN_ON(flavor != RPC_AUTH_GSS_KRB5I && + flavor != RPC_AUTH_GSS_KRB5P); + *clntp = clp->cl_rpcclient; + } +} #else /* CONFIG_NFS_v4_1 */ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) { @@ -285,6 +314,12 @@ is_ds_client(struct nfs_client *clp) { return false; } + +static inline void +nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags, + struct rpc_clnt **clntp, struct rpc_message *msg) +{ +} #endif /* CONFIG_NFS_V4_1 */ extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; -- cgit v0.10.2 From fa940720ceff2d7b24b4898c51a440ab59f39a4d Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 13 Aug 2013 16:37:34 -0400 Subject: nfs4.1: Add SP4_MACH_CRED cleanup support CLOSE and LOCKU can use the machine credential. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ab6ee1d..cba486a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2615,6 +2615,9 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) }; int status = -ENOMEM; + nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_CLEANUP, + &task_setup_data.rpc_client, &msg); + calldata = kzalloc(sizeof(*calldata), gfp_mask); if (calldata == NULL) goto out; @@ -5230,6 +5233,9 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, .flags = RPC_TASK_ASYNC, }; + nfs4_state_protect(NFS_SERVER(lsp->ls_state->inode)->nfs_client, + NFS_SP4_MACH_CRED_CLEANUP, &task_setup_data.rpc_client, &msg); + /* Ensure this is an unlock - when canceling a lock, the * canceled lock is passed in, and it won't be an unlock. */ @@ -6117,7 +6123,8 @@ out: } /* - * Minimum set of SP4_MACH_CRED operations from RFC 5661 + * Minimum set of SP4_MACH_CRED operations from RFC 5661 in the enforce map + * and operations we'd like to see to enable certain features in the allow map */ static const struct nfs41_state_protection nfs4_sp4_mach_cred_request = { .how = SP4_MACH_CRED, @@ -6127,6 +6134,10 @@ static const struct nfs41_state_protection nfs4_sp4_mach_cred_request = { 1 << (OP_CREATE_SESSION - 32) | 1 << (OP_DESTROY_SESSION - 32) | 1 << (OP_DESTROY_CLIENTID - 32) + }, + .allow.u.words = { + [0] = 1 << (OP_CLOSE) | + 1 << (OP_LOCKU) } }; @@ -6186,6 +6197,12 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp, dfprintk(MOUNT, "sp4_mach_cred: disabled\n"); return -EINVAL; } + + if (test_bit(OP_CLOSE, sp->allow.u.longs) && + test_bit(OP_LOCKU, sp->allow.u.longs)) { + dfprintk(MOUNT, " cleanup mode enabled\n"); + set_bit(NFS_SP4_MACH_CRED_CLEANUP, &clp->cl_sp4_flags); + } } return 0; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 9f7b919..c0ba338 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -94,6 +94,7 @@ struct nfs_client { unsigned long cl_sp4_flags; #define NFS_SP4_MACH_CRED_MINIMAL 1 /* Minimal sp4_mach_cred - state ops * must use machine cred */ +#define NFS_SP4_MACH_CRED_CLEANUP 2 /* CLOSE and LOCKU */ #endif /* CONFIG_NFS_V4 */ #ifdef CONFIG_NFS_FSCACHE -- cgit v0.10.2 From 8b5bee2e1b60a611b15b1862f55e60cc9f668324 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 13 Aug 2013 16:37:35 -0400 Subject: nfs4.1: Add SP4_MACH_CRED secinfo support SECINFO and SECINFO_NONAME can use the machine credential. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cba486a..5c917c9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5998,6 +5998,10 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct } dprintk("NFS call secinfo %s\n", name->name); + + nfs4_state_protect(NFS_SERVER(dir)->nfs_client, + NFS_SP4_MACH_CRED_SECINFO, &clnt, &msg); + status = nfs4_call_sync(clnt, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); dprintk("NFS reply secinfo: %d\n", status); @@ -6137,7 +6141,9 @@ static const struct nfs41_state_protection nfs4_sp4_mach_cred_request = { }, .allow.u.words = { [0] = 1 << (OP_CLOSE) | - 1 << (OP_LOCKU) + 1 << (OP_LOCKU), + [1] = 1 << (OP_SECINFO - 32) | + 1 << (OP_SECINFO_NO_NAME - 32) } }; @@ -6203,6 +6209,12 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp, dfprintk(MOUNT, " cleanup mode enabled\n"); set_bit(NFS_SP4_MACH_CRED_CLEANUP, &clp->cl_sp4_flags); } + + if (test_bit(OP_SECINFO, sp->allow.u.longs) && + test_bit(OP_SECINFO_NO_NAME, sp->allow.u.longs)) { + dfprintk(MOUNT, " secinfo mode enabled\n"); + set_bit(NFS_SP4_MACH_CRED_SECINFO, &clp->cl_sp4_flags); + } } return 0; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index c0ba338..800da11 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -95,6 +95,7 @@ struct nfs_client { #define NFS_SP4_MACH_CRED_MINIMAL 1 /* Minimal sp4_mach_cred - state ops * must use machine cred */ #define NFS_SP4_MACH_CRED_CLEANUP 2 /* CLOSE and LOCKU */ +#define NFS_SP4_MACH_CRED_SECINFO 3 /* SECINFO and SECINFO_NO_NAME */ #endif /* CONFIG_NFS_V4 */ #ifdef CONFIG_NFS_FSCACHE -- cgit v0.10.2 From 3787d5063c52b0c38003e6293f24839508604070 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 13 Aug 2013 16:37:36 -0400 Subject: nfs4.1: Add SP4_MACH_CRED stateid support TEST_STATEID and FREE_STATEID can use the machine credential. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5c917c9..4818a38 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6143,7 +6143,9 @@ static const struct nfs41_state_protection nfs4_sp4_mach_cred_request = { [0] = 1 << (OP_CLOSE) | 1 << (OP_LOCKU), [1] = 1 << (OP_SECINFO - 32) | - 1 << (OP_SECINFO_NO_NAME - 32) + 1 << (OP_SECINFO_NO_NAME - 32) | + 1 << (OP_TEST_STATEID - 32) | + 1 << (OP_FREE_STATEID - 32) } }; @@ -6215,6 +6217,12 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp, dfprintk(MOUNT, " secinfo mode enabled\n"); set_bit(NFS_SP4_MACH_CRED_SECINFO, &clp->cl_sp4_flags); } + + if (test_bit(OP_TEST_STATEID, sp->allow.u.longs) && + test_bit(OP_FREE_STATEID, sp->allow.u.longs)) { + dfprintk(MOUNT, " stateid mode enabled\n"); + set_bit(NFS_SP4_MACH_CRED_STATEID, &clp->cl_sp4_flags); + } } return 0; @@ -7547,11 +7555,15 @@ static int _nfs41_test_stateid(struct nfs_server *server, .rpc_resp = &res, .rpc_cred = cred, }; + struct rpc_clnt *rpc_client = server->client; + + nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_STATEID, + &rpc_client, &msg); dprintk("NFS call test_stateid %p\n", stateid); nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); nfs4_set_sequence_privileged(&args.seq_args); - status = nfs4_call_sync_sequence(server->client, server, &msg, + status = nfs4_call_sync_sequence(rpc_client, server, &msg, &args.seq_args, &res.seq_res); if (status != NFS_OK) { dprintk("NFS reply test_stateid: failed, %d\n", status); @@ -7643,6 +7655,9 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, }; struct nfs_free_stateid_data *data; + nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_STATEID, + &task_setup.rpc_client, &msg); + dprintk("NFS call free_stateid %p\n", stateid); data = kmalloc(sizeof(*data), GFP_NOFS); if (!data) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 800da11..1795865 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -96,6 +96,7 @@ struct nfs_client { * must use machine cred */ #define NFS_SP4_MACH_CRED_CLEANUP 2 /* CLOSE and LOCKU */ #define NFS_SP4_MACH_CRED_SECINFO 3 /* SECINFO and SECINFO_NO_NAME */ +#define NFS_SP4_MACH_CRED_STATEID 4 /* TEST_STATEID and FREE_STATEID */ #endif /* CONFIG_NFS_V4 */ #ifdef CONFIG_NFS_FSCACHE -- cgit v0.10.2 From 8c21c62c4452f4e66c3dac9b3f6b74474fad3e08 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 13 Aug 2013 16:37:37 -0400 Subject: nfs4.1: Add SP4_MACH_CRED write and commit support WRITE and COMMIT can use the machine credential. If WRITE is supported and COMMIT is not, make all (mach cred) writes FILE_SYNC4. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 3559e89..d2db3ce 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -269,15 +269,9 @@ is_ds_client(struct nfs_client *clp) return clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS; } -/* - * Function responsible for determining if an rpc_message should use the - * machine cred under SP4_MACH_CRED and if so switching the credential and - * authflavor (using the nfs_client's rpc_clnt which will be krb5i/p). - * Should be called before rpc_call_sync/rpc_call_async. - */ -static inline void -nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode, - struct rpc_clnt **clntp, struct rpc_message *msg) +static inline bool +_nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode, + struct rpc_clnt **clntp, struct rpc_message *msg) { struct rpc_cred *newcred = NULL; rpc_authflavor_t flavor; @@ -295,7 +289,37 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode, WARN_ON(flavor != RPC_AUTH_GSS_KRB5I && flavor != RPC_AUTH_GSS_KRB5P); *clntp = clp->cl_rpcclient; + + return true; } + return false; +} + +/* + * Function responsible for determining if an rpc_message should use the + * machine cred under SP4_MACH_CRED and if so switching the credential and + * authflavor (using the nfs_client's rpc_clnt which will be krb5i/p). + * Should be called before rpc_call_sync/rpc_call_async. + */ +static inline void +nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode, + struct rpc_clnt **clntp, struct rpc_message *msg) +{ + _nfs4_state_protect(clp, sp4_mode, clntp, msg); +} + +/* + * Special wrapper to nfs4_state_protect for write. + * If WRITE can use machine cred but COMMIT cannot, make sure all writes + * that use machine cred use NFS_FILE_SYNC. + */ +static inline void +nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, + struct rpc_message *msg, struct nfs_write_data *wdata) +{ + if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) && + !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags)) + wdata->args.stable = NFS_FILE_SYNC; } #else /* CONFIG_NFS_v4_1 */ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) @@ -320,6 +344,12 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags, struct rpc_clnt **clntp, struct rpc_message *msg) { } + +static inline void +nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, + struct rpc_message *msg, struct nfs_write_data *wdata) +{ +} #endif /* CONFIG_NFS_V4_1 */ extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; @@ -455,6 +485,8 @@ static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state) #define nfs4_close_state(a, b) do { } while (0) #define nfs4_close_sync(a, b) do { } while (0) +#define nfs4_state_protect(a, b, c, d) do { } while (0) +#define nfs4_state_protect_write(a, b, c, d) do { } while (0) #endif /* CONFIG_NFS_V4 */ #endif /* __LINUX_FS_NFS_NFS4_FS.H */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4818a38..e3cdfe3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6223,6 +6223,16 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp, dfprintk(MOUNT, " stateid mode enabled\n"); set_bit(NFS_SP4_MACH_CRED_STATEID, &clp->cl_sp4_flags); } + + if (test_bit(OP_WRITE, sp->allow.u.longs)) { + dfprintk(MOUNT, " write mode enabled\n"); + set_bit(NFS_SP4_MACH_CRED_WRITE, &clp->cl_sp4_flags); + } + + if (test_bit(OP_COMMIT, sp->allow.u.longs)) { + dfprintk(MOUNT, " commit mode enabled\n"); + set_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags); + } } return 0; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 379450c..40979e8 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1022,6 +1022,9 @@ int nfs_initiate_write(struct rpc_clnt *clnt, data->args.count, (unsigned long long)data->args.offset); + nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, + &task_setup_data.rpc_client, &msg, data); + task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) { ret = PTR_ERR(task); @@ -1488,6 +1491,9 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); + nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client, + NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg); + task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 1795865..e8ff178 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -97,6 +97,8 @@ struct nfs_client { #define NFS_SP4_MACH_CRED_CLEANUP 2 /* CLOSE and LOCKU */ #define NFS_SP4_MACH_CRED_SECINFO 3 /* SECINFO and SECINFO_NO_NAME */ #define NFS_SP4_MACH_CRED_STATEID 4 /* TEST_STATEID and FREE_STATEID */ +#define NFS_SP4_MACH_CRED_WRITE 5 /* WRITE */ +#define NFS_SP4_MACH_CRED_COMMIT 6 /* COMMIT */ #endif /* CONFIG_NFS_V4 */ #ifdef CONFIG_NFS_FSCACHE -- cgit v0.10.2 From 8897538e97ed80570f6e3befc41ca0288b776799 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 13 Aug 2013 16:37:38 -0400 Subject: nfs4: Map NFS4ERR_WRONG_CRED to EPERM Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e3cdfe3..cb56102 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -152,6 +152,7 @@ static int nfs4_map_errors(int err) case -NFS4ERR_RECALLCONFLICT: return -EREMOTEIO; case -NFS4ERR_WRONGSEC: + case -NFS4ERR_WRONG_CRED: return -EPERM; case -NFS4ERR_BADOWNER: case -NFS4ERR_BADNAME: -- cgit v0.10.2 From 0f1d26055068bbc66751d1974ecc6f0398b3ac67 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 5 Sep 2013 15:52:51 -0400 Subject: NFS: Don't check lock owner compatibility in writes unless file is locked If we're doing buffered writes, and there is no file locking involved, then we don't have to worry about whether or not the lock owner information is identical. By relaxing this check, we ensure that fork()ed child processes can write to a page without having to first sync dirty data that was written by the parent to disk. Reported-by: Quentin Barnes Signed-off-by: Trond Myklebust Tested-by: Quentin Barnes diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 40979e8..ac1dc33 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -863,7 +863,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) return 0; l_ctx = req->wb_lock_context; do_flush = req->wb_page != page || req->wb_context != ctx; - if (l_ctx) { + if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) { do_flush |= l_ctx->lockowner.l_owner != current->files || l_ctx->lockowner.l_pid != current->tgid; } -- cgit v0.10.2 From 4109bb7496640aa97a12904527ba8e3a19b7ce7a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 6 Sep 2013 11:09:38 -0400 Subject: NFS: Don't check lock owner compatability unless file is locked (part 2) When coalescing requests into a single READ or WRITE RPC call, and there is no file locking involved, we don't have to refuse coalescing for requests where the lock owner information doesn't match. Reported-by: Jeff Layton Signed-off-by: Trond Myklebust diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 29cfb7a..2ffebf2 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -328,6 +328,19 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, } EXPORT_SYMBOL_GPL(nfs_pageio_init); +static bool nfs_match_open_context(const struct nfs_open_context *ctx1, + const struct nfs_open_context *ctx2) +{ + return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state; +} + +static bool nfs_match_lock_context(const struct nfs_lock_context *l1, + const struct nfs_lock_context *l2) +{ + return l1->lockowner.l_owner == l2->lockowner.l_owner + && l1->lockowner.l_pid == l2->lockowner.l_pid; +} + /** * nfs_can_coalesce_requests - test two requests for compatibility * @prev: pointer to nfs_page @@ -343,13 +356,10 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, struct nfs_page *req, struct nfs_pageio_descriptor *pgio) { - if (req->wb_context->cred != prev->wb_context->cred) - return false; - if (req->wb_lock_context->lockowner.l_owner != prev->wb_lock_context->lockowner.l_owner) - return false; - if (req->wb_lock_context->lockowner.l_pid != prev->wb_lock_context->lockowner.l_pid) + if (!nfs_match_open_context(req->wb_context, prev->wb_context)) return false; - if (req->wb_context->state != prev->wb_context->state) + if (req->wb_context->dentry->d_inode->i_flock != NULL && + !nfs_match_lock_context(req->wb_lock_context, prev->wb_lock_context)) return false; if (req->wb_pgbase != 0) return false; -- cgit v0.10.2 From 0e20162ed1e9a639fc61d62c71327169fb1a1970 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 6 Sep 2013 14:14:00 -0400 Subject: NFSv4.1 Use MDS auth flavor for data server connection Commit 4edaa308 "NFS: Use "krb5i" to establish NFSv4 state whenever possible" uses the nfs_client cl_rpcclient for all state management operations, and will use krb5i or auth_sys with no regard to the mount command authflavor choice. The MDS, as any NFSv4.1 mount point, uses the nfs_server rpc client for all non-state management operations with a different nfs_server for each fsid encountered traversing the mount point, each with a potentially different auth flavor. pNFS data servers are not mounted in the normal sense as there is no associated nfs_server structure. Data servers can also export multiple fsids, each with a potentially different auth flavor. Data servers need to use the same authflavor as the MDS server rpc client for non-state management operations. Populate a list of rpc clients with the MDS server rpc client auth flavor for the DS to use. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2415198..23ec6e8 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -186,6 +186,8 @@ extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, int ds_addrlen, int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans); +extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *, + struct inode *); #ifdef CONFIG_PROC_FS extern int __init nfs_fs_proc_init(void); extern void nfs_fs_proc_exit(void); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 98c0104..f798925 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -41,9 +41,124 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) } #ifdef CONFIG_NFS_V4_1 +/** + * Per auth flavor data server rpc clients + */ +struct nfs4_ds_server { + struct list_head list; /* ds_clp->cl_ds_clients */ + struct rpc_clnt *rpc_clnt; +}; + +/** + * Common lookup case for DS I/O + */ +static struct nfs4_ds_server * +nfs4_find_ds_client(struct nfs_client *ds_clp, rpc_authflavor_t flavor) +{ + struct nfs4_ds_server *dss; + + rcu_read_lock(); + list_for_each_entry_rcu(dss, &ds_clp->cl_ds_clients, list) { + if (dss->rpc_clnt->cl_auth->au_flavor != flavor) + continue; + goto out; + } + dss = NULL; +out: + rcu_read_unlock(); + return dss; +} + +static struct nfs4_ds_server * +nfs4_add_ds_client(struct nfs_client *ds_clp, rpc_authflavor_t flavor, + struct nfs4_ds_server *new) +{ + struct nfs4_ds_server *dss; + + spin_lock(&ds_clp->cl_lock); + list_for_each_entry(dss, &ds_clp->cl_ds_clients, list) { + if (dss->rpc_clnt->cl_auth->au_flavor != flavor) + continue; + goto out; + } + if (new) + list_add_rcu(&new->list, &ds_clp->cl_ds_clients); + dss = new; +out: + spin_unlock(&ds_clp->cl_lock); /* need some lock to protect list */ + return dss; +} + +static struct nfs4_ds_server * +nfs4_alloc_ds_server(struct nfs_client *ds_clp, rpc_authflavor_t flavor) +{ + struct nfs4_ds_server *dss; + + dss = kmalloc(sizeof(*dss), GFP_NOFS); + if (dss == NULL) + return ERR_PTR(-ENOMEM); + + dss->rpc_clnt = rpc_clone_client_set_auth(ds_clp->cl_rpcclient, flavor); + if (IS_ERR(dss->rpc_clnt)) { + int err = PTR_ERR(dss->rpc_clnt); + kfree (dss); + return ERR_PTR(err); + } + INIT_LIST_HEAD(&dss->list); + + return dss; +} + +static void +nfs4_free_ds_server(struct nfs4_ds_server *dss) +{ + rpc_release_client(dss->rpc_clnt); + kfree(dss); +} + +/** +* Find or create a DS rpc client with th MDS server rpc client auth flavor +* in the nfs_client cl_ds_clients list. +*/ +struct rpc_clnt * +nfs4_find_or_create_ds_client(struct nfs_client *ds_clp, struct inode *inode) +{ + struct nfs4_ds_server *dss, *new; + rpc_authflavor_t flavor = NFS_SERVER(inode)->client->cl_auth->au_flavor; + + dss = nfs4_find_ds_client(ds_clp, flavor); + if (dss != NULL) + goto out; + new = nfs4_alloc_ds_server(ds_clp, flavor); + if (IS_ERR(new)) + return ERR_CAST(new); + dss = nfs4_add_ds_client(ds_clp, flavor, new); + if (dss != new) + nfs4_free_ds_server(new); +out: + return dss->rpc_clnt; +} +EXPORT_SYMBOL_GPL(nfs4_find_or_create_ds_client); + +static void +nfs4_shutdown_ds_clients(struct nfs_client *clp) +{ + struct nfs4_ds_server *dss; + LIST_HEAD(shutdown_list); + + while (!list_empty(&clp->cl_ds_clients)) { + dss = list_entry(clp->cl_ds_clients.next, + struct nfs4_ds_server, list); + list_del(&dss->list); + rpc_shutdown_client(dss->rpc_clnt); + kfree (dss); + } +} + void nfs41_shutdown_client(struct nfs_client *clp) { if (nfs4_has_session(clp)) { + nfs4_shutdown_ds_clients(clp); nfs4_destroy_session(clp->cl_session); nfs4_destroy_clientid(clp); } @@ -77,6 +192,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) spin_lock_init(&clp->cl_lock); INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); + INIT_LIST_HEAD(&clp->cl_ds_clients); rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; clp->cl_minorversion = cl_init->minorversion; diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index a70cb3a..b86464b 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -528,6 +528,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) struct nfs_pgio_header *hdr = data->header; struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; + struct rpc_clnt *ds_clnt; loff_t offset = data->args.offset; u32 j, idx; struct nfs_fh *fh; @@ -542,6 +543,11 @@ filelayout_read_pagelist(struct nfs_read_data *data) ds = nfs4_fl_prepare_ds(lseg, idx); if (!ds) return PNFS_NOT_ATTEMPTED; + + ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode); + if (IS_ERR(ds_clnt)) + return PNFS_NOT_ATTEMPTED; + dprintk("%s USE DS: %s cl_count %d\n", __func__, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); @@ -556,7 +562,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) data->mds_offset = offset; /* Perform an asynchronous read to ds */ - nfs_initiate_read(ds->ds_clp->cl_rpcclient, data, + nfs_initiate_read(ds_clnt, data, &filelayout_read_call_ops, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; } @@ -568,6 +574,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) struct nfs_pgio_header *hdr = data->header; struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; + struct rpc_clnt *ds_clnt; loff_t offset = data->args.offset; u32 j, idx; struct nfs_fh *fh; @@ -578,6 +585,11 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) ds = nfs4_fl_prepare_ds(lseg, idx); if (!ds) return PNFS_NOT_ATTEMPTED; + + ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode); + if (IS_ERR(ds_clnt)) + return PNFS_NOT_ATTEMPTED; + dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n", __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); @@ -595,7 +607,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) data->args.offset = filelayout_get_dserver_offset(lseg, offset); /* Perform an asynchronous write */ - nfs_initiate_write(ds->ds_clp->cl_rpcclient, data, + nfs_initiate_write(ds_clnt, data, &filelayout_write_call_ops, sync, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; @@ -1105,16 +1117,19 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) { struct pnfs_layout_segment *lseg = data->lseg; struct nfs4_pnfs_ds *ds; + struct rpc_clnt *ds_clnt; u32 idx; struct nfs_fh *fh; idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); ds = nfs4_fl_prepare_ds(lseg, idx); - if (!ds) { - prepare_to_resend_writes(data); - filelayout_commit_release(data); - return -EAGAIN; - } + if (!ds) + goto out_err; + + ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, data->inode); + if (IS_ERR(ds_clnt)) + goto out_err; + dprintk("%s ino %lu, how %d cl_count %d\n", __func__, data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count)); data->commit_done_cb = filelayout_commit_done_cb; @@ -1123,9 +1138,13 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); if (fh) data->args.fh = fh; - return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data, + return nfs_initiate_commit(ds_clnt, data, &filelayout_commit_call_ops, how, RPC_TASK_SOFTCONN); +out_err: + prepare_to_resend_writes(data); + filelayout_commit_release(data); + return -EAGAIN; } static int diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index e8ff178..b8cedce 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -56,6 +56,7 @@ struct nfs_client { struct rpc_cred *cl_machine_cred; #if IS_ENABLED(CONFIG_NFS_V4) + struct list_head cl_ds_clients; /* auth flavor data servers */ u64 cl_clientid; /* constant */ nfs4_verifier cl_confirm; /* Clientid verifier */ unsigned long cl_state; -- cgit v0.10.2 From 74c988116231129320925a665e7420e513faaabe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Sep 2013 13:26:57 -0400 Subject: NFS: Clean up the auth flavour array mess What is the point of having a 'auth_flavor_len' field, if it is always set to 1, and can't be used to determine if the user has selected an auth flavour? This cleanup goes back to using auth_flavor_len for its original intended purpose, and gets rid of the ad-hoc replacements. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 5dbe2d2..4ad837c 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -253,8 +253,10 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, dfprintk(MOUNT, "--> nfs4_try_mount()\n"); - if (data->auth_flavors[0] == RPC_AUTH_MAXFLAVOR) + if (data->auth_flavor_len < 1) { data->auth_flavors[0] = RPC_AUTH_UNIX; + data->auth_flavor_len = 1; + } export_path = data->nfs_server.export_path; data->nfs_server.export_path = "/"; root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 6ad9053..5d16ee3 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -923,7 +923,7 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) data->nfs_server.port = NFS_UNSPEC_PORT; data->nfs_server.protocol = XPRT_TRANSPORT_TCP; data->auth_flavors[0] = RPC_AUTH_MAXFLAVOR; - data->auth_flavor_len = 1; + data->auth_flavor_len = 0; data->minorversion = 0; data->need_mount = true; data->net = current->nsproxy->net_ns; @@ -1018,6 +1018,13 @@ static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt) } } +static void nfs_set_auth_parsed_mount_data(struct nfs_parsed_mount_data *data, + rpc_authflavor_t pseudoflavor) +{ + data->auth_flavors[0] = pseudoflavor; + data->auth_flavor_len = 1; +} + /* * Parse the value of the 'sec=' option. */ @@ -1729,7 +1736,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf * Was a sec= authflavor specified in the options? First, verify * whether the server supports it, and then just try to use it if so. */ - if (args->auth_flavors[0] != RPC_AUTH_MAXFLAVOR) { + if (args->auth_flavor_len > 0) { status = nfs_verify_authflavor(args, authlist, authlist_len); dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->auth_flavors[0]); if (status) @@ -1760,7 +1767,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf /* Fallthrough */ } dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor); - args->auth_flavors[0] = flavor; + nfs_set_auth_parsed_mount_data(args, flavor); server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); if (!IS_ERR(server)) return server; @@ -1776,7 +1783,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf /* Last chance! Try AUTH_UNIX */ dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", RPC_AUTH_UNIX); - args->auth_flavors[0] = RPC_AUTH_UNIX; + nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX); return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); } @@ -1893,6 +1900,7 @@ static int nfs23_validate_mount_data(void *options, { struct nfs_mount_data *data = (struct nfs_mount_data *)options; struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; + int extra_flags = NFS_MOUNT_LEGACY_INTERFACE; if (data == NULL) goto out_no_data; @@ -1908,6 +1916,8 @@ static int nfs23_validate_mount_data(void *options, goto out_no_v3; data->root.size = NFS2_FHSIZE; memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + /* Turn off security negotiation */ + extra_flags |= NFS_MOUNT_SECFLAVOUR; case 4: if (data->flags & NFS_MOUNT_SECFLAVOUR) goto out_no_sec; @@ -1935,7 +1945,7 @@ static int nfs23_validate_mount_data(void *options, * can deal with. */ args->flags = data->flags & NFS_MOUNT_FLAGMASK; - args->flags |= NFS_MOUNT_LEGACY_INTERFACE; + args->flags |= extra_flags; args->rsize = data->rsize; args->wsize = data->wsize; args->timeo = data->timeo; @@ -1959,9 +1969,10 @@ static int nfs23_validate_mount_data(void *options, args->namlen = data->namlen; args->bsize = data->bsize; - args->auth_flavors[0] = RPC_AUTH_UNIX; if (data->flags & NFS_MOUNT_SECFLAVOUR) - args->auth_flavors[0] = data->pseudoflavor; + nfs_set_auth_parsed_mount_data(args, data->pseudoflavor); + else + nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX); if (!args->nfs_server.hostname) goto out_nomem; @@ -2176,7 +2187,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) data->rsize = nfss->rsize; data->wsize = nfss->wsize; data->retrans = nfss->client->cl_timeout->to_retries; - data->auth_flavors[0] = nfss->client->cl_auth->au_flavor; + nfs_set_auth_parsed_mount_data(data, nfss->client->cl_auth->au_flavor); data->acregmin = nfss->acregmin / HZ; data->acregmax = nfss->acregmax / HZ; data->acdirmin = nfss->acdirmin / HZ; @@ -2675,15 +2686,17 @@ static int nfs4_validate_mount_data(void *options, goto out_no_address; args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); - args->auth_flavors[0] = RPC_AUTH_UNIX; if (data->auth_flavourlen) { + rpc_authflavor_t pseudoflavor; if (data->auth_flavourlen > 1) goto out_inval_auth; - if (copy_from_user(&args->auth_flavors[0], + if (copy_from_user(&pseudoflavor, data->auth_flavours, - sizeof(args->auth_flavors[0]))) + sizeof(pseudoflavor))) return -EFAULT; - } + nfs_set_auth_parsed_mount_data(args, pseudoflavor); + } else + nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX); c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); if (IS_ERR(c)) -- cgit v0.10.2 From 19e7b8d240702297d837891694f463435910f718 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Sep 2013 14:09:52 -0400 Subject: NFS: Clean up nfs_parse_security_flavors() Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5d16ee3..b2dd6da 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1032,49 +1032,50 @@ static int nfs_parse_security_flavors(char *value, struct nfs_parsed_mount_data *mnt) { substring_t args[MAX_OPT_ARGS]; + rpc_authflavor_t pseudoflavor; dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value); switch (match_token(value, nfs_secflavor_tokens, args)) { case Opt_sec_none: - mnt->auth_flavors[0] = RPC_AUTH_NULL; + pseudoflavor = RPC_AUTH_NULL; break; case Opt_sec_sys: - mnt->auth_flavors[0] = RPC_AUTH_UNIX; + pseudoflavor = RPC_AUTH_UNIX; break; case Opt_sec_krb5: - mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; + pseudoflavor = RPC_AUTH_GSS_KRB5; break; case Opt_sec_krb5i: - mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; + pseudoflavor = RPC_AUTH_GSS_KRB5I; break; case Opt_sec_krb5p: - mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; + pseudoflavor = RPC_AUTH_GSS_KRB5P; break; case Opt_sec_lkey: - mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; + pseudoflavor = RPC_AUTH_GSS_LKEY; break; case Opt_sec_lkeyi: - mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; + pseudoflavor = RPC_AUTH_GSS_LKEYI; break; case Opt_sec_lkeyp: - mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; + pseudoflavor = RPC_AUTH_GSS_LKEYP; break; case Opt_sec_spkm: - mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; + pseudoflavor = RPC_AUTH_GSS_SPKM; break; case Opt_sec_spkmi: - mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; + pseudoflavor = RPC_AUTH_GSS_SPKMI; break; case Opt_sec_spkmp: - mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; + pseudoflavor = RPC_AUTH_GSS_SPKMP; break; default: return 0; } mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; + nfs_set_auth_parsed_mount_data(mnt, pseudoflavor); return 1; } -- cgit v0.10.2 From 5e6b19901b763f502e62b5fd4f583778df983ce7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Sep 2013 12:58:57 -0400 Subject: NFSv4: Fix security auto-negotiation NFSv4 security auto-negotiation has been broken since commit 4580a92d44e2b21c2254fa5fef0f1bfb43c82318 (NFS: Use server-recommended security flavor by default (NFSv3)) because nfs4_try_mount() will automatically select AUTH_SYS if it sees no auth flavours. Signed-off-by: Trond Myklebust Cc: Chuck Lever diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 23ec6e8..d388302c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -358,7 +358,7 @@ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *, const char *); -extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); +extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool); #endif struct nfs_pgio_completion_ops; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index d2db3ce..f520a11 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -221,7 +221,7 @@ struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *, /* nfs4proc.c */ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); -extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); +extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool); extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, struct rpc_cred *cred); extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); extern int nfs4_destroy_clientid(struct nfs_client *clp); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index f798925..cc80085 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -885,7 +885,7 @@ static void nfs4_session_set_rwsize(struct nfs_server *server) } static int nfs4_server_common_setup(struct nfs_server *server, - struct nfs_fh *mntfh) + struct nfs_fh *mntfh, bool auth_probe) { struct nfs_fattr *fattr; int error; @@ -917,7 +917,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, /* Probe the root fh to retrieve its FSID and filehandle */ - error = nfs4_get_rootfh(server, mntfh); + error = nfs4_get_rootfh(server, mntfh, auth_probe); if (error < 0) goto out; @@ -949,6 +949,7 @@ out: static int nfs4_init_server(struct nfs_server *server, const struct nfs_parsed_mount_data *data) { + rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX; struct rpc_timeout timeparms; int error; @@ -961,13 +962,16 @@ static int nfs4_init_server(struct nfs_server *server, server->flags = data->flags; server->options = data->options; + if (data->auth_flavor_len >= 1) + pseudoflavor = data->auth_flavors[0]; + /* Get a client record */ error = nfs4_set_client(server, data->nfs_server.hostname, (const struct sockaddr *)&data->nfs_server.address, data->nfs_server.addrlen, data->client_address, - data->auth_flavors[0], + pseudoflavor, data->nfs_server.protocol, &timeparms, data->minorversion, @@ -987,7 +991,7 @@ static int nfs4_init_server(struct nfs_server *server, server->port = data->nfs_server.port; - error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); + error = nfs_init_server_rpcclient(server, &timeparms, pseudoflavor); error: /* Done */ @@ -1005,6 +1009,7 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info, struct nfs_subversion *nfs_mod) { struct nfs_server *server; + bool auth_probe; int error; dprintk("--> nfs4_create_server()\n"); @@ -1013,12 +1018,14 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info, if (!server) return ERR_PTR(-ENOMEM); + auth_probe = mount_info->parsed->auth_flavor_len < 1; + /* set up the general RPC client */ error = nfs4_init_server(server, mount_info->parsed); if (error < 0) goto error; - error = nfs4_server_common_setup(server, mount_info->mntfh); + error = nfs4_server_common_setup(server, mount_info->mntfh, auth_probe); if (error < 0) goto error; @@ -1071,7 +1078,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, if (error < 0) goto error; - error = nfs4_server_common_setup(server, mntfh); + error = nfs4_server_common_setup(server, mntfh, false); if (error < 0) goto error; diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c index 549462e..c0b3a16 100644 --- a/fs/nfs/nfs4getroot.c +++ b/fs/nfs/nfs4getroot.c @@ -9,7 +9,7 @@ #define NFSDBG_FACILITY NFSDBG_CLIENT -int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh) +int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_probe) { struct nfs_fsinfo fsinfo; int ret = -ENOMEM; @@ -21,7 +21,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh) goto out; /* Start by getting the root filehandle from the server */ - ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo); + ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo, auth_probe); if (ret < 0) { dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret); goto out; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cb56102..68551ea 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2884,18 +2884,27 @@ static int nfs4_do_find_root_sec(struct nfs_server *server, * @server: initialized nfs_server handle * @fhandle: we fill in the pseudo-fs root file handle * @info: we fill in an FSINFO struct + * @auth_probe: probe the auth flavours * * Returns zero on success, or a negative errno. */ int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fsinfo *info, + bool auth_probe) { int status; - status = nfs4_lookup_root(server, fhandle, info); - if ((status == -NFS4ERR_WRONGSEC) && - !(server->flags & NFS_MOUNT_SECFLAVOUR)) + switch (auth_probe) { + case false: + status = nfs4_lookup_root(server, fhandle, info); + if (status != -NFS4ERR_WRONGSEC) + break; + /* Did user force a 'sec=' mount option? */ + if (server->flags & NFS_MOUNT_SECFLAVOUR) + break; + default: status = nfs4_do_find_root_sec(server, fhandle, info); + } if (status == 0) status = nfs4_server_capabilities(server, fhandle); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 4ad837c..e26acdd 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -253,10 +253,6 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, dfprintk(MOUNT, "--> nfs4_try_mount()\n"); - if (data->auth_flavor_len < 1) { - data->auth_flavors[0] = RPC_AUTH_UNIX; - data->auth_flavor_len = 1; - } export_path = data->nfs_server.export_path; data->nfs_server.export_path = "/"; root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, -- cgit v0.10.2 From 41d058c3ba7bd16c3a91b9ec4d89fb6e7d4b4316 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Sep 2013 17:49:05 -0400 Subject: NFSv4: Disallow security negotiation for lookups when 'sec=' is specified Ensure that nfs4_proc_lookup_common respects the NFS_MOUNT_SECFLAVOUR flag. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 68551ea..122b934 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3154,7 +3154,9 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, err = -EPERM; if (client != *clnt) goto out; - + /* No security negotiation if the user specified 'sec=' */ + if (NFS_SERVER(dir)->flags & NFS_MOUNT_SECFLAVOUR) + goto out; client = nfs4_create_sec_client(client, dir, name); if (IS_ERR(client)) return PTR_ERR(client); -- cgit v0.10.2 From 47040da3c7524facd542f37ffeadedac4f228601 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Sep 2013 16:01:07 -0400 Subject: NFSv4: Allow security autonegotiation for submounts In cases where the parent super block was not mounted with a 'sec=' line, allow autonegotiation of security for the submounts. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index cc80085..a860ab5 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1078,7 +1078,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, if (error < 0) goto error; - error = nfs4_server_common_setup(server, mntfh, false); + error = nfs4_server_common_setup(server, mntfh, + !(parent_server->flags & NFS_MOUNT_SECFLAVOUR)); if (error < 0) goto error; diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index cdb0b41..2288cd3 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -369,21 +370,33 @@ out: struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr) { + rpc_authflavor_t flavor = server->client->cl_auth->au_flavor; struct dentry *parent = dget_parent(dentry); + struct inode *dir = parent->d_inode; + struct qstr *name = &dentry->d_name; struct rpc_clnt *client; struct vfsmount *mnt; /* Look it up again to get its attributes and sec flavor */ - client = nfs4_proc_lookup_mountpoint(parent->d_inode, &dentry->d_name, fh, fattr); + client = nfs4_proc_lookup_mountpoint(dir, name, fh, fattr); dput(parent); if (IS_ERR(client)) return ERR_CAST(client); - if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) + if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { mnt = nfs_do_refmount(client, dentry); - else - mnt = nfs_do_submount(dentry, fh, fattr, client->cl_auth->au_flavor); + goto out; + } + if (client->cl_auth->au_flavor != flavor) + flavor = client->cl_auth->au_flavor; + else if (!(server->flags & NFS_MOUNT_SECFLAVOUR)) { + rpc_authflavor_t new = nfs4_negotiate_security(dir, name); + if ((int)new >= 0) + flavor = new; + } + mnt = nfs_do_submount(dentry, fh, fattr, flavor); +out: rpc_shutdown_client(client); return mnt; } -- cgit v0.10.2 From 0aea92bf67321fc600b6c61627e0fd46e8889a49 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Sep 2013 18:28:10 -0400 Subject: NFS: nfs_compare_super shouldn't check the auth flavour unless 'sec=' was set Also don't worry about obsolete mount flags... Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b2dd6da..50bc31d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2295,6 +2295,18 @@ void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info) nfs_initialise_sb(sb); } +#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ + | NFS_MOUNT_SECURE \ + | NFS_MOUNT_TCP \ + | NFS_MOUNT_VER3 \ + | NFS_MOUNT_KERBEROS \ + | NFS_MOUNT_NONLM \ + | NFS_MOUNT_BROKEN_SUID \ + | NFS_MOUNT_STRICTLOCK \ + | NFS_MOUNT_UNSHARED \ + | NFS_MOUNT_NORESVPORT \ + | NFS_MOUNT_LEGACY_INTERFACE) + static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) { const struct nfs_server *a = s->s_fs_info; @@ -2305,7 +2317,7 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n goto Ebusy; if (a->nfs_client != b->nfs_client) goto Ebusy; - if (a->flags != b->flags) + if ((a->flags ^ b->flags) & NFS_MOUNT_CMP_FLAGMASK) goto Ebusy; if (a->wsize != b->wsize) goto Ebusy; @@ -2319,7 +2331,8 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n goto Ebusy; if (a->acdirmax != b->acdirmax) goto Ebusy; - if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor) + if (b->flags & NFS_MOUNT_SECFLAVOUR && + clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor) goto Ebusy; return 1; Ebusy: -- cgit v0.10.2 From b1b3e136948a2bf4915326acb0d825d7d180753f Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Wed, 4 Sep 2013 12:13:19 -0400 Subject: NFSv4: use mach cred for SECINFO_NO_NAME w/ integrity Commit 97431204ea005ec8070ac94bc3251e836daa7ca7 introduced a regression that causes SECINFO_NO_NAME to fail without sending an RPC if: 1) the nfs_client's rpc_client is using krb5i/p (now tried by default) 2) the current user doesn't have valid kerberos credentials This situation is quite common - as of now a sec=sys mount would use krb5i for the nfs_client's rpc_client and a user would hardly be faulted for not having run kinit. The solution is to use the machine cred when trying to use an integrity protected auth flavor for SECINFO_NO_NAME. Older servers may not support using the machine cred or an integrity protected auth flavor for SECINFO_NO_NAME in every circumstance, so we fall back to using the user's cred and the filesystem's auth flavor in this case. We run into another problem when running against linux nfs servers - they return NFS4ERR_WRONGSEC when using integrity auth flavor (unless the mount is also that flavor) even though that is not a valid error for SECINFO*. Even though it's against spec, handle WRONGSEC errors on SECINFO_NO_NAME by falling back to using the user cred and the filesystem's auth flavor. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 122b934..e121291 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7483,7 +7483,8 @@ out: */ static int _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) + struct nfs_fsinfo *info, + struct nfs4_secinfo_flavors *flavors, bool use_integrity) { struct nfs41_secinfo_no_name_args args = { .style = SECINFO_STYLE_CURRENT_FH, @@ -7496,8 +7497,23 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_argp = &args, .rpc_resp = &res, }; - return nfs4_call_sync(server->nfs_client->cl_rpcclient, server, &msg, - &args.seq_args, &res.seq_res, 0); + struct rpc_clnt *clnt = server->client; + int status; + + if (use_integrity) { + clnt = server->nfs_client->cl_rpcclient; + msg.rpc_cred = nfs4_get_clid_cred(server->nfs_client); + } + + dprintk("--> %s\n", __func__); + status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, + &res.seq_res, 0); + dprintk("<-- %s status=%d\n", __func__, status); + + if (msg.rpc_cred) + put_rpccred(msg.rpc_cred); + + return status; } static int @@ -7507,7 +7523,24 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs4_exception exception = { }; int err; do { - err = _nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); + /* first try using integrity protection */ + err = -NFS4ERR_WRONGSEC; + + /* try to use integrity protection with machine cred */ + if (_nfs4_is_integrity_protected(server->nfs_client)) + err = _nfs41_proc_secinfo_no_name(server, fhandle, info, + flavors, true); + + /* + * if unable to use integrity protection, or SECINFO with + * integrity protection returns NFS4ERR_WRONGSEC (which is + * disallowed by spec, but exists in deployed servers) use + * the current filesystem's rpc_client and the user cred. + */ + if (err == -NFS4ERR_WRONGSEC) + err = _nfs41_proc_secinfo_no_name(server, fhandle, info, + flavors, false); + switch (err) { case 0: case -NFS4ERR_WRONGSEC: -- cgit v0.10.2