From eb48241bb4484e56171fcb5beb39f530a2cd484e Mon Sep 17 00:00:00 2001 From: Zhao Hongjiang Date: Fri, 3 May 2013 18:08:24 +0800 Subject: nfsd: get rid of the unused functions in vfs The fh_lock_parent(), nfsd_truncate(), nfsd_notify_change() and nfsd_sync_dir() fuctions are neither implemented nor used, just remove them. Signed-off-by: Zhao Hongjiang Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 5b58941..8d2b40d 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -39,7 +39,6 @@ typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int); /* nfsd/vfs.c */ -int fh_lock_parent(struct svc_fh *, struct dentry *); int nfsd_racache_init(int); void nfsd_racache_shutdown(void); int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, @@ -92,17 +91,13 @@ __be32 nfsd_remove(struct svc_rqst *, struct svc_fh *, char *, int); __be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type, char *name, int len); -int nfsd_truncate(struct svc_rqst *, struct svc_fh *, - unsigned long size); __be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, loff_t *, struct readdir_cd *, filldir_t); __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, struct kstatfs *, int access); -int nfsd_notify_change(struct inode *, struct iattr *); __be32 nfsd_permission(struct svc_rqst *, struct svc_export *, struct dentry *, int); -int nfsd_sync_dir(struct dentry *dp); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int); -- cgit v0.10.2 From 4f540e29dc20b87d460559bce184d2238237f48b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 7 May 2013 11:57:52 -0400 Subject: nfsd4: store correct client minorversion for >=4.2 This code assumes that any client using exchange_id is using NFSv4.1, but with the introduction of 4.2 that will no longer true. This main effect of this is that client callbacks will use the same minorversion as that used on the exchange_id. Note that clients are forbidden from mixing 4.1 and 4.2 compounds. (See rfc 5661, section 2.7, #13: "A client MUST NOT attempt to use a stateid, filehandle, or similar returned object from the COMPOUND procedure with minor version X for another COMPOUND procedure with minor version Y, where X != Y.") However, we do not currently attempt to enforce this except in the case of mixing zero minor version with non-zero minor versions. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 316ec84..91ead0e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1709,7 +1709,7 @@ out_new: status = nfserr_jukebox; goto out; } - new->cl_minorversion = 1; + new->cl_minorversion = cstate->minorversion; gen_clid(new, nn); add_to_unconfirmed(new); -- cgit v0.10.2 From 0d422afb892e3f993cf934b76a2c2ef839c446e0 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 8 May 2013 18:05:41 -0400 Subject: security: cap_inode_getsecctx returning garbage We shouldn't be returning success from this function without also filling in the return values ctx and ctxlen. Note currently this doesn't appear to cause bugs since the only inode_getsecctx caller I can find is fs/sysfs/inode.c, which only calls this if security_inode_setsecurity succeeds. Assuming security_inode_setsecurity is set to cap_inode_setsecurity whenever inode_getsecctx is set to cap_inode_getsecctx, this function can never actually called. So I noticed this only because the server labeled NFS patches add a real caller. Acked-by: Serge E. Hallyn Signed-off-by: J. Bruce Fields diff --git a/security/capability.c b/security/capability.c index 1728d4e..83efc90 100644 --- a/security/capability.c +++ b/security/capability.c @@ -843,7 +843,7 @@ static int cap_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen) static int cap_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) { - return 0; + return -EOPNOTSUPP; } #ifdef CONFIG_KEYS static int cap_key_alloc(struct key *key, const struct cred *cred, -- cgit v0.10.2 From 4bdc33ed5bd9fbaa243bda6fdccb22674aed6305 Mon Sep 17 00:00:00 2001 From: Steve Dickson Date: Thu, 2 May 2013 13:18:57 -0400 Subject: NFSDv4.2: Add NFS v4.2 support to the NFS server This enables NFSv4.2 support for the server. To enable this code do the following: echo "+4.2" >/proc/fs/nfsd/versions after the nfsd kernel module is loaded. On its own this does nothing except allow the server to respond to compounds with minorversion set to 2. All the new NFSv4.2 features are optional, so this is perfectly legal. Signed-off-by: Steve Dickson Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 6cd86e0..9aeacdd 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1567,6 +1567,7 @@ struct nfsd4_minorversion_ops { static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, [1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, + [2] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, }; static __be32 diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index c0d9317..15e7e1d 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -24,7 +24,7 @@ /* * nfsd version */ -#define NFSD_SUPPORTED_MINOR_VERSION 1 +#define NFSD_SUPPORTED_MINOR_VERSION 2 /* * Maximum blocksizes supported by daemon under various circumstances. */ -- cgit v0.10.2 From 18032ca062e621e15683cb61c066ef3dc5414a7b Mon Sep 17 00:00:00 2001 From: David Quigley Date: Thu, 2 May 2013 13:19:10 -0400 Subject: NFSD: Server implementation of MAC Labeling Implement labeled NFS on the server: encoding and decoding, and writing and reading, of file labels. Enabled with CONFIG_NFSD_V4_SECURITY_LABEL. Signed-off-by: Matthew N. Dodd Signed-off-by: Miguel Rodel Felipe Signed-off-by: Phua Eu Gene Signed-off-by: Khin Mi Mi Aung Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 430b687..dc8f1ef 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -81,6 +81,22 @@ config NFSD_V4 If unsure, say N. +config NFSD_V4_SECURITY_LABEL + bool "Provide Security Label support for NFSv4 server" + depends on NFSD_V4 && SECURITY + help + + Say Y here if you want enable fine-grained security label attribute + support for NFS version 4. Security labels allow security modules like + SELinux and Smack to label files to facilitate enforcement of their policies. + Without this an NFSv4 mount will have the same label on each file. + + If you do not wish to enable fine-grained security labels SELinux or + Smack policies on NFSv4 files, say N. + + WARNING: there is still a chance of backwards-incompatible protocol changes. + For now we recommend "Y" only for developers and testers." + config NFSD_FAULT_INJECTION bool "NFS server manual fault injection" depends on NFSD_V4 && DEBUG_KERNEL diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 27d74a2..1a1ff24 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -42,6 +42,36 @@ #include "current_stateid.h" #include "netns.h" +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#include + +static inline void +nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) +{ + struct inode *inode = resfh->fh_dentry->d_inode; + int status; + + mutex_lock(&inode->i_mutex); + status = security_inode_setsecctx(resfh->fh_dentry, + label->data, label->len); + mutex_unlock(&inode->i_mutex); + + if (status) + /* + * XXX: We should really fail the whole open, but we may + * already have created a new file, so it may be too + * late. For now this seems the least of evils: + */ + bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + + return; +} +#else +static inline void +nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) +{ } +#endif + #define NFSDDBG_FACILITY NFSDDBG_PROC static u32 nfsd_attrmask[] = { @@ -239,6 +269,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru (u32 *)open->op_verf.data, &open->op_truncate, &open->op_created); + if (!status && open->op_label.len) + nfsd4_security_inode_setsecctx(resfh, &open->op_label, open->op_bmval); + /* * Following rfc 3530 14.2.16, use the returned bitmask * to indicate which attributes we used to store the @@ -637,6 +670,9 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; + if (create->cr_label.len) + nfsd4_security_inode_setsecctx(&resfh, &create->cr_label, create->cr_bmval); + if (create->cr_acl != NULL) do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, create->cr_bmval); @@ -916,6 +952,11 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, setattr->sa_acl); if (status) goto out; + if (setattr->sa_label.len) + status = nfsd4_set_nfs4_label(rqstp, &cstate->current_fh, + &setattr->sa_label); + if (status) + goto out; status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, 0, (time_t)0); out: diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 9aeacdd..dfca512 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -55,6 +55,11 @@ #include "cache.h" #include "netns.h" +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#include +#endif + + #define NFSDDBG_FACILITY NFSDDBG_XDR /* @@ -242,7 +247,8 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) static __be32 nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, - struct iattr *iattr, struct nfs4_acl **acl) + struct iattr *iattr, struct nfs4_acl **acl, + struct xdr_netobj *label) { int expected_len, len = 0; u32 dummy32; @@ -380,6 +386,32 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, goto xdr_error; } } + + label->len = 0; +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { + READ_BUF(4); + len += 4; + READ32(dummy32); /* lfs: we don't use it */ + READ_BUF(4); + len += 4; + READ32(dummy32); /* pi: we don't use it either */ + READ_BUF(4); + len += 4; + READ32(dummy32); + READ_BUF(dummy32); + if (dummy32 > NFSD4_MAX_SEC_LABEL_LEN) + return nfserr_badlabel; + len += (XDR_QUADLEN(dummy32) << 2); + READMEM(buf, dummy32); + label->data = kzalloc(dummy32 + 1, GFP_KERNEL); + if (!label->data) + return nfserr_jukebox; + defer_free(argp, kfree, label->data); + memcpy(label->data, buf, dummy32); + } +#endif + if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0 || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1 || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2) @@ -576,7 +608,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create return status; status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, - &create->cr_acl); + &create->cr_acl, &create->cr_label); if (status) goto out; @@ -827,7 +859,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) case NFS4_CREATE_UNCHECKED: case NFS4_CREATE_GUARDED: status = nfsd4_decode_fattr(argp, open->op_bmval, - &open->op_iattr, &open->op_acl); + &open->op_iattr, &open->op_acl, &open->op_label); if (status) goto out; break; @@ -841,7 +873,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) READ_BUF(NFS4_VERIFIER_SIZE); COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); status = nfsd4_decode_fattr(argp, open->op_bmval, - &open->op_iattr, &open->op_acl); + &open->op_iattr, &open->op_acl, &open->op_label); if (status) goto out; break; @@ -1063,7 +1095,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta if (status) return status; return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, - &setattr->sa_acl); + &setattr->sa_acl, &setattr->sa_label); } static __be32 @@ -1954,6 +1986,36 @@ nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace, FATTR4_WORD0_RDATTR_ERROR) #define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +static inline __be32 +nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen) +{ + __be32 *p = *pp; + + if (*buflen < ((XDR_QUADLEN(len) << 2) + 4 + 4 + 4)) + return nfserr_resource; + + /* + * For now we use a 0 here to indicate the null translation; in + * the future we may place a call to translation code here. + */ + if ((*buflen -= 8) < 0) + return nfserr_resource; + + WRITE32(0); /* lfs */ + WRITE32(0); /* pi */ + p = xdr_encode_opaque(p, context, len); + *buflen -= (XDR_QUADLEN(len) << 2) + 4; + + *pp = p; + return 0; +} +#else +static inline __be32 +nfsd4_encode_security_label(struct svc_rqst *rqstp, struct dentry *dentry, __be32 **pp, int *buflen) +{ return 0; } +#endif + static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) { /* As per referral draft: */ @@ -2013,6 +2075,9 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, int err; int aclsupport = 0; struct nfs4_acl *acl = NULL; + void *context = NULL; + int contextlen; + bool contextsupport = false; struct nfsd4_compoundres *resp = rqstp->rq_resp; u32 minorversion = resp->cstate.minorversion; struct path path = { @@ -2066,6 +2131,21 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, } } +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) || + bmval[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { + err = security_inode_getsecctx(dentry->d_inode, + &context, &contextlen); + contextsupport = (err == 0); + if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { + if (err == -EOPNOTSUPP) + bmval2 &= ~FATTR4_WORD2_SECURITY_LABEL; + else if (err) + goto out_nfserr; + } + } +#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ + if (bmval2) { if ((buflen -= 16) < 0) goto out_resource; @@ -2094,6 +2174,8 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, if (!aclsupport) word0 &= ~FATTR4_WORD0_ACL; + if (!contextsupport) + word2 &= ~FATTR4_WORD2_SECURITY_LABEL; if (!word2) { if ((buflen -= 12) < 0) goto out_resource; @@ -2401,6 +2483,12 @@ out_acl: get_parent_attributes(exp, &stat); WRITE64(stat.ino); } + if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { + status = nfsd4_encode_security_label(rqstp, context, + contextlen, &p, &buflen); + if (status) + goto out; + } if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { WRITE32(3); WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); @@ -2413,6 +2501,8 @@ out_acl: status = nfs_ok; out: + if (context) + security_release_secctx(context, contextlen); kfree(acl); if (fhp == &tempfh) fh_put(&tempfh); @@ -3177,16 +3267,18 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 { __be32 *p; - RESERVE_SPACE(12); + RESERVE_SPACE(16); if (nfserr) { - WRITE32(2); + WRITE32(3); + WRITE32(0); WRITE32(0); WRITE32(0); } else { - WRITE32(2); + WRITE32(3); WRITE32(setattr->sa_bmval[0]); WRITE32(setattr->sa_bmval[1]); + WRITE32(setattr->sa_bmval[2]); } ADJUST_ARGS(); return nfserr; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 15e7e1d..2bbd94e 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -328,6 +328,13 @@ void nfsd_lockd_shutdown(void); #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ + (NFSD4_1_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SECURITY_LABEL) +#else +#define NFSD4_2_SUPPORTED_ATTRS_WORD2 0 +#endif + static inline u32 nfsd_suppattrs0(u32 minorversion) { return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 @@ -342,8 +349,11 @@ static inline u32 nfsd_suppattrs1(u32 minorversion) static inline u32 nfsd_suppattrs2(u32 minorversion) { - return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2 - : NFSD4_SUPPORTED_ATTRS_WORD2; + switch (minorversion) { + default: return NFSD4_2_SUPPORTED_ATTRS_WORD2; + case 1: return NFSD4_1_SUPPORTED_ATTRS_WORD2; + case 0: return NFSD4_SUPPORTED_ATTRS_WORD2; + } } /* These will return ERR_INVAL if specified in GETATTR or READDIR. */ @@ -356,7 +366,11 @@ static inline u32 nfsd_suppattrs2(u32 minorversion) #define NFSD_WRITEABLE_ATTRS_WORD1 \ (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#define NFSD_WRITEABLE_ATTRS_WORD2 FATTR4_WORD2_SECURITY_LABEL +#else #define NFSD_WRITEABLE_ATTRS_WORD2 0 +#endif #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ NFSD_WRITEABLE_ATTRS_WORD0 diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 84ce601..1e757fa 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -28,6 +28,7 @@ #include #include #include +#include #ifdef CONFIG_NFSD_V3 #include "xdr3.h" @@ -621,6 +622,33 @@ int nfsd4_is_junction(struct dentry *dentry) return 0; return 1; } +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct xdr_netobj *label) +{ + __be32 error; + int host_error; + struct dentry *dentry; + + error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR); + if (error) + return error; + + dentry = fhp->fh_dentry; + + mutex_lock(&dentry->d_inode->i_mutex); + host_error = security_inode_setsecctx(dentry, label->data, label->len); + mutex_unlock(&dentry->d_inode->i_mutex); + return nfserrno(host_error); +} +#else +__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct xdr_netobj *label) +{ + return nfserr_notsupp; +} +#endif + #endif /* defined(CONFIG_NFSD_V4) */ #ifdef CONFIG_NFSD_V3 diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 8d2b40d..a4be2e3 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -55,6 +55,8 @@ int nfsd_mountpoint(struct dentry *, struct svc_export *); __be32 nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *, struct nfs4_acl *); int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **); +__be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, + struct xdr_netobj *); #endif /* CONFIG_NFSD_V4 */ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 3b271d2..b3ed644 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -40,6 +40,7 @@ #include "state.h" #include "nfsd.h" +#define NFSD4_MAX_SEC_LABEL_LEN 2048 #define NFSD4_MAX_TAGLEN 128 #define XDR_LEN(n) (((n) + 3) & ~3) @@ -118,6 +119,7 @@ struct nfsd4_create { struct iattr cr_iattr; /* request */ struct nfsd4_change_info cr_cinfo; /* response */ struct nfs4_acl *cr_acl; + struct xdr_netobj cr_label; }; #define cr_linklen u.link.namelen #define cr_linkname u.link.name @@ -246,6 +248,7 @@ struct nfsd4_open { struct nfs4_file *op_file; /* used during processing */ struct nfs4_ol_stateid *op_stp; /* used during processing */ struct nfs4_acl *op_acl; + struct xdr_netobj op_label; }; #define op_iattr iattr @@ -330,6 +333,7 @@ struct nfsd4_setattr { u32 sa_bmval[3]; /* request */ struct iattr sa_iattr; /* request */ struct nfs4_acl *sa_acl; + struct xdr_netobj sa_label; }; struct nfsd4_setclientid { -- cgit v0.10.2 From 2fccbd9cc0fdca649b01f1e2d96e5ef85256341a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 24 Sep 2012 15:53:29 -0400 Subject: sunrpc: server back channel needs no rpcbind method XPRT_BOUND is set on server backchannel xprts by xs_setup_bc_tcp() (using xprt_set_bound()), and is never cleared, so ->rpcbind() will never need to be called. Reported-by: "Myklebust, Trond" Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ffd5034..5d6b0da 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2534,7 +2534,6 @@ static struct rpc_xprt_ops bc_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xprt_release_xprt, .alloc_slot = xprt_alloc_slot, - .rpcbind = xs_local_rpcbind, .buf_alloc = bc_malloc, .buf_free = bc_free, .send_request = bc_send_request, -- cgit v0.10.2 From ba4e55bb67894136489b27372166416cd70b0756 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 15 May 2013 10:27:52 -0400 Subject: nfsd4: fix compile in !CONFIG_NFSD_V4_SECURITY_LABEL case Reported-by: kbuild test robot Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index dfca512..170ea7e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2012,7 +2012,7 @@ nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be } #else static inline __be32 -nfsd4_encode_security_label(struct svc_rqst *rqstp, struct dentry *dentry, __be32 **pp, int *buflen) +nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen) { return 0; } #endif @@ -2501,8 +2501,10 @@ out_acl: status = nfs_ok; out: +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL if (context) security_release_secctx(context, contextlen); +#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ kfree(acl); if (fhp == &tempfh) fh_put(&tempfh); -- cgit v0.10.2 From b6040f9706c4c81cc50b50855ed70840f022bebb Mon Sep 17 00:00:00 2001 From: chaoting fan Date: Thu, 28 Mar 2013 22:19:45 +0800 Subject: sunrpc: the cache_detail in cache_is_valid is unused any more The cache_detail(*detail) in function cache_is_valid is not used any more. Signed-off-by: fanchaoting Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 80fe5c8..3b3f14f 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -201,7 +201,7 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h) return sunrpc_cache_pipe_upcall(cd, h); } -static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h) +static inline int cache_is_valid(struct cache_head *h) { if (!test_bit(CACHE_VALID, &h->flags)) return -EAGAIN; @@ -227,7 +227,7 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h int rv; write_lock(&detail->hash_lock); - rv = cache_is_valid(detail, h); + rv = cache_is_valid(h); if (rv != -EAGAIN) { write_unlock(&detail->hash_lock); return rv; @@ -260,7 +260,7 @@ int cache_check(struct cache_detail *detail, long refresh_age, age; /* First decide return status as best we can */ - rv = cache_is_valid(detail, h); + rv = cache_is_valid(h); /* now see if we want to start an upcall */ refresh_age = (h->expiry_time - h->last_refresh); @@ -293,7 +293,7 @@ int cache_check(struct cache_detail *detail, * Request was not deferred; handle it as best * we can ourselves: */ - rv = cache_is_valid(detail, h); + rv = cache_is_valid(h); if (rv == -EAGAIN) rv = -ETIMEDOUT; } -- cgit v0.10.2 From 1a9357f443d64aa41e9b0dc414953663a6fcca19 Mon Sep 17 00:00:00 2001 From: Jim Rees Date: Fri, 17 May 2013 17:33:00 -0400 Subject: nfsd: avoid undefined signed overflow In C, signed integer overflow results in undefined behavior, but unsigned overflow wraps around. So do the subtraction first, then cast to signed. Reported-by: Joakim Tjernlund Signed-off-by: Jim Rees Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 91ead0e..72f0c4e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3427,7 +3427,7 @@ grace_disallows_io(struct net *net, struct inode *inode) /* Returns true iff a is later than b: */ static bool stateid_generation_after(stateid_t *a, stateid_t *b) { - return (s32)a->si_generation - (s32)b->si_generation > 0; + return (s32)(a->si_generation - b->si_generation) > 0; } static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) -- cgit v0.10.2 From 442340639194762df7e61e8aabae44a18896eca1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 14 May 2013 16:53:40 -0400 Subject: svcrpc: introduce init_svc_cred Common helper to zero out fields of the svc_cred. Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index ff374ab..95c9566 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -25,6 +25,13 @@ struct svc_cred { char *cr_principal; /* for gss */ }; +static inline void init_svc_cred(struct svc_cred *cred) +{ + cred->cr_group_info = NULL; + cred->cr_principal = NULL; + cred->cr_gss_mech = NULL; +} + static inline void free_svc_cred(struct svc_cred *cred) { if (cred->cr_group_info) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 29b4ba9..8d7860e 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -377,8 +377,7 @@ rsc_init(struct cache_head *cnew, struct cache_head *ctmp) new->handle.data = tmp->handle.data; tmp->handle.data = NULL; new->mechctx = NULL; - new->cred.cr_group_info = NULL; - new->cred.cr_principal = NULL; + init_svc_cred(&new->cred); } static void @@ -392,9 +391,8 @@ update_rsc(struct cache_head *cnew, struct cache_head *ctmp) memset(&new->seqdata, 0, sizeof(new->seqdata)); spin_lock_init(&new->seqdata.sd_lock); new->cred = tmp->cred; - tmp->cred.cr_group_info = NULL; new->cred.cr_principal = tmp->cred.cr_principal; - tmp->cred.cr_principal = NULL; + init_svc_cred(&tmp->cred); } static struct cache_head * -- cgit v0.10.2 From 0dc1531aca7fd1440918bd55844a054e9c29acad Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 14 May 2013 16:07:13 -0400 Subject: svcrpc: store gss mech in svc_cred Store a pointer to the gss mechanism used in the rq_cred and cl_cred. This will make it easier to enforce SP4_MACH_CRED, which needs to compare the mechanism used on the exchange_id with that used on protected operations. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 72f0c4e..109b4340 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1188,6 +1188,9 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source) target->cr_gid = source->cr_gid; target->cr_group_info = source->cr_group_info; get_group_info(target->cr_group_info); + target->cr_gss_mech = source->cr_gss_mech; + if (source->cr_gss_mech) + gss_mech_get(source->cr_gss_mech); return 0; } diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 161463e..1f911cc 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -151,6 +151,8 @@ struct gss_api_mech *gss_mech_get_by_pseudoflavor(u32); /* Fill in an array with a list of supported pseudoflavors */ int gss_mech_list_pseudoflavors(rpc_authflavor_t *, int); +struct gss_api_mech * gss_mech_get(struct gss_api_mech *); + /* For every successful gss_mech_get or gss_mech_get_by_* call there must be a * corresponding call to gss_mech_put. */ void gss_mech_put(struct gss_api_mech *); diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 95c9566..8d71d65 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -23,6 +24,7 @@ struct svc_cred { struct group_info *cr_group_info; u32 cr_flavor; /* pseudoflavor */ char *cr_principal; /* for gss */ + struct gss_api_mech *cr_gss_mech; }; static inline void init_svc_cred(struct svc_cred *cred) @@ -37,6 +39,8 @@ static inline void free_svc_cred(struct svc_cred *cred) if (cred->cr_group_info) put_group_info(cred->cr_group_info); kfree(cred->cr_principal); + gss_mech_put(cred->cr_gss_mech); + init_svc_cred(cred); } struct svc_rqst; /* forward decl */ diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index defa9d3..27ce262 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -139,11 +139,12 @@ void gss_mech_unregister(struct gss_api_mech *gm) } EXPORT_SYMBOL_GPL(gss_mech_unregister); -static struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm) +struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm) { __module_get(gm->gm_owner); return gm; } +EXPORT_SYMBOL(gss_mech_get); static struct gss_api_mech * _gss_mech_get_by_name(const char *name) @@ -360,6 +361,7 @@ gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor) } return 0; } +EXPORT_SYMBOL(gss_pseudoflavor_to_service); char * gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service) @@ -379,6 +381,7 @@ gss_mech_put(struct gss_api_mech * gm) if (gm) module_put(gm->gm_owner); } +EXPORT_SYMBOL(gss_mech_put); /* The mech could probably be determined from the token instead, but it's just * as easy for now to pass it in. */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 8d7860e..0265bb3 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -391,7 +391,6 @@ update_rsc(struct cache_head *cnew, struct cache_head *ctmp) memset(&new->seqdata, 0, sizeof(new->seqdata)); spin_lock_init(&new->seqdata.sd_lock); new->cred = tmp->cred; - new->cred.cr_principal = tmp->cred.cr_principal; init_svc_cred(&tmp->cred); } @@ -485,7 +484,7 @@ static int rsc_parse(struct cache_detail *cd, len = qword_get(&mesg, buf, mlen); if (len < 0) goto out; - gm = gss_mech_get_by_name(buf); + gm = rsci.cred.cr_gss_mech = gss_mech_get_by_name(buf); status = -EOPNOTSUPP; if (!gm) goto out; @@ -515,7 +514,6 @@ static int rsc_parse(struct cache_detail *cd, rscp = rsc_update(cd, &rsci, rscp); status = 0; out: - gss_mech_put(gm); rsc_free(&rsci); if (rscp) cache_put(&rscp->h, cd); -- cgit v0.10.2 From 57266a6e916e2522ea61758a3ee5576b60156791 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 13 Apr 2013 14:27:29 -0400 Subject: nfsd4: implement minimal SP4_MACH_CRED Do a minimal SP4_MACH_CRED implementation suggested by Trond, ignoring the client-provided spo_must_* arrays and just enforcing credential checks for the minimum required operations. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 109b4340..2383d24 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1265,6 +1265,31 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2) return 0 == strcmp(cr1->cr_principal, cr2->cr_principal); } +static bool svc_rqst_integrity_protected(struct svc_rqst *rqstp) +{ + struct svc_cred *cr = &rqstp->rq_cred; + u32 service; + + service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor); + return service == RPC_GSS_SVC_INTEGRITY || + service == RPC_GSS_SVC_PRIVACY; +} + +static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp) +{ + struct svc_cred *cr = &rqstp->rq_cred; + + if (!cl->cl_mach_cred) + return true; + if (cl->cl_cred.cr_gss_mech != cr->cr_gss_mech) + return false; + if (!svc_rqst_integrity_protected(rqstp)) + return false; + if (!cr->cr_principal) + return false; + return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal); +} + static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) { static u32 current_clientid = 1; @@ -1642,16 +1667,16 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, if (exid->flags & ~EXCHGID4_FLAG_MASK_A) return nfserr_inval; - /* Currently only support SP4_NONE */ switch (exid->spa_how) { + case SP4_MACH_CRED: + if (!svc_rqst_integrity_protected(rqstp)) + return nfserr_inval; case SP4_NONE: break; default: /* checked by xdr code */ WARN_ON_ONCE(1); case SP4_SSV: return nfserr_encr_alg_unsupp; - case SP4_MACH_CRED: - return nfserr_serverfault; /* no excuse :-/ */ } /* Cases below refer to rfc 5661 section 18.35.4: */ @@ -1666,6 +1691,10 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, status = nfserr_inval; goto out; } + if (!mach_creds_match(conf, rqstp)) { + status = nfserr_wrong_cred; + goto out; + } if (!creds_match) { /* case 9 */ status = nfserr_perm; goto out; @@ -1713,6 +1742,7 @@ out_new: goto out; } new->cl_minorversion = cstate->minorversion; + new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED); gen_clid(new, nn); add_to_unconfirmed(new); @@ -1877,6 +1907,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, WARN_ON_ONCE(conf && unconf); if (conf) { + status = nfserr_wrong_cred; + if (!mach_creds_match(conf, rqstp)) + goto out_free_conn; cs_slot = &conf->cl_cs_slot; status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status == nfserr_replay_cache) { @@ -1893,6 +1926,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, status = nfserr_clid_inuse; goto out_free_conn; } + status = nfserr_wrong_cred; + if (!mach_creds_match(unconf, rqstp)) + goto out_free_conn; cs_slot = &unconf->cl_cs_slot; status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status) { @@ -1989,6 +2025,9 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, status = nfserr_badsession; if (!session) goto out; + status = nfserr_wrong_cred; + if (!mach_creds_match(session->se_client, rqstp)) + goto out; status = nfsd4_map_bcts_dir(&bcts->dir); if (status) goto out; @@ -2031,6 +2070,9 @@ nfsd4_destroy_session(struct svc_rqst *r, status = nfserr_badsession; if (!ses) goto out_client_lock; + status = nfserr_wrong_cred; + if (!mach_creds_match(ses->se_client, r)) + goto out_client_lock; status = mark_session_dead_locked(ses); if (status) goto out_client_lock; @@ -2061,26 +2103,31 @@ static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_s return NULL; } -static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) +static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; struct nfsd4_conn *c; + __be32 status = nfs_ok; int ret; spin_lock(&clp->cl_lock); c = __nfsd4_find_conn(new->cn_xprt, ses); - if (c) { - spin_unlock(&clp->cl_lock); - free_conn(new); - return; - } + if (c) + goto out_free; + status = nfserr_conn_not_bound_to_session; + if (clp->cl_mach_cred) + goto out_free; __nfsd4_hash_conn(new, ses); spin_unlock(&clp->cl_lock); ret = nfsd4_register_conn(new); if (ret) /* oops; xprt is already down: */ nfsd4_conn_lost(&new->cn_xpt_user); - return; + return nfs_ok; +out_free: + spin_unlock(&clp->cl_lock); + free_conn(new); + return status; } static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session) @@ -2172,8 +2219,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (status) goto out_put_session; - nfsd4_sequence_check_conn(conn, session); + status = nfsd4_sequence_check_conn(conn, session); conn = NULL; + if (status) + goto out_put_session; /* Success! bump slot seqid */ slot->sl_seqid = seq->seqid; @@ -2235,7 +2284,10 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta status = nfserr_stale_clientid; goto out; } - + if (!mach_creds_match(clp, rqstp)) { + status = nfserr_wrong_cred; + goto out; + } expire_client(clp); out: nfs4_unlock_state(); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 170ea7e..3126210 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3321,6 +3321,14 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w return nfserr; } +static const u32 nfs4_minimal_spo_must_enforce[2] = { + [1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) | + 1 << (OP_EXCHANGE_ID - 32) | + 1 << (OP_CREATE_SESSION - 32) | + 1 << (OP_DESTROY_SESSION - 32) | + 1 << (OP_DESTROY_CLIENTID - 32) +}; + static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_exchange_id *exid) @@ -3359,6 +3367,20 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, /* state_protect4_r. Currently only support SP4_NONE */ BUG_ON(exid->spa_how != SP4_NONE); WRITE32(exid->spa_how); + switch (exid->spa_how) { + case SP4_NONE: + break; + case SP4_MACH_CRED: + /* spo_must_enforce bitmap: */ + WRITE32(2); + WRITE32(nfs4_minimal_spo_must_enforce[0]); + WRITE32(nfs4_minimal_spo_must_enforce[1]); + /* empty spo_must_allow bitmap: */ + WRITE32(0); + break; + default: + WARN_ON_ONCE(1); + } /* The server_owner struct */ WRITE64(minor_id); /* Minor id */ diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 274e2a1..424d8f5 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -246,6 +246,7 @@ struct nfs4_client { nfs4_verifier cl_verifier; /* generated by client */ time_t cl_time; /* time of last lease renewal */ struct sockaddr_storage cl_addr; /* client ipaddress */ + bool cl_mach_cred; /* SP4_MACH_CRED in force */ struct svc_cred cl_cred; /* setclientid principal */ clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ -- cgit v0.10.2 From b78724b70599f66a91c6d6c897a81f4f87f549f4 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 15 May 2013 17:34:39 -0400 Subject: nfsd4: fail attempts to request gss on the backchannel We don't support gss on the backchannel. We should state that fact up front rather than just letting things continue and later making the client try to figure out why the backchannel isn't working. Trond suggested instead returning NFS4ERR_NOENT. I think it would be tricky for the client to distinguish between the case "I don't support gss on the backchannel" and "I can't find that in my cache, please create another context and try that instead", and I'd prefer something that currently doesn't have any other meaning for this operation, hence the (somewhat arbitrary) NFS4ERR_ENCR_ALG_UNSUPP. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2383d24..c4f6339 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1872,6 +1872,24 @@ static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca) return nfs_ok; } +static __be32 nfsd4_check_cb_sec(struct nfsd4_cb_sec *cbs) +{ + switch (cbs->flavor) { + case RPC_AUTH_NULL: + case RPC_AUTH_UNIX: + return nfs_ok; + default: + /* + * GSS case: the spec doesn't allow us to return this + * error. But it also doesn't allow us not to support + * GSS. + * I'd rather this fail hard than return some error the + * client might think it can already handle: + */ + return nfserr_encr_alg_unsupp; + } +} + __be32 nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -1887,6 +1905,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) return nfserr_inval; + status = nfsd4_check_cb_sec(&cr_ses->cb_sec); + if (status) + return status; status = check_forechannel_attrs(&cr_ses->fore_channel, nn); if (status) return status; @@ -1996,7 +2017,11 @@ __be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state { struct nfsd4_session *session = cstate->session; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + __be32 status; + status = nfsd4_check_cb_sec(&bc->bc_cb_sec); + if (status) + return status; spin_lock(&nn->client_lock); session->se_cb_prog = bc->bc_cb_program; session->se_cb_sec = bc->bc_cb_sec; -- cgit v0.10.2 From 57569a707082a337e4a61a657521d79cac3528bf Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 17 May 2013 16:25:32 -0400 Subject: nfsd4: allow client to send no cb_sec flavors In testing I notice that some of the pynfs tests forget to send any cb_sec flavors, and that we haven't necessarily errored out in that case before. I'll fix pynfs, but am also inclined to default to trying AUTH_NONE in that case in case this is something clients actually do. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3126210..171fe5e4 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -460,7 +460,11 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_ /* callback_sec_params4 */ READ_BUF(4); READ32(nr_secflavs); - cbs->flavor = (u32)(-1); + if (nr_secflavs) + cbs->flavor = (u32)(-1); + else + /* Is this legal? Be generous, take it to mean AUTH_NONE: */ + cbs->flavor = 0; for (i = 0; i < nr_secflavs; ++i) { READ_BUF(4); READ32(dummy); -- cgit v0.10.2 From 9a0590aec3ca5e86a64b87e004244abc4dd1faf9 Mon Sep 17 00:00:00 2001 From: Steve Dickson Date: Wed, 15 May 2013 14:51:49 -0400 Subject: NFSD: Don't give out read delegations on creates When an exclusive create is done with the mode bits set (aka open(testfile, O_CREAT | O_EXCL, 0777)) this causes a OPEN op followed by a SETATTR op. When a read delegation is given in the OPEN, it causes the SETATTR to delay with EAGAIN until the delegation is recalled. This patch caused exclusive creates to give out a write delegation (which turn into no delegation) which allows the SETATTR seamlessly succeed. Signed-off-by: Steve Dickson [bfields: do this for any CREATE, not just exclusive; comment] Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c4f6339..44dcea9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3113,8 +3113,17 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, goto out; if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) goto out; + /* + * Also, if the file was opened for write or + * create, there's a good chance the client's + * about to write to it, resulting in an + * immediate recall (since we don't support + * write delegations): + */ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) flag = NFS4_OPEN_DELEGATE_WRITE; + else if (open->op_create == NFS4_OPEN_CREATE) + flag = NFS4_OPEN_DELEGATE_WRITE; else flag = NFS4_OPEN_DELEGATE_READ; break; -- cgit v0.10.2 From 99c415156c49571d0f045a8cb56e0bc24225b9d9 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 21 May 2013 16:21:25 -0400 Subject: nfsd4: clean up nfs4_open_delegation The nfs4_open_delegation logic is unecessarily baroque. Also stop pretending we support write delegations in several places. Some day we will support write delegations, but when that happens adding back in these flag parameters will be the easy part. For now they're just confusing. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 44dcea9..fcc0610 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -364,19 +364,12 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) } static struct nfs4_delegation * -alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh, u32 type) +alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) { struct nfs4_delegation *dp; struct nfs4_file *fp = stp->st_file; dprintk("NFSD alloc_init_deleg\n"); - /* - * Major work on the lease subsystem (for example, to support - * calbacks on stat) will be required before we can support - * write delegations properly. - */ - if (type != NFS4_OPEN_DELEGATE_READ) - return NULL; if (fp->fi_had_conflict) return NULL; if (num_delegations > max_delegations) @@ -397,7 +390,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv INIT_LIST_HEAD(&dp->dl_recall_lru); get_nfs4_file(fp); dp->dl_file = fp; - dp->dl_type = type; + dp->dl_type = NFS4_OPEN_DELEGATE_READ; fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); dp->dl_time = 0; atomic_set(&dp->dl_count, 1); @@ -3020,13 +3013,13 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f return fl; } -static int nfs4_setlease(struct nfs4_delegation *dp, int flag) +static int nfs4_setlease(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_file; struct file_lock *fl; int status; - fl = nfs4_alloc_init_lease(dp, flag); + fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ); if (!fl) return -ENOMEM; fl->fl_file = find_readable_file(fp); @@ -3044,12 +3037,12 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag) return 0; } -static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag) +static int nfs4_set_delegation(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_file; if (!fp->fi_lease) - return nfs4_setlease(dp, flag); + return nfs4_setlease(dp); spin_lock(&recall_lock); if (fp->fi_had_conflict) { spin_unlock(&recall_lock); @@ -3085,6 +3078,9 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) /* * Attempt to hand out a delegation. + * + * Note we don't support write delegations, and won't until the vfs has + * proper support for them. */ static void nfs4_open_delegation(struct net *net, struct svc_fh *fh, @@ -3093,26 +3089,26 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, struct nfs4_delegation *dp; struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); int cb_up; - int status = 0, flag = 0; + int status = 0; cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); - flag = NFS4_OPEN_DELEGATE_NONE; open->op_recall = 0; switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_PREVIOUS: if (!cb_up) open->op_recall = 1; - flag = open->op_delegate_type; - if (flag == NFS4_OPEN_DELEGATE_NONE) - goto out; + if (open->op_delegate_type != NFS4_OPEN_DELEGATE_READ) + goto out_no_deleg; break; case NFS4_OPEN_CLAIM_NULL: - /* Let's not give out any delegations till everyone's - * had the chance to reclaim theirs.... */ + /* + * Let's not give out any delegations till everyone's + * had the chance to reclaim theirs.... + */ if (locks_in_grace(net)) - goto out; + goto out_no_deleg; if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) - goto out; + goto out_no_deleg; /* * Also, if the file was opened for write or * create, there's a good chance the client's @@ -3121,20 +3117,17 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, * write delegations): */ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) - flag = NFS4_OPEN_DELEGATE_WRITE; - else if (open->op_create == NFS4_OPEN_CREATE) - flag = NFS4_OPEN_DELEGATE_WRITE; - else - flag = NFS4_OPEN_DELEGATE_READ; + goto out_no_deleg; + if (open->op_create == NFS4_OPEN_CREATE) + goto out_no_deleg; break; default: - goto out; + goto out_no_deleg; } - - dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh, flag); + dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh); if (dp == NULL) goto out_no_deleg; - status = nfs4_set_delegation(dp, flag); + status = nfs4_set_delegation(dp); if (status) goto out_free; @@ -3142,24 +3135,21 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", STATEID_VAL(&dp->dl_stid.sc_stateid)); -out: - open->op_delegate_type = flag; - if (flag == NFS4_OPEN_DELEGATE_NONE) { - if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && - open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) - dprintk("NFSD: WARNING: refusing delegation reclaim\n"); - - /* 4.1 client asking for a delegation? */ - if (open->op_deleg_want) - nfsd4_open_deleg_none_ext(open, status); - } + open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; return; out_free: unhash_stid(&dp->dl_stid); nfs4_put_delegation(dp); out_no_deleg: - flag = NFS4_OPEN_DELEGATE_NONE; - goto out; + open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; + if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && + open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) + dprintk("NFSD: WARNING: refusing delegation reclaim\n"); + + /* 4.1 client asking for a delegation? */ + if (open->op_deleg_want) + nfsd4_open_deleg_none_ext(open, status); + return; } static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, -- cgit v0.10.2 From 247500820ebd02ad87525db5d9b199e5b66f6636 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 21 Jun 2013 11:48:11 -0400 Subject: nfsd4: fix decoding of compounds across page boundaries A freebsd NFSv4.0 client was getting rare IO errors expanding a tarball. A network trace showed the server returning BAD_XDR on the final getattr of a getattr+write+getattr compound. The final getattr started on a page boundary. I believe the Linux client ignores errors on the post-write getattr, and that that's why we haven't seen this before. Cc: stable@vger.kernel.org Reported-by: Rick Macklem Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 171fe5e4..e34f5eb 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -167,8 +167,8 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) */ memcpy(p, argp->p, avail); /* step to next page */ - argp->p = page_address(argp->pagelist[0]); argp->pagelist++; + argp->p = page_address(argp->pagelist[0]); if (argp->pagelen < PAGE_SIZE) { argp->end = argp->p + (argp->pagelen>>2); argp->pagelen = 0; -- cgit v0.10.2 From 590b743143eae8db40abdfd1ab20bc51ee0ee5db Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 21 Jun 2013 17:06:29 -0400 Subject: nfsd4: minor read_buf cleanup The code to step to the next page seems reasonably self-contained. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e34f5eb..c102d25 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -139,6 +139,19 @@ xdr_error: \ } \ } while (0) +static void next_decode_page(struct nfsd4_compoundargs *argp) +{ + argp->pagelist++; + argp->p = page_address(argp->pagelist[0]); + if (argp->pagelen < PAGE_SIZE) { + argp->end = argp->p + (argp->pagelen>>2); + argp->pagelen = 0; + } else { + argp->end = argp->p + (PAGE_SIZE>>2); + argp->pagelen -= PAGE_SIZE; + } +} + static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) { /* We want more bytes than seem to be available. @@ -166,16 +179,7 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) * guarantee p points to at least nbytes bytes. */ memcpy(p, argp->p, avail); - /* step to next page */ - argp->pagelist++; - argp->p = page_address(argp->pagelist[0]); - if (argp->pagelen < PAGE_SIZE) { - argp->end = argp->p + (argp->pagelen>>2); - argp->pagelen = 0; - } else { - argp->end = argp->p + (PAGE_SIZE>>2); - argp->pagelen -= PAGE_SIZE; - } + next_decode_page(argp); memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); argp->p += XDR_QUADLEN(nbytes - avail); return p; -- cgit v0.10.2 From cf3aa02cb4a0c5af5557dd47f15a08a7df33182a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 Jun 2013 11:09:06 -0400 Subject: svcrpc: fix handling of too-short rpc's If we detect that an rpc is too short, we abort and close the connection. Except, there's a bug here: we're leaving sk_datalen nonzero without leaving any pages in the sk_pages array. The most likely result of the inconsistency is a subsequent crash in svc_tcp_clear_pages. Also demote the BUG_ON in svc_tcp_clear_pages to a WARN. Cc: stable@kernel.org Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 0f679df..df74919 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -917,7 +917,10 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk) len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { - BUG_ON(svsk->sk_pages[i] == NULL); + if (svsk->sk_pages[i] == NULL) { + WARN_ON_ONCE(1); + continue; + } put_page(svsk->sk_pages[i]); svsk->sk_pages[i] = NULL; } @@ -1092,8 +1095,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto err_noclose; } - if (svc_sock_reclen(svsk) < 8) + if (svc_sock_reclen(svsk) < 8) { + svsk->sk_datalen = 0; goto err_delete; /* client is nuts. */ + } rqstp->rq_arg.len = svsk->sk_datalen; rqstp->rq_arg.page_base = 0; -- cgit v0.10.2 From 1f691b07c5dc51b2055834f58c0f351defd97f27 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 Jun 2013 10:55:40 -0400 Subject: svcrpc: don't error out on small tcp fragment Though clients we care about mostly don't do this, it is possible for rpc requests to be sent in multiple fragments. Here we have a sanity check to ensure that the final received rpc isn't too small--except that the number we're actually checking is the length of just the final fragment, not of the whole rpc. So a perfectly legal rpc that's unluckily fragmented could cause the server to close the connection here. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index df74919..305374d 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1095,7 +1095,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto err_noclose; } - if (svc_sock_reclen(svsk) < 8) { + if (svsk->sk_datalen < 8) { svsk->sk_datalen = 0; goto err_delete; /* client is nuts. */ } -- cgit v0.10.2 From 89f6c3362cb5a6bce96dbe6aa15b4749c2262b21 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 19 Jun 2013 15:47:37 -0400 Subject: nfsd4: delegation-based open reclaims should bypass permissions We saw a v4.0 client's create fail as follows: - open create succeeds and gets a read delegation - client attempts to set mode on new file, gets DELAY while server recalls delegation. - client attempts a CLAIM_DELEGATE_CUR open using the delegation, gets error because of new file mode. This probably can't happen on a recent kernel since we're no longer giving out delegations on create opens. Nevertheless, it's a bug--reclaim opens should bypass permission checks. Reported-by: Steve Dickson Reported-by: Trond Myklebust Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 1a1ff24..a7cee86 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -296,7 +296,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru nfsd4_set_open_owner_reply_cache(cstate, open, resfh); accmode = NFSD_MAY_NOP; - if (open->op_created) + if (open->op_created || + open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR) accmode |= NFSD_MAY_OWNER_OVERRIDE; status = do_open_permission(rqstp, resfh, open, accmode); set_change_info(&open->op_cinfo, current_fh); -- cgit v0.10.2 From 0a262ffb75275e48caa17e80b96504354e94f6c2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 17 Jun 2013 17:29:40 -0400 Subject: nfsd4: do not throw away 4.1 lock state on last unlock This reverts commit eb2099f31b0f090684a64ef8df44a30ff7c45fc2 "nfsd4: release lockowners on last unlock in 4.1 case". Trond identified language in rfc 5661 section 8.2.4 which forbids this behavior: Stateids associated with byte-range locks are an exception. They remain valid even if a LOCKU frees all remaining locks, so long as the open file with which they are associated remains open, unless the client frees the stateids via the FREE_STATEID operation. And bakeathon 2013 testing found a 4.1 freebsd client was getting an incorrect BAD_STATEID return from a FREE_STATEID in the above situation and then failing. The spec language honestly was probably a mistake but at this point with implementations already following it we're probably stuck with that. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fcc0610..a7d8a11 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4514,7 +4514,6 @@ __be32 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_locku *locku) { - struct nfs4_lockowner *lo; struct nfs4_ol_stateid *stp; struct file *filp = NULL; struct file_lock *file_lock = NULL; @@ -4547,10 +4546,9 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_jukebox; goto out; } - lo = lockowner(stp->st_stateowner); locks_init_lock(file_lock); file_lock->fl_type = F_UNLCK; - file_lock->fl_owner = (fl_owner_t)lo; + file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); file_lock->fl_pid = current->tgid; file_lock->fl_file = filp; file_lock->fl_flags = FL_POSIX; @@ -4569,11 +4567,6 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, update_stateid(&stp->st_stid.sc_stateid); memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); - if (nfsd4_has_session(cstate) && !check_for_locks(stp->st_file, lo)) { - WARN_ON_ONCE(cstate->replay_owner); - release_lockowner(lo); - } - out: nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) -- cgit v0.10.2 From d08d32e6e5c0fee127d3b20d70f5b59d8af0a261 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 21 Jun 2013 11:05:32 -0400 Subject: nfsd4: return delegation immediately if lease fails This case shouldn't happen--the administrator shouldn't really allow other applications access to the export until clients have had the chance to reclaim their state--but if it does then we should set the "return this lease immediately" bit on the reply. That still leaves some small races, but it's the best the protocol allows us to do in the case a lease is ripped out from under us.... Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a7d8a11..d44a4bf 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3143,8 +3143,10 @@ out_free: out_no_deleg: open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && - open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) + open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) { dprintk("NFSD: WARNING: refusing delegation reclaim\n"); + open->op_recall = 1; + } /* 4.1 client asking for a delegation? */ if (open->op_deleg_want) -- cgit v0.10.2 From f9e1aedc6c79f18bb56caa3735b94217c4ec1e0c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Jun 2013 12:53:42 +1000 Subject: sunrpc/cache: remove races with queuing an upcall. We currently queue an upcall after setting CACHE_PENDING, and dequeue after clearing CACHE_PENDING. So a request should only be present when CACHE_PENDING is set. However we don't combine the test and the enqueue/dequeue in a protected region, so it is possible (if unlikely) for a race to result in a request being queued without CACHE_PENDING set, or a request to be absent despite CACHE_PENDING. So: include a test for CACHE_PENDING inside the regions of enqueue and dequeue where queue_lock is held, and abort the operation if the value is not as expected. Also remove the early 'return' from cache_dequeue() to ensure that it always removes all entries: As there is no locking between setting CACHE_PENDING and calling sunrpc_cache_pipe_upcall it is not inconceivable for some other thread to clear CACHE_PENDING and then someone else to set it and call sunrpc_cache_pipe_upcall, both before the original threads completed the call. With this, it perfectly safe and correct to: - call cache_dequeue() if and only if we have just cleared CACHE_PENDING - call sunrpc_cache_pipe_upcall() (via cache_make_upcall) if and only if we have just set CACHE_PENDING. Reported-by: Bodo Stroesser Signed-off-by: NeilBrown Signed-off-by: Bodo Stroesser Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 3b3f14f..8e964ae 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1036,23 +1036,32 @@ static int cache_release(struct inode *inode, struct file *filp, static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch) { - struct cache_queue *cq; + struct cache_queue *cq, *tmp; + struct cache_request *cr; + struct list_head dequeued; + + INIT_LIST_HEAD(&dequeued); spin_lock(&queue_lock); - list_for_each_entry(cq, &detail->queue, list) + list_for_each_entry_safe(cq, tmp, &detail->queue, list) if (!cq->reader) { - struct cache_request *cr = container_of(cq, struct cache_request, q); + cr = container_of(cq, struct cache_request, q); if (cr->item != ch) continue; + if (test_bit(CACHE_PENDING, &ch->flags)) + /* Lost a race and it is pending again */ + break; if (cr->readers != 0) continue; - list_del(&cr->q.list); - spin_unlock(&queue_lock); - cache_put(cr->item, detail); - kfree(cr->buf); - kfree(cr); - return; + list_move(&cr->q.list, &dequeued); } spin_unlock(&queue_lock); + while (!list_empty(&dequeued)) { + cr = list_entry(dequeued.next, struct cache_request, q.list); + list_del(&cr->q.list); + cache_put(cr->item, detail); + kfree(cr->buf); + kfree(cr); + } } /* @@ -1166,6 +1175,7 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) char *buf; struct cache_request *crq; + int ret = 0; if (!detail->cache_request) return -EINVAL; @@ -1191,10 +1201,18 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) crq->len = 0; crq->readers = 0; spin_lock(&queue_lock); - list_add_tail(&crq->q.list, &detail->queue); + if (test_bit(CACHE_PENDING, &h->flags)) + list_add_tail(&crq->q.list, &detail->queue); + else + /* Lost a race, no longer PENDING, so don't enqueue */ + ret = -EAGAIN; spin_unlock(&queue_lock); wake_up(&queue_wait); - return 0; + if (ret == -EAGAIN) { + kfree(buf); + kfree(crq); + } + return ret; } EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall); -- cgit v0.10.2 From 2a1c7f53fd31e46f51780b61eb99fffef4c3c5a6 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Jun 2013 12:53:42 +1000 Subject: sunrpc/cache: use cache_fresh_unlocked consistently and correctly. cache_fresh_unlocked() is called when a cache entry has been updated and ensures that if there were any pending upcalls, they are cleared. So every time we update a cache entry, we should call this, and this should be the only way that we try to clear pending calls (that sort of uniformity makes code sooo much easier to read). try_to_negate_entry() will (possibly) mark an entry as negative. If it doesn't, it is because the entry already is VALID. So the entry will be valid on exit, so it is appropriate to call cache_fresh_unlocked(). So tidy up try_to_negate_entry() to do that, and remove partial open-coded cache_fresh_unlocked() from the one call-site of try_to_negate_entry(). In the other branch of the 'switch(cache_make_upcall())', we again have a partial open-coded version of cache_fresh_unlocked(). Replace that with a real call. And again in cache_clean(), use a real call to cache_fresh_unlocked(). These call sites might previously have called cache_revisit_request() if CACHE_PENDING wasn't set. This is never necessary because cache_revisit_request() can only do anything if the item is in the cache_defer_hash, However any time that an item is added to the cache_defer_hash (setup_deferral), the code immediately tests CACHE_PENDING, and removes the entry again if it is clear. So all other places we only need to 'cache_revisit_request' if we've just cleared CACHE_PENDING. Reported-by: Bodo Stroesser Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 8e964ae..29c4633 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -228,15 +228,14 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h write_lock(&detail->hash_lock); rv = cache_is_valid(h); - if (rv != -EAGAIN) { - write_unlock(&detail->hash_lock); - return rv; + if (rv == -EAGAIN) { + set_bit(CACHE_NEGATIVE, &h->flags); + cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); + rv = -ENOENT; } - set_bit(CACHE_NEGATIVE, &h->flags); - cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); write_unlock(&detail->hash_lock); cache_fresh_unlocked(h, detail); - return -ENOENT; + return rv; } /* @@ -275,13 +274,10 @@ int cache_check(struct cache_detail *detail, if (!test_and_set_bit(CACHE_PENDING, &h->flags)) { switch (cache_make_upcall(detail, h)) { case -EINVAL: - clear_bit(CACHE_PENDING, &h->flags); - cache_revisit_request(h); rv = try_to_negate_entry(detail, h); break; case -EAGAIN: - clear_bit(CACHE_PENDING, &h->flags); - cache_revisit_request(h); + cache_fresh_unlocked(h, detail); break; } } @@ -457,9 +453,7 @@ static int cache_clean(void) current_index ++; spin_unlock(&cache_list_lock); if (ch) { - if (test_and_clear_bit(CACHE_PENDING, &ch->flags)) - cache_dequeue(current_detail, ch); - cache_revisit_request(ch); + cache_fresh_unlocked(ch, d); cache_put(ch, d); } } else -- cgit v0.10.2 From 013920eb5db97e99a4c30c8400f1c616e2a8b0a2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Jun 2013 12:53:42 +1000 Subject: sunrpc/cache: ensure items removed from cache do not have pending upcalls. It is possible for a race to set CACHE_PENDING after cache_clean() has removed a cache entry from the cache. If CACHE_PENDING is still set when the entry is finally 'put', the cache_dequeue() will never happen and we can leak memory. So set a new flag 'CACHE_CLEANED' when we remove something from the cache, and don't queue any upcall if it is set. If CACHE_PENDING is set before CACHE_CLEANED, the call that cache_clean() makes to cache_fresh_unlocked() will free memory as needed. If CACHE_PENDING is set after CACHE_CLEANED, the test in sunrpc_cache_pipe_upcall will ensure that the memory is not allocated. Reported-by: Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 303399b..8419f7d 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -57,6 +57,7 @@ struct cache_head { #define CACHE_VALID 0 /* Entry contains valid data */ #define CACHE_NEGATIVE 1 /* Negative entry - there is no match for the key */ #define CACHE_PENDING 2 /* An upcall has been sent but no reply received yet*/ +#define CACHE_CLEANED 3 /* Entry has been cleaned from cache */ #define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */ diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 29c4633..b12144c 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -306,7 +306,7 @@ EXPORT_SYMBOL_GPL(cache_check); * a current pointer into that list and into the table * for that entry. * - * Each time clean_cache is called it finds the next non-empty entry + * Each time cache_clean is called it finds the next non-empty entry * in the current table and walks the list in that entry * looking for entries that can be removed. * @@ -453,6 +453,7 @@ static int cache_clean(void) current_index ++; spin_unlock(&cache_list_lock); if (ch) { + set_bit(CACHE_CLEANED, &ch->flags); cache_fresh_unlocked(ch, d); cache_put(ch, d); } @@ -1178,6 +1179,9 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) warn_no_listener(detail); return -EINVAL; } + if (test_bit(CACHE_CLEANED, &h->flags)) + /* Too late to make an upcall */ + return -EAGAIN; buf = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buf) -- cgit v0.10.2 From 7715cde86857d4bb40f43f1ee971cf906eaf1b9c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Jun 2013 12:53:42 +1000 Subject: net/sunrpc: xpt_auth_cache should be ignored when expired. commit d202cce8963d9268ff355a386e20243e8332b308 sunrpc: never return expired entries in sunrpc_cache_lookup moved the 'entry is expired' test from cache_check to sunrpc_cache_lookup, so that it happened early and some races could safely be ignored. However the ip_map (in svcauth_unix.c) has a separate single-item cache which allows quick lookup without locking. An entry in this case would not be subject to the expiry test and so could be used well after it has expired. This is not normally a big problem because the first time it is used after it is expired an up-call will be scheduled to refresh the entry (if it hasn't been scheduled already) and the old entry will then be invalidated. So on the second attempt to use it after it has expired, ip_map_cached_get will discard it. However that is subtle and not ideal, so replace the "!cache_valid" test with "cache_is_expired". In doing this we drop the test on the "CACHE_VALID" bit. This is unnecessary as the bit is never cleared, and an entry will only be cached if the bit is set. Reported-by: Bodo Stroesser Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 8419f7d..6ce690d 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -149,6 +149,24 @@ struct cache_deferred_req { int too_many); }; +/* + * timestamps kept in the cache are expressed in seconds + * since boot. This is the best for measuring differences in + * real time. + */ +static inline time_t seconds_since_boot(void) +{ + struct timespec boot; + getboottime(&boot); + return get_seconds() - boot.tv_sec; +} + +static inline time_t convert_to_wallclock(time_t sinceboot) +{ + struct timespec boot; + getboottime(&boot); + return boot.tv_sec + sinceboot; +} extern const struct file_operations cache_file_operations_pipefs; extern const struct file_operations content_file_operations_pipefs; @@ -182,15 +200,10 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd) kref_put(&h->ref, cd->cache_put); } -static inline int cache_valid(struct cache_head *h) +static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) { - /* If an item has been unhashed pending removal when - * the refcount drops to 0, the expiry_time will be - * set to 0. We don't want to consider such items - * valid in this context even though CACHE_VALID is - * set. - */ - return (h->expiry_time != 0 && test_bit(CACHE_VALID, &h->flags)); + return (h->expiry_time < seconds_since_boot()) || + (detail->flush_time > h->last_refresh); } extern int cache_check(struct cache_detail *detail, @@ -251,25 +264,6 @@ static inline int get_uint(char **bpp, unsigned int *anint) return 0; } -/* - * timestamps kept in the cache are expressed in seconds - * since boot. This is the best for measuring differences in - * real time. - */ -static inline time_t seconds_since_boot(void) -{ - struct timespec boot; - getboottime(&boot); - return get_seconds() - boot.tv_sec; -} - -static inline time_t convert_to_wallclock(time_t sinceboot) -{ - struct timespec boot; - getboottime(&boot); - return boot.tv_sec + sinceboot; -} - static inline time_t get_expiry(char **bpp) { int rv; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index b12144c..5478a01 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -50,12 +50,6 @@ static void cache_init(struct cache_head *h) h->last_refresh = now; } -static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) -{ - return (h->expiry_time < seconds_since_boot()) || - (detail->flush_time > h->last_refresh); -} - struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, struct cache_head *key, int hash) { diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 06bdf5a..a98853d 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -347,13 +347,13 @@ ip_map_cached_get(struct svc_xprt *xprt) spin_lock(&xprt->xpt_lock); ipm = xprt->xpt_auth_cache; if (ipm != NULL) { - if (!cache_valid(&ipm->h)) { + sn = net_generic(xprt->xpt_net, sunrpc_net_id); + if (cache_is_expired(sn->ip_map_cache, &ipm->h)) { /* * The entry has been invalidated since it was * remembered, e.g. by a second mount from the * same IP address. */ - sn = net_generic(xprt->xpt_net, sunrpc_net_id); xprt->xpt_auth_cache = NULL; spin_unlock(&xprt->xpt_lock); cache_put(&ipm->h, sn->ip_map_cache); -- cgit v0.10.2 From 0bebc633f1428163c9659fd16b34c745e60a0757 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Jun 2013 12:53:42 +1000 Subject: sunrpc: Don't schedule an upcall on a replaced cache entry. When a cache entry is replaced, the "expiry_time" get set to zero by a call to "cache_fresh_locked(..., 0)" at the end of "sunrpc_cache_update". This low expiry time makes cache_check() think that the 'refresh_age' is negative, so the 'age' is comparatively large and a refresh is triggered. However refreshing a replaced entry it pointless, it cannot achieve anything useful. So teach cache_check to ignore a low refresh_age when expiry_time is zero. Reported-by: Bodo Stroesser Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 5478a01..49eb370 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -262,7 +262,8 @@ int cache_check(struct cache_detail *detail, if (rqstp == NULL) { if (rv == -EAGAIN) rv = -ENOENT; - } else if (rv == -EAGAIN || age > refresh_age/2) { + } else if (rv == -EAGAIN || + (h->expiry_time != 0 && age > refresh_age/2)) { dprintk("RPC: Want update, refage=%ld, age=%ld\n", refresh_age, age); if (!test_and_set_bit(CACHE_PENDING, &h->flags)) { -- cgit v0.10.2 From 0979292bfa301cb87d936b69af428090d2feea1b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 8 Jul 2013 13:44:45 -0400 Subject: svcrpc: fix failures to handle -1 uid's As of f025adf191924e3a75ce80e130afcd2485b53bb8 "sunrpc: Properly decode kuids and kgids in RPC_AUTH_UNIX credentials" any rpc containing a -1 (0xffff) uid or gid would fail with a badcred error. Commit afe3c3fd5392b2f0066930abc5dbd3f4b14a0f13 "svcrpc: fix failures to handle -1 uid's and gid's" fixed part of the problem, but overlooked the gid upcall--the kernel can request supplementary gid's for the -1 uid, but mountd's attempt write a response will get -EINVAL. Symptoms were nfsd failing to reply to the first attempt to use a newly negotiated krb5 context. Reported-by: Sven Geggus Tested-by: Sven Geggus Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index a98853d..621ca7b 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -493,8 +493,6 @@ static int unix_gid_parse(struct cache_detail *cd, if (rv) return -EINVAL; uid = make_kuid(&init_user_ns, id); - if (!uid_valid(uid)) - return -EINVAL; ug.uid = uid; expiry = get_expiry(&mesg); -- cgit v0.10.2 From f0f51f5cdd107971282ae18f00a6fa03d69407a0 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 18 Jun 2013 14:26:02 -0400 Subject: nfsd4: allow destroy_session over destroyed session RFC 5661 allows a client to destroy a session using a compound associated with the destroyed session, as long as the DESTROY_SESSION op is the last op of the compound. We attempt to allow this, but testing against a Solaris client (which does destroy sessions in this way) showed that we were failing the DESTROY_SESSION with NFS4ERR_DELAY, because we assumed the reference count on the session (held by us) represented another rpc in progress over this session. Fix this by noting that in this case the expected reference count is 1, not 0. Also, note as long as the session holds a reference to the compound we're destroying, we can't free it here--instead, delay the free till the final put in nfs4svc_encode_compoundres. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d44a4bf..25ae6ce 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -97,19 +97,20 @@ nfs4_lock_state(void) static void free_session(struct nfsd4_session *); -void nfsd4_put_session(struct nfsd4_session *ses) +static bool is_session_dead(struct nfsd4_session *ses) { - atomic_dec(&ses->se_ref); + return ses->se_flags & NFS4_SESSION_DEAD; } -static bool is_session_dead(struct nfsd4_session *ses) +void nfsd4_put_session(struct nfsd4_session *ses) { - return ses->se_flags & NFS4_SESSION_DEAD; + if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses)) + free_session(ses); } -static __be32 mark_session_dead_locked(struct nfsd4_session *ses) +static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me) { - if (atomic_read(&ses->se_ref)) + if (atomic_read(&ses->se_ref) > ref_held_by_me) return nfserr_jukebox; ses->se_flags |= NFS4_SESSION_DEAD; return nfs_ok; @@ -2074,6 +2075,7 @@ nfsd4_destroy_session(struct svc_rqst *r, { struct nfsd4_session *ses; __be32 status; + int ref_held_by_me = 0; struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id); nfs4_lock_state(); @@ -2081,6 +2083,7 @@ nfsd4_destroy_session(struct svc_rqst *r, if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { if (!nfsd4_last_compound_op(r)) goto out; + ref_held_by_me++; } dump_sessionid(__func__, &sessionid->sessionid); spin_lock(&nn->client_lock); @@ -2091,17 +2094,19 @@ nfsd4_destroy_session(struct svc_rqst *r, status = nfserr_wrong_cred; if (!mach_creds_match(ses->se_client, r)) goto out_client_lock; - status = mark_session_dead_locked(ses); + nfsd4_get_session_locked(ses); + status = mark_session_dead_locked(ses, 1 + ref_held_by_me); if (status) - goto out_client_lock; + goto out_put_session; unhash_session(ses); spin_unlock(&nn->client_lock); nfsd4_probe_callback_sync(ses->se_client); spin_lock(&nn->client_lock); - free_session(ses); status = nfs_ok; +out_put_session: + nfsd4_put_session(ses); out_client_lock: spin_unlock(&nn->client_lock); out: diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c102d25..0c0f3ea9 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3760,13 +3760,17 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; BUG_ON(iov->iov_len > PAGE_SIZE); if (nfsd4_has_session(cs)) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfs4_client *clp = cs->session->se_client; if (cs->status != nfserr_replay_cache) { nfsd4_store_cache_entry(resp); cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; } /* Renew the clientid on success and on replay */ - put_client_renew(cs->session->se_client); + spin_lock(&nn->client_lock); nfsd4_put_session(cs->session); + spin_unlock(&nn->client_lock); + put_client_renew(clp); } return 1; } -- cgit v0.10.2 From d109148111cdfcdae94f797dc142468bd0ff7557 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 8 Jul 2013 18:00:56 -0400 Subject: nfsd4: support minorversion 1 by default We now have minimal minorversion 1 support; turn it on by default. This can still be turned off with "echo -4.1 >/proc/fs/nfsd/versions". Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 262df5c..6b9f48c 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -116,7 +116,7 @@ struct svc_program nfsd_program = { }; -u32 nfsd_supported_minorversion; +u32 nfsd_supported_minorversion = 1; int nfsd_vers(int vers, enum vers_op change) { -- cgit v0.10.2