From eb48241bb4484e56171fcb5beb39f530a2cd484e Mon Sep 17 00:00:00 2001
From: Zhao Hongjiang <zhaohongjiang@huawei.com>
Date: Fri, 3 May 2013 18:08:24 +0800
Subject: nfsd: get rid of the unused functions in vfs

The fh_lock_parent(), nfsd_truncate(), nfsd_notify_change() and
nfsd_sync_dir() fuctions are neither implemented nor used, just remove
them.

Signed-off-by: Zhao Hongjiang <zhaohongjiang@huawei.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 5b58941..8d2b40d 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -39,7 +39,6 @@
 typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
 
 /* nfsd/vfs.c */
-int		fh_lock_parent(struct svc_fh *, struct dentry *);
 int		nfsd_racache_init(int);
 void		nfsd_racache_shutdown(void);
 int		nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
@@ -92,17 +91,13 @@ __be32		nfsd_remove(struct svc_rqst *,
 				struct svc_fh *, char *, int);
 __be32		nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type,
 				char *name, int len);
-int		nfsd_truncate(struct svc_rqst *, struct svc_fh *,
-				unsigned long size);
 __be32		nfsd_readdir(struct svc_rqst *, struct svc_fh *,
 			     loff_t *, struct readdir_cd *, filldir_t);
 __be32		nfsd_statfs(struct svc_rqst *, struct svc_fh *,
 				struct kstatfs *, int access);
 
-int		nfsd_notify_change(struct inode *, struct iattr *);
 __be32		nfsd_permission(struct svc_rqst *, struct svc_export *,
 				struct dentry *, int);
-int		nfsd_sync_dir(struct dentry *dp);
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
-- 
cgit v0.10.2


From 4f540e29dc20b87d460559bce184d2238237f48b Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 7 May 2013 11:57:52 -0400
Subject: nfsd4: store correct client minorversion for >=4.2

This code assumes that any client using exchange_id is using NFSv4.1,
but with the introduction of 4.2 that will no longer true.

This main effect of this is that client callbacks will use the same
minorversion as that used on the exchange_id.

Note that clients are forbidden from mixing 4.1 and 4.2 compounds.  (See
rfc 5661, section 2.7, #13: "A client MUST NOT attempt to use a stateid,
filehandle, or similar returned object from the COMPOUND procedure with
minor version X for another COMPOUND procedure with minor version Y,
where X != Y.")  However, we do not currently attempt to enforce this
except in the case of mixing zero minor version with non-zero minor
versions.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 316ec84..91ead0e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1709,7 +1709,7 @@ out_new:
 		status = nfserr_jukebox;
 		goto out;
 	}
-	new->cl_minorversion = 1;
+	new->cl_minorversion = cstate->minorversion;
 
 	gen_clid(new, nn);
 	add_to_unconfirmed(new);
-- 
cgit v0.10.2


From 0d422afb892e3f993cf934b76a2c2ef839c446e0 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 8 May 2013 18:05:41 -0400
Subject: security: cap_inode_getsecctx returning garbage

We shouldn't be returning success from this function without also
filling in the return values ctx and ctxlen.

Note currently this doesn't appear to cause bugs since the only
inode_getsecctx caller I can find is fs/sysfs/inode.c, which only calls
this if security_inode_setsecurity succeeds.  Assuming
security_inode_setsecurity is set to cap_inode_setsecurity whenever
inode_getsecctx is set to cap_inode_getsecctx, this function can never
actually called.

So I noticed this only because the server labeled NFS patches add a real
caller.

Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/security/capability.c b/security/capability.c
index 1728d4e..83efc90 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -843,7 +843,7 @@ static int cap_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
 
 static int cap_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
 {
-	return 0;
+	return -EOPNOTSUPP;
 }
 #ifdef CONFIG_KEYS
 static int cap_key_alloc(struct key *key, const struct cred *cred,
-- 
cgit v0.10.2


From 4bdc33ed5bd9fbaa243bda6fdccb22674aed6305 Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Thu, 2 May 2013 13:18:57 -0400
Subject: NFSDv4.2: Add NFS v4.2 support to the NFS server

This enables NFSv4.2 support for the server. To enable this
code do the following:
  echo "+4.2" >/proc/fs/nfsd/versions

after the nfsd kernel module is loaded.

On its own this does nothing except allow the server to respond to
compounds with minorversion set to 2.  All the new NFSv4.2 features are
optional, so this is perfectly legal.

Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 6cd86e0..9aeacdd 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1567,6 +1567,7 @@ struct nfsd4_minorversion_ops {
 static struct nfsd4_minorversion_ops nfsd4_minorversion[] = {
 	[0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) },
 	[1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) },
+	[2] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) },
 };
 
 static __be32
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index c0d9317..15e7e1d 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -24,7 +24,7 @@
 /*
  * nfsd version
  */
-#define NFSD_SUPPORTED_MINOR_VERSION	1
+#define NFSD_SUPPORTED_MINOR_VERSION	2
 /*
  * Maximum blocksizes supported by daemon under various circumstances.
  */
-- 
cgit v0.10.2


From 18032ca062e621e15683cb61c066ef3dc5414a7b Mon Sep 17 00:00:00 2001
From: David Quigley <dpquigl@davequigley.com>
Date: Thu, 2 May 2013 13:19:10 -0400
Subject: NFSD: Server implementation of MAC Labeling

Implement labeled NFS on the server: encoding and decoding, and writing
and reading, of file labels.

Enabled with CONFIG_NFSD_V4_SECURITY_LABEL.

Signed-off-by: Matthew N. Dodd <Matthew.Dodd@sparta.com>
Signed-off-by: Miguel Rodel Felipe <Rodel_FM@dsi.a-star.edu.sg>
Signed-off-by: Phua Eu Gene <PHUA_Eu_Gene@dsi.a-star.edu.sg>
Signed-off-by: Khin Mi Mi Aung <Mi_Mi_AUNG@dsi.a-star.edu.sg>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 430b687..dc8f1ef 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -81,6 +81,22 @@ config NFSD_V4
 
 	  If unsure, say N.
 
+config NFSD_V4_SECURITY_LABEL
+	bool "Provide Security Label support for NFSv4 server"
+	depends on NFSD_V4 && SECURITY
+	help
+
+	Say Y here if you want enable fine-grained security label attribute
+	support for NFS version 4.  Security labels allow security modules like
+	SELinux and Smack to label files to facilitate enforcement of their policies.
+	Without this an NFSv4 mount will have the same label on each file.
+
+	If you do not wish to enable fine-grained security labels SELinux or
+	Smack policies on NFSv4 files, say N.
+
+	WARNING: there is still a chance of backwards-incompatible protocol changes.
+	For now we recommend "Y" only for developers and testers."
+
 config NFSD_FAULT_INJECTION
 	bool "NFS server manual fault injection"
 	depends on NFSD_V4 && DEBUG_KERNEL
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 27d74a2..1a1ff24 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -42,6 +42,36 @@
 #include "current_stateid.h"
 #include "netns.h"
 
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
+#include <linux/security.h>
+
+static inline void
+nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval)
+{
+	struct inode *inode = resfh->fh_dentry->d_inode;
+	int status;
+
+	mutex_lock(&inode->i_mutex);
+	status = security_inode_setsecctx(resfh->fh_dentry,
+		label->data, label->len);
+	mutex_unlock(&inode->i_mutex);
+
+	if (status)
+		/*
+		 * XXX: We should really fail the whole open, but we may
+		 * already have created a new file, so it may be too
+		 * late.  For now this seems the least of evils:
+		 */
+		bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+
+	return;
+}
+#else
+static inline void
+nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval)
+{ }
+#endif
+
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
 
 static u32 nfsd_attrmask[] = {
@@ -239,6 +269,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
 					(u32 *)open->op_verf.data,
 					&open->op_truncate, &open->op_created);
 
+		if (!status && open->op_label.len)
+			nfsd4_security_inode_setsecctx(resfh, &open->op_label, open->op_bmval);
+
 		/*
 		 * Following rfc 3530 14.2.16, use the returned bitmask
 		 * to indicate which attributes we used to store the
@@ -637,6 +670,9 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto out;
 
+	if (create->cr_label.len)
+		nfsd4_security_inode_setsecctx(&resfh, &create->cr_label, create->cr_bmval);
+
 	if (create->cr_acl != NULL)
 		do_set_nfs4_acl(rqstp, &resfh, create->cr_acl,
 				create->cr_bmval);
@@ -916,6 +952,11 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 					    setattr->sa_acl);
 	if (status)
 		goto out;
+	if (setattr->sa_label.len)
+		status = nfsd4_set_nfs4_label(rqstp, &cstate->current_fh,
+				&setattr->sa_label);
+	if (status)
+		goto out;
 	status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
 				0, (time_t)0);
 out:
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 9aeacdd..dfca512 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -55,6 +55,11 @@
 #include "cache.h"
 #include "netns.h"
 
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
+#include <linux/security.h>
+#endif
+
+
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
 
 /*
@@ -242,7 +247,8 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
 
 static __be32
 nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
-		   struct iattr *iattr, struct nfs4_acl **acl)
+		   struct iattr *iattr, struct nfs4_acl **acl,
+		   struct xdr_netobj *label)
 {
 	int expected_len, len = 0;
 	u32 dummy32;
@@ -380,6 +386,32 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
 			goto xdr_error;
 		}
 	}
+
+	label->len = 0;
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
+	if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) {
+		READ_BUF(4);
+		len += 4;
+		READ32(dummy32); /* lfs: we don't use it */
+		READ_BUF(4);
+		len += 4;
+		READ32(dummy32); /* pi: we don't use it either */
+		READ_BUF(4);
+		len += 4;
+		READ32(dummy32);
+		READ_BUF(dummy32);
+		if (dummy32 > NFSD4_MAX_SEC_LABEL_LEN)
+			return nfserr_badlabel;
+		len += (XDR_QUADLEN(dummy32) << 2);
+		READMEM(buf, dummy32);
+		label->data = kzalloc(dummy32 + 1, GFP_KERNEL);
+		if (!label->data)
+			return nfserr_jukebox;
+		defer_free(argp, kfree, label->data);
+		memcpy(label->data, buf, dummy32);
+	}
+#endif
+
 	if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0
 	    || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1
 	    || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2)
@@ -576,7 +608,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
 		return status;
 
 	status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr,
-				    &create->cr_acl);
+				    &create->cr_acl, &create->cr_label);
 	if (status)
 		goto out;
 
@@ -827,7 +859,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
 		case NFS4_CREATE_UNCHECKED:
 		case NFS4_CREATE_GUARDED:
 			status = nfsd4_decode_fattr(argp, open->op_bmval,
-				&open->op_iattr, &open->op_acl);
+				&open->op_iattr, &open->op_acl, &open->op_label);
 			if (status)
 				goto out;
 			break;
@@ -841,7 +873,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
 			READ_BUF(NFS4_VERIFIER_SIZE);
 			COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE);
 			status = nfsd4_decode_fattr(argp, open->op_bmval,
-				&open->op_iattr, &open->op_acl);
+				&open->op_iattr, &open->op_acl, &open->op_label);
 			if (status)
 				goto out;
 			break;
@@ -1063,7 +1095,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta
 	if (status)
 		return status;
 	return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr,
-				  &setattr->sa_acl);
+				  &setattr->sa_acl, &setattr->sa_label);
 }
 
 static __be32
@@ -1954,6 +1986,36 @@ nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace,
 			      FATTR4_WORD0_RDATTR_ERROR)
 #define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID
 
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
+static inline __be32
+nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen)
+{
+	__be32 *p = *pp;
+
+	if (*buflen < ((XDR_QUADLEN(len) << 2) + 4 + 4 + 4))
+		return nfserr_resource;
+
+	/*
+	 * For now we use a 0 here to indicate the null translation; in
+	 * the future we may place a call to translation code here.
+	 */
+	if ((*buflen -= 8) < 0)
+		return nfserr_resource;
+
+	WRITE32(0); /* lfs */
+	WRITE32(0); /* pi */
+	p = xdr_encode_opaque(p, context, len);
+	*buflen -= (XDR_QUADLEN(len) << 2) + 4;
+
+	*pp = p;
+	return 0;
+}
+#else
+static inline __be32
+nfsd4_encode_security_label(struct svc_rqst *rqstp, struct dentry *dentry, __be32 **pp, int *buflen)
+{ return 0; }
+#endif
+
 static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
 {
 	/* As per referral draft:  */
@@ -2013,6 +2075,9 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 	int err;
 	int aclsupport = 0;
 	struct nfs4_acl *acl = NULL;
+	void *context = NULL;
+	int contextlen;
+	bool contextsupport = false;
 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	u32 minorversion = resp->cstate.minorversion;
 	struct path path = {
@@ -2066,6 +2131,21 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 		}
 	}
 
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
+	if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) ||
+			bmval[0] & FATTR4_WORD0_SUPPORTED_ATTRS) {
+		err = security_inode_getsecctx(dentry->d_inode,
+						&context, &contextlen);
+		contextsupport = (err == 0);
+		if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
+			if (err == -EOPNOTSUPP)
+				bmval2 &= ~FATTR4_WORD2_SECURITY_LABEL;
+			else if (err)
+				goto out_nfserr;
+		}
+	}
+#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
+
 	if (bmval2) {
 		if ((buflen -= 16) < 0)
 			goto out_resource;
@@ -2094,6 +2174,8 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 
 		if (!aclsupport)
 			word0 &= ~FATTR4_WORD0_ACL;
+		if (!contextsupport)
+			word2 &= ~FATTR4_WORD2_SECURITY_LABEL;
 		if (!word2) {
 			if ((buflen -= 12) < 0)
 				goto out_resource;
@@ -2401,6 +2483,12 @@ out_acl:
 			get_parent_attributes(exp, &stat);
 		WRITE64(stat.ino);
 	}
+	if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
+		status = nfsd4_encode_security_label(rqstp, context,
+				contextlen, &p, &buflen);
+		if (status)
+			goto out;
+	}
 	if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
 		WRITE32(3);
 		WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
@@ -2413,6 +2501,8 @@ out_acl:
 	status = nfs_ok;
 
 out:
+	if (context)
+		security_release_secctx(context, contextlen);
 	kfree(acl);
 	if (fhp == &tempfh)
 		fh_put(&tempfh);
@@ -3177,16 +3267,18 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 {
 	__be32 *p;
 
-	RESERVE_SPACE(12);
+	RESERVE_SPACE(16);
 	if (nfserr) {
-		WRITE32(2);
+		WRITE32(3);
+		WRITE32(0);
 		WRITE32(0);
 		WRITE32(0);
 	}
 	else {
-		WRITE32(2);
+		WRITE32(3);
 		WRITE32(setattr->sa_bmval[0]);
 		WRITE32(setattr->sa_bmval[1]);
+		WRITE32(setattr->sa_bmval[2]);
 	}
 	ADJUST_ARGS();
 	return nfserr;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 15e7e1d..2bbd94e 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -328,6 +328,13 @@ void		nfsd_lockd_shutdown(void);
 #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
 	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
 
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
+#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
+	(NFSD4_1_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SECURITY_LABEL)
+#else
+#define NFSD4_2_SUPPORTED_ATTRS_WORD2 0
+#endif
+
 static inline u32 nfsd_suppattrs0(u32 minorversion)
 {
 	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0
@@ -342,8 +349,11 @@ static inline u32 nfsd_suppattrs1(u32 minorversion)
 
 static inline u32 nfsd_suppattrs2(u32 minorversion)
 {
-	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2
-			    : NFSD4_SUPPORTED_ATTRS_WORD2;
+	switch (minorversion) {
+	default: return NFSD4_2_SUPPORTED_ATTRS_WORD2;
+	case 1:  return NFSD4_1_SUPPORTED_ATTRS_WORD2;
+	case 0:  return NFSD4_SUPPORTED_ATTRS_WORD2;
+	}
 }
 
 /* These will return ERR_INVAL if specified in GETATTR or READDIR. */
@@ -356,7 +366,11 @@ static inline u32 nfsd_suppattrs2(u32 minorversion)
 #define NFSD_WRITEABLE_ATTRS_WORD1 \
 	(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
 	| FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
+#define NFSD_WRITEABLE_ATTRS_WORD2 FATTR4_WORD2_SECURITY_LABEL
+#else
 #define NFSD_WRITEABLE_ATTRS_WORD2 0
+#endif
 
 #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
 	NFSD_WRITEABLE_ATTRS_WORD0
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 84ce601..1e757fa 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -28,6 +28,7 @@
 #include <asm/uaccess.h>
 #include <linux/exportfs.h>
 #include <linux/writeback.h>
+#include <linux/security.h>
 
 #ifdef CONFIG_NFSD_V3
 #include "xdr3.h"
@@ -621,6 +622,33 @@ int nfsd4_is_junction(struct dentry *dentry)
 		return 0;
 	return 1;
 }
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
+__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		struct xdr_netobj *label)
+{
+	__be32 error;
+	int host_error;
+	struct dentry *dentry;
+
+	error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR);
+	if (error)
+		return error;
+
+	dentry = fhp->fh_dentry;
+
+	mutex_lock(&dentry->d_inode->i_mutex);
+	host_error = security_inode_setsecctx(dentry, label->data, label->len);
+	mutex_unlock(&dentry->d_inode->i_mutex);
+	return nfserrno(host_error);
+}
+#else
+__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		struct xdr_netobj *label)
+{
+	return nfserr_notsupp;
+}
+#endif
+
 #endif /* defined(CONFIG_NFSD_V4) */
 
 #ifdef CONFIG_NFSD_V3
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 8d2b40d..a4be2e3 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -55,6 +55,8 @@ int nfsd_mountpoint(struct dentry *, struct svc_export *);
 __be32          nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *,
                     struct nfs4_acl *);
 int             nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **);
+__be32          nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
+		    struct xdr_netobj *);
 #endif /* CONFIG_NFSD_V4 */
 __be32		nfsd_create(struct svc_rqst *, struct svc_fh *,
 				char *name, int len, struct iattr *attrs,
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 3b271d2..b3ed644 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -40,6 +40,7 @@
 #include "state.h"
 #include "nfsd.h"
 
+#define NFSD4_MAX_SEC_LABEL_LEN	2048
 #define NFSD4_MAX_TAGLEN	128
 #define XDR_LEN(n)                     (((n) + 3) & ~3)
 
@@ -118,6 +119,7 @@ struct nfsd4_create {
 	struct iattr	cr_iattr;           /* request */
 	struct nfsd4_change_info  cr_cinfo; /* response */
 	struct nfs4_acl *cr_acl;
+	struct xdr_netobj cr_label;
 };
 #define cr_linklen	u.link.namelen
 #define cr_linkname	u.link.name
@@ -246,6 +248,7 @@ struct nfsd4_open {
 	struct nfs4_file *op_file;          /* used during processing */
 	struct nfs4_ol_stateid *op_stp;	    /* used during processing */
 	struct nfs4_acl *op_acl;
+	struct xdr_netobj op_label;
 };
 #define op_iattr	iattr
 
@@ -330,6 +333,7 @@ struct nfsd4_setattr {
 	u32		sa_bmval[3];        /* request */
 	struct iattr	sa_iattr;           /* request */
 	struct nfs4_acl *sa_acl;
+	struct xdr_netobj sa_label;
 };
 
 struct nfsd4_setclientid {
-- 
cgit v0.10.2


From 2fccbd9cc0fdca649b01f1e2d96e5ef85256341a Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 24 Sep 2012 15:53:29 -0400
Subject: sunrpc: server back channel needs no rpcbind method

XPRT_BOUND is set on server backchannel xprts by xs_setup_bc_tcp()
(using xprt_set_bound()), and is never cleared, so ->rpcbind() will
never need to be called.

Reported-by: "Myklebust, Trond" <Trond.Myklebust@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ffd5034..5d6b0da 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2534,7 +2534,6 @@ static struct rpc_xprt_ops bc_tcp_ops = {
 	.reserve_xprt		= xprt_reserve_xprt,
 	.release_xprt		= xprt_release_xprt,
 	.alloc_slot		= xprt_alloc_slot,
-	.rpcbind		= xs_local_rpcbind,
 	.buf_alloc		= bc_malloc,
 	.buf_free		= bc_free,
 	.send_request		= bc_send_request,
-- 
cgit v0.10.2


From ba4e55bb67894136489b27372166416cd70b0756 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 15 May 2013 10:27:52 -0400
Subject: nfsd4: fix compile in !CONFIG_NFSD_V4_SECURITY_LABEL case

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index dfca512..170ea7e 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2012,7 +2012,7 @@ nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be
 }
 #else
 static inline __be32
-nfsd4_encode_security_label(struct svc_rqst *rqstp, struct dentry *dentry, __be32 **pp, int *buflen)
+nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen)
 { return 0; }
 #endif
 
@@ -2501,8 +2501,10 @@ out_acl:
 	status = nfs_ok;
 
 out:
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
 	if (context)
 		security_release_secctx(context, contextlen);
+#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
 	kfree(acl);
 	if (fhp == &tempfh)
 		fh_put(&tempfh);
-- 
cgit v0.10.2


From b6040f9706c4c81cc50b50855ed70840f022bebb Mon Sep 17 00:00:00 2001
From: chaoting fan <tingchaofan@gmail.com>
Date: Thu, 28 Mar 2013 22:19:45 +0800
Subject: sunrpc: the cache_detail in cache_is_valid is unused any more

The cache_detail(*detail) in function cache_is_valid is not used any
more.

Signed-off-by: fanchaoting <fanchaoting@cn.fujitsu.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 80fe5c8..3b3f14f 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -201,7 +201,7 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h)
 	return sunrpc_cache_pipe_upcall(cd, h);
 }
 
-static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h)
+static inline int cache_is_valid(struct cache_head *h)
 {
 	if (!test_bit(CACHE_VALID, &h->flags))
 		return -EAGAIN;
@@ -227,7 +227,7 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
 	int rv;
 
 	write_lock(&detail->hash_lock);
-	rv = cache_is_valid(detail, h);
+	rv = cache_is_valid(h);
 	if (rv != -EAGAIN) {
 		write_unlock(&detail->hash_lock);
 		return rv;
@@ -260,7 +260,7 @@ int cache_check(struct cache_detail *detail,
 	long refresh_age, age;
 
 	/* First decide return status as best we can */
-	rv = cache_is_valid(detail, h);
+	rv = cache_is_valid(h);
 
 	/* now see if we want to start an upcall */
 	refresh_age = (h->expiry_time - h->last_refresh);
@@ -293,7 +293,7 @@ int cache_check(struct cache_detail *detail,
 			 * Request was not deferred; handle it as best
 			 * we can ourselves:
 			 */
-			rv = cache_is_valid(detail, h);
+			rv = cache_is_valid(h);
 			if (rv == -EAGAIN)
 				rv = -ETIMEDOUT;
 		}
-- 
cgit v0.10.2


From 1a9357f443d64aa41e9b0dc414953663a6fcca19 Mon Sep 17 00:00:00 2001
From: Jim Rees <rees@umich.edu>
Date: Fri, 17 May 2013 17:33:00 -0400
Subject: nfsd: avoid undefined signed overflow

In C, signed integer overflow results in undefined behavior, but unsigned
overflow wraps around. So do the subtraction first, then cast to signed.

Reported-by: Joakim Tjernlund <joakim.tjernlund@transmode.se>
Signed-off-by: Jim Rees <rees@umich.edu>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 91ead0e..72f0c4e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3427,7 +3427,7 @@ grace_disallows_io(struct net *net, struct inode *inode)
 /* Returns true iff a is later than b: */
 static bool stateid_generation_after(stateid_t *a, stateid_t *b)
 {
-	return (s32)a->si_generation - (s32)b->si_generation > 0;
+	return (s32)(a->si_generation - b->si_generation) > 0;
 }
 
 static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session)
-- 
cgit v0.10.2


From 442340639194762df7e61e8aabae44a18896eca1 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 14 May 2013 16:53:40 -0400
Subject: svcrpc: introduce init_svc_cred

Common helper to zero out fields of the svc_cred.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index ff374ab..95c9566 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -25,6 +25,13 @@ struct svc_cred {
 	char			*cr_principal; /* for gss */
 };
 
+static inline void init_svc_cred(struct svc_cred *cred)
+{
+	cred->cr_group_info = NULL;
+	cred->cr_principal = NULL;
+	cred->cr_gss_mech = NULL;
+}
+
 static inline void free_svc_cred(struct svc_cred *cred)
 {
 	if (cred->cr_group_info)
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 29b4ba9..8d7860e 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -377,8 +377,7 @@ rsc_init(struct cache_head *cnew, struct cache_head *ctmp)
 	new->handle.data = tmp->handle.data;
 	tmp->handle.data = NULL;
 	new->mechctx = NULL;
-	new->cred.cr_group_info = NULL;
-	new->cred.cr_principal = NULL;
+	init_svc_cred(&new->cred);
 }
 
 static void
@@ -392,9 +391,8 @@ update_rsc(struct cache_head *cnew, struct cache_head *ctmp)
 	memset(&new->seqdata, 0, sizeof(new->seqdata));
 	spin_lock_init(&new->seqdata.sd_lock);
 	new->cred = tmp->cred;
-	tmp->cred.cr_group_info = NULL;
 	new->cred.cr_principal = tmp->cred.cr_principal;
-	tmp->cred.cr_principal = NULL;
+	init_svc_cred(&tmp->cred);
 }
 
 static struct cache_head *
-- 
cgit v0.10.2


From 0dc1531aca7fd1440918bd55844a054e9c29acad Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 14 May 2013 16:07:13 -0400
Subject: svcrpc: store gss mech in svc_cred

Store a pointer to the gss mechanism used in the rq_cred and cl_cred.
This will make it easier to enforce SP4_MACH_CRED, which needs to
compare the mechanism used on the exchange_id with that used on
protected operations.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 72f0c4e..109b4340 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1188,6 +1188,9 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source)
 	target->cr_gid = source->cr_gid;
 	target->cr_group_info = source->cr_group_info;
 	get_group_info(target->cr_group_info);
+	target->cr_gss_mech = source->cr_gss_mech;
+	if (source->cr_gss_mech)
+		gss_mech_get(source->cr_gss_mech);
 	return 0;
 }
 
diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h
index 161463e..1f911cc 100644
--- a/include/linux/sunrpc/gss_api.h
+++ b/include/linux/sunrpc/gss_api.h
@@ -151,6 +151,8 @@ struct gss_api_mech *gss_mech_get_by_pseudoflavor(u32);
 /* Fill in an array with a list of supported pseudoflavors */
 int gss_mech_list_pseudoflavors(rpc_authflavor_t *, int);
 
+struct gss_api_mech * gss_mech_get(struct gss_api_mech *);
+
 /* For every successful gss_mech_get or gss_mech_get_by_* call there must be a
  * corresponding call to gss_mech_put. */
 void gss_mech_put(struct gss_api_mech *);
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 95c9566..8d71d65 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -14,6 +14,7 @@
 #include <linux/string.h>
 #include <linux/sunrpc/msg_prot.h>
 #include <linux/sunrpc/cache.h>
+#include <linux/sunrpc/gss_api.h>
 #include <linux/hash.h>
 #include <linux/cred.h>
 
@@ -23,6 +24,7 @@ struct svc_cred {
 	struct group_info	*cr_group_info;
 	u32			cr_flavor; /* pseudoflavor */
 	char			*cr_principal; /* for gss */
+	struct gss_api_mech	*cr_gss_mech;
 };
 
 static inline void init_svc_cred(struct svc_cred *cred)
@@ -37,6 +39,8 @@ static inline void free_svc_cred(struct svc_cred *cred)
 	if (cred->cr_group_info)
 		put_group_info(cred->cr_group_info);
 	kfree(cred->cr_principal);
+	gss_mech_put(cred->cr_gss_mech);
+	init_svc_cred(cred);
 }
 
 struct svc_rqst;		/* forward decl */
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index defa9d3..27ce262 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -139,11 +139,12 @@ void gss_mech_unregister(struct gss_api_mech *gm)
 }
 EXPORT_SYMBOL_GPL(gss_mech_unregister);
 
-static struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm)
+struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm)
 {
 	__module_get(gm->gm_owner);
 	return gm;
 }
+EXPORT_SYMBOL(gss_mech_get);
 
 static struct gss_api_mech *
 _gss_mech_get_by_name(const char *name)
@@ -360,6 +361,7 @@ gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor)
 	}
 	return 0;
 }
+EXPORT_SYMBOL(gss_pseudoflavor_to_service);
 
 char *
 gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service)
@@ -379,6 +381,7 @@ gss_mech_put(struct gss_api_mech * gm)
 	if (gm)
 		module_put(gm->gm_owner);
 }
+EXPORT_SYMBOL(gss_mech_put);
 
 /* The mech could probably be determined from the token instead, but it's just
  * as easy for now to pass it in. */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 8d7860e..0265bb3 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -391,7 +391,6 @@ update_rsc(struct cache_head *cnew, struct cache_head *ctmp)
 	memset(&new->seqdata, 0, sizeof(new->seqdata));
 	spin_lock_init(&new->seqdata.sd_lock);
 	new->cred = tmp->cred;
-	new->cred.cr_principal = tmp->cred.cr_principal;
 	init_svc_cred(&tmp->cred);
 }
 
@@ -485,7 +484,7 @@ static int rsc_parse(struct cache_detail *cd,
 		len = qword_get(&mesg, buf, mlen);
 		if (len < 0)
 			goto out;
-		gm = gss_mech_get_by_name(buf);
+		gm = rsci.cred.cr_gss_mech = gss_mech_get_by_name(buf);
 		status = -EOPNOTSUPP;
 		if (!gm)
 			goto out;
@@ -515,7 +514,6 @@ static int rsc_parse(struct cache_detail *cd,
 	rscp = rsc_update(cd, &rsci, rscp);
 	status = 0;
 out:
-	gss_mech_put(gm);
 	rsc_free(&rsci);
 	if (rscp)
 		cache_put(&rscp->h, cd);
-- 
cgit v0.10.2


From 57266a6e916e2522ea61758a3ee5576b60156791 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Sat, 13 Apr 2013 14:27:29 -0400
Subject: nfsd4: implement minimal SP4_MACH_CRED

Do a minimal SP4_MACH_CRED implementation suggested by Trond, ignoring
the client-provided spo_must_* arrays and just enforcing credential
checks for the minimum required operations.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 109b4340..2383d24 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1265,6 +1265,31 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
 	return 0 == strcmp(cr1->cr_principal, cr2->cr_principal);
 }
 
+static bool svc_rqst_integrity_protected(struct svc_rqst *rqstp)
+{
+	struct svc_cred *cr = &rqstp->rq_cred;
+	u32 service;
+
+	service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor);
+	return service == RPC_GSS_SVC_INTEGRITY ||
+	       service == RPC_GSS_SVC_PRIVACY;
+}
+
+static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp)
+{
+	struct svc_cred *cr = &rqstp->rq_cred;
+
+	if (!cl->cl_mach_cred)
+		return true;
+	if (cl->cl_cred.cr_gss_mech != cr->cr_gss_mech)
+		return false;
+	if (!svc_rqst_integrity_protected(rqstp))
+		return false;
+	if (!cr->cr_principal)
+		return false;
+	return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal);
+}
+
 static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn)
 {
 	static u32 current_clientid = 1;
@@ -1642,16 +1667,16 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 	if (exid->flags & ~EXCHGID4_FLAG_MASK_A)
 		return nfserr_inval;
 
-	/* Currently only support SP4_NONE */
 	switch (exid->spa_how) {
+	case SP4_MACH_CRED:
+		if (!svc_rqst_integrity_protected(rqstp))
+			return nfserr_inval;
 	case SP4_NONE:
 		break;
 	default:				/* checked by xdr code */
 		WARN_ON_ONCE(1);
 	case SP4_SSV:
 		return nfserr_encr_alg_unsupp;
-	case SP4_MACH_CRED:
-		return nfserr_serverfault;	/* no excuse :-/ */
 	}
 
 	/* Cases below refer to rfc 5661 section 18.35.4: */
@@ -1666,6 +1691,10 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 				status = nfserr_inval;
 				goto out;
 			}
+			if (!mach_creds_match(conf, rqstp)) {
+				status = nfserr_wrong_cred;
+				goto out;
+			}
 			if (!creds_match) { /* case 9 */
 				status = nfserr_perm;
 				goto out;
@@ -1713,6 +1742,7 @@ out_new:
 		goto out;
 	}
 	new->cl_minorversion = cstate->minorversion;
+	new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED);
 
 	gen_clid(new, nn);
 	add_to_unconfirmed(new);
@@ -1877,6 +1907,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 	WARN_ON_ONCE(conf && unconf);
 
 	if (conf) {
+		status = nfserr_wrong_cred;
+		if (!mach_creds_match(conf, rqstp))
+			goto out_free_conn;
 		cs_slot = &conf->cl_cs_slot;
 		status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
 		if (status == nfserr_replay_cache) {
@@ -1893,6 +1926,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 			status = nfserr_clid_inuse;
 			goto out_free_conn;
 		}
+		status = nfserr_wrong_cred;
+		if (!mach_creds_match(unconf, rqstp))
+			goto out_free_conn;
 		cs_slot = &unconf->cl_cs_slot;
 		status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
 		if (status) {
@@ -1989,6 +2025,9 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
 	status = nfserr_badsession;
 	if (!session)
 		goto out;
+	status = nfserr_wrong_cred;
+	if (!mach_creds_match(session->se_client, rqstp))
+		goto out;
 	status = nfsd4_map_bcts_dir(&bcts->dir);
 	if (status)
 		goto out;
@@ -2031,6 +2070,9 @@ nfsd4_destroy_session(struct svc_rqst *r,
 	status = nfserr_badsession;
 	if (!ses)
 		goto out_client_lock;
+	status = nfserr_wrong_cred;
+	if (!mach_creds_match(ses->se_client, r))
+		goto out_client_lock;
 	status = mark_session_dead_locked(ses);
 	if (status)
 		goto out_client_lock;
@@ -2061,26 +2103,31 @@ static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_s
 	return NULL;
 }
 
-static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses)
+static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses)
 {
 	struct nfs4_client *clp = ses->se_client;
 	struct nfsd4_conn *c;
+	__be32 status = nfs_ok;
 	int ret;
 
 	spin_lock(&clp->cl_lock);
 	c = __nfsd4_find_conn(new->cn_xprt, ses);
-	if (c) {
-		spin_unlock(&clp->cl_lock);
-		free_conn(new);
-		return;
-	}
+	if (c)
+		goto out_free;
+	status = nfserr_conn_not_bound_to_session;
+	if (clp->cl_mach_cred)
+		goto out_free;
 	__nfsd4_hash_conn(new, ses);
 	spin_unlock(&clp->cl_lock);
 	ret = nfsd4_register_conn(new);
 	if (ret)
 		/* oops; xprt is already down: */
 		nfsd4_conn_lost(&new->cn_xpt_user);
-	return;
+	return nfs_ok;
+out_free:
+	spin_unlock(&clp->cl_lock);
+	free_conn(new);
+	return status;
 }
 
 static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session)
@@ -2172,8 +2219,10 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 	if (status)
 		goto out_put_session;
 
-	nfsd4_sequence_check_conn(conn, session);
+	status = nfsd4_sequence_check_conn(conn, session);
 	conn = NULL;
+	if (status)
+		goto out_put_session;
 
 	/* Success! bump slot seqid */
 	slot->sl_seqid = seq->seqid;
@@ -2235,7 +2284,10 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
 		status = nfserr_stale_clientid;
 		goto out;
 	}
-
+	if (!mach_creds_match(clp, rqstp)) {
+		status = nfserr_wrong_cred;
+		goto out;
+	}
 	expire_client(clp);
 out:
 	nfs4_unlock_state();
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 170ea7e..3126210 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3321,6 +3321,14 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w
 	return nfserr;
 }
 
+static const u32 nfs4_minimal_spo_must_enforce[2] = {
+	[1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) |
+	      1 << (OP_EXCHANGE_ID - 32) |
+	      1 << (OP_CREATE_SESSION - 32) |
+	      1 << (OP_DESTROY_SESSION - 32) |
+	      1 << (OP_DESTROY_CLIENTID - 32)
+};
+
 static __be32
 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
 			 struct nfsd4_exchange_id *exid)
@@ -3359,6 +3367,20 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
 	/* state_protect4_r. Currently only support SP4_NONE */
 	BUG_ON(exid->spa_how != SP4_NONE);
 	WRITE32(exid->spa_how);
+	switch (exid->spa_how) {
+	case SP4_NONE:
+		break;
+	case SP4_MACH_CRED:
+		/* spo_must_enforce bitmap: */
+		WRITE32(2);
+		WRITE32(nfs4_minimal_spo_must_enforce[0]);
+		WRITE32(nfs4_minimal_spo_must_enforce[1]);
+		/* empty spo_must_allow bitmap: */
+		WRITE32(0);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+	}
 
 	/* The server_owner struct */
 	WRITE64(minor_id);      /* Minor id */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 274e2a1..424d8f5 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -246,6 +246,7 @@ struct nfs4_client {
 	nfs4_verifier		cl_verifier; 	/* generated by client */
 	time_t                  cl_time;        /* time of last lease renewal */
 	struct sockaddr_storage	cl_addr; 	/* client ipaddress */
+	bool			cl_mach_cred;	/* SP4_MACH_CRED in force */
 	struct svc_cred		cl_cred; 	/* setclientid principal */
 	clientid_t		cl_clientid;	/* generated by server */
 	nfs4_verifier		cl_confirm;	/* generated by server */
-- 
cgit v0.10.2


From b78724b70599f66a91c6d6c897a81f4f87f549f4 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 15 May 2013 17:34:39 -0400
Subject: nfsd4: fail attempts to request gss on the backchannel

We don't support gss on the backchannel.  We should state that fact up
front rather than just letting things continue and later making the
client try to figure out why the backchannel isn't working.

Trond suggested instead returning NFS4ERR_NOENT.  I think it would be
tricky for the client to distinguish between the case "I don't support
gss on the backchannel" and "I can't find that in my cache, please
create another context and try that instead", and I'd prefer something
that currently doesn't have any other meaning for this operation, hence
the (somewhat arbitrary) NFS4ERR_ENCR_ALG_UNSUPP.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2383d24..c4f6339 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1872,6 +1872,24 @@ static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca)
 	return nfs_ok;
 }
 
+static __be32 nfsd4_check_cb_sec(struct nfsd4_cb_sec *cbs)
+{
+	switch (cbs->flavor) {
+	case RPC_AUTH_NULL:
+	case RPC_AUTH_UNIX:
+		return nfs_ok;
+	default:
+		/*
+		 * GSS case: the spec doesn't allow us to return this
+		 * error.  But it also doesn't allow us not to support
+		 * GSS.
+		 * I'd rather this fail hard than return some error the
+		 * client might think it can already handle:
+		 */
+		return nfserr_encr_alg_unsupp;
+	}
+}
+
 __be32
 nfsd4_create_session(struct svc_rqst *rqstp,
 		     struct nfsd4_compound_state *cstate,
@@ -1887,6 +1905,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 
 	if (cr_ses->flags & ~SESSION4_FLAG_MASK_A)
 		return nfserr_inval;
+	status = nfsd4_check_cb_sec(&cr_ses->cb_sec);
+	if (status)
+		return status;
 	status = check_forechannel_attrs(&cr_ses->fore_channel, nn);
 	if (status)
 		return status;
@@ -1996,7 +2017,11 @@ __be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state
 {
 	struct nfsd4_session *session = cstate->session;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+	__be32 status;
 
+	status = nfsd4_check_cb_sec(&bc->bc_cb_sec);
+	if (status)
+		return status;
 	spin_lock(&nn->client_lock);
 	session->se_cb_prog = bc->bc_cb_program;
 	session->se_cb_sec = bc->bc_cb_sec;
-- 
cgit v0.10.2


From 57569a707082a337e4a61a657521d79cac3528bf Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 17 May 2013 16:25:32 -0400
Subject: nfsd4: allow client to send no cb_sec flavors

In testing I notice that some of the pynfs tests forget to send any
cb_sec flavors, and that we haven't necessarily errored out in that case
before.

I'll fix pynfs, but am also inclined to default to trying AUTH_NONE in
that case in case this is something clients actually do.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 3126210..171fe5e4 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -460,7 +460,11 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_
 	/* callback_sec_params4 */
 	READ_BUF(4);
 	READ32(nr_secflavs);
-	cbs->flavor = (u32)(-1);
+	if (nr_secflavs)
+		cbs->flavor = (u32)(-1);
+	else
+		/* Is this legal? Be generous, take it to mean AUTH_NONE: */
+		cbs->flavor = 0;
 	for (i = 0; i < nr_secflavs; ++i) {
 		READ_BUF(4);
 		READ32(dummy);
-- 
cgit v0.10.2


From 9a0590aec3ca5e86a64b87e004244abc4dd1faf9 Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Wed, 15 May 2013 14:51:49 -0400
Subject: NFSD: Don't give out read delegations on creates

When an exclusive create is done with the mode bits
set (aka open(testfile, O_CREAT | O_EXCL, 0777)) this
causes a OPEN op followed by a SETATTR op. When a
read delegation is given in the OPEN, it causes
the SETATTR to delay with EAGAIN until the
delegation is recalled.

This patch caused exclusive creates to give out
a write delegation (which turn into no delegation)
which allows the SETATTR seamlessly succeed.

Signed-off-by: Steve Dickson <steved@redhat.com>
[bfields: do this for any CREATE, not just exclusive; comment]
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c4f6339..44dcea9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3113,8 +3113,17 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,
 				goto out;
 			if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
 				goto out;
+			/*
+			 * Also, if the file was opened for write or
+			 * create, there's a good chance the client's
+			 * about to write to it, resulting in an
+			 * immediate recall (since we don't support
+			 * write delegations):
+			 */
 			if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
 				flag = NFS4_OPEN_DELEGATE_WRITE;
+			else if (open->op_create == NFS4_OPEN_CREATE)
+				flag = NFS4_OPEN_DELEGATE_WRITE;
 			else
 				flag = NFS4_OPEN_DELEGATE_READ;
 			break;
-- 
cgit v0.10.2


From 99c415156c49571d0f045a8cb56e0bc24225b9d9 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 21 May 2013 16:21:25 -0400
Subject: nfsd4: clean up nfs4_open_delegation

The nfs4_open_delegation logic is unecessarily baroque.

Also stop pretending we support write delegations in several places.

Some day we will support write delegations, but when that happens adding
back in these flag parameters will be the easy part.  For now they're
just confusing.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 44dcea9..fcc0610 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -364,19 +364,12 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp)
 }
 
 static struct nfs4_delegation *
-alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh, u32 type)
+alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh)
 {
 	struct nfs4_delegation *dp;
 	struct nfs4_file *fp = stp->st_file;
 
 	dprintk("NFSD alloc_init_deleg\n");
-	/*
-	 * Major work on the lease subsystem (for example, to support
-	 * calbacks on stat) will be required before we can support
-	 * write delegations properly.
-	 */
-	if (type != NFS4_OPEN_DELEGATE_READ)
-		return NULL;
 	if (fp->fi_had_conflict)
 		return NULL;
 	if (num_delegations > max_delegations)
@@ -397,7 +390,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv
 	INIT_LIST_HEAD(&dp->dl_recall_lru);
 	get_nfs4_file(fp);
 	dp->dl_file = fp;
-	dp->dl_type = type;
+	dp->dl_type = NFS4_OPEN_DELEGATE_READ;
 	fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
 	dp->dl_time = 0;
 	atomic_set(&dp->dl_count, 1);
@@ -3020,13 +3013,13 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f
 	return fl;
 }
 
-static int nfs4_setlease(struct nfs4_delegation *dp, int flag)
+static int nfs4_setlease(struct nfs4_delegation *dp)
 {
 	struct nfs4_file *fp = dp->dl_file;
 	struct file_lock *fl;
 	int status;
 
-	fl = nfs4_alloc_init_lease(dp, flag);
+	fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ);
 	if (!fl)
 		return -ENOMEM;
 	fl->fl_file = find_readable_file(fp);
@@ -3044,12 +3037,12 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag)
 	return 0;
 }
 
-static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag)
+static int nfs4_set_delegation(struct nfs4_delegation *dp)
 {
 	struct nfs4_file *fp = dp->dl_file;
 
 	if (!fp->fi_lease)
-		return nfs4_setlease(dp, flag);
+		return nfs4_setlease(dp);
 	spin_lock(&recall_lock);
 	if (fp->fi_had_conflict) {
 		spin_unlock(&recall_lock);
@@ -3085,6 +3078,9 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
 
 /*
  * Attempt to hand out a delegation.
+ *
+ * Note we don't support write delegations, and won't until the vfs has
+ * proper support for them.
  */
 static void
 nfs4_open_delegation(struct net *net, struct svc_fh *fh,
@@ -3093,26 +3089,26 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,
 	struct nfs4_delegation *dp;
 	struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner);
 	int cb_up;
-	int status = 0, flag = 0;
+	int status = 0;
 
 	cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
-	flag = NFS4_OPEN_DELEGATE_NONE;
 	open->op_recall = 0;
 	switch (open->op_claim_type) {
 		case NFS4_OPEN_CLAIM_PREVIOUS:
 			if (!cb_up)
 				open->op_recall = 1;
-			flag = open->op_delegate_type;
-			if (flag == NFS4_OPEN_DELEGATE_NONE)
-				goto out;
+			if (open->op_delegate_type != NFS4_OPEN_DELEGATE_READ)
+				goto out_no_deleg;
 			break;
 		case NFS4_OPEN_CLAIM_NULL:
-			/* Let's not give out any delegations till everyone's
-			 * had the chance to reclaim theirs.... */
+			/*
+			 * Let's not give out any delegations till everyone's
+			 * had the chance to reclaim theirs....
+			 */
 			if (locks_in_grace(net))
-				goto out;
+				goto out_no_deleg;
 			if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
-				goto out;
+				goto out_no_deleg;
 			/*
 			 * Also, if the file was opened for write or
 			 * create, there's a good chance the client's
@@ -3121,20 +3117,17 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,
 			 * write delegations):
 			 */
 			if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
-				flag = NFS4_OPEN_DELEGATE_WRITE;
-			else if (open->op_create == NFS4_OPEN_CREATE)
-				flag = NFS4_OPEN_DELEGATE_WRITE;
-			else
-				flag = NFS4_OPEN_DELEGATE_READ;
+				goto out_no_deleg;
+			if (open->op_create == NFS4_OPEN_CREATE)
+				goto out_no_deleg;
 			break;
 		default:
-			goto out;
+			goto out_no_deleg;
 	}
-
-	dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh, flag);
+	dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh);
 	if (dp == NULL)
 		goto out_no_deleg;
-	status = nfs4_set_delegation(dp, flag);
+	status = nfs4_set_delegation(dp);
 	if (status)
 		goto out_free;
 
@@ -3142,24 +3135,21 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,
 
 	dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
 		STATEID_VAL(&dp->dl_stid.sc_stateid));
-out:
-	open->op_delegate_type = flag;
-	if (flag == NFS4_OPEN_DELEGATE_NONE) {
-		if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
-		    open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
-			dprintk("NFSD: WARNING: refusing delegation reclaim\n");
-
-		/* 4.1 client asking for a delegation? */
-		if (open->op_deleg_want)
-			nfsd4_open_deleg_none_ext(open, status);
-	}
+	open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
 	return;
 out_free:
 	unhash_stid(&dp->dl_stid);
 	nfs4_put_delegation(dp);
 out_no_deleg:
-	flag = NFS4_OPEN_DELEGATE_NONE;
-	goto out;
+	open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
+	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
+	    open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
+		dprintk("NFSD: WARNING: refusing delegation reclaim\n");
+
+	/* 4.1 client asking for a delegation? */
+	if (open->op_deleg_want)
+		nfsd4_open_deleg_none_ext(open, status);
+	return;
 }
 
 static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
-- 
cgit v0.10.2


From 247500820ebd02ad87525db5d9b199e5b66f6636 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 21 Jun 2013 11:48:11 -0400
Subject: nfsd4: fix decoding of compounds across page boundaries

A freebsd NFSv4.0 client was getting rare IO errors expanding a tarball.
A network trace showed the server returning BAD_XDR on the final getattr
of a getattr+write+getattr compound.  The final getattr started on a
page boundary.

I believe the Linux client ignores errors on the post-write getattr, and
that that's why we haven't seen this before.

Cc: stable@vger.kernel.org
Reported-by: Rick Macklem <rmacklem@uoguelph.ca>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 171fe5e4..e34f5eb 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -167,8 +167,8 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
 	 */
 	memcpy(p, argp->p, avail);
 	/* step to next page */
-	argp->p = page_address(argp->pagelist[0]);
 	argp->pagelist++;
+	argp->p = page_address(argp->pagelist[0]);
 	if (argp->pagelen < PAGE_SIZE) {
 		argp->end = argp->p + (argp->pagelen>>2);
 		argp->pagelen = 0;
-- 
cgit v0.10.2


From 590b743143eae8db40abdfd1ab20bc51ee0ee5db Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 21 Jun 2013 17:06:29 -0400
Subject: nfsd4: minor read_buf cleanup

The code to step to the next page seems reasonably self-contained.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e34f5eb..c102d25 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -139,6 +139,19 @@ xdr_error:					\
 	}					\
 } while (0)
 
+static void next_decode_page(struct nfsd4_compoundargs *argp)
+{
+	argp->pagelist++;
+	argp->p = page_address(argp->pagelist[0]);
+	if (argp->pagelen < PAGE_SIZE) {
+		argp->end = argp->p + (argp->pagelen>>2);
+		argp->pagelen = 0;
+	} else {
+		argp->end = argp->p + (PAGE_SIZE>>2);
+		argp->pagelen -= PAGE_SIZE;
+	}
+}
+
 static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
 {
 	/* We want more bytes than seem to be available.
@@ -166,16 +179,7 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
 	 * guarantee p points to at least nbytes bytes.
 	 */
 	memcpy(p, argp->p, avail);
-	/* step to next page */
-	argp->pagelist++;
-	argp->p = page_address(argp->pagelist[0]);
-	if (argp->pagelen < PAGE_SIZE) {
-		argp->end = argp->p + (argp->pagelen>>2);
-		argp->pagelen = 0;
-	} else {
-		argp->end = argp->p + (PAGE_SIZE>>2);
-		argp->pagelen -= PAGE_SIZE;
-	}
+	next_decode_page(argp);
 	memcpy(((char*)p)+avail, argp->p, (nbytes - avail));
 	argp->p += XDR_QUADLEN(nbytes - avail);
 	return p;
-- 
cgit v0.10.2


From cf3aa02cb4a0c5af5557dd47f15a08a7df33182a Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 26 Jun 2013 11:09:06 -0400
Subject: svcrpc: fix handling of too-short rpc's

If we detect that an rpc is too short, we abort and close the
connection.  Except, there's a bug here: we're leaving sk_datalen
nonzero without leaving any pages in the sk_pages array.  The most
likely result of the inconsistency is a subsequent crash in
svc_tcp_clear_pages.

Also demote the BUG_ON in svc_tcp_clear_pages to a WARN.

Cc: stable@kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 0f679df..df74919 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -917,7 +917,10 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk)
 	len = svsk->sk_datalen;
 	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	for (i = 0; i < npages; i++) {
-		BUG_ON(svsk->sk_pages[i] == NULL);
+		if (svsk->sk_pages[i] == NULL) {
+			WARN_ON_ONCE(1);
+			continue;
+		}
 		put_page(svsk->sk_pages[i]);
 		svsk->sk_pages[i] = NULL;
 	}
@@ -1092,8 +1095,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		goto err_noclose;
 	}
 
-	if (svc_sock_reclen(svsk) < 8)
+	if (svc_sock_reclen(svsk) < 8) {
+		svsk->sk_datalen = 0;
 		goto err_delete; /* client is nuts. */
+	}
 
 	rqstp->rq_arg.len = svsk->sk_datalen;
 	rqstp->rq_arg.page_base = 0;
-- 
cgit v0.10.2


From 1f691b07c5dc51b2055834f58c0f351defd97f27 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 26 Jun 2013 10:55:40 -0400
Subject: svcrpc: don't error out on small tcp fragment

Though clients we care about mostly don't do this, it is possible for
rpc requests to be sent in multiple fragments.  Here we have a sanity
check to ensure that the final received rpc isn't too small--except that
the number we're actually checking is the length of just the final
fragment, not of the whole rpc.  So a perfectly legal rpc that's
unluckily fragmented could cause the server to close the connection
here.

Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index df74919..305374d 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1095,7 +1095,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		goto err_noclose;
 	}
 
-	if (svc_sock_reclen(svsk) < 8) {
+	if (svsk->sk_datalen < 8) {
 		svsk->sk_datalen = 0;
 		goto err_delete; /* client is nuts. */
 	}
-- 
cgit v0.10.2


From 89f6c3362cb5a6bce96dbe6aa15b4749c2262b21 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 19 Jun 2013 15:47:37 -0400
Subject: nfsd4: delegation-based open reclaims should bypass permissions

We saw a v4.0 client's create fail as follows:

	- open create succeeds and gets a read delegation
	- client attempts to set mode on new file, gets DELAY while
	  server recalls delegation.
	- client attempts a CLAIM_DELEGATE_CUR open using the
	  delegation, gets error because of new file mode.

This probably can't happen on a recent kernel since we're no longer
giving out delegations on create opens.  Nevertheless, it's a
bug--reclaim opens should bypass permission checks.

Reported-by: Steve Dickson <steved@redhat.com>
Reported-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 1a1ff24..a7cee86 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -296,7 +296,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
 
 	nfsd4_set_open_owner_reply_cache(cstate, open, resfh);
 	accmode = NFSD_MAY_NOP;
-	if (open->op_created)
+	if (open->op_created ||
+			open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
 		accmode |= NFSD_MAY_OWNER_OVERRIDE;
 	status = do_open_permission(rqstp, resfh, open, accmode);
 	set_change_info(&open->op_cinfo, current_fh);
-- 
cgit v0.10.2


From 0a262ffb75275e48caa17e80b96504354e94f6c2 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 17 Jun 2013 17:29:40 -0400
Subject: nfsd4: do not throw away 4.1 lock state on last unlock

This reverts commit eb2099f31b0f090684a64ef8df44a30ff7c45fc2 "nfsd4:
release lockowners on last unlock in 4.1 case".  Trond identified
language in rfc 5661 section 8.2.4 which forbids this behavior:

	Stateids associated with byte-range locks are an exception.
	They remain valid even if a LOCKU frees all remaining locks, so
	long as the open file with which they are associated remains
	open, unless the client frees the stateids via the FREE_STATEID
	operation.

And bakeathon 2013 testing found a 4.1 freebsd client was getting an
incorrect BAD_STATEID return from a FREE_STATEID in the above situation
and then failing.

The spec language honestly was probably a mistake but at this point with
implementations already following it we're probably stuck with that.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fcc0610..a7d8a11 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4514,7 +4514,6 @@ __be32
 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	    struct nfsd4_locku *locku)
 {
-	struct nfs4_lockowner *lo;
 	struct nfs4_ol_stateid *stp;
 	struct file *filp = NULL;
 	struct file_lock *file_lock = NULL;
@@ -4547,10 +4546,9 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		status = nfserr_jukebox;
 		goto out;
 	}
-	lo = lockowner(stp->st_stateowner);
 	locks_init_lock(file_lock);
 	file_lock->fl_type = F_UNLCK;
-	file_lock->fl_owner = (fl_owner_t)lo;
+	file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner);
 	file_lock->fl_pid = current->tgid;
 	file_lock->fl_file = filp;
 	file_lock->fl_flags = FL_POSIX;
@@ -4569,11 +4567,6 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	update_stateid(&stp->st_stid.sc_stateid);
 	memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
 
-	if (nfsd4_has_session(cstate) && !check_for_locks(stp->st_file, lo)) {
-		WARN_ON_ONCE(cstate->replay_owner);
-		release_lockowner(lo);
-	}
-
 out:
 	nfsd4_bump_seqid(cstate, status);
 	if (!cstate->replay_owner)
-- 
cgit v0.10.2


From d08d32e6e5c0fee127d3b20d70f5b59d8af0a261 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 21 Jun 2013 11:05:32 -0400
Subject: nfsd4: return delegation immediately if lease fails

This case shouldn't happen--the administrator shouldn't really allow
other applications access to the export until clients have had the
chance to reclaim their state--but if it does then we should set the
"return this lease immediately" bit on the reply.  That still leaves
some small races, but it's the best the protocol allows us to do in the
case a lease is ripped out from under us....

Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a7d8a11..d44a4bf 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3143,8 +3143,10 @@ out_free:
 out_no_deleg:
 	open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
 	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
-	    open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
+	    open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) {
 		dprintk("NFSD: WARNING: refusing delegation reclaim\n");
+		open->op_recall = 1;
+	}
 
 	/* 4.1 client asking for a delegation? */
 	if (open->op_deleg_want)
-- 
cgit v0.10.2


From f9e1aedc6c79f18bb56caa3735b94217c4ec1e0c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 13 Jun 2013 12:53:42 +1000
Subject: sunrpc/cache: remove races with queuing an upcall.

We currently queue an upcall after setting CACHE_PENDING,
and dequeue after clearing CACHE_PENDING.
So a request should only be present when CACHE_PENDING is set.

However we don't combine the test and the enqueue/dequeue in
a protected region, so it is possible (if unlikely) for a race
to result in a request being queued without CACHE_PENDING set,
or a request to be absent despite CACHE_PENDING.

So: include a test for CACHE_PENDING inside the regions of
enqueue and dequeue where queue_lock is held, and abort
the operation if the value is not as expected.

Also remove the early 'return' from cache_dequeue() to ensure that it
always removes all entries: As there is no locking between setting
CACHE_PENDING and calling sunrpc_cache_pipe_upcall it is not
inconceivable for some other thread to clear CACHE_PENDING and then
someone else to set it and call sunrpc_cache_pipe_upcall, both before
the original threads completed the call.

With this, it perfectly safe and correct to:
 - call cache_dequeue() if and only if we have just
   cleared CACHE_PENDING
 - call sunrpc_cache_pipe_upcall() (via cache_make_upcall)
   if and only if we have just set CACHE_PENDING.

Reported-by: Bodo Stroesser <bstroesser@ts.fujitsu.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Bodo Stroesser <bstroesser@ts.fujitsu.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 3b3f14f..8e964ae 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1036,23 +1036,32 @@ static int cache_release(struct inode *inode, struct file *filp,
 
 static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch)
 {
-	struct cache_queue *cq;
+	struct cache_queue *cq, *tmp;
+	struct cache_request *cr;
+	struct list_head dequeued;
+
+	INIT_LIST_HEAD(&dequeued);
 	spin_lock(&queue_lock);
-	list_for_each_entry(cq, &detail->queue, list)
+	list_for_each_entry_safe(cq, tmp, &detail->queue, list)
 		if (!cq->reader) {
-			struct cache_request *cr = container_of(cq, struct cache_request, q);
+			cr = container_of(cq, struct cache_request, q);
 			if (cr->item != ch)
 				continue;
+			if (test_bit(CACHE_PENDING, &ch->flags))
+				/* Lost a race and it is pending again */
+				break;
 			if (cr->readers != 0)
 				continue;
-			list_del(&cr->q.list);
-			spin_unlock(&queue_lock);
-			cache_put(cr->item, detail);
-			kfree(cr->buf);
-			kfree(cr);
-			return;
+			list_move(&cr->q.list, &dequeued);
 		}
 	spin_unlock(&queue_lock);
+	while (!list_empty(&dequeued)) {
+		cr = list_entry(dequeued.next, struct cache_request, q.list);
+		list_del(&cr->q.list);
+		cache_put(cr->item, detail);
+		kfree(cr->buf);
+		kfree(cr);
+	}
 }
 
 /*
@@ -1166,6 +1175,7 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
 
 	char *buf;
 	struct cache_request *crq;
+	int ret = 0;
 
 	if (!detail->cache_request)
 		return -EINVAL;
@@ -1191,10 +1201,18 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
 	crq->len = 0;
 	crq->readers = 0;
 	spin_lock(&queue_lock);
-	list_add_tail(&crq->q.list, &detail->queue);
+	if (test_bit(CACHE_PENDING, &h->flags))
+		list_add_tail(&crq->q.list, &detail->queue);
+	else
+		/* Lost a race, no longer PENDING, so don't enqueue */
+		ret = -EAGAIN;
 	spin_unlock(&queue_lock);
 	wake_up(&queue_wait);
-	return 0;
+	if (ret == -EAGAIN) {
+		kfree(buf);
+		kfree(crq);
+	}
+	return ret;
 }
 EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall);
 
-- 
cgit v0.10.2


From 2a1c7f53fd31e46f51780b61eb99fffef4c3c5a6 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 13 Jun 2013 12:53:42 +1000
Subject: sunrpc/cache: use cache_fresh_unlocked consistently and correctly.

cache_fresh_unlocked() is called when a cache entry
has been updated and ensures that if there were any
pending upcalls, they are cleared.

So every time we update a cache entry, we should call this,
and this should be the only way that we try to clear
pending calls (that sort of uniformity makes code sooo much
easier to read).

try_to_negate_entry() will (possibly) mark an entry as
negative.  If it doesn't, it is because the entry already
is VALID.
So the entry will be valid on exit, so it is appropriate to
call cache_fresh_unlocked().
So tidy up try_to_negate_entry() to do that, and remove
partial open-coded cache_fresh_unlocked() from the one
call-site of try_to_negate_entry().

In the other branch of the 'switch(cache_make_upcall())',
we again have a partial open-coded version of cache_fresh_unlocked().
Replace that with a real call.

And again in cache_clean(), use a real call to cache_fresh_unlocked().

These call sites might previously have called
cache_revisit_request() if CACHE_PENDING wasn't set.
This is never necessary because cache_revisit_request() can
only do anything if the item is in the cache_defer_hash,
However any time that an item is added to the cache_defer_hash
(setup_deferral), the code immediately tests CACHE_PENDING,
and removes the entry again if it is clear.  So all other
places we only need to 'cache_revisit_request' if we've
just cleared CACHE_PENDING.

Reported-by: Bodo Stroesser <bstroesser@ts.fujitsu.com>
Signed-off-by: NeilBrown  <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 8e964ae..29c4633 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -228,15 +228,14 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
 
 	write_lock(&detail->hash_lock);
 	rv = cache_is_valid(h);
-	if (rv != -EAGAIN) {
-		write_unlock(&detail->hash_lock);
-		return rv;
+	if (rv == -EAGAIN) {
+		set_bit(CACHE_NEGATIVE, &h->flags);
+		cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
+		rv = -ENOENT;
 	}
-	set_bit(CACHE_NEGATIVE, &h->flags);
-	cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
 	write_unlock(&detail->hash_lock);
 	cache_fresh_unlocked(h, detail);
-	return -ENOENT;
+	return rv;
 }
 
 /*
@@ -275,13 +274,10 @@ int cache_check(struct cache_detail *detail,
 		if (!test_and_set_bit(CACHE_PENDING, &h->flags)) {
 			switch (cache_make_upcall(detail, h)) {
 			case -EINVAL:
-				clear_bit(CACHE_PENDING, &h->flags);
-				cache_revisit_request(h);
 				rv = try_to_negate_entry(detail, h);
 				break;
 			case -EAGAIN:
-				clear_bit(CACHE_PENDING, &h->flags);
-				cache_revisit_request(h);
+				cache_fresh_unlocked(h, detail);
 				break;
 			}
 		}
@@ -457,9 +453,7 @@ static int cache_clean(void)
 			current_index ++;
 		spin_unlock(&cache_list_lock);
 		if (ch) {
-			if (test_and_clear_bit(CACHE_PENDING, &ch->flags))
-				cache_dequeue(current_detail, ch);
-			cache_revisit_request(ch);
+			cache_fresh_unlocked(ch, d);
 			cache_put(ch, d);
 		}
 	} else
-- 
cgit v0.10.2


From 013920eb5db97e99a4c30c8400f1c616e2a8b0a2 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 13 Jun 2013 12:53:42 +1000
Subject: sunrpc/cache: ensure items removed from cache do not have pending
 upcalls.

It is possible for a race to set CACHE_PENDING after cache_clean()
has removed a cache entry from the cache.
If CACHE_PENDING is still set when the entry is finally 'put',
the cache_dequeue() will never happen and we can leak memory.

So set a new flag 'CACHE_CLEANED' when we remove something from
the cache, and don't queue any upcall if it is set.

If CACHE_PENDING is set before CACHE_CLEANED, the call that
cache_clean() makes to cache_fresh_unlocked() will free memory
as needed.  If CACHE_PENDING is set after CACHE_CLEANED, the
test in sunrpc_cache_pipe_upcall will ensure that the memory
is not allocated.

Reported-by: <bstroesser@ts.fujitsu.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 303399b..8419f7d 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -57,6 +57,7 @@ struct cache_head {
 #define	CACHE_VALID	0	/* Entry contains valid data */
 #define	CACHE_NEGATIVE	1	/* Negative entry - there is no match for the key */
 #define	CACHE_PENDING	2	/* An upcall has been sent but no reply received yet*/
+#define	CACHE_CLEANED	3	/* Entry has been cleaned from cache */
 
 #define	CACHE_NEW_EXPIRY 120	/* keep new things pending confirmation for 120 seconds */
 
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 29c4633..b12144c 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -306,7 +306,7 @@ EXPORT_SYMBOL_GPL(cache_check);
  * a current pointer into that list and into the table
  * for that entry.
  *
- * Each time clean_cache is called it finds the next non-empty entry
+ * Each time cache_clean is called it finds the next non-empty entry
  * in the current table and walks the list in that entry
  * looking for entries that can be removed.
  *
@@ -453,6 +453,7 @@ static int cache_clean(void)
 			current_index ++;
 		spin_unlock(&cache_list_lock);
 		if (ch) {
+			set_bit(CACHE_CLEANED, &ch->flags);
 			cache_fresh_unlocked(ch, d);
 			cache_put(ch, d);
 		}
@@ -1178,6 +1179,9 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
 		warn_no_listener(detail);
 		return -EINVAL;
 	}
+	if (test_bit(CACHE_CLEANED, &h->flags))
+		/* Too late to make an upcall */
+		return -EAGAIN;
 
 	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
 	if (!buf)
-- 
cgit v0.10.2


From 7715cde86857d4bb40f43f1ee971cf906eaf1b9c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 13 Jun 2013 12:53:42 +1000
Subject: net/sunrpc: xpt_auth_cache should be ignored when expired.

commit d202cce8963d9268ff355a386e20243e8332b308
    sunrpc: never return expired entries in sunrpc_cache_lookup

moved the 'entry is expired' test from cache_check to
sunrpc_cache_lookup, so that it happened early and some races could
safely be ignored.

However the ip_map (in svcauth_unix.c) has a separate single-item
cache which allows quick lookup without locking.  An entry in this
case would not be subject to the expiry test and so could be used
well after it has expired.

This is not normally a big problem because the first time it is used
after it is expired an up-call will be scheduled to refresh the entry
(if it hasn't been scheduled already) and the old entry will then
be invalidated.  So on the second attempt to use it after it has
expired, ip_map_cached_get will discard it.

However that is subtle and not ideal, so replace the "!cache_valid"
test with "cache_is_expired".
In doing this we drop the test on the "CACHE_VALID" bit.  This is
unnecessary as the bit is never cleared, and an entry will only
be cached if the bit is set.

Reported-by: Bodo Stroesser <bstroesser@ts.fujitsu.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 8419f7d..6ce690d 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -149,6 +149,24 @@ struct cache_deferred_req {
 					   int too_many);
 };
 
+/*
+ * timestamps kept in the cache are expressed in seconds
+ * since boot.  This is the best for measuring differences in
+ * real time.
+ */
+static inline time_t seconds_since_boot(void)
+{
+	struct timespec boot;
+	getboottime(&boot);
+	return get_seconds() - boot.tv_sec;
+}
+
+static inline time_t convert_to_wallclock(time_t sinceboot)
+{
+	struct timespec boot;
+	getboottime(&boot);
+	return boot.tv_sec + sinceboot;
+}
 
 extern const struct file_operations cache_file_operations_pipefs;
 extern const struct file_operations content_file_operations_pipefs;
@@ -182,15 +200,10 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
 	kref_put(&h->ref, cd->cache_put);
 }
 
-static inline int cache_valid(struct cache_head *h)
+static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
 {
-	/* If an item has been unhashed pending removal when
-	 * the refcount drops to 0, the expiry_time will be
-	 * set to 0.  We don't want to consider such items
-	 * valid in this context even though CACHE_VALID is
-	 * set.
-	 */
-	return (h->expiry_time != 0 && test_bit(CACHE_VALID, &h->flags));
+	return  (h->expiry_time < seconds_since_boot()) ||
+		(detail->flush_time > h->last_refresh);
 }
 
 extern int cache_check(struct cache_detail *detail,
@@ -251,25 +264,6 @@ static inline int get_uint(char **bpp, unsigned int *anint)
 	return 0;
 }
 
-/*
- * timestamps kept in the cache are expressed in seconds
- * since boot.  This is the best for measuring differences in
- * real time.
- */
-static inline time_t seconds_since_boot(void)
-{
-	struct timespec boot;
-	getboottime(&boot);
-	return get_seconds() - boot.tv_sec;
-}
-
-static inline time_t convert_to_wallclock(time_t sinceboot)
-{
-	struct timespec boot;
-	getboottime(&boot);
-	return boot.tv_sec + sinceboot;
-}
-
 static inline time_t get_expiry(char **bpp)
 {
 	int rv;
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index b12144c..5478a01 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -50,12 +50,6 @@ static void cache_init(struct cache_head *h)
 	h->last_refresh = now;
 }
 
-static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
-{
-	return  (h->expiry_time < seconds_since_boot()) ||
-		(detail->flush_time > h->last_refresh);
-}
-
 struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
 				       struct cache_head *key, int hash)
 {
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 06bdf5a..a98853d 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -347,13 +347,13 @@ ip_map_cached_get(struct svc_xprt *xprt)
 		spin_lock(&xprt->xpt_lock);
 		ipm = xprt->xpt_auth_cache;
 		if (ipm != NULL) {
-			if (!cache_valid(&ipm->h)) {
+			sn = net_generic(xprt->xpt_net, sunrpc_net_id);
+			if (cache_is_expired(sn->ip_map_cache, &ipm->h)) {
 				/*
 				 * The entry has been invalidated since it was
 				 * remembered, e.g. by a second mount from the
 				 * same IP address.
 				 */
-				sn = net_generic(xprt->xpt_net, sunrpc_net_id);
 				xprt->xpt_auth_cache = NULL;
 				spin_unlock(&xprt->xpt_lock);
 				cache_put(&ipm->h, sn->ip_map_cache);
-- 
cgit v0.10.2


From 0bebc633f1428163c9659fd16b34c745e60a0757 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 13 Jun 2013 12:53:42 +1000
Subject: sunrpc: Don't schedule an upcall on a replaced cache entry.

When a cache entry is replaced, the "expiry_time" get set to
zero by a call to "cache_fresh_locked(..., 0)" at the end of
"sunrpc_cache_update".

This low expiry time makes cache_check() think that the 'refresh_age'
is negative, so the 'age' is comparatively large and a refresh is
triggered.
However refreshing a replaced entry it pointless, it cannot achieve
anything useful.

So teach cache_check to ignore a low refresh_age when expiry_time
is zero.

Reported-by: Bodo Stroesser <bstroesser@ts.fujitsu.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 5478a01..49eb370 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -262,7 +262,8 @@ int cache_check(struct cache_detail *detail,
 	if (rqstp == NULL) {
 		if (rv == -EAGAIN)
 			rv = -ENOENT;
-	} else if (rv == -EAGAIN || age > refresh_age/2) {
+	} else if (rv == -EAGAIN ||
+		   (h->expiry_time != 0 && age > refresh_age/2)) {
 		dprintk("RPC:       Want update, refage=%ld, age=%ld\n",
 				refresh_age, age);
 		if (!test_and_set_bit(CACHE_PENDING, &h->flags)) {
-- 
cgit v0.10.2


From 0979292bfa301cb87d936b69af428090d2feea1b Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 8 Jul 2013 13:44:45 -0400
Subject: svcrpc: fix failures to handle -1 uid's

As of f025adf191924e3a75ce80e130afcd2485b53bb8 "sunrpc: Properly decode
kuids and kgids in RPC_AUTH_UNIX credentials" any rpc containing a -1
(0xffff) uid or gid would fail with a badcred error.

Commit afe3c3fd5392b2f0066930abc5dbd3f4b14a0f13 "svcrpc: fix failures to
handle -1 uid's and gid's" fixed part of the problem, but overlooked the
gid upcall--the kernel can request supplementary gid's for the -1 uid,
but mountd's attempt write a response will get -EINVAL.

Symptoms were nfsd failing to reply to the first attempt to use a newly
negotiated krb5 context.

Reported-by: Sven Geggus <lists@fuchsschwanzdomain.de>
Tested-by: Sven Geggus <lists@fuchsschwanzdomain.de>
Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index a98853d..621ca7b 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -493,8 +493,6 @@ static int unix_gid_parse(struct cache_detail *cd,
 	if (rv)
 		return -EINVAL;
 	uid = make_kuid(&init_user_ns, id);
-	if (!uid_valid(uid))
-		return -EINVAL;
 	ug.uid = uid;
 
 	expiry = get_expiry(&mesg);
-- 
cgit v0.10.2


From f0f51f5cdd107971282ae18f00a6fa03d69407a0 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 18 Jun 2013 14:26:02 -0400
Subject: nfsd4: allow destroy_session over destroyed session

RFC 5661 allows a client to destroy a session using a compound
associated with the destroyed session, as long as the DESTROY_SESSION op
is the last op of the compound.

We attempt to allow this, but testing against a Solaris client (which
does destroy sessions in this way) showed that we were failing the
DESTROY_SESSION with NFS4ERR_DELAY, because we assumed the reference
count on the session (held by us) represented another rpc in progress
over this session.

Fix this by noting that in this case the expected reference count is 1,
not 0.

Also, note as long as the session holds a reference to the compound
we're destroying, we can't free it here--instead, delay the free till
the final put in nfs4svc_encode_compoundres.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d44a4bf..25ae6ce 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -97,19 +97,20 @@ nfs4_lock_state(void)
 
 static void free_session(struct nfsd4_session *);
 
-void nfsd4_put_session(struct nfsd4_session *ses)
+static bool is_session_dead(struct nfsd4_session *ses)
 {
-	atomic_dec(&ses->se_ref);
+	return ses->se_flags & NFS4_SESSION_DEAD;
 }
 
-static bool is_session_dead(struct nfsd4_session *ses)
+void nfsd4_put_session(struct nfsd4_session *ses)
 {
-	return ses->se_flags & NFS4_SESSION_DEAD;
+	if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses))
+		free_session(ses);
 }
 
-static __be32 mark_session_dead_locked(struct nfsd4_session *ses)
+static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me)
 {
-	if (atomic_read(&ses->se_ref))
+	if (atomic_read(&ses->se_ref) > ref_held_by_me)
 		return nfserr_jukebox;
 	ses->se_flags |= NFS4_SESSION_DEAD;
 	return nfs_ok;
@@ -2074,6 +2075,7 @@ nfsd4_destroy_session(struct svc_rqst *r,
 {
 	struct nfsd4_session *ses;
 	__be32 status;
+	int ref_held_by_me = 0;
 	struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id);
 
 	nfs4_lock_state();
@@ -2081,6 +2083,7 @@ nfsd4_destroy_session(struct svc_rqst *r,
 	if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) {
 		if (!nfsd4_last_compound_op(r))
 			goto out;
+		ref_held_by_me++;
 	}
 	dump_sessionid(__func__, &sessionid->sessionid);
 	spin_lock(&nn->client_lock);
@@ -2091,17 +2094,19 @@ nfsd4_destroy_session(struct svc_rqst *r,
 	status = nfserr_wrong_cred;
 	if (!mach_creds_match(ses->se_client, r))
 		goto out_client_lock;
-	status = mark_session_dead_locked(ses);
+	nfsd4_get_session_locked(ses);
+	status = mark_session_dead_locked(ses, 1 + ref_held_by_me);
 	if (status)
-		goto out_client_lock;
+		goto out_put_session;
 	unhash_session(ses);
 	spin_unlock(&nn->client_lock);
 
 	nfsd4_probe_callback_sync(ses->se_client);
 
 	spin_lock(&nn->client_lock);
-	free_session(ses);
 	status = nfs_ok;
+out_put_session:
+	nfsd4_put_session(ses);
 out_client_lock:
 	spin_unlock(&nn->client_lock);
 out:
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c102d25..0c0f3ea9 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3760,13 +3760,17 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
 	iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
 	BUG_ON(iov->iov_len > PAGE_SIZE);
 	if (nfsd4_has_session(cs)) {
+		struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+		struct nfs4_client *clp = cs->session->se_client;
 		if (cs->status != nfserr_replay_cache) {
 			nfsd4_store_cache_entry(resp);
 			cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE;
 		}
 		/* Renew the clientid on success and on replay */
-		put_client_renew(cs->session->se_client);
+		spin_lock(&nn->client_lock);
 		nfsd4_put_session(cs->session);
+		spin_unlock(&nn->client_lock);
+		put_client_renew(clp);
 	}
 	return 1;
 }
-- 
cgit v0.10.2


From d109148111cdfcdae94f797dc142468bd0ff7557 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 8 Jul 2013 18:00:56 -0400
Subject: nfsd4: support minorversion 1 by default

We now have minimal minorversion 1 support; turn it on by default.

This can still be turned off with "echo -4.1 >/proc/fs/nfsd/versions".

Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 262df5c..6b9f48c 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -116,7 +116,7 @@ struct svc_program		nfsd_program = {
 
 };
 
-u32 nfsd_supported_minorversion;
+u32 nfsd_supported_minorversion = 1;
 
 int nfsd_vers(int vers, enum vers_op change)
 {
-- 
cgit v0.10.2