From 12dd7ecf2323c572b1d302707eada4900848dced Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 1 May 2014 14:15:02 -0700 Subject: lockd: avoid warning when CONFIG_SYSCTL undefined MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building without CONFIG_SYSCTL, the compiler saw an unused label. This moves the label into the #ifdef it is used under. fs/lockd/svc.c: In function ‘init_nlm’: fs/lockd/svc.c:626:1: warning: label ‘err_sysctl’ defined but not used [-Wunused-label] Signed-off-by: Kees Cook Signed-off-by: J. Bruce Fields diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 6bf06a0..2b431f7 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -622,8 +622,8 @@ static int __init init_nlm(void) err_pernet: #ifdef CONFIG_SYSCTL unregister_sysctl_table(nlm_sysctl_table); -#endif err_sysctl: +#endif return err; } -- cgit v0.10.2 From 4dd86e150f63a6c360783f163a979ec563e6d570 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Apr 2014 14:43:58 -0400 Subject: NFSd: Remove 'inline' designation for free_client() It is large, it is used in more than one place, and it is not performance critical. Let gcc figure out whether it should be inlined... Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 32b699b..841495a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1093,7 +1093,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) return clp; } -static inline void +static void free_client(struct nfs4_client *clp) { struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id); -- cgit v0.10.2 From 9c69de4c94fcb11db919160d5fa0b48f13d1757a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 May 2014 19:37:13 +0200 Subject: nfsd: remove MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only real user of this header is fs/nfsd/nfsfh.h, so merge the two. Various lockѕ source files used it to indirectly get other sunrpc or nfs headers, so fix those up. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c index 00ec0b9..d3e40db 100644 --- a/fs/lockd/clnt4xdr.c +++ b/fs/lockd/clnt4xdr.c @@ -14,6 +14,8 @@ #include #include +#include + #define NLMDBG_FACILITY NLMDBG_XDR #if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ) diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c index 9a55797..3e9f787 100644 --- a/fs/lockd/clntxdr.c +++ b/fs/lockd/clntxdr.c @@ -15,6 +15,8 @@ #include #include +#include + #define NLMDBG_FACILITY NLMDBG_XDR #if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ) diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index dc5c759..7ec6b10 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -14,12 +14,12 @@ #include #include #include -#include #include #include #include #include #include +#include #define NLMDBG_FACILITY NLMDBG_SVCSUBS diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 964666c..9340e7e 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -16,6 +16,8 @@ #include #include +#include + #define NLMDBG_FACILITY NLMDBG_XDR diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 479eb68..7d5c310 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index ad67964..2e89e70 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -1,9 +1,58 @@ -/* Copyright (C) 1995, 1996, 1997 Olaf Kirch */ +/* + * Copyright (C) 1995, 1996, 1997 Olaf Kirch + * + * This file describes the layout of the file handles as passed + * over the wire. + */ +#ifndef _LINUX_NFSD_NFSFH_H +#define _LINUX_NFSD_NFSFH_H + +#include +#include + +static inline __u32 ino_t_to_u32(ino_t ino) +{ + return (__u32) ino; +} + +static inline ino_t u32_to_ino_t(__u32 uino) +{ + return (ino_t) uino; +} -#ifndef _LINUX_NFSD_FH_INT_H -#define _LINUX_NFSD_FH_INT_H +/* + * This is the internal representation of an NFS handle used in knfsd. + * pre_mtime/post_version will be used to support wcc_attr's in NFSv3. + */ +typedef struct svc_fh { + struct knfsd_fh fh_handle; /* FH data */ + struct dentry * fh_dentry; /* validated dentry */ + struct svc_export * fh_export; /* export pointer */ + int fh_maxsize; /* max size for fh_handle */ + + unsigned char fh_locked; /* inode locked by us */ + unsigned char fh_want_write; /* remount protection taken */ + +#ifdef CONFIG_NFSD_V3 + unsigned char fh_post_saved; /* post-op attrs saved */ + unsigned char fh_pre_saved; /* pre-op attrs saved */ + + /* Pre-op attributes saved during fh_lock */ + __u64 fh_pre_size; /* size before operation */ + struct timespec fh_pre_mtime; /* mtime before oper */ + struct timespec fh_pre_ctime; /* ctime before oper */ + /* + * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode) + * to find out if it is valid. + */ + u64 fh_pre_change; + + /* Post-op attributes saved in fh_unlock */ + struct kstat fh_post_attr; /* full attrs after operation */ + u64 fh_post_change; /* nfsv4 change; see above */ +#endif /* CONFIG_NFSD_V3 */ -#include +} svc_fh; enum nfsd_fsid { FSID_DEV = 0, @@ -215,4 +264,4 @@ fh_unlock(struct svc_fh *fhp) } } -#endif /* _LINUX_NFSD_FH_INT_H */ +#endif /* _LINUX_NFSD_NFSFH_H */ diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 424d8f5..5b3bbf2 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -37,7 +37,6 @@ #include #include -#include #include "nfsfh.h" typedef struct { diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index dcaad79..219d796 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -17,13 +17,13 @@ #include #include #include -#include #include #include #ifdef CONFIG_LOCKD_V4 #include #endif #include +#include /* * Version string diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 7898c99..b12c4e5 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -9,9 +9,13 @@ #ifndef NFSD_EXPORT_H #define NFSD_EXPORT_H -# include +#include #include +struct knfsd_fh; +struct svc_fh; +struct svc_rqst; + /* * FS Locations */ diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h deleted file mode 100644 index a93593f1..0000000 --- a/include/linux/nfsd/nfsfh.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * include/linux/nfsd/nfsfh.h - * - * This file describes the layout of the file handles as passed - * over the wire. - * - * Earlier versions of knfsd used to sign file handles using keyed MD5 - * or SHA. I've removed this code, because it doesn't give you more - * security than blocking external access to port 2049 on your firewall. - * - * Copyright (C) 1995, 1996, 1997 Olaf Kirch - */ -#ifndef _LINUX_NFSD_FH_H -#define _LINUX_NFSD_FH_H - -# include -#include - -static inline __u32 ino_t_to_u32(ino_t ino) -{ - return (__u32) ino; -} - -static inline ino_t u32_to_ino_t(__u32 uino) -{ - return (ino_t) uino; -} - -/* - * This is the internal representation of an NFS handle used in knfsd. - * pre_mtime/post_version will be used to support wcc_attr's in NFSv3. - */ -typedef struct svc_fh { - struct knfsd_fh fh_handle; /* FH data */ - struct dentry * fh_dentry; /* validated dentry */ - struct svc_export * fh_export; /* export pointer */ - int fh_maxsize; /* max size for fh_handle */ - - unsigned char fh_locked; /* inode locked by us */ - unsigned char fh_want_write; /* remount protection taken */ - -#ifdef CONFIG_NFSD_V3 - unsigned char fh_post_saved; /* post-op attrs saved */ - unsigned char fh_pre_saved; /* pre-op attrs saved */ - - /* Pre-op attributes saved during fh_lock */ - __u64 fh_pre_size; /* size before operation */ - struct timespec fh_pre_mtime; /* mtime before oper */ - struct timespec fh_pre_ctime; /* ctime before oper */ - /* - * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode) - * to find out if it is valid. - */ - u64 fh_pre_change; - - /* Post-op attributes saved in fh_unlock */ - struct kstat fh_post_attr; /* full attrs after operation */ - u64 fh_post_change; /* nfsv4 change; see above */ -#endif /* CONFIG_NFSD_V3 */ - -} svc_fh; - -#endif /* _LINUX_NFSD_FH_H */ -- cgit v0.10.2 From d430e8d530e900c923bf77718d72478b1c280592 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 May 2014 19:37:14 +0200 Subject: nfsd: move to fs/nfsd There are no legitimate users outside of fs/nfsd, so move it there. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 7ec6b10..b6f3b84 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h new file mode 100644 index 0000000..d7939a6 --- /dev/null +++ b/fs/nfsd/export.h @@ -0,0 +1,109 @@ +/* + * Copyright (C) 1995-1997 Olaf Kirch + */ +#ifndef NFSD_EXPORT_H +#define NFSD_EXPORT_H + +#include +#include + +struct knfsd_fh; +struct svc_fh; +struct svc_rqst; + +/* + * FS Locations + */ + +#define MAX_FS_LOCATIONS 128 + +struct nfsd4_fs_location { + char *hosts; /* colon separated list of hosts */ + char *path; /* slash separated list of path components */ +}; + +struct nfsd4_fs_locations { + uint32_t locations_count; + struct nfsd4_fs_location *locations; +/* If we're not actually serving this data ourselves (only providing a + * list of replicas that do serve it) then we set "migrated": */ + int migrated; +}; + +/* + * We keep an array of pseudoflavors with the export, in order from most + * to least preferred. For the foreseeable future, we don't expect more + * than the eight pseudoflavors null, unix, krb5, krb5i, krb5p, skpm3, + * spkm3i, and spkm3p (and using all 8 at once should be rare). + */ +#define MAX_SECINFO_LIST 8 + +struct exp_flavor_info { + u32 pseudoflavor; + u32 flags; +}; + +struct svc_export { + struct cache_head h; + struct auth_domain * ex_client; + int ex_flags; + struct path ex_path; + kuid_t ex_anon_uid; + kgid_t ex_anon_gid; + int ex_fsid; + unsigned char * ex_uuid; /* 16 byte fsid */ + struct nfsd4_fs_locations ex_fslocs; + int ex_nflavors; + struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; + struct cache_detail *cd; +}; + +/* an "export key" (expkey) maps a filehandlefragement to an + * svc_export for a given client. There can be several per export, + * for the different fsid types. + */ +struct svc_expkey { + struct cache_head h; + + struct auth_domain * ek_client; + int ek_fsidtype; + u32 ek_fsid[6]; + + struct path ek_path; +}; + +#define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC)) +#define EX_NOHIDE(exp) ((exp)->ex_flags & NFSEXP_NOHIDE) +#define EX_WGATHER(exp) ((exp)->ex_flags & NFSEXP_GATHERED_WRITES) + +int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp); +__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); + +/* + * Function declarations + */ +int nfsd_export_init(struct net *); +void nfsd_export_shutdown(struct net *); +void nfsd_export_flush(struct net *); +struct svc_export * rqst_exp_get_by_name(struct svc_rqst *, + struct path *); +struct svc_export * rqst_exp_parent(struct svc_rqst *, + struct path *); +struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *); +int exp_rootfh(struct net *, struct auth_domain *, + char *path, struct knfsd_fh *, int maxsize); +__be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); +__be32 nfserrno(int errno); + +static inline void exp_put(struct svc_export *exp) +{ + cache_put(&exp->h, exp->cd); +} + +static inline void exp_get(struct svc_export *exp) +{ + cache_get(&exp->h); +} +struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *); + +#endif /* NFSD_EXPORT_H */ diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 7d5c310..72004ca 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -19,9 +19,10 @@ #include #include -#include #include +#include "export.h" + /* * nfsd version */ diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h deleted file mode 100644 index b12c4e5..0000000 --- a/include/linux/nfsd/export.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * include/linux/nfsd/export.h - * - * Public declarations for NFS exports. The definitions for the - * syscall interface are in nfsctl.h - * - * Copyright (C) 1995-1997 Olaf Kirch - */ -#ifndef NFSD_EXPORT_H -#define NFSD_EXPORT_H - -#include -#include - -struct knfsd_fh; -struct svc_fh; -struct svc_rqst; - -/* - * FS Locations - */ - -#define MAX_FS_LOCATIONS 128 - -struct nfsd4_fs_location { - char *hosts; /* colon separated list of hosts */ - char *path; /* slash separated list of path components */ -}; - -struct nfsd4_fs_locations { - uint32_t locations_count; - struct nfsd4_fs_location *locations; -/* If we're not actually serving this data ourselves (only providing a - * list of replicas that do serve it) then we set "migrated": */ - int migrated; -}; - -/* - * We keep an array of pseudoflavors with the export, in order from most - * to least preferred. For the foreseeable future, we don't expect more - * than the eight pseudoflavors null, unix, krb5, krb5i, krb5p, skpm3, - * spkm3i, and spkm3p (and using all 8 at once should be rare). - */ -#define MAX_SECINFO_LIST 8 - -struct exp_flavor_info { - u32 pseudoflavor; - u32 flags; -}; - -struct svc_export { - struct cache_head h; - struct auth_domain * ex_client; - int ex_flags; - struct path ex_path; - kuid_t ex_anon_uid; - kgid_t ex_anon_gid; - int ex_fsid; - unsigned char * ex_uuid; /* 16 byte fsid */ - struct nfsd4_fs_locations ex_fslocs; - int ex_nflavors; - struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; - struct cache_detail *cd; -}; - -/* an "export key" (expkey) maps a filehandlefragement to an - * svc_export for a given client. There can be several per export, - * for the different fsid types. - */ -struct svc_expkey { - struct cache_head h; - - struct auth_domain * ek_client; - int ek_fsidtype; - u32 ek_fsid[6]; - - struct path ek_path; -}; - -#define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC)) -#define EX_NOHIDE(exp) ((exp)->ex_flags & NFSEXP_NOHIDE) -#define EX_WGATHER(exp) ((exp)->ex_flags & NFSEXP_GATHERED_WRITES) - -int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp); -__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); - -/* - * Function declarations - */ -int nfsd_export_init(struct net *); -void nfsd_export_shutdown(struct net *); -void nfsd_export_flush(struct net *); -struct svc_export * rqst_exp_get_by_name(struct svc_rqst *, - struct path *); -struct svc_export * rqst_exp_parent(struct svc_rqst *, - struct path *); -struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *); -int exp_rootfh(struct net *, struct auth_domain *, - char *path, struct knfsd_fh *, int maxsize); -__be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); -__be32 nfserrno(int errno); - -static inline void exp_put(struct svc_export *exp) -{ - cache_put(&exp->h, exp->cd); -} - -static inline void exp_get(struct svc_export *exp) -{ - cache_get(&exp->h); -} -struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *); - -#endif /* NFSD_EXPORT_H */ -- cgit v0.10.2 From 7f94423e8fcc1e0f3416be76d3da0982f586d565 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 May 2014 19:37:15 +0200 Subject: nfsd: move to fs/nfsd There are no legitimate users outside of fs/nfsd, so move it there. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 72004ca..7a07f9c 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -19,8 +19,8 @@ #include #include -#include +#include "stats.h" #include "export.h" /* diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index 6d4521f..cd90878 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include "nfsd.h" diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h new file mode 100644 index 0000000..a5c944b --- /dev/null +++ b/fs/nfsd/stats.h @@ -0,0 +1,43 @@ +/* + * Statistics for NFS server. + * + * Copyright (C) 1995, 1996 Olaf Kirch + */ +#ifndef _NFSD_STATS_H +#define _NFSD_STATS_H + +#include + + +struct nfsd_stats { + unsigned int rchits; /* repcache hits */ + unsigned int rcmisses; /* repcache hits */ + unsigned int rcnocache; /* uncached reqs */ + unsigned int fh_stale; /* FH stale error */ + unsigned int fh_lookup; /* dentry cached */ + unsigned int fh_anon; /* anon file dentry returned */ + unsigned int fh_nocache_dir; /* filehandle not found in dcache */ + unsigned int fh_nocache_nondir; /* filehandle not found in dcache */ + unsigned int io_read; /* bytes returned to read requests */ + unsigned int io_write; /* bytes passed in write requests */ + unsigned int th_cnt; /* number of available threads */ + unsigned int th_usage[10]; /* number of ticks during which n perdeciles + * of available threads were in use */ + unsigned int th_fullcnt; /* number of times last free thread was used */ + unsigned int ra_size; /* size of ra cache */ + unsigned int ra_depth[11]; /* number of times ra entry was found that deep + * in the cache (10percentiles). [10] = not found */ +#ifdef CONFIG_NFSD_V4 + unsigned int nfs4_opcount[LAST_NFS4_OP + 1]; /* count of individual nfsv4 operations */ +#endif + +}; + + +extern struct nfsd_stats nfsdstats; +extern struct svc_stat nfsd_svcstats; + +void nfsd_stat_init(void); +void nfsd_stat_shutdown(void); + +#endif /* _NFSD_STATS_H */ diff --git a/include/linux/nfsd/stats.h b/include/linux/nfsd/stats.h deleted file mode 100644 index e75b254..0000000 --- a/include/linux/nfsd/stats.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * linux/include/linux/nfsd/stats.h - * - * Statistics for NFS server. - * - * Copyright (C) 1995, 1996 Olaf Kirch - */ -#ifndef LINUX_NFSD_STATS_H -#define LINUX_NFSD_STATS_H - -#include - - -struct nfsd_stats { - unsigned int rchits; /* repcache hits */ - unsigned int rcmisses; /* repcache hits */ - unsigned int rcnocache; /* uncached reqs */ - unsigned int fh_stale; /* FH stale error */ - unsigned int fh_lookup; /* dentry cached */ - unsigned int fh_anon; /* anon file dentry returned */ - unsigned int fh_nocache_dir; /* filehandle not found in dcache */ - unsigned int fh_nocache_nondir; /* filehandle not found in dcache */ - unsigned int io_read; /* bytes returned to read requests */ - unsigned int io_write; /* bytes passed in write requests */ - unsigned int th_cnt; /* number of available threads */ - unsigned int th_usage[10]; /* number of ticks during which n perdeciles - * of available threads were in use */ - unsigned int th_fullcnt; /* number of times last free thread was used */ - unsigned int ra_size; /* size of ra cache */ - unsigned int ra_depth[11]; /* number of times ra entry was found that deep - * in the cache (10percentiles). [10] = not found */ -#ifdef CONFIG_NFSD_V4 - unsigned int nfs4_opcount[LAST_NFS4_OP + 1]; /* count of individual nfsv4 operations */ -#endif - -}; - - -extern struct nfsd_stats nfsdstats; -extern struct svc_stat nfsd_svcstats; - -void nfsd_stat_init(void); -void nfsd_stat_shutdown(void); - -#endif /* LINUX_NFSD_STATS_H */ -- cgit v0.10.2 From 6f226e2ab1b895c8685e868af0a5f797fcaaaf57 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 May 2014 19:37:16 +0200 Subject: nfsd: remove There is almost nothing left it in, just merge it into the only file that includes it. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 7a07f9c..e9f2fd4 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -18,11 +18,18 @@ #include #include -#include +#include #include "stats.h" #include "export.h" +#undef ifdebug +#ifdef NFSD_DEBUG +# define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag) +#else +# define ifdebug(flag) if (0) +#endif + /* * nfsd version */ diff --git a/include/linux/nfsd/debug.h b/include/linux/nfsd/debug.h deleted file mode 100644 index 19ef837..0000000 --- a/include/linux/nfsd/debug.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * linux/include/linux/nfsd/debug.h - * - * Debugging-related stuff for nfsd - * - * Copyright (C) 1995 Olaf Kirch - */ -#ifndef LINUX_NFSD_DEBUG_H -#define LINUX_NFSD_DEBUG_H - -#include - -# undef ifdebug -# ifdef NFSD_DEBUG -# define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag) -# else -# define ifdebug(flag) if (0) -# endif -#endif /* LINUX_NFSD_DEBUG_H */ -- cgit v0.10.2 From 50cc62317dece5173b56aa8d3569930b065ce47d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Apr 2014 14:44:03 -0400 Subject: NFSd: Mark nfs4_free_lockowner and nfs4_free_openowner as static functions They do not need to be used outside fs/nfsd/nfs4state.c Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 841495a..fcab909 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -645,6 +645,12 @@ static void unhash_lockowner(struct nfs4_lockowner *lo) } } +static void nfs4_free_lockowner(struct nfs4_lockowner *lo) +{ + kfree(lo->lo_owner.so_owner.data); + kmem_cache_free(lockowner_slab, lo); +} + static void release_lockowner(struct nfs4_lockowner *lo) { unhash_lockowner(lo); @@ -699,6 +705,12 @@ static void release_last_closed_stateid(struct nfs4_openowner *oo) } } +static void nfs4_free_openowner(struct nfs4_openowner *oo) +{ + kfree(oo->oo_owner.so_owner.data); + kmem_cache_free(openowner_slab, oo); +} + static void release_openowner(struct nfs4_openowner *oo) { unhash_openowner(oo); @@ -2553,18 +2565,6 @@ out_nomem: return -ENOMEM; } -void nfs4_free_openowner(struct nfs4_openowner *oo) -{ - kfree(oo->oo_owner.so_owner.data); - kmem_cache_free(openowner_slab, oo); -} - -void nfs4_free_lockowner(struct nfs4_lockowner *lo) -{ - kfree(lo->lo_owner.so_owner.data); - kmem_cache_free(lockowner_slab, lo); -} - static void init_nfs4_replay(struct nfs4_replay *rp) { rp->rp_status = nfserr_serverfault; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 5b3bbf2..fda9ce2 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -463,8 +463,6 @@ extern void nfs4_release_reclaim(struct nfsd_net *); extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn); extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn); -extern void nfs4_free_openowner(struct nfs4_openowner *); -extern void nfs4_free_lockowner(struct nfs4_lockowner *); extern int set_callback_cred(void); extern void nfsd4_init_callback(struct nfsd4_callback *); extern void nfsd4_probe_callback(struct nfs4_client *clp); -- cgit v0.10.2 From 14bcab1a395b1b150e654100cce09a1b552ec5f1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Apr 2014 14:44:07 -0400 Subject: NFSd: Clean up nfs4_preprocess_stateid_op Move the state locking and file descriptor reference out from the callers and into nfs4_preprocess_stateid_op() itself. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d543222..ac83778 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -786,7 +786,6 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!nfsd4_last_compound_op(rqstp)) rqstp->rq_splice_ok = false; - nfs4_lock_state(); /* check stateid */ if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, &read->rd_stateid, @@ -794,11 +793,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); goto out; } - if (read->rd_filp) - get_file(read->rd_filp); status = nfs_ok; out: - nfs4_unlock_state(); read->rd_rqstp = rqstp; read->rd_fhp = &cstate->current_fh; return status; @@ -937,10 +933,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, int err; if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { - nfs4_lock_state(); status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, &setattr->sa_stateid, WR_STATE, NULL); - nfs4_unlock_state(); if (status) { dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); return status; @@ -1006,17 +1000,12 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (write->wr_offset >= OFFSET_MAX) return nfserr_inval; - nfs4_lock_state(); status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, stateid, WR_STATE, &filp); if (status) { - nfs4_unlock_state(); dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); return status; } - if (filp) - get_file(filp); - nfs4_unlock_state(); cnt = write->wr_buflen; write->wr_how_written = write->wr_stable_how; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fcab909..b527894 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3654,6 +3654,7 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, struct svc_fh *current_fh = &cstate->current_fh; struct inode *ino = current_fh->fh_dentry->d_inode; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct file *file = NULL; __be32 status; if (filpp) @@ -3665,10 +3666,12 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return check_special_stateids(net, current_fh, stateid, flags); + nfs4_lock_state(); + status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s, cstate->minorversion, nn); if (status) - return status; + goto out; status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); if (status) goto out; @@ -3679,8 +3682,8 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, if (status) goto out; if (filpp) { - *filpp = dp->dl_file->fi_deleg_file; - if (!*filpp) { + file = dp->dl_file->fi_deleg_file; + if (!file) { WARN_ON_ONCE(1); status = nfserr_serverfault; goto out; @@ -3701,16 +3704,20 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, goto out; if (filpp) { if (flags & RD_STATE) - *filpp = find_readable_file(stp->st_file); + file = find_readable_file(stp->st_file); else - *filpp = find_writeable_file(stp->st_file); + file = find_writeable_file(stp->st_file); } break; default: - return nfserr_bad_stateid; + status = nfserr_bad_stateid; + goto out; } status = nfs_ok; + if (file) + *filpp = get_file(file); out: + nfs4_unlock_state(); return status; } -- cgit v0.10.2 From ecc7455d8eb1860f5aa6b9ad82a9a81f93eb11d1 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 7 May 2014 23:08:04 +0800 Subject: NFSD: cleanup unneeded including linux/export.h commit 4ac7249ea5a0ceef9f8269f63f33cc873c3fac61 have remove all EXPORT_SYMBOL, linux/export.h is not needed, just clean it. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 6f3f392..b206e6c 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -36,7 +36,6 @@ #include #include -#include #include "nfsfh.h" #include "nfsd.h" #include "acl.h" -- cgit v0.10.2 From 5409e46f1bcf960c651f3fff35f2f25e539655cf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 May 2014 13:49:44 +0200 Subject: nfsd: clean up fh_auth usage Use fh_fsid when reffering to the fsid part of the filehandle. The variable length auth field envisioned in nfsfh wasn't ever implemented. Also clean up some lose ends around this and document the file handle format better. Btw, why do we even export nfsfh.h to userspace? The file handle very much is kernel private, and nothing in nfs-utils include the header either. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 3c37b16..a337106 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -169,8 +169,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) data_left -= len; if (data_left < 0) return error; - exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth); - fid = (struct fid *)(fh->fh_auth + len); + exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid); + fid = (struct fid *)(fh->fh_fsid + len); } else { __u32 tfh[2]; dev_t xdev; @@ -385,7 +385,7 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp, { if (dentry != exp->ex_path.dentry) { struct fid *fid = (struct fid *) - (fhp->fh_handle.fh_auth + fhp->fh_handle.fh_size/4 - 1); + (fhp->fh_handle.fh_fsid + fhp->fh_handle.fh_size/4 - 1); int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4; int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK); @@ -513,7 +513,6 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, */ struct inode * inode = dentry->d_inode; - __u32 *datap; dev_t ex_dev = exp_sb(exp)->s_dev; dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n", @@ -557,17 +556,16 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, if (inode) _fh_update_old(dentry, exp, &fhp->fh_handle); } else { - int len; + fhp->fh_handle.fh_size = + key_len(fhp->fh_handle.fh_fsid_type) + 4; fhp->fh_handle.fh_auth_type = 0; - datap = fhp->fh_handle.fh_auth+0; - mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev, + + mk_fsid(fhp->fh_handle.fh_fsid_type, + fhp->fh_handle.fh_fsid, + ex_dev, exp->ex_path.dentry->d_inode->i_ino, exp->ex_fsid, exp->ex_uuid); - len = key_len(fhp->fh_handle.fh_fsid_type); - datap += len/4; - fhp->fh_handle.fh_size = 4 + len; - if (inode) _fh_update(fhp, exp, dentry); if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h index 616e3b3..2039123 100644 --- a/include/uapi/linux/nfsd/nfsfh.h +++ b/include/uapi/linux/nfsd/nfsfh.h @@ -1,13 +1,7 @@ /* - * include/linux/nfsd/nfsfh.h - * * This file describes the layout of the file handles as passed * over the wire. * - * Earlier versions of knfsd used to sign file handles using keyed MD5 - * or SHA. I've removed this code, because it doesn't give you more - * security than blocking external access to port 2049 on your firewall. - * * Copyright (C) 1995, 1996, 1997 Olaf Kirch */ @@ -37,7 +31,7 @@ struct nfs_fhbase_old { }; /* - * This is the new flexible, extensible style NFSv2/v3 file handle. + * This is the new flexible, extensible style NFSv2/v3/v4 file handle. * by Neil Brown - March 2000 * * The file handle starts with a sequence of four-byte words. @@ -47,14 +41,7 @@ struct nfs_fhbase_old { * * All four-byte values are in host-byte-order. * - * The auth_type field specifies how the filehandle can be authenticated - * This might allow a file to be confirmed to be in a writable part of a - * filetree without checking the path from it up to the root. - * Current values: - * 0 - No authentication. fb_auth is 0 bytes long - * Possible future values: - * 1 - 4 bytes taken from MD5 hash of the remainer of the file handle - * prefixed by a secret and with the important export flags. + * The auth_type field is deprecated and must be set to 0. * * The fsid_type identifies how the filesystem (or export point) is * encoded. @@ -71,14 +58,9 @@ struct nfs_fhbase_old { * 7 - 8 byte inode number and 16 byte uuid * * The fileid_type identified how the file within the filesystem is encoded. - * This is (will be) passed to, and set by, the underlying filesystem if it supports - * filehandle operations. The filesystem must not use the value '0' or '0xff' and may - * only use the values 1 and 2 as defined below: - * Current values: - * 0 - The root, or export point, of the filesystem. fb_fileid is 0 bytes. - * 1 - 32bit inode number, 32 bit generation number. - * 2 - 32bit inode number, 32 bit generation number, 32 bit parent directory inode number. - * + * The values for this field are filesystem specific, exccept that + * filesystems must not use the values '0' or '0xff'. 'See enum fid_type' + * in include/linux/exportfs.h for currently registered values. */ struct nfs_fhbase_new { __u8 fb_version; /* == 1, even => nfs_fhbase_old */ @@ -114,9 +96,9 @@ struct knfsd_fh { #define fh_fsid_type fh_base.fh_new.fb_fsid_type #define fh_auth_type fh_base.fh_new.fb_auth_type #define fh_fileid_type fh_base.fh_new.fb_fileid_type -#define fh_auth fh_base.fh_new.fb_auth #define fh_fsid fh_base.fh_new.fb_auth - +/* Do not use, provided for userspace compatiblity. */ +#define fh_auth fh_base.fh_new.fb_auth #endif /* _UAPI_LINUX_NFSD_FH_H */ -- cgit v0.10.2 From ecca063b31b88d31ee79e9d958ea78023659554e Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 15 Apr 2014 17:13:56 +0800 Subject: SUNRPC: Fix printk that is not only for nfsd Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index ae333c1..0663621 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -374,7 +374,7 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd) } return; out: - printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name); + printk(KERN_ERR "RPC: failed to unregister %s cache\n", cd->name); } EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail); -- cgit v0.10.2 From f3e41ec5ef0f5d2e10b6bfd3a13dc29f6d260d79 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 8 Apr 2014 13:04:01 +0800 Subject: NFSD: Use simple_read_from_buffer for coping data to userspace Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index d620e7f..2ed05c3 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -97,25 +97,14 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf, { static u64 val; char read_buf[25]; - size_t size, ret; + size_t size; loff_t pos = *ppos; if (!pos) nfsd_inject_get(file_inode(file)->i_private, &val); size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val); - if (pos < 0) - return -EINVAL; - if (pos >= size || !len) - return 0; - if (len > size - pos) - len = size - pos; - ret = copy_to_user(buf, read_buf + pos, len); - if (ret == len) - return -EFAULT; - len -= ret; - *ppos = pos + len; - return len; + return simple_read_from_buffer(buf, len, ppos, read_buf, size); } static ssize_t fault_inject_write(struct file *file, const char __user *buf, -- cgit v0.10.2 From 9fa1959e976f7a6ae84f616ca669359028070c61 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 8 Apr 2014 13:06:28 +0800 Subject: NFSD: Get rid of empty function nfs4_state_init Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b527894..fac2683 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4979,13 +4979,6 @@ struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_ #endif /* CONFIG_NFSD_FAULT_INJECTION */ -/* initialization to perform at module load time: */ - -void -nfs4_state_init(void) -{ -} - /* * Since the lifetime of a delegation isn't limited to that of an open, a * client may quite reasonably hang on to a delegation as long as it has diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index f34d9de..5184404 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1179,7 +1179,6 @@ static int __init init_nfsd(void) retval = nfsd4_init_slabs(); if (retval) goto out_unregister_pernet; - nfs4_state_init(); retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ if (retval) goto out_free_slabs; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index e9f2fd4..847daf3 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -115,7 +115,6 @@ static inline int nfsd_v4client(struct svc_rqst *rq) */ #ifdef CONFIG_NFSD_V4 extern unsigned long max_delegations; -void nfs4_state_init(void); int nfsd4_init_slabs(void); void nfsd4_free_slabs(void); int nfs4_state_start(void); @@ -126,7 +125,6 @@ void nfs4_reset_lease(time_t leasetime); int nfs4_reset_recoverydir(char *recdir); char * nfs4_recoverydir(void); #else -static inline void nfs4_state_init(void) { } static inline int nfsd4_init_slabs(void) { return 0; } static inline void nfsd4_free_slabs(void) { } static inline int nfs4_state_start(void) { return 0; } -- cgit v0.10.2 From 368fe39b508486483eb2cbb03a21ebd1b2a204bf Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sat, 19 Apr 2014 00:17:31 +0800 Subject: NFSD: Don't clear SUID/SGID after root writing data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're clearing the SUID/SGID bits on write by hand in nfsd_vfs_write, even though the subsequent vfs_writev() call will end up doing this for us (through file system write methods eventually calling file_remove_suid(), e.g., from __generic_file_aio_write). So, remove the redundant nfsd code. The only change in behavior is when the write is by root, in which case we previously cleared SUID/SGID, but will now leave it alone. The new behavior is the behavior of every filesystem we've checked. It seems better to be consistent with local filesystem behavior. And the security advantage seems limited as root could always restore these bits by hand if it wanted. SUID/SGID is not cleared after writing data with (root, local ext4), File: ‘test’ Size: 0 Blocks: 0 IO Block: 4096 regular empty file Device: 803h/2051d Inode: 1200137 Links: 1 Access: (4777/-rwsrwxrwx) Uid: ( 0/ root) Gid: ( 0/ root) Context: unconfined_u:object_r:admin_home_t:s0 Access: 2014-04-18 21:36:31.016029014 +0800 Modify: 2014-04-18 21:36:31.016029014 +0800 Change: 2014-04-18 21:36:31.026030285 +0800 Birth: - File: ‘test’ Size: 5 Blocks: 8 IO Block: 4096 regular file Device: 803h/2051d Inode: 1200137 Links: 1 Access: (4777/-rwsrwxrwx) Uid: ( 0/ root) Gid: ( 0/ root) Context: unconfined_u:object_r:admin_home_t:s0 Access: 2014-04-18 21:36:31.016029014 +0800 Modify: 2014-04-18 21:36:31.040032065 +0800 Change: 2014-04-18 21:36:31.040032065 +0800 Birth: - With no_root_squash, (root, remote ext4), SUID/SGID are cleared, File: ‘test’ Size: 0 Blocks: 0 IO Block: 262144 regular empty file Device: 24h/36d Inode: 786439 Links: 1 Access: (4777/-rwsrwxrwx) Uid: ( 1000/ test) Gid: ( 1000/ test) Context: system_u:object_r:nfs_t:s0 Access: 2014-04-18 21:45:32.155805097 +0800 Modify: 2014-04-18 21:45:32.155805097 +0800 Change: 2014-04-18 21:45:32.168806749 +0800 Birth: - File: ‘test’ Size: 5 Blocks: 8 IO Block: 262144 regular file Device: 24h/36d Inode: 786439 Links: 1 Access: (0777/-rwxrwxrwx) Uid: ( 1000/ test) Gid: ( 1000/ test) Context: system_u:object_r:nfs_t:s0 Access: 2014-04-18 21:45:32.155805097 +0800 Modify: 2014-04-18 21:45:32.184808783 +0800 Change: 2014-04-18 21:45:32.184808783 +0800 Birth: - Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 16f0673..6aaa305 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -857,20 +857,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, return err; } -static void kill_suid(struct dentry *dentry) -{ - struct iattr ia; - ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; - - mutex_lock(&dentry->d_inode->i_mutex); - /* - * Note we call this on write, so notify_change will not - * encounter any conflicting delegations: - */ - notify_change(dentry, &ia, NULL); - mutex_unlock(&dentry->d_inode->i_mutex); -} - /* * Gathered writes: If another process is currently writing to the file, * there's a high chance this is another nfsd (triggered by a bulk write @@ -942,10 +928,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, nfsdstats.io_write += host_err; fsnotify_modify(file); - /* clear setuid/setgid flag after write */ - if (inode->i_mode & (S_ISUID | S_ISGID)) - kill_suid(dentry); - if (stable) { if (use_wgather) host_err = wait_for_concurrent_writes(file); -- cgit v0.10.2 From cbf7a75bc58a2458bd6e47476e47819ba3f40b00 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 3 Mar 2014 12:19:18 -0500 Subject: nfsd4: fix delegation cleanup on error We're not cleaning up everything we need to on error. In particular, we're not removing our lease. Among other problems this can cause the struct nfs4_file used as fl_owner to be referenced after it has been destroyed. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fac2683..c6ded9f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -418,6 +418,8 @@ nfs4_put_delegation(struct nfs4_delegation *dp) static void nfs4_put_deleg_lease(struct nfs4_file *fp) { + if (!fp->fi_lease) + return; if (atomic_dec_and_test(&fp->fi_delegees)) { vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease); fp->fi_lease = NULL; @@ -440,9 +442,11 @@ unhash_delegation(struct nfs4_delegation *dp) list_del_init(&dp->dl_perfile); list_del_init(&dp->dl_recall_lru); spin_unlock(&recall_lock); - nfs4_put_deleg_lease(dp->dl_file); - put_nfs4_file(dp->dl_file); - dp->dl_file = NULL; + if (dp->dl_file) { + nfs4_put_deleg_lease(dp->dl_file); + put_nfs4_file(dp->dl_file); + dp->dl_file = NULL; + } } @@ -3060,33 +3064,22 @@ out_free: static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) { - int status; - if (fp->fi_had_conflict) return -EAGAIN; get_nfs4_file(fp); dp->dl_file = fp; - if (!fp->fi_lease) { - status = nfs4_setlease(dp); - if (status) - goto out_free; - return 0; - } + if (!fp->fi_lease) + return nfs4_setlease(dp); spin_lock(&recall_lock); + atomic_inc(&fp->fi_delegees); if (fp->fi_had_conflict) { spin_unlock(&recall_lock); - status = -EAGAIN; - goto out_free; + return -EAGAIN; } - atomic_inc(&fp->fi_delegees); list_add(&dp->dl_perfile, &fp->fi_delegations); spin_unlock(&recall_lock); list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); return 0; -out_free: - put_nfs4_file(fp); - dp->dl_file = fp; - return status; } static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) @@ -3173,8 +3166,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; return; out_free: - remove_stid(&dp->dl_stid); - nfs4_put_delegation(dp); + destroy_delegation(dp); out_no_deleg: open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && -- cgit v0.10.2 From d40aa3372f90d478b6166df0321349b5aeb0aea8 Mon Sep 17 00:00:00 2001 From: Benoit Taine Date: Thu, 22 May 2014 16:32:30 +0200 Subject: nfsd: Remove assignments inside conditions Assignments should not happen inside an if conditional, but in the line before. This issue was reported by checkpatch. The semantic patch that makes this change is as follows (http://coccinelle.lip6.fr/): // @@ identifier i1; expression e1; statement S; @@ -if(!(i1 = e1)) S +i1 = e1; +if(!i1) +S // It has been tested by compilation. Signed-off-by: Benoit Taine Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 11c1fba..12b023a 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -182,7 +182,8 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessarg static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_getaclargs *argp) { - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; argp->mask = ntohl(*p); p++; @@ -197,7 +198,8 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p, unsigned int base; int n; - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; argp->mask = ntohl(*p++); if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || @@ -218,7 +220,8 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p, static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *argp) { - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; return xdr_argsize_check(rqstp, p); } @@ -226,7 +229,8 @@ static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p, static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_accessargs *argp) { - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; argp->access = ntohl(*p++); diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index adc5f1b..2a514e2 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -128,7 +128,8 @@ out: static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_getaclargs *args) { - if (!(p = nfs3svc_decode_fh(p, &args->fh))) + p = nfs3svc_decode_fh(p, &args->fh); + if (!p) return 0; args->mask = ntohl(*p); p++; @@ -143,7 +144,8 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p, unsigned int base; int n; - if (!(p = nfs3svc_decode_fh(p, &args->fh))) + p = nfs3svc_decode_fh(p, &args->fh); + if (!p) return 0; args->mask = ntohl(*p++); if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index de6e39e..e6c01e8 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -278,7 +278,8 @@ void fill_post_wcc(struct svc_fh *fhp) int nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; return xdr_argsize_check(rqstp, p); } @@ -287,7 +288,8 @@ int nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_sattrargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = decode_sattr3(p, &args->attrs); @@ -315,7 +317,8 @@ int nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_accessargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; args->access = ntohl(*p++); @@ -330,7 +333,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, int v; u32 max_blocksize = svc_max_payload(rqstp); - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->offset); @@ -360,7 +364,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, unsigned int len, v, hdr, dlen; u32 max_blocksize = svc_max_payload(rqstp); - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->offset); @@ -535,7 +540,8 @@ int nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_readlinkargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; args->buffer = page_address(*(rqstp->rq_next_page++)); @@ -558,7 +564,8 @@ int nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_readdirargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->cookie); args->verf = p; p += 2; @@ -580,7 +587,8 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p, int len; u32 max_blocksize = svc_max_payload(rqstp); - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->cookie); args->verf = p; p += 2; @@ -605,7 +613,8 @@ int nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_commitargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->offset); args->count = ntohl(*p++); diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 9c769a4..1ac306b 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -214,7 +214,8 @@ nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy) int nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; return xdr_argsize_check(rqstp, p); } @@ -248,7 +249,8 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, { unsigned int len; int v; - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; args->offset = ntohl(*p++); @@ -281,7 +283,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, unsigned int len, hdr, dlen; int v; - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p++; /* beginoffset */ @@ -355,7 +358,8 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p, int nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readlinkargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; args->buffer = page_address(*(rqstp->rq_next_page++)); @@ -391,7 +395,8 @@ int nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readdirargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; args->cookie = ntohl(*p++); args->count = ntohl(*p++); -- cgit v0.10.2 From abf1135b6ee31cc17f569f2a59f87c833ba0849c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 May 2014 07:43:03 -0700 Subject: nfsd: remove nfsd4_free_slab No need for a kmem_cache_destroy wrapper in nfsd, just do proper goto based unwinding. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a037627..42f6c25 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -83,11 +83,11 @@ static DEFINE_MUTEX(client_mutex); */ static DEFINE_SPINLOCK(recall_lock); -static struct kmem_cache *openowner_slab = NULL; -static struct kmem_cache *lockowner_slab = NULL; -static struct kmem_cache *file_slab = NULL; -static struct kmem_cache *stateid_slab = NULL; -static struct kmem_cache *deleg_slab = NULL; +static struct kmem_cache *openowner_slab; +static struct kmem_cache *lockowner_slab; +static struct kmem_cache *file_slab; +static struct kmem_cache *stateid_slab; +static struct kmem_cache *deleg_slab; void nfs4_lock_state(void) @@ -2520,23 +2520,14 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) spin_unlock(&recall_lock); } -static void -nfsd4_free_slab(struct kmem_cache **slab) -{ - if (*slab == NULL) - return; - kmem_cache_destroy(*slab); - *slab = NULL; -} - void nfsd4_free_slabs(void) { - nfsd4_free_slab(&openowner_slab); - nfsd4_free_slab(&lockowner_slab); - nfsd4_free_slab(&file_slab); - nfsd4_free_slab(&stateid_slab); - nfsd4_free_slab(&deleg_slab); + kmem_cache_destroy(openowner_slab); + kmem_cache_destroy(lockowner_slab); + kmem_cache_destroy(file_slab); + kmem_cache_destroy(stateid_slab); + kmem_cache_destroy(deleg_slab); } int @@ -2545,26 +2536,34 @@ nfsd4_init_slabs(void) openowner_slab = kmem_cache_create("nfsd4_openowners", sizeof(struct nfs4_openowner), 0, 0, NULL); if (openowner_slab == NULL) - goto out_nomem; + goto out; lockowner_slab = kmem_cache_create("nfsd4_lockowners", sizeof(struct nfs4_lockowner), 0, 0, NULL); if (lockowner_slab == NULL) - goto out_nomem; + goto out_free_openowner_slab; file_slab = kmem_cache_create("nfsd4_files", sizeof(struct nfs4_file), 0, 0, NULL); if (file_slab == NULL) - goto out_nomem; + goto out_free_lockowner_slab; stateid_slab = kmem_cache_create("nfsd4_stateids", sizeof(struct nfs4_ol_stateid), 0, 0, NULL); if (stateid_slab == NULL) - goto out_nomem; + goto out_free_file_slab; deleg_slab = kmem_cache_create("nfsd4_delegations", sizeof(struct nfs4_delegation), 0, 0, NULL); if (deleg_slab == NULL) - goto out_nomem; + goto out_free_stateid_slab; return 0; -out_nomem: - nfsd4_free_slabs(); + +out_free_stateid_slab: + kmem_cache_destroy(stateid_slab); +out_free_file_slab: + kmem_cache_destroy(file_slab); +out_free_lockowner_slab: + kmem_cache_destroy(lockowner_slab); +out_free_openowner_slab: + kmem_cache_destroy(openowner_slab); +out: dprintk("nfsd4: out of memory while initializing nfsv4\n"); return -ENOMEM; } -- cgit v0.10.2 From 16e4d93f6de7063800f3f5e68f064b0ff8fae9b7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 May 2014 13:40:22 -0400 Subject: NFSD: Ignore client's source port on RDMA transports An NFS/RDMA client's source port is meaningless for RDMA transports. The transport layer typically sets the source port value on the connection to a random ephemeral port. Currently, NFS server administrators must specify the "insecure" export option to enable clients to access exports via RDMA. But this means NFS clients can access such an export via IP using an ephemeral port, which may not be desirable. This patch eliminates the need to specify the "insecure" export option to allow NFS/RDMA clients access to an export. BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=250 Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index b05963f..0cec1b9 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -24,6 +24,7 @@ struct svc_xprt_ops { void (*xpo_release_rqst)(struct svc_rqst *); void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); + int (*xpo_secure_port)(struct svc_rqst *); }; struct svc_xprt_class { diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 06c6ff0..614956f 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -793,7 +793,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) clear_bit(XPT_OLD, &xprt->xpt_flags); - rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); + rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp); rqstp->rq_chandle.defer = svc_defer; if (serv->sv_stats) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 43bcb46..0cb34f5 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -400,6 +400,12 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, release_sock(sock->sk); #endif } + +static int svc_sock_secure_port(struct svc_rqst *rqstp) +{ + return svc_port_is_privileged(svc_addr(rqstp)); +} + /* * INET callback when data has been received on the socket. */ @@ -678,6 +684,7 @@ static struct svc_xprt_ops svc_udp_ops = { .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr, .xpo_has_wspace = svc_udp_has_wspace, .xpo_accept = svc_udp_accept, + .xpo_secure_port = svc_sock_secure_port, }; static struct svc_xprt_class svc_udp_class = { @@ -1234,6 +1241,7 @@ static struct svc_xprt_ops svc_tcp_bc_ops = { .xpo_detach = svc_bc_tcp_sock_detach, .xpo_free = svc_bc_sock_free, .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, + .xpo_secure_port = svc_sock_secure_port, }; static struct svc_xprt_class svc_tcp_bc_class = { @@ -1272,6 +1280,7 @@ static struct svc_xprt_ops svc_tcp_ops = { .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, .xpo_has_wspace = svc_tcp_has_wspace, .xpo_accept = svc_tcp_accept, + .xpo_secure_port = svc_sock_secure_port, }; static struct svc_xprt_class svc_tcp_class = { diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 25688fa..02db8d9 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -65,6 +65,7 @@ static void dto_tasklet_func(unsigned long data); static void svc_rdma_detach(struct svc_xprt *xprt); static void svc_rdma_free(struct svc_xprt *xprt); static int svc_rdma_has_wspace(struct svc_xprt *xprt); +static int svc_rdma_secure_port(struct svc_rqst *); static void rq_cq_reap(struct svcxprt_rdma *xprt); static void sq_cq_reap(struct svcxprt_rdma *xprt); @@ -82,6 +83,7 @@ static struct svc_xprt_ops svc_rdma_ops = { .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr, .xpo_has_wspace = svc_rdma_has_wspace, .xpo_accept = svc_rdma_accept, + .xpo_secure_port = svc_rdma_secure_port, }; struct svc_xprt_class svc_rdma_class = { @@ -1207,6 +1209,11 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) return 1; } +static int svc_rdma_secure_port(struct svc_rqst *rqstp) +{ + return 1; +} + /* * Attempt to register the kvec representing the RPC memory with the * device. -- cgit v0.10.2 From c789102c20bbbdda6831a273e046715be9d6af79 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 18 May 2014 14:05:22 -0400 Subject: SUNRPC: Fix a module reference leak in svc_handle_xprt If the accept() call fails, we need to put the module reference. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 614956f..29772e0 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -730,6 +730,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) newxpt = xprt->xpt_ops->xpo_accept(xprt); if (newxpt) svc_add_new_temp_xprt(serv, newxpt); + else + module_put(xprt->xpt_class->xcl_owner); } else if (xprt->xpt_ops->xpo_has_wspace(xprt)) { /* XPT_DATA|XPT_DEFERRED case: */ dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", -- cgit v0.10.2 From ef11ce24875a8a540adc185e7bce3d7d49c8296f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 12 May 2014 11:22:47 +1000 Subject: SUNRPC: track whether a request is coming from a loop-back interface. If an incoming NFS request is coming from the local host, then nfsd will need to perform some special handling. So detect that possibility and make the source visible in rq_local. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 04e7632..a0dbbd1 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -254,6 +254,7 @@ struct svc_rqst { u32 rq_prot; /* IP protocol */ unsigned short rq_secure : 1; /* secure port */ + unsigned short rq_local : 1; /* local request */ void * rq_argp; /* decoded arguments */ void * rq_resp; /* xdr'd results */ diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 0cec1b9..7235040 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -64,6 +64,7 @@ struct svc_xprt { #define XPT_DETACHED 10 /* detached from tempsocks list */ #define XPT_LISTENER 11 /* listening endpoint */ #define XPT_CACHE_AUTH 12 /* cache auth info */ +#define XPT_LOCAL 13 /* connection from loopback interface */ struct svc_serv *xpt_server; /* service for transport */ atomic_t xpt_reserved; /* space on outq that is rsvd */ diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h index 14c9f6d..f2b7cb5 100644 --- a/net/sunrpc/sunrpc.h +++ b/net/sunrpc/sunrpc.h @@ -43,6 +43,19 @@ static inline int rpc_reply_expected(struct rpc_task *task) (task->tk_msg.rpc_proc->p_decode != NULL); } +static inline int sock_is_loopback(struct sock *sk) +{ + struct dst_entry *dst; + int loopback = 0; + rcu_read_lock(); + dst = rcu_dereference(sk->sk_dst_cache); + if (dst && dst->dev && + (dst->dev->features & NETIF_F_LOOPBACK)) + loopback = 1; + rcu_read_unlock(); + return loopback; +} + int svc_send_common(struct socket *sock, struct xdr_buf *xdr, struct page *headpage, unsigned long headoffset, struct page *tailpage, unsigned long tailoffset); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 0cb34f5..f3b8eb3 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -874,6 +874,10 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) } svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen); + if (sock_is_loopback(newsock->sk)) + set_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags); + else + clear_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags); if (serv->sv_stats) serv->sv_stats->nettcpconn++; @@ -1119,6 +1123,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; + rqstp->rq_local = !!test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags); p = (__be32 *)rqstp->rq_arg.head[0].iov_base; calldir = p[1]; -- cgit v0.10.2 From 8658452e4a588da603f6cb5ee2615deafcd82b71 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 12 May 2014 11:22:47 +1000 Subject: nfsd: Only set PF_LESS_THROTTLE when really needed. PF_LESS_THROTTLE has a very specific use case: to avoid deadlocks and live-locks while writing to the page cache in a loop-back NFS mount situation. It therefore makes sense to *only* set PF_LESS_THROTTLE in this situation. We now know when a request came from the local-host so it could be a loop-back mount. We already know when we are handling write requests, and when we are doing anything else. So combine those two to allow nfsd to still be throttled (like any other process) in every situation except when it is known to be problematic. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 9a4a5f9..1879e43 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -591,12 +591,6 @@ nfsd(void *vrqstp) nfsdstats.th_cnt++; mutex_unlock(&nfsd_mutex); - /* - * We want less throttling in balance_dirty_pages() so that nfs to - * localhost doesn't cause nfsd to lock up due to all the client's - * dirty pages. - */ - current->flags |= PF_LESS_THROTTLE; set_freezable(); /* diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 6aaa305..3aa3852 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -908,6 +908,16 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, int stable = *stablep; int use_wgather; loff_t pos = offset; + unsigned int pflags = current->flags; + + if (rqstp->rq_local) + /* + * We want less throttling in balance_dirty_pages() + * and shrink_inactive_list() so that nfs to + * localhost doesn't cause nfsd to lock up due to all + * the client's dirty pages or its congested queue. + */ + current->flags |= PF_LESS_THROTTLE; dentry = file->f_path.dentry; inode = dentry->d_inode; @@ -941,6 +951,8 @@ out_nfserr: err = 0; else err = nfserrno(host_err); + if (rqstp->rq_local) + tsk_restore_flags(current, pflags, PF_LESS_THROTTLE); return err; } -- cgit v0.10.2 From 5b648699afa00de0cfe679a4733bef593faaa3a4 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 7 Mar 2014 11:43:58 -0500 Subject: nfsd4: READ, READDIR, etc., are idempotent OP_MODIFIES_SOMETHING flags operations that we should be careful not to initiate without being sure we have the buffer space to encode a reply. None of these ops fall into that category. We could probably remove a few more, but this isn't a very important problem at least for ops whose reply size is easy to estimate. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ac83778..2c1ee70 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1665,37 +1665,32 @@ static struct nfsd4_operation nfsd4_ops[] = { [OP_PUTFH] = { .op_func = (nfsd4op_func)nfsd4_putfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING - | OP_CLEAR_STATEID, + | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID, .op_name = "OP_PUTFH", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_PUTPUBFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING - | OP_CLEAR_STATEID, + | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID, .op_name = "OP_PUTPUBFH", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_PUTROOTFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING - | OP_CLEAR_STATEID, + | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID, .op_name = "OP_PUTROOTFH", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_READ] = { .op_func = (nfsd4op_func)nfsd4_read, - .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_READ", .op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize, .op_get_currentstateid = (stateid_getter)nfsd4_get_readstateid, }, [OP_READDIR] = { .op_func = (nfsd4op_func)nfsd4_readdir, - .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_READDIR", .op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize, }, -- cgit v0.10.2 From 24906f32378288d74289405231ddbb7120317691 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 12 Mar 2014 15:17:18 -0400 Subject: nfsd4: allow larger 4.1 session drc slots The client is actually asking for 2532 bytes. I suspect that's a mistake. But maybe we can allow some more. In theory lock needs more if it might return a maximum-length lockowner in the denied case. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index fda9ce2..374c662 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -122,7 +122,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) /* Maximum number of operations per session compound */ #define NFSD_MAX_OPS_PER_COMPOUND 16 /* Maximum session per slot cache size */ -#define NFSD_SLOT_CACHE_SIZE 1024 +#define NFSD_SLOT_CACHE_SIZE 2048 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ #define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32 #define NFSD_MAX_MEM_PER_SESSION \ -- cgit v0.10.2 From 622f560e6afbd54cdd9b9a416ddab8114baedd76 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 16 May 2014 14:38:20 -0400 Subject: nfsd4: read size estimate should include padding Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 2c1ee70..77c4369 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1489,7 +1489,7 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) if (rlen > maxcount) rlen = maxcount; - return (op_encode_hdr_size + 2) * sizeof(__be32) + rlen; + return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); } static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) -- cgit v0.10.2 From f34e432b673781175c782fa8c5a619114340ba38 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 16 May 2014 21:24:56 -0400 Subject: nfsd4: fix write reply size estimate The write reply also includes count and stable_how. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 77c4369..9d60a1c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1528,7 +1528,7 @@ static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_o static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { - return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); + return (op_encode_hdr_size + 2 + op_encode_verifier_maxsz) * sizeof(__be32); } static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) -- cgit v0.10.2 From e372ba60def1af33e1c0b9bbfa5c8f8559c1ad6b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 19 May 2014 12:27:11 -0400 Subject: nfsd4: decoding errors can still be cached and require space Currently a non-idempotent op reply may be cached if it fails in the proc code but not if it fails at xdr decoding. I doubt there are any xdr-decoding-time errors that would make this a problem in practice, so this probably isn't a serious bug. The space estimates should also take into account space required for encoding of error returns. Again, not a practical problem, though it would become one after future patches which will tighten the space estimates. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 9d60a1c..b0e075c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1215,6 +1215,8 @@ static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op) bool nfsd4_cache_this_op(struct nfsd4_op *op) { + if (op->opnum == OP_ILLEGAL) + return false; return OPDESC(op)->op_flags & OP_CACHEME; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 18881f3..e866a06 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1677,11 +1677,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) op->opnum = OP_ILLEGAL; op->status = nfserr_op_illegal; } - - if (op->status) { - argp->opcnt = i+1; - break; - } /* * We'll try to cache the result in the DRC if any one * op in the compound wants to be cached: @@ -1689,6 +1684,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) cachethis |= nfsd4_cache_this_op(op); max_reply = max(max_reply, nfsd4_max_reply(op->opnum)); + + if (op->status) { + argp->opcnt = i+1; + break; + } } /* Sessions make the DRC unnecessary: */ if (argp->minorversion) -- cgit v0.10.2 From 4aea24b2ff7510932118ec9b06c35a11625194ea Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 15 Jan 2014 15:17:58 -0500 Subject: nfsd4: embed xdr_stream in nfsd4_compoundres This is a mechanical transformation with no change in behavior. Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index b0e075c..6d94adf 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1270,13 +1270,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, u32 plen = 0; __be32 status; - resp->xbuf = &rqstp->rq_res; - resp->p = rqstp->rq_res.head[0].iov_base + + resp->xdr.buf = &rqstp->rq_res; + resp->xdr.p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; - resp->tagp = resp->p; + resp->tagp = resp->xdr.p; /* reserve space for: taglen, tag, and opcnt */ - resp->p += 2 + XDR_QUADLEN(args->taglen); - resp->end = rqstp->rq_res.head[0].iov_base + PAGE_SIZE; + resp->xdr.p += 2 + XDR_QUADLEN(args->taglen); + resp->xdr.end = rqstp->rq_res.head[0].iov_base + PAGE_SIZE; resp->taglen = args->taglen; resp->tag = args->tag; resp->opcnt = 0; @@ -1328,7 +1328,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, * failed response to the next operation. If we don't * have enough room, fail with ERR_RESOURCE. */ - slack_bytes = (char *)resp->end - (char *)resp->p; + slack_bytes = (char *)resp->xdr.end - (char *)resp->xdr.p; if (slack_bytes < COMPOUND_SLACK_SPACE + COMPOUND_ERR_SLACK_SPACE) { BUG_ON(slack_bytes < COMPOUND_ERR_SLACK_SPACE); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 42f6c25..14bfb55 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1573,10 +1573,10 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) slot->sl_datalen = 0; return; } - slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap; + slot->sl_datalen = (char *)resp->xdr.p - (char *)resp->cstate.datap; base = (char *)resp->cstate.datap - - (char *)resp->xbuf->head[0].iov_base; - if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data, + (char *)resp->xdr.buf->head[0].iov_base; + if (read_bytes_from_xdr_buf(resp->xdr.buf, base, slot->sl_data, slot->sl_datalen)) WARN("%s: sessions DRC could not cache compound\n", __func__); return; @@ -1630,7 +1630,7 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen); resp->opcnt = slot->sl_opcnt; - resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen); + resp->xdr.p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen); status = slot->sl_status; return status; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e866a06..9113725 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1747,10 +1747,10 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) } #define RESERVE_SPACE(nbytes) do { \ - p = resp->p; \ - BUG_ON(p + XDR_QUADLEN(nbytes) > resp->end); \ + p = resp->xdr.p; \ + BUG_ON(p + XDR_QUADLEN(nbytes) > resp->xdr.end); \ } while (0) -#define ADJUST_ARGS() resp->p = p +#define ADJUST_ARGS() resp->xdr.p = p /* Encode as an array of strings the string given with components * separated @sep, escaped with esc_enter and esc_exit. @@ -2751,9 +2751,9 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 if (nfserr) return nfserr; - buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2); + buflen = resp->xdr.end - resp->xdr.p - (COMPOUND_ERR_SLACK_SPACE >> 2); nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, - &resp->p, buflen, getattr->ga_bmval, + &resp->xdr.p, buflen, getattr->ga_bmval, resp->rqstp, 0); return nfserr; } @@ -2953,7 +2953,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr) return nfserr; - if (resp->xbuf->page_len) + if (resp->xdr.buf->page_len) return nfserr_resource; RESERVE_SPACE(8); /* eof flag and byte count */ @@ -2991,18 +2991,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(eof); WRITE32(maxcount); ADJUST_ARGS(); - resp->xbuf->head[0].iov_len = (char*)p - - (char*)resp->xbuf->head[0].iov_base; - resp->xbuf->page_len = maxcount; + resp->xdr.buf->head[0].iov_len = (char *)p + - (char *)resp->xdr.buf->head[0].iov_base; + resp->xdr.buf->page_len = maxcount; /* Use rest of head for padding and remaining ops: */ - resp->xbuf->tail[0].iov_base = p; - resp->xbuf->tail[0].iov_len = 0; + resp->xdr.buf->tail[0].iov_base = p; + resp->xdr.buf->tail[0].iov_len = 0; if (maxcount&3) { RESERVE_SPACE(4); WRITE32(0); - resp->xbuf->tail[0].iov_base += maxcount&3; - resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); + resp->xdr.buf->tail[0].iov_base += maxcount&3; + resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); ADJUST_ARGS(); } return 0; @@ -3017,7 +3017,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd if (nfserr) return nfserr; - if (resp->xbuf->page_len) + if (resp->xdr.buf->page_len) return nfserr_resource; if (!*resp->rqstp->rq_next_page) return nfserr_resource; @@ -3041,18 +3041,18 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd WRITE32(maxcount); ADJUST_ARGS(); - resp->xbuf->head[0].iov_len = (char*)p - - (char*)resp->xbuf->head[0].iov_base; - resp->xbuf->page_len = maxcount; + resp->xdr.buf->head[0].iov_len = (char *)p + - (char *)resp->xdr.buf->head[0].iov_base; + resp->xdr.buf->page_len = maxcount; /* Use rest of head for padding and remaining ops: */ - resp->xbuf->tail[0].iov_base = p; - resp->xbuf->tail[0].iov_len = 0; + resp->xdr.buf->tail[0].iov_base = p; + resp->xdr.buf->tail[0].iov_len = 0; if (maxcount&3) { RESERVE_SPACE(4); WRITE32(0); - resp->xbuf->tail[0].iov_base += maxcount&3; - resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); + resp->xdr.buf->tail[0].iov_base += maxcount&3; + resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); ADJUST_ARGS(); } return 0; @@ -3068,7 +3068,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 if (nfserr) return nfserr; - if (resp->xbuf->page_len) + if (resp->xdr.buf->page_len) return nfserr_resource; if (!*resp->rqstp->rq_next_page) return nfserr_resource; @@ -3080,7 +3080,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 WRITE32(0); WRITE32(0); ADJUST_ARGS(); - resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; + resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p) + - (char *)resp->xdr.buf->head[0].iov_base; tailbase = p; maxcount = PAGE_SIZE; @@ -3121,14 +3122,15 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 p = readdir->buffer; *p++ = 0; /* no more entries */ *p++ = htonl(readdir->common.err == nfserr_eof); - resp->xbuf->page_len = ((char*)p) - + resp->xdr.buf->page_len = ((char *)p) - (char*)page_address(*(resp->rqstp->rq_next_page-1)); /* Use rest of head for padding and remaining ops: */ - resp->xbuf->tail[0].iov_base = tailbase; - resp->xbuf->tail[0].iov_len = 0; - resp->p = resp->xbuf->tail[0].iov_base; - resp->end = resp->p + (PAGE_SIZE - resp->xbuf->head[0].iov_len)/4; + resp->xdr.buf->tail[0].iov_base = tailbase; + resp->xdr.buf->tail[0].iov_len = 0; + resp->xdr.p = resp->xdr.buf->tail[0].iov_base; + resp->xdr.end = resp->xdr.p + + (PAGE_SIZE - resp->xdr.buf->head[0].iov_len)/4; return 0; err_no_verf: @@ -3587,10 +3589,10 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) session = resp->cstate.session; if (xb->page_len == 0) { - length = (char *)resp->p - (char *)xb->head[0].iov_base + pad; + length = (char *)resp->xdr.p - (char *)xb->head[0].iov_base + pad; } else { if (xb->tail[0].iov_base && xb->tail[0].iov_len > 0) - tlen = (char *)resp->p - (char *)xb->tail[0].iov_base; + tlen = (char *)resp->xdr.p - (char *)xb->tail[0].iov_base; length = xb->head[0].iov_len + xb->page_len + tlen + pad; } @@ -3629,7 +3631,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) op->status = nfsd4_check_resp_size(resp, 0); if (so) { so->so_replay.rp_status = op->status; - so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1); + so->so_replay.rp_buflen = (char *)resp->xdr.p + - (char *)(statp+1); memcpy(so->so_replay.rp_buf, statp+1, so->so_replay.rp_buflen); } status: @@ -3731,7 +3734,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo iov = &rqstp->rq_res.tail[0]; else iov = &rqstp->rq_res.head[0]; - iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; + iov->iov_len = ((char *)resp->xdr.p) - (char *)iov->iov_base; BUG_ON(iov->iov_len > PAGE_SIZE); if (nfsd4_has_session(cs)) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 5ea7df3..6884d70 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -506,9 +506,7 @@ struct nfsd4_compoundargs { struct nfsd4_compoundres { /* scratch variables for XDR encode */ - __be32 * p; - __be32 * end; - struct xdr_buf * xbuf; + struct xdr_stream xdr; struct svc_rqst * rqstp; u32 taglen; -- cgit v0.10.2 From d518465866bfeaa41fb685d7dfc9983e0312232e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 26 Aug 2013 16:04:46 -0400 Subject: nfsd4: tweak nfsd4_encode_getattr to take xdr_stream Just change the nfsd4_encode_getattr api. Not changing any code or adding any new functionality yet. Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 6d94adf..94ef528 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1061,10 +1061,10 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_jukebox; p = buf; - status = nfsd4_encode_fattr(&cstate->current_fh, + status = nfsd4_encode_fattr_to_buf(&p, count, &cstate->current_fh, cstate->current_fh.fh_export, - cstate->current_fh.fh_dentry, &p, - count, verify->ve_bmval, + cstate->current_fh.fh_dentry, + verify->ve_bmval, rqstp, 0); /* * If nfsd4_encode_fattr() ran out of space, assume that's because diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 9113725..1879250 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2045,12 +2045,11 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) /* * Note: @fhp can be NULL; in this case, we might have to compose the filehandle * ourselves. - * - * countp is the buffer size in _words_ */ __be32 -nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, - struct dentry *dentry, __be32 **buffer, int count, u32 *bmval, +nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, + struct svc_export *exp, + struct dentry *dentry, u32 *bmval, struct svc_rqst *rqstp, int ignore_crossmnt) { u32 bmval0 = bmval[0]; @@ -2059,12 +2058,12 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, struct kstat stat; struct svc_fh *tempfh = NULL; struct kstatfs statfs; - int buflen = count << 2; + __be32 *p = xdr->p; + int buflen = xdr->buf->buflen; __be32 *attrlenp; u32 dummy; u64 dummy64; u32 rdattr_err = 0; - __be32 *p = *buffer; __be32 status; int err; int aclsupport = 0; @@ -2491,7 +2490,7 @@ out_acl: } *attrlenp = htonl((char *)p - (char *)attrlenp - 4); - *buffer = p; + xdr->p = p; status = nfs_ok; out: @@ -2513,6 +2512,27 @@ out_resource: goto out; } +__be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words, + struct svc_fh *fhp, struct svc_export *exp, + struct dentry *dentry, u32 *bmval, + struct svc_rqst *rqstp, int ignore_crossmnt) +{ + struct xdr_buf dummy = { + .head[0] = { + .iov_base = *p, + }, + .buflen = words << 2, + }; + struct xdr_stream xdr; + __be32 ret; + + xdr_init_encode(&xdr, &dummy, NULL); + ret = nfsd4_encode_fattr(&xdr, fhp, exp, dentry, bmval, rqstp, + ignore_crossmnt); + *p = xdr.p; + return ret; +} + static inline int attributes_need_mount(u32 *bmval) { if (bmval[0] & ~(FATTR4_WORD0_RDATTR_ERROR | FATTR4_WORD0_LEASE_TIME)) @@ -2576,7 +2596,8 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, } out_encode: - nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, + nfserr = nfsd4_encode_fattr_to_buf(p, buflen, NULL, exp, dentry, + cd->rd_bmval, cd->rd_rqstp, ignore_crossmnt); out_put: dput(dentry); @@ -2746,14 +2767,16 @@ static __be32 nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr) { struct svc_fh *fhp = getattr->ga_fhp; - int buflen; + struct xdr_stream *xdr = &resp->xdr; + struct xdr_buf *buf = resp->xdr.buf; if (nfserr) return nfserr; - buflen = resp->xdr.end - resp->xdr.p - (COMPOUND_ERR_SLACK_SPACE >> 2); - nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, - &resp->xdr.p, buflen, getattr->ga_bmval, + buf->buflen = (void *)resp->xdr.end - (void *)resp->xdr.p + - COMPOUND_ERR_SLACK_SPACE; + nfserr = nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry, + getattr->ga_bmval, resp->rqstp, 0); return nfserr; } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 6884d70..f62a055 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -562,9 +562,10 @@ int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *, __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); -__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, - struct dentry *dentry, __be32 **buffer, int countp, - u32 *bmval, struct svc_rqst *, int ignore_crossmnt); +__be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words, + struct svc_fh *fhp, struct svc_export *exp, + struct dentry *dentry, + u32 *bmval, struct svc_rqst *, int ignore_crossmnt); extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_setclientid *setclid); -- cgit v0.10.2 From 2d124dfaad08733077d2c46755716b2873af65a3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 15 Jan 2014 16:26:42 -0500 Subject: nfsd4: move proc_compound xdr encode init to helper Mechanical transformation with no change of behavior. Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 94ef528..f809aa8 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1253,6 +1253,18 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp) return !(nextd->op_flags & OP_HANDLES_WRONGSEC); } +static void svcxdr_init_encode(struct svc_rqst *rqstp, + struct nfsd4_compoundres *resp) +{ + struct xdr_stream *xdr = &resp->xdr; + struct xdr_buf *buf = &rqstp->rq_res; + struct kvec *head = buf->head; + + xdr->buf = buf; + xdr->p = head->iov_base + head->iov_len; + xdr->end = head->iov_base + PAGE_SIZE; +} + /* * COMPOUND call. */ @@ -1270,13 +1282,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, u32 plen = 0; __be32 status; - resp->xdr.buf = &rqstp->rq_res; - resp->xdr.p = rqstp->rq_res.head[0].iov_base + - rqstp->rq_res.head[0].iov_len; + svcxdr_init_encode(rqstp, resp); resp->tagp = resp->xdr.p; /* reserve space for: taglen, tag, and opcnt */ resp->xdr.p += 2 + XDR_QUADLEN(args->taglen); - resp->xdr.end = rqstp->rq_res.head[0].iov_base + PAGE_SIZE; resp->taglen = args->taglen; resp->tag = args->tag; resp->opcnt = 0; -- cgit v0.10.2 From 1802a67894fab3ff90a3ef4f484e97a5b4515426 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 21 Jan 2014 11:06:52 -0500 Subject: nfsd4: reserve head space for krb5 integ/priv info Currently if the nfs-level part of a reply would be too large, we'll return an error to the client. But if the nfs-level part fits and leaves no room for krb5p or krb5i stuff, then we just drop the request entirely. That's no good. Instead, reserve some slack space at the end of the buffer and make sure we fail outright if we'd come close. The slack space here is a massive overstimate of what's required, we should probably try for a tighter limit at some point. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f809aa8..747d6a8 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1262,7 +1262,7 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, xdr->buf = buf; xdr->p = head->iov_base + head->iov_len; - xdr->end = head->iov_base + PAGE_SIZE; + xdr->end = head->iov_base + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE; } /* -- cgit v0.10.2 From 07d1f8020738ba3180ea9992c4fa7dbc0685396a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 6 Mar 2014 20:39:29 -0500 Subject: nfsd4: fix encoding of out-of-space replies If nfsd4_check_resp_size() returns an error then we should really be truncating the reply here, otherwise we may leave extra garbage at the end of the rpc reply. Also add a warning to catch any cases where our reply-size estimates may be wrong in the case of a non-idempotent operation. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 747d6a8..bf8cddf 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1171,9 +1171,7 @@ struct nfsd4_operation { static struct nfsd4_operation nfsd4_ops[]; -#ifdef NFSD_DEBUG static const char *nfsd4_op_name(unsigned opnum); -#endif /* * Enforce NFSv4.1 COMPOUND ordering rules: @@ -1859,14 +1857,21 @@ static struct nfsd4_operation nfsd4_ops[] = { }, }; -#ifdef NFSD_DEBUG +void warn_on_nonidempotent_op(struct nfsd4_op *op) +{ + if (OPDESC(op)->op_flags & OP_MODIFIES_SOMETHING) { + pr_err("unable to encode reply to nonidempotent op %d (%s)\n", + op->opnum, nfsd4_op_name(op->opnum)); + WARN_ON_ONCE(1); + } +} + static const char *nfsd4_op_name(unsigned opnum) { if (opnum < ARRAY_SIZE(nfsd4_ops)) return nfsd4_ops[opnum].op_name; return "unknown_operation"; } -#endif #define nfsd4_voidres nfsd4_voidargs struct nfsd4_voidargs { int dummy; }; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1879250..24ba652 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3637,6 +3637,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { struct nfs4_stateowner *so = resp->cstate.replay_owner; __be32 *statp; + nfsd4_enc encoder; __be32 *p; RESERVE_SPACE(8); @@ -3648,10 +3649,24 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) goto status; BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || !nfsd4_enc_ops[op->opnum]); - op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u); + encoder = nfsd4_enc_ops[op->opnum]; + op->status = encoder(resp, op->status, &op->u); /* nfsd4_check_resp_size guarantees enough room for error status */ if (!op->status) op->status = nfsd4_check_resp_size(resp, 0); + if (op->status == nfserr_resource || + op->status == nfserr_rep_too_big || + op->status == nfserr_rep_too_big_to_cache) { + /* + * The operation may have already been encoded or + * partially encoded. No op returns anything additional + * in the case of one of these three errors, so we can + * just truncate back to after the status. But it's a + * bug if we had to do this on a non-idempotent op: + */ + warn_on_nonidempotent_op(op); + resp->xdr.p = statp + 1; + } if (so) { so->so_replay.rp_status = op->status; so->so_replay.rp_buflen = (char *)resp->xdr.p diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index f62a055..15ca477 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -536,6 +536,8 @@ static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp) return argp->opcnt == resp->opcnt; } +void warn_on_nonidempotent_op(struct nfsd4_op *op); + #define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs) static inline void -- cgit v0.10.2 From 5f4ab9458755eddc66912a15319363bf311f7fc8 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 7 Mar 2014 11:01:37 -0500 Subject: nfsd4: allow space for final error return This post-encoding check should be taking into account the need to encode at least an out-of-space error to the following op (if any). Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 24ba652..2ed8036 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3636,6 +3636,7 @@ void nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { struct nfs4_stateowner *so = resp->cstate.replay_owner; + struct svc_rqst *rqstp = resp->rqstp; __be32 *statp; nfsd4_enc encoder; __be32 *p; @@ -3652,8 +3653,12 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) encoder = nfsd4_enc_ops[op->opnum]; op->status = encoder(resp, op->status, &op->u); /* nfsd4_check_resp_size guarantees enough room for error status */ - if (!op->status) - op->status = nfsd4_check_resp_size(resp, 0); + if (!op->status) { + int space_needed = 0; + if (!nfsd4_last_compound_op(rqstp)) + space_needed = COMPOUND_ERR_SLACK_SPACE; + op->status = nfsd4_check_resp_size(resp, space_needed); + } if (op->status == nfserr_resource || op->status == nfserr_rep_too_big || op->status == nfserr_rep_too_big_to_cache) { -- cgit v0.10.2 From ddd1ea56367202f6c99135cd59de7a97af4c4ffd Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 27 Aug 2013 21:32:25 -0400 Subject: nfsd4: use xdr_reserve_space in attribute encoding This is a cosmetic change for now; no change in behavior. Note we're just depending on xdr_reserve_space to do the bounds checking for us, we're not really depending on its adjustment of iovec or xdr_buf lengths yet, as those are fixed up by as necessary after the fact by read-link operations and by nfs4svc_encode_compoundres. However we do have to update xdr->iov on read-like operations to prevent xdr_reserve_space from messing with the already-fixed-up length of the the head. When the attribute encoding fails partway through we have to undo the length adjustments made so far. We do it manually for now, but later patches will add an xdr_truncate_encode() helper to handle cases like this. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h index b481e1f..a986ceb 100644 --- a/fs/nfsd/acl.h +++ b/fs/nfsd/acl.h @@ -49,7 +49,7 @@ struct svc_rqst; struct nfs4_acl *nfs4_acl_new(int); int nfs4_acl_get_whotype(char *, u32); -__be32 nfs4_acl_write_who(int who, __be32 **p, int *len); +__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who); int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl); diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h index 66e58db..a3f3490 100644 --- a/fs/nfsd/idmap.h +++ b/fs/nfsd/idmap.h @@ -56,7 +56,7 @@ static inline void nfsd_idmap_shutdown(struct net *net) __be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, kuid_t *); __be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, kgid_t *); -__be32 nfsd4_encode_user(struct svc_rqst *, kuid_t, __be32 **, int *); -__be32 nfsd4_encode_group(struct svc_rqst *, kgid_t, __be32 **, int *); +__be32 nfsd4_encode_user(struct xdr_stream *, struct svc_rqst *, kuid_t); +__be32 nfsd4_encode_group(struct xdr_stream *, struct svc_rqst *, kgid_t); #endif /* LINUX_NFSD_IDMAP_H */ diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 7c7c025..d714156 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -919,20 +919,19 @@ nfs4_acl_get_whotype(char *p, u32 len) return NFS4_ACL_WHO_NAMED; } -__be32 nfs4_acl_write_who(int who, __be32 **p, int *len) +__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who) { + __be32 *p; int i; - int bytes; for (i = 0; i < ARRAY_SIZE(s2t_map); i++) { if (s2t_map[i].type != who) continue; - bytes = 4 + (XDR_QUADLEN(s2t_map[i].stringlen) << 2); - if (bytes > *len) + p = xdr_reserve_space(xdr, s2t_map[i].stringlen + 4); + if (!p) return nfserr_resource; - *p = xdr_encode_opaque(*p, s2t_map[i].string, + p = xdr_encode_opaque(p, s2t_map[i].string, s2t_map[i].stringlen); - *len -= bytes; return 0; } WARN_ON_ONCE(1); diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index c0dfde6..a0ab0a8 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -551,44 +551,43 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen return 0; } -static __be32 encode_ascii_id(u32 id, __be32 **p, int *buflen) +static __be32 encode_ascii_id(struct xdr_stream *xdr, u32 id) { char buf[11]; int len; - int bytes; + __be32 *p; len = sprintf(buf, "%u", id); - bytes = 4 + (XDR_QUADLEN(len) << 2); - if (bytes > *buflen) + p = xdr_reserve_space(xdr, len + 4); + if (!p) return nfserr_resource; - *p = xdr_encode_opaque(*p, buf, len); - *buflen -= bytes; + p = xdr_encode_opaque(p, buf, len); return 0; } -static __be32 idmap_id_to_name(struct svc_rqst *rqstp, int type, u32 id, __be32 **p, int *buflen) +static __be32 idmap_id_to_name(struct xdr_stream *xdr, + struct svc_rqst *rqstp, int type, u32 id) { struct ent *item, key = { .id = id, .type = type, }; + __be32 *p; int ret; - int bytes; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item); if (ret == -ENOENT) - return encode_ascii_id(id, p, buflen); + return encode_ascii_id(xdr, id); if (ret) return nfserrno(ret); ret = strlen(item->name); WARN_ON_ONCE(ret > IDMAP_NAMESZ); - bytes = 4 + (XDR_QUADLEN(ret) << 2); - if (bytes > *buflen) + p = xdr_reserve_space(xdr, ret + 4); + if (!p) return nfserr_resource; - *p = xdr_encode_opaque(*p, item->name, ret); - *buflen -= bytes; + p = xdr_encode_opaque(p, item->name, ret); cache_put(&item->h, nn->idtoname_cache); return 0; } @@ -622,11 +621,12 @@ do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u return idmap_name_to_id(rqstp, type, name, namelen, id); } -static __be32 encode_name_from_id(struct svc_rqst *rqstp, int type, u32 id, __be32 **p, int *buflen) +static __be32 encode_name_from_id(struct xdr_stream *xdr, + struct svc_rqst *rqstp, int type, u32 id) { if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS) - return encode_ascii_id(id, p, buflen); - return idmap_id_to_name(rqstp, type, id, p, buflen); + return encode_ascii_id(xdr, id); + return idmap_id_to_name(xdr, rqstp, type, id); } __be32 @@ -655,14 +655,16 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, return status; } -__be32 nfsd4_encode_user(struct svc_rqst *rqstp, kuid_t uid, __be32 **p, int *buflen) +__be32 nfsd4_encode_user(struct xdr_stream *xdr, struct svc_rqst *rqstp, + kuid_t uid) { u32 id = from_kuid(&init_user_ns, uid); - return encode_name_from_id(rqstp, IDMAP_TYPE_USER, id, p, buflen); + return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_USER, id); } -__be32 nfsd4_encode_group(struct svc_rqst *rqstp, kgid_t gid, __be32 **p, int *buflen) +__be32 nfsd4_encode_group(struct xdr_stream *xdr, struct svc_rqst *rqstp, + kgid_t gid) { u32 id = from_kgid(&init_user_ns, gid); - return encode_name_from_id(rqstp, IDMAP_TYPE_GROUP, id, p, buflen); + return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_GROUP, id); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index bf8cddf..41c7c0a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1259,6 +1259,7 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, struct kvec *head = buf->head; xdr->buf = buf; + xdr->iov = head; xdr->p = head->iov_base + head->iov_len; xdr->end = head->iov_base + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2ed8036..27d1e94 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1755,18 +1755,20 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) /* Encode as an array of strings the string given with components * separated @sep, escaped with esc_enter and esc_exit. */ -static __be32 nfsd4_encode_components_esc(char sep, char *components, - __be32 **pp, int *buflen, - char esc_enter, char esc_exit) +static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, + char *components, char esc_enter, + char esc_exit) { - __be32 *p = *pp; - __be32 *countp = p; + __be32 *p; + __be32 *countp; int strlen, count=0; char *str, *end, *next; dprintk("nfsd4_encode_components(%s)\n", components); - if ((*buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) return nfserr_resource; + countp = p; WRITE32(0); /* We will fill this in with @count later */ end = str = components; while (*end) { @@ -1789,7 +1791,8 @@ static __be32 nfsd4_encode_components_esc(char sep, char *components, strlen = end - str; if (strlen) { - if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0) + p = xdr_reserve_space(xdr, strlen + 4); + if (!p) return nfserr_resource; WRITE32(strlen); WRITEMEM(str, strlen); @@ -1799,7 +1802,6 @@ static __be32 nfsd4_encode_components_esc(char sep, char *components, end++; str = end; } - *pp = p; p = countp; WRITE32(count); return 0; @@ -1808,40 +1810,39 @@ static __be32 nfsd4_encode_components_esc(char sep, char *components, /* Encode as an array of strings the string given with components * separated @sep. */ -static __be32 nfsd4_encode_components(char sep, char *components, - __be32 **pp, int *buflen) +static __be32 nfsd4_encode_components(struct xdr_stream *xdr, char sep, + char *components) { - return nfsd4_encode_components_esc(sep, components, pp, buflen, 0, 0); + return nfsd4_encode_components_esc(xdr, sep, components, 0, 0); } /* * encode a location element of a fs_locations structure */ -static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location, - __be32 **pp, int *buflen) +static __be32 nfsd4_encode_fs_location4(struct xdr_stream *xdr, + struct nfsd4_fs_location *location) { __be32 status; - __be32 *p = *pp; - status = nfsd4_encode_components_esc(':', location->hosts, &p, buflen, + status = nfsd4_encode_components_esc(xdr, ':', location->hosts, '[', ']'); if (status) return status; - status = nfsd4_encode_components('/', location->path, &p, buflen); + status = nfsd4_encode_components(xdr, '/', location->path); if (status) return status; - *pp = p; return 0; } /* * Encode a path in RFC3530 'pathname4' format */ -static __be32 nfsd4_encode_path(const struct path *root, - const struct path *path, __be32 **pp, int *buflen) +static __be32 nfsd4_encode_path(struct xdr_stream *xdr, + const struct path *root, + const struct path *path) { struct path cur = *path; - __be32 *p = *pp; + __be32 *p; struct dentry **components = NULL; unsigned int ncomponents = 0; __be32 err = nfserr_jukebox; @@ -1872,9 +1873,9 @@ static __be32 nfsd4_encode_path(const struct path *root, components[ncomponents++] = cur.dentry; cur.dentry = dget_parent(cur.dentry); } - - *buflen -= 4; - if (*buflen < 0) + err = nfserr_resource; + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_free; WRITE32(ncomponents); @@ -1884,8 +1885,8 @@ static __be32 nfsd4_encode_path(const struct path *root, spin_lock(&dentry->d_lock); len = dentry->d_name.len; - *buflen -= 4 + (XDR_QUADLEN(len) << 2); - if (*buflen < 0) { + p = xdr_reserve_space(xdr, len + 4); + if (!p) { spin_unlock(&dentry->d_lock); goto out_free; } @@ -1897,7 +1898,6 @@ static __be32 nfsd4_encode_path(const struct path *root, ncomponents--; } - *pp = p; err = 0; out_free: dprintk(")\n"); @@ -1908,8 +1908,8 @@ out_free: return err; } -static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp, - const struct path *path, __be32 **pp, int *buflen) +static __be32 nfsd4_encode_fsloc_fsroot(struct xdr_stream *xdr, + struct svc_rqst *rqstp, const struct path *path) { struct svc_export *exp_ps; __be32 res; @@ -1917,7 +1917,7 @@ static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp, exp_ps = rqst_find_fsidzero_export(rqstp); if (IS_ERR(exp_ps)) return nfserrno(PTR_ERR(exp_ps)); - res = nfsd4_encode_path(&exp_ps->ex_path, path, pp, buflen); + res = nfsd4_encode_path(xdr, &exp_ps->ex_path, path); exp_put(exp_ps); return res; } @@ -1925,28 +1925,26 @@ static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp, /* * encode a fs_locations structure */ -static __be32 nfsd4_encode_fs_locations(struct svc_rqst *rqstp, - struct svc_export *exp, - __be32 **pp, int *buflen) +static __be32 nfsd4_encode_fs_locations(struct xdr_stream *xdr, + struct svc_rqst *rqstp, struct svc_export *exp) { __be32 status; int i; - __be32 *p = *pp; + __be32 *p; struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs; - status = nfsd4_encode_fsloc_fsroot(rqstp, &exp->ex_path, &p, buflen); + status = nfsd4_encode_fsloc_fsroot(xdr, rqstp, &exp->ex_path); if (status) return status; - if ((*buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) return nfserr_resource; WRITE32(fslocs->locations_count); for (i=0; ilocations_count; i++) { - status = nfsd4_encode_fs_location4(&fslocs->locations[i], - &p, buflen); + status = nfsd4_encode_fs_location4(xdr, &fslocs->locations[i]); if (status) return status; } - *pp = p; return 0; } @@ -1965,15 +1963,15 @@ static u32 nfs4_file_type(umode_t mode) } static inline __be32 -nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace, - __be32 **p, int *buflen) +nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp, + struct nfs4_ace *ace) { if (ace->whotype != NFS4_ACL_WHO_NAMED) - return nfs4_acl_write_who(ace->whotype, p, buflen); + return nfs4_acl_write_who(xdr, ace->whotype); else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) - return nfsd4_encode_group(rqstp, ace->who_gid, p, buflen); + return nfsd4_encode_group(xdr, rqstp, ace->who_gid); else - return nfsd4_encode_user(rqstp, ace->who_uid, p, buflen); + return nfsd4_encode_user(xdr, rqstp, ace->who_uid); } #define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ @@ -1982,31 +1980,28 @@ nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace, #ifdef CONFIG_NFSD_V4_SECURITY_LABEL static inline __be32 -nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen) +nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, + void *context, int len) { - __be32 *p = *pp; + __be32 *p; - if (*buflen < ((XDR_QUADLEN(len) << 2) + 4 + 4 + 4)) + p = xdr_reserve_space(xdr, len + 4 + 4 + 4); + if (!p) return nfserr_resource; /* * For now we use a 0 here to indicate the null translation; in * the future we may place a call to translation code here. */ - if ((*buflen -= 8) < 0) - return nfserr_resource; - WRITE32(0); /* lfs */ WRITE32(0); /* pi */ p = xdr_encode_opaque(p, context, len); - *buflen -= (XDR_QUADLEN(len) << 2) + 4; - - *pp = p; return 0; } #else static inline __be32 -nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen) +nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, + void *context, int len) { return 0; } #endif @@ -2058,8 +2053,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, struct kstat stat; struct svc_fh *tempfh = NULL; struct kstatfs statfs; - __be32 *p = xdr->p; - int buflen = xdr->buf->buflen; + __be32 *p; + __be32 *start = xdr->p; __be32 *attrlenp; u32 dummy; u64 dummy64; @@ -2144,24 +2139,30 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ if (bmval2) { - if ((buflen -= 16) < 0) + p = xdr_reserve_space(xdr, 16); + if (!p) goto out_resource; WRITE32(3); WRITE32(bmval0); WRITE32(bmval1); WRITE32(bmval2); } else if (bmval1) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; WRITE32(2); WRITE32(bmval0); WRITE32(bmval1); } else { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE32(1); WRITE32(bmval0); } + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out_resource; attrlenp = p++; /* to be backfilled later */ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { @@ -2174,13 +2175,15 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, if (!contextsupport) word2 &= ~FATTR4_WORD2_SECURITY_LABEL; if (!word2) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; WRITE32(2); WRITE32(word0); WRITE32(word1); } else { - if ((buflen -= 16) < 0) + p = xdr_reserve_space(xdr, 16); + if (!p) goto out_resource; WRITE32(3); WRITE32(word0); @@ -2189,7 +2192,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, } } if (bmval0 & FATTR4_WORD0_TYPE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; dummy = nfs4_file_type(stat.mode); if (dummy == NF4BAD) { @@ -2199,7 +2203,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, WRITE32(dummy); } if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) WRITE32(NFS4_FH_PERSISTENT); @@ -2207,32 +2212,38 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME); } if (bmval0 & FATTR4_WORD0_CHANGE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; write_change(&p, &stat, dentry->d_inode); } if (bmval0 & FATTR4_WORD0_SIZE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE64(stat.size); } if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(1); } if (bmval0 & FATTR4_WORD0_SYMLINK_SUPPORT) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(1); } if (bmval0 & FATTR4_WORD0_NAMED_ATTR) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(0); } if (bmval0 & FATTR4_WORD0_FSID) { - if ((buflen -= 16) < 0) + p = xdr_reserve_space(xdr, 16); + if (!p) goto out_resource; if (exp->ex_fslocs.migrated) { WRITE64(NFS4_REFERRAL_FSID_MAJOR); @@ -2254,17 +2265,20 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, } } if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(0); } if (bmval0 & FATTR4_WORD0_LEASE_TIME) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(nn->nfsd4_lease); } if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(rdattr_err); } @@ -2272,198 +2286,229 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, struct nfs4_ace *ace; if (acl == NULL) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(0); goto out_acl; } - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(acl->naces); for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) { - if ((buflen -= 4*3) < 0) + p = xdr_reserve_space(xdr, 4*3); + if (!p) goto out_resource; WRITE32(ace->type); WRITE32(ace->flag); WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL); - status = nfsd4_encode_aclname(rqstp, ace, &p, &buflen); + status = nfsd4_encode_aclname(xdr, rqstp, ace); if (status) goto out; } } out_acl: if (bmval0 & FATTR4_WORD0_ACLSUPPORT) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(aclsupport ? ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0); } if (bmval0 & FATTR4_WORD0_CANSETTIME) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(1); } if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(0); } if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(1); } if (bmval0 & FATTR4_WORD0_CHOWN_RESTRICTED) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(1); } if (bmval0 & FATTR4_WORD0_FILEHANDLE) { - buflen -= (XDR_QUADLEN(fhp->fh_handle.fh_size) << 2) + 4; - if (buflen < 0) + p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4); + if (!p) goto out_resource; WRITE32(fhp->fh_handle.fh_size); WRITEMEM(&fhp->fh_handle.fh_base, fhp->fh_handle.fh_size); } if (bmval0 & FATTR4_WORD0_FILEID) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE64(stat.ino); } if (bmval0 & FATTR4_WORD0_FILES_AVAIL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE64((u64) statfs.f_ffree); } if (bmval0 & FATTR4_WORD0_FILES_FREE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE64((u64) statfs.f_ffree); } if (bmval0 & FATTR4_WORD0_FILES_TOTAL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE64((u64) statfs.f_files); } if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) { - status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen); + status = nfsd4_encode_fs_locations(xdr, rqstp, exp); if (status) goto out; } if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(1); } if (bmval0 & FATTR4_WORD0_MAXFILESIZE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE64(exp->ex_path.mnt->mnt_sb->s_maxbytes); } if (bmval0 & FATTR4_WORD0_MAXLINK) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(255); } if (bmval0 & FATTR4_WORD0_MAXNAME) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(statfs.f_namelen); } if (bmval0 & FATTR4_WORD0_MAXREAD) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE64((u64) svc_max_payload(rqstp)); } if (bmval0 & FATTR4_WORD0_MAXWRITE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE64((u64) svc_max_payload(rqstp)); } if (bmval1 & FATTR4_WORD1_MODE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(stat.mode & S_IALLUGO); } if (bmval1 & FATTR4_WORD1_NO_TRUNC) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(1); } if (bmval1 & FATTR4_WORD1_NUMLINKS) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(stat.nlink); } if (bmval1 & FATTR4_WORD1_OWNER) { - status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen); + status = nfsd4_encode_user(xdr, rqstp, stat.uid); if (status) goto out; } if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { - status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen); + status = nfsd4_encode_group(xdr, rqstp, stat.gid); if (status) goto out; } if (bmval1 & FATTR4_WORD1_RAWDEV) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE32((u32) MAJOR(stat.rdev)); WRITE32((u32) MINOR(stat.rdev)); } if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)statfs.f_bavail * (u64)statfs.f_bsize; WRITE64(dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_FREE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)statfs.f_bfree * (u64)statfs.f_bsize; WRITE64(dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)statfs.f_blocks * (u64)statfs.f_bsize; WRITE64(dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_USED) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)stat.blocks << 9; WRITE64(dummy64); } if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; WRITE64((s64)stat.atime.tv_sec); WRITE32(stat.atime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; WRITE32(0); WRITE32(1); WRITE32(0); } if (bmval1 & FATTR4_WORD1_TIME_METADATA) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; WRITE64((s64)stat.ctime.tv_sec); WRITE32(stat.ctime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; WRITE64((s64)stat.mtime.tv_sec); WRITE32(stat.mtime.tv_nsec); } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; /* * Get parent's attributes if not ignoring crossmount @@ -2475,13 +2520,14 @@ out_acl: WRITE64(stat.ino); } if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { - status = nfsd4_encode_security_label(rqstp, context, - contextlen, &p, &buflen); + status = nfsd4_encode_security_label(xdr, rqstp, context, + contextlen); if (status) goto out; } if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { - if ((buflen -= 16) < 0) + p = xdr_reserve_space(xdr, 16); + if (!p) goto out_resource; WRITE32(3); WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); @@ -2489,8 +2535,7 @@ out_acl: WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD2); } - *attrlenp = htonl((char *)p - (char *)attrlenp - 4); - xdr->p = p; + *attrlenp = htonl((char *)xdr->p - (char *)attrlenp - 4); status = nfs_ok; out: @@ -2503,6 +2548,13 @@ out: fh_put(tempfh); kfree(tempfh); } + if (status) { + int nbytes = (char *)xdr->p - (char *)start; + /* open code what *should* be xdr_truncate(xdr, len); */ + xdr->iov->iov_len -= nbytes; + xdr->buf->len -= nbytes; + xdr->p = start; + } return status; out_nfserr: status = nfserrno(err); @@ -2768,13 +2820,10 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 { struct svc_fh *fhp = getattr->ga_fhp; struct xdr_stream *xdr = &resp->xdr; - struct xdr_buf *buf = resp->xdr.buf; if (nfserr) return nfserr; - buf->buflen = (void *)resp->xdr.end - (void *)resp->xdr.p - - COMPOUND_ERR_SLACK_SPACE; nfserr = nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry, getattr->ga_bmval, resp->rqstp, 0); @@ -2971,6 +3020,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, int v; struct page *page; unsigned long maxcount; + struct xdr_stream *xdr = &resp->xdr; long len; __be32 *p; @@ -3017,6 +3067,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, resp->xdr.buf->head[0].iov_len = (char *)p - (char *)resp->xdr.buf->head[0].iov_base; resp->xdr.buf->page_len = maxcount; + xdr->iov = xdr->buf->tail; /* Use rest of head for padding and remaining ops: */ resp->xdr.buf->tail[0].iov_base = p; @@ -3035,6 +3086,7 @@ static __be32 nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) { int maxcount; + struct xdr_stream *xdr = &resp->xdr; char *page; __be32 *p; @@ -3067,6 +3119,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd resp->xdr.buf->head[0].iov_len = (char *)p - (char *)resp->xdr.buf->head[0].iov_base; resp->xdr.buf->page_len = maxcount; + xdr->iov = xdr->buf->tail; /* Use rest of head for padding and remaining ops: */ resp->xdr.buf->tail[0].iov_base = p; @@ -3086,6 +3139,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 { int maxcount; loff_t offset; + struct xdr_stream *xdr = &resp->xdr; __be32 *page, *savep, *tailbase; __be32 *p; @@ -3148,6 +3202,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 resp->xdr.buf->page_len = ((char *)p) - (char*)page_address(*(resp->rqstp->rq_next_page-1)); + xdr->iov = xdr->buf->tail; + /* Use rest of head for padding and remaining ops: */ resp->xdr.buf->tail[0].iov_base = tailbase; resp->xdr.buf->tail[0].iov_len = 0; -- cgit v0.10.2 From d3f627c815b6eb5f6be388100617c36823d661c5 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 Feb 2014 17:00:38 -0500 Subject: nfsd4: use xdr_stream throughout compound encoding Note this makes ADJUST_ARGS useless; we'll remove it in the following patch. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 41c7c0a..109b5a8 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1284,7 +1284,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, svcxdr_init_encode(rqstp, resp); resp->tagp = resp->xdr.p; /* reserve space for: taglen, tag, and opcnt */ - resp->xdr.p += 2 + XDR_QUADLEN(args->taglen); + xdr_reserve_space(&resp->xdr, 8 + args->taglen); resp->taglen = args->taglen; resp->tag = args->tag; resp->opcnt = 0; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 27d1e94..5064cb5 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1747,10 +1747,10 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) } #define RESERVE_SPACE(nbytes) do { \ - p = resp->xdr.p; \ - BUG_ON(p + XDR_QUADLEN(nbytes) > resp->xdr.end); \ + p = xdr_reserve_space(&resp->xdr, nbytes); \ + BUG_ON(!p); \ } while (0) -#define ADJUST_ARGS() resp->xdr.p = p +#define ADJUST_ARGS() WARN_ON_ONCE(p != resp->xdr.p) \ /* Encode as an array of strings the string given with components * separated @sep, escaped with esc_enter and esc_exit. @@ -3056,8 +3056,11 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, &maxcount); - if (nfserr) + if (nfserr) { + xdr->p -= 2; + xdr->iov->iov_len -= 8; return nfserr; + } eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size); @@ -3110,9 +3113,12 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd */ nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, page, &maxcount); if (nfserr == nfserr_isdir) - return nfserr_inval; - if (nfserr) + nfserr = nfserr_inval; + if (nfserr) { + xdr->p--; + xdr->iov->iov_len -= 4; return nfserr; + } WRITE32(maxcount); ADJUST_ARGS(); @@ -3213,8 +3219,9 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 return 0; err_no_verf: - p = savep; - ADJUST_ARGS(); + xdr->p = savep; + xdr->iov->iov_len = ((char *)resp->xdr.p) + - (char *)resp->xdr.buf->head[0].iov_base; return nfserr; } -- cgit v0.10.2 From f46d382a749874e1b29cfb34d4ccf283eae4fffa Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 31 Jan 2014 11:19:41 -0500 Subject: nfsd4: remove ADJUST_ARGS It's just uninteresting debugging code at this point. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5064cb5..e9abf5f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1750,7 +1750,6 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) p = xdr_reserve_space(&resp->xdr, nbytes); \ BUG_ON(!p); \ } while (0) -#define ADJUST_ARGS() WARN_ON_ONCE(p != resp->xdr.p) \ /* Encode as an array of strings the string given with components * separated @sep, escaped with esc_enter and esc_exit. @@ -2744,7 +2743,6 @@ nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid) RESERVE_SPACE(sizeof(stateid_t)); WRITE32(sid->si_generation); WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); - ADJUST_ARGS(); } static __be32 @@ -2756,7 +2754,6 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ RESERVE_SPACE(8); WRITE32(access->ac_supported); WRITE32(access->ac_resp_access); - ADJUST_ARGS(); } return nfserr; } @@ -2771,7 +2768,6 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, WRITE32(bcts->dir); /* Sorry, we do not yet support RDMA over 4.1: */ WRITE32(0); - ADJUST_ARGS(); } return nfserr; } @@ -2794,7 +2790,6 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ if (!nfserr) { RESERVE_SPACE(NFS4_VERIFIER_SIZE); WRITEMEM(commit->co_verf.data, NFS4_VERIFIER_SIZE); - ADJUST_ARGS(); } return nfserr; } @@ -2810,7 +2805,6 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ WRITE32(2); WRITE32(create->cr_bmval[0]); WRITE32(create->cr_bmval[1]); - ADJUST_ARGS(); } return nfserr; } @@ -2842,7 +2836,6 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh RESERVE_SPACE(len + 4); WRITE32(len); WRITEMEM(&fhp->fh_handle.fh_base, len); - ADJUST_ARGS(); } return nfserr; } @@ -2870,7 +2863,6 @@ nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denie WRITE64((u64)0); /* clientid */ WRITE32(0); /* length of owner name */ } - ADJUST_ARGS(); } static __be32 @@ -2910,7 +2902,6 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li if (!nfserr) { RESERVE_SPACE(20); write_cinfo(&p, &link->li_cinfo); - ADJUST_ARGS(); } return nfserr; } @@ -2932,7 +2923,6 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op WRITE32(open->op_bmval[0]); WRITE32(open->op_bmval[1]); WRITE32(open->op_delegate_type); - ADJUST_ARGS(); switch (open->op_delegate_type) { case NFS4_OPEN_DELEGATE_NONE: @@ -2949,7 +2939,6 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op WRITE32(0); WRITE32(0); WRITE32(0); /* XXX: is NULL principal ok? */ - ADJUST_ARGS(); break; case NFS4_OPEN_DELEGATE_WRITE: nfsd4_encode_stateid(resp, &open->op_delegate_stateid); @@ -2970,7 +2959,6 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op WRITE32(0); WRITE32(0); WRITE32(0); /* XXX: is NULL principal ok? */ - ADJUST_ARGS(); break; case NFS4_OPEN_DELEGATE_NONE_EXT: /* 4.1 */ switch (open->op_why_no_deleg) { @@ -2984,7 +2972,6 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op RESERVE_SPACE(4); WRITE32(open->op_why_no_deleg); } - ADJUST_ARGS(); break; default: BUG(); @@ -3066,7 +3053,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(eof); WRITE32(maxcount); - ADJUST_ARGS(); resp->xdr.buf->head[0].iov_len = (char *)p - (char *)resp->xdr.buf->head[0].iov_base; resp->xdr.buf->page_len = maxcount; @@ -3080,7 +3066,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(0); resp->xdr.buf->tail[0].iov_base += maxcount&3; resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); - ADJUST_ARGS(); } return 0; } @@ -3121,7 +3106,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd } WRITE32(maxcount); - ADJUST_ARGS(); resp->xdr.buf->head[0].iov_len = (char *)p - (char *)resp->xdr.buf->head[0].iov_base; resp->xdr.buf->page_len = maxcount; @@ -3135,7 +3119,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd WRITE32(0); resp->xdr.buf->tail[0].iov_base += maxcount&3; resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); - ADJUST_ARGS(); } return 0; } @@ -3162,7 +3145,6 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ WRITE32(0); WRITE32(0); - ADJUST_ARGS(); resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p) - (char *)resp->xdr.buf->head[0].iov_base; tailbase = p; @@ -3233,7 +3215,6 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ if (!nfserr) { RESERVE_SPACE(20); write_cinfo(&p, &remove->rm_cinfo); - ADJUST_ARGS(); } return nfserr; } @@ -3247,7 +3228,6 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ RESERVE_SPACE(40); write_cinfo(&p, &rename->rn_sinfo); write_cinfo(&p, &rename->rn_tinfo); - ADJUST_ARGS(); } return nfserr; } @@ -3287,7 +3267,6 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, supported = 0; RESERVE_SPACE(4); flavorsp = p++; /* to be backfilled later */ - ADJUST_ARGS(); for (i = 0; i < nflavs; i++) { rpc_authflavor_t pf = flavs[i].pseudoflavor; @@ -3301,12 +3280,10 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, WRITEMEM(info.oid.data, info.oid.len); WRITE32(info.qop); WRITE32(info.service); - ADJUST_ARGS(); } else if (pf < RPC_AUTH_MAXFLAVOR) { supported++; RESERVE_SPACE(4); WRITE32(pf); - ADJUST_ARGS(); } else { if (report) pr_warn("NFS: SECINFO: security flavor %u " @@ -3360,7 +3337,6 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 WRITE32(setattr->sa_bmval[1]); WRITE32(setattr->sa_bmval[2]); } - ADJUST_ARGS(); return nfserr; } @@ -3373,13 +3349,11 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n RESERVE_SPACE(8 + NFS4_VERIFIER_SIZE); WRITEMEM(&scd->se_clientid, 8); WRITEMEM(&scd->se_confirm, NFS4_VERIFIER_SIZE); - ADJUST_ARGS(); } else if (nfserr == nfserr_clid_inuse) { RESERVE_SPACE(8); WRITE32(0); WRITE32(0); - ADJUST_ARGS(); } return nfserr; } @@ -3394,7 +3368,6 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w WRITE32(write->wr_bytes_written); WRITE32(write->wr_how_written); WRITEMEM(write->wr_verifier.data, NFS4_VERIFIER_SIZE); - ADJUST_ARGS(); } return nfserr; } @@ -3437,7 +3410,6 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(exid->flags); WRITE32(exid->spa_how); - ADJUST_ARGS(); switch (exid->spa_how) { case SP4_NONE: @@ -3453,7 +3425,6 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, /* empty spo_must_allow bitmap: */ WRITE32(0); - ADJUST_ARGS(); break; default: WARN_ON_ONCE(1); @@ -3479,7 +3450,6 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, /* Implementation id */ WRITE32(0); /* zero length nfs_impl_id4 array */ - ADJUST_ARGS(); return 0; } @@ -3496,7 +3466,6 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, WRITEMEM(sess->sessionid.data, NFS4_MAX_SESSIONID_LEN); WRITE32(sess->seqid); WRITE32(sess->flags); - ADJUST_ARGS(); RESERVE_SPACE(28); WRITE32(0); /* headerpadsz */ @@ -3506,12 +3475,10 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(sess->fore_channel.maxops); WRITE32(sess->fore_channel.maxreqs); WRITE32(sess->fore_channel.nr_rdma_attrs); - ADJUST_ARGS(); if (sess->fore_channel.nr_rdma_attrs) { RESERVE_SPACE(4); WRITE32(sess->fore_channel.rdma_attrs); - ADJUST_ARGS(); } RESERVE_SPACE(28); @@ -3522,12 +3489,10 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(sess->back_channel.maxops); WRITE32(sess->back_channel.maxreqs); WRITE32(sess->back_channel.nr_rdma_attrs); - ADJUST_ARGS(); if (sess->back_channel.nr_rdma_attrs) { RESERVE_SPACE(4); WRITE32(sess->back_channel.rdma_attrs); - ADJUST_ARGS(); } return 0; } @@ -3550,7 +3515,6 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(seq->maxslots - 1); /* sr_target_highest_slotid */ WRITE32(seq->status_flags); - ADJUST_ARGS(); resp->cstate.datap = p; /* DRC cache data pointer */ return 0; } @@ -3572,7 +3536,6 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, *p++ = stateid->ts_id_status; } - ADJUST_ARGS(); return nfserr; } @@ -3707,7 +3670,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) RESERVE_SPACE(8); WRITE32(op->opnum); statp = p++; /* to be backfilled at the end */ - ADJUST_ARGS(); if (op->opnum == OP_ILLEGAL) goto status; @@ -3768,11 +3730,9 @@ nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op) RESERVE_SPACE(8); WRITE32(op->opnum); *p++ = rp->rp_status; /* already xdr'ed */ - ADJUST_ARGS(); RESERVE_SPACE(rp->rp_buflen); WRITEMEM(rp->rp_buf, rp->rp_buflen); - ADJUST_ARGS(); } int -- cgit v0.10.2 From dd97fddedc251eb423408d89f2947eff9c4ea3c1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 Feb 2014 17:16:27 -0500 Subject: nfsd4: no need for encode_compoundres to adjust lengths xdr_reserve_space should now be calculating the length correctly as we go, so there's no longer any need to fix it up here. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 14bfb55..b06f9a0 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1618,6 +1618,7 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq) { struct nfsd4_slot *slot = resp->cstate.slot; + struct kvec *head = resp->xdr.iov; __be32 status; dprintk("--> %s slot %p\n", __func__, slot); @@ -1631,6 +1632,8 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, resp->opcnt = slot->sl_opcnt; resp->xdr.p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen); + head->iov_len = (void *)resp->xdr.p - head->iov_base; + resp->xdr.buf->len = head->iov_len; status = slot->sl_status; return status; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e9abf5f..79b8e1e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3789,19 +3789,13 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo * All that remains is to write the tag and operation count... */ struct nfsd4_compound_state *cs = &resp->cstate; - struct kvec *iov; + p = resp->tagp; *p++ = htonl(resp->taglen); memcpy(p, resp->tag, resp->taglen); p += XDR_QUADLEN(resp->taglen); *p++ = htonl(resp->opcnt); - if (rqstp->rq_res.page_len) - iov = &rqstp->rq_res.tail[0]; - else - iov = &rqstp->rq_res.head[0]; - iov->iov_len = ((char *)resp->xdr.p) - (char *)iov->iov_base; - BUG_ON(iov->iov_len > PAGE_SIZE); if (nfsd4_has_session(cs)) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct nfs4_client *clp = cs->session->se_client; -- cgit v0.10.2 From 6ac90391c6e36c536cfcedbe4801a77e304205b1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 Feb 2014 17:39:35 -0500 Subject: nfsd4: keep xdr buf length updated Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 109b5a8..ef3952a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1262,6 +1262,8 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, xdr->iov = head; xdr->p = head->iov_base + head->iov_len; xdr->end = head->iov_base + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE; + /* Tail and page_len should be zero at this point: */ + buf->len = buf->head[0].iov_len; } /* diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 79b8e1e..57f6081 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3046,6 +3046,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr) { xdr->p -= 2; xdr->iov->iov_len -= 8; + xdr->buf->len -= 8; return nfserr; } eof = (read->rd_offset + maxcount >= @@ -3053,9 +3054,10 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(eof); WRITE32(maxcount); - resp->xdr.buf->head[0].iov_len = (char *)p - - (char *)resp->xdr.buf->head[0].iov_base; + WARN_ON_ONCE(resp->xdr.buf->head[0].iov_len != (char *)p + - (char *)resp->xdr.buf->head[0].iov_base); resp->xdr.buf->page_len = maxcount; + xdr->buf->len += maxcount; xdr->iov = xdr->buf->tail; /* Use rest of head for padding and remaining ops: */ @@ -3066,6 +3068,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(0); resp->xdr.buf->tail[0].iov_base += maxcount&3; resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); + xdr->buf->len -= (maxcount&3); } return 0; } @@ -3102,6 +3105,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd if (nfserr) { xdr->p--; xdr->iov->iov_len -= 4; + xdr->buf->len -= 4; return nfserr; } @@ -3109,6 +3113,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd resp->xdr.buf->head[0].iov_len = (char *)p - (char *)resp->xdr.buf->head[0].iov_base; resp->xdr.buf->page_len = maxcount; + xdr->buf->len += maxcount; xdr->iov = xdr->buf->tail; /* Use rest of head for padding and remaining ops: */ @@ -3189,6 +3194,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 *p++ = htonl(readdir->common.err == nfserr_eof); resp->xdr.buf->page_len = ((char *)p) - (char*)page_address(*(resp->rqstp->rq_next_page-1)); + xdr->buf->len += xdr->buf->page_len; xdr->iov = xdr->buf->tail; @@ -3204,6 +3210,7 @@ err_no_verf: xdr->p = savep; xdr->iov->iov_len = ((char *)resp->xdr.p) - (char *)resp->xdr.buf->head[0].iov_base; + xdr->buf->len = xdr->iov->iov_len; return nfserr; } @@ -3789,6 +3796,10 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo * All that remains is to write the tag and operation count... */ struct nfsd4_compound_state *cs = &resp->cstate; + struct xdr_buf *buf = resp->xdr.buf; + + WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + + buf->tail[0].iov_len); p = resp->tagp; *p++ = htonl(resp->taglen); -- cgit v0.10.2 From 3e19ce762b537dd9aeefdd0849ba5f2f01ff83cf Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 25 Feb 2014 17:44:21 -0500 Subject: rpc: xdr_truncate_encode This will be used in the server side in a few cases: - when certain operations (read, readdir, readlink) fail after encoding a partial response. - when we run out of space after encoding a partial response. - in readlink, where we initially reserve PAGE_SIZE bytes for data, then truncate to the actual size. Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 15f9204..e7bb2e3 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -215,6 +215,7 @@ typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); +extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index dd97ba3..352f3b3 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -509,6 +509,72 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) EXPORT_SYMBOL_GPL(xdr_reserve_space); /** + * xdr_truncate_encode - truncate an encode buffer + * @xdr: pointer to xdr_stream + * @len: new length of buffer + * + * Truncates the xdr stream, so that xdr->buf->len == len, + * and xdr->p points at offset len from the start of the buffer, and + * head, tail, and page lengths are adjusted to correspond. + * + * If this means moving xdr->p to a different buffer, we assume that + * that the end pointer should be set to the end of the current page, + * except in the case of the head buffer when we assume the head + * buffer's current length represents the end of the available buffer. + * + * This is *not* safe to use on a buffer that already has inlined page + * cache pages (as in a zero-copy server read reply), except for the + * simple case of truncating from one position in the tail to another. + * + */ +void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) +{ + struct xdr_buf *buf = xdr->buf; + struct kvec *head = buf->head; + struct kvec *tail = buf->tail; + int fraglen; + int new, old; + + if (len > buf->len) { + WARN_ON_ONCE(1); + return; + } + + fraglen = min_t(int, buf->len - len, tail->iov_len); + tail->iov_len -= fraglen; + buf->len -= fraglen; + if (tail->iov_len && buf->len == len) { + xdr->p = tail->iov_base + tail->iov_len; + /* xdr->end, xdr->iov should be set already */ + return; + } + WARN_ON_ONCE(fraglen); + fraglen = min_t(int, buf->len - len, buf->page_len); + buf->page_len -= fraglen; + buf->len -= fraglen; + + new = buf->page_base + buf->page_len; + old = new + fraglen; + xdr->page_ptr -= (old >> PAGE_SHIFT) - (new >> PAGE_SHIFT); + + if (buf->page_len && buf->len == len) { + xdr->p = page_address(*xdr->page_ptr); + xdr->end = (void *)xdr->p + PAGE_SIZE; + xdr->p = (void *)xdr->p + (new % PAGE_SIZE); + /* xdr->iov should already be NULL */ + return; + } + if (fraglen) + xdr->end = head->iov_base + head->iov_len; + /* (otherwise assume xdr->end is already set) */ + head->iov_len = len; + buf->len = len; + xdr->p = head->iov_base + head->iov_len; + xdr->iov = buf->head; +} +EXPORT_SYMBOL(xdr_truncate_encode); + +/** * xdr_write_pages - Insert a list of pages into an XDR buffer for sending * @xdr: pointer to xdr_stream * @pages: list of pages -- cgit v0.10.2 From 1fcea5b20b74cb856f5cd27161fea5329079dbd7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 Feb 2014 20:17:02 -0500 Subject: nfsd4: use xdr_truncate_encode Now that lengths are reliable, we can use xdr_truncate instead of open-coding it everywhere. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 57f6081..c2815f48 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2053,7 +2053,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, struct svc_fh *tempfh = NULL; struct kstatfs statfs; __be32 *p; - __be32 *start = xdr->p; + int starting_len = xdr->buf->len; __be32 *attrlenp; u32 dummy; u64 dummy64; @@ -2547,13 +2547,8 @@ out: fh_put(tempfh); kfree(tempfh); } - if (status) { - int nbytes = (char *)xdr->p - (char *)start; - /* open code what *should* be xdr_truncate(xdr, len); */ - xdr->iov->iov_len -= nbytes; - xdr->buf->len -= nbytes; - xdr->p = start; - } + if (status) + xdr_truncate_encode(xdr, starting_len); return status; out_nfserr: status = nfserrno(err); @@ -3008,6 +3003,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, struct page *page; unsigned long maxcount; struct xdr_stream *xdr = &resp->xdr; + int starting_len = xdr->buf->len; long len; __be32 *p; @@ -3044,9 +3040,13 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, &maxcount); if (nfserr) { - xdr->p -= 2; - xdr->iov->iov_len -= 8; - xdr->buf->len -= 8; + /* + * nfsd_splice_actor may have already messed with the + * page length; reset it so as not to confuse + * xdr_truncate_encode: + */ + xdr->buf->page_len = 0; + xdr_truncate_encode(xdr, starting_len); return nfserr; } eof = (read->rd_offset + maxcount >= @@ -3079,6 +3079,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd int maxcount; struct xdr_stream *xdr = &resp->xdr; char *page; + int length_offset = xdr->buf->len; __be32 *p; if (nfserr) @@ -3103,9 +3104,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd if (nfserr == nfserr_isdir) nfserr = nfserr_inval; if (nfserr) { - xdr->p--; - xdr->iov->iov_len -= 4; - xdr->buf->len -= 4; + xdr_truncate_encode(xdr, length_offset); return nfserr; } @@ -3134,7 +3133,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 int maxcount; loff_t offset; struct xdr_stream *xdr = &resp->xdr; - __be32 *page, *savep, *tailbase; + int starting_len = xdr->buf->len; + __be32 *page, *tailbase; __be32 *p; if (nfserr) @@ -3145,7 +3145,6 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 return nfserr_resource; RESERVE_SPACE(NFS4_VERIFIER_SIZE); - savep = p; /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ WRITE32(0); @@ -3207,10 +3206,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 return 0; err_no_verf: - xdr->p = savep; - xdr->iov->iov_len = ((char *)resp->xdr.p) - - (char *)resp->xdr.buf->head[0].iov_base; - xdr->buf->len = xdr->iov->iov_len; + xdr_truncate_encode(xdr, starting_len); return nfserr; } -- cgit v0.10.2 From 082d4bd72a4527c6568f53f4a5de74e804666fa7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 29 Aug 2013 15:42:52 -0400 Subject: nfsd4: "backfill" using write_bytes_to_xdr_buf Normally xdr encoding proceeds in a single pass from start of a buffer to end, but sometimes we have to write a few bytes to an earlier position. Use write_bytes_to_xdr_buf for these cases rather than saving a pointer to write to. We plan to rewrite xdr_reserve_space to handle encoding across page boundaries using a scratch buffer, and don't want to risk writing to a pointer that was contained in a scratch buffer. Also it will no longer be safe to calculate lengths by subtracting two pointers, so use xdr_buf offsets instead. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c2815f48..37a73c8 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1759,16 +1759,19 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, char esc_exit) { __be32 *p; - __be32 *countp; + __be32 pathlen; + int pathlen_offset; int strlen, count=0; char *str, *end, *next; dprintk("nfsd4_encode_components(%s)\n", components); + + pathlen_offset = xdr->buf->len; p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; - countp = p; - WRITE32(0); /* We will fill this in with @count later */ + p++; /* We will fill this in with @count later */ + end = str = components; while (*end) { bool found_esc = false; @@ -1801,8 +1804,8 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, end++; str = end; } - p = countp; - WRITE32(count); + pathlen = htonl(xdr->buf->len - pathlen_offset); + write_bytes_to_xdr_buf(xdr->buf, pathlen_offset, &pathlen, 4); return 0; } @@ -2054,7 +2057,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, struct kstatfs statfs; __be32 *p; int starting_len = xdr->buf->len; - __be32 *attrlenp; + int attrlen_offset; + __be32 attrlen; u32 dummy; u64 dummy64; u32 rdattr_err = 0; @@ -2159,10 +2163,12 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, WRITE32(1); WRITE32(bmval0); } + + attrlen_offset = xdr->buf->len; p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - attrlenp = p++; /* to be backfilled later */ + p++; /* to be backfilled later */ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { u32 word0 = nfsd_suppattrs0(minorversion); @@ -2534,7 +2540,8 @@ out_acl: WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD2); } - *attrlenp = htonl((char *)xdr->p - (char *)attrlenp - 4); + attrlen = htonl(xdr->buf->len - attrlen_offset - 4); + write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4); status = nfs_ok; out: @@ -3664,15 +3671,16 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) void nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { + struct xdr_stream *xdr = &resp->xdr; struct nfs4_stateowner *so = resp->cstate.replay_owner; struct svc_rqst *rqstp = resp->rqstp; - __be32 *statp; + int post_err_offset; nfsd4_enc encoder; __be32 *p; RESERVE_SPACE(8); WRITE32(op->opnum); - statp = p++; /* to be backfilled at the end */ + post_err_offset = xdr->buf->len; if (op->opnum == OP_ILLEGAL) goto status; @@ -3698,20 +3706,19 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) * bug if we had to do this on a non-idempotent op: */ warn_on_nonidempotent_op(op); - resp->xdr.p = statp + 1; + xdr_truncate_encode(xdr, post_err_offset); } if (so) { + int len = xdr->buf->len - post_err_offset; + so->so_replay.rp_status = op->status; - so->so_replay.rp_buflen = (char *)resp->xdr.p - - (char *)(statp+1); - memcpy(so->so_replay.rp_buf, statp+1, so->so_replay.rp_buflen); + so->so_replay.rp_buflen = len; + read_bytes_from_xdr_buf(xdr->buf, post_err_offset, + so->so_replay.rp_buf, len); } status: - /* - * Note: We write the status directly, instead of using WRITE32(), - * since it is already in network byte order. - */ - *statp = op->status; + /* Note that op->status is already in network byte order: */ + write_bytes_to_xdr_buf(xdr->buf, post_err_offset - 4, &op->status, 4); } /* -- cgit v0.10.2 From d0a381dd0eda1cc769a5762d0eed4d0d662219f2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 30 Jan 2014 17:18:38 -0500 Subject: nfsd4: teach encoders to handle reserve_space failures We've tried to prevent running out of space with COMPOUND_SLACK_SPACE and special checking in those operations (getattr) whose result can vary enormously. However: - COMPOUND_SLACK_SPACE may be difficult to maintain as we add more protocol. - BUG_ON or page faulting on failure seems overly fragile. - Especially in the 4.1 case, we prefer not to fail compounds just because the returned result came *close* to session limits. (Though perfect enforcement here may be difficult.) - I'd prefer encoding to be uniform for all encoders instead of having special exceptions for encoders containing, for example, attributes. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ef3952a..6a5b701 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1400,7 +1400,7 @@ encode_op: } if (op->status == nfserr_replay_me) { op->replay = &cstate->replay_owner->so_replay; - nfsd4_encode_replay(resp, op); + nfsd4_encode_replay(&resp->xdr, op); status = op->status = op->replay->rp_status; } else { nfsd4_encode_operation(resp, op); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 37a73c8..799a904 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1746,11 +1746,6 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) } } -#define RESERVE_SPACE(nbytes) do { \ - p = xdr_reserve_space(&resp->xdr, nbytes); \ - BUG_ON(!p); \ -} while (0) - /* Encode as an array of strings the string given with components * separated @sep, escaped with esc_enter and esc_exit. */ @@ -2737,23 +2732,29 @@ fail: return -EINVAL; } -static void -nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid) +static __be32 +nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) { __be32 *p; - RESERVE_SPACE(sizeof(stateid_t)); + p = xdr_reserve_space(xdr, sizeof(stateid_t)); + if (!p) + return nfserr_resource; WRITE32(sid->si_generation); WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); + return 0; } static __be32 nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(8); + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; WRITE32(access->ac_supported); WRITE32(access->ac_resp_access); } @@ -2762,10 +2763,13 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 8); + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8); + if (!p) + return nfserr_resource; WRITEMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); WRITE32(bcts->dir); /* Sorry, we do not yet support RDMA over 4.1: */ @@ -2777,8 +2781,10 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, static __be32 nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) { + struct xdr_stream *xdr = &resp->xdr; + if (!nfserr) - nfsd4_encode_stateid(resp, &close->cl_stateid); + nfserr = nfsd4_encode_stateid(xdr, &close->cl_stateid); return nfserr; } @@ -2787,10 +2793,13 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c static __be32 nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(NFS4_VERIFIER_SIZE); + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; WRITEMEM(commit->co_verf.data, NFS4_VERIFIER_SIZE); } return nfserr; @@ -2799,10 +2808,13 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(32); + p = xdr_reserve_space(xdr, 32); + if (!p) + return nfserr_resource; write_cinfo(&p, &create->cr_cinfo); WRITE32(2); WRITE32(create->cr_bmval[0]); @@ -2829,13 +2841,16 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 static __be32 nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp) { + struct xdr_stream *xdr = &resp->xdr; struct svc_fh *fhp = *fhpp; unsigned int len; __be32 *p; if (!nfserr) { len = fhp->fh_handle.fh_size; - RESERVE_SPACE(len + 4); + p = xdr_reserve_space(xdr, len + 4); + if (!p) + return nfserr_resource; WRITE32(len); WRITEMEM(&fhp->fh_handle.fh_base, len); } @@ -2846,13 +2861,15 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh * Including all fields other than the name, a LOCK4denied structure requires * 8(clientid) + 4(namelen) + 8(offset) + 8(length) + 4(type) = 32 bytes. */ -static void -nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld) +static __be32 +nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld) { struct xdr_netobj *conf = &ld->ld_owner; __be32 *p; - RESERVE_SPACE(32 + XDR_LEN(conf->len)); + p = xdr_reserve_space(xdr, 32 + XDR_LEN(conf->len)); + if (!p) + return nfserr_resource; WRITE64(ld->ld_start); WRITE64(ld->ld_length); WRITE32(ld->ld_type); @@ -2865,15 +2882,18 @@ nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denie WRITE64((u64)0); /* clientid */ WRITE32(0); /* length of owner name */ } + return nfserr_denied; } static __be32 nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) { + struct xdr_stream *xdr = &resp->xdr; + if (!nfserr) - nfsd4_encode_stateid(resp, &lock->lk_resp_stateid); + nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid); else if (nfserr == nfserr_denied) - nfsd4_encode_lock_denied(resp, &lock->lk_denied); + nfserr = nfsd4_encode_lock_denied(xdr, &lock->lk_denied); return nfserr; } @@ -2881,16 +2901,20 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo static __be32 nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt) { + struct xdr_stream *xdr = &resp->xdr; + if (nfserr == nfserr_denied) - nfsd4_encode_lock_denied(resp, &lockt->lt_denied); + nfsd4_encode_lock_denied(xdr, &lockt->lt_denied); return nfserr; } static __be32 nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) { + struct xdr_stream *xdr = &resp->xdr; + if (!nfserr) - nfsd4_encode_stateid(resp, &locku->lu_stateid); + nfserr = nfsd4_encode_stateid(xdr, &locku->lu_stateid); return nfserr; } @@ -2899,10 +2923,13 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l static __be32 nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(20); + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; write_cinfo(&p, &link->li_cinfo); } return nfserr; @@ -2912,13 +2939,18 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li static __be32 nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (nfserr) goto out; - nfsd4_encode_stateid(resp, &open->op_stateid); - RESERVE_SPACE(40); + nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); + if (nfserr) + goto out; + p = xdr_reserve_space(xdr, 40); + if (!p) + return nfserr_resource; write_cinfo(&p, &open->op_cinfo); WRITE32(open->op_rflags); WRITE32(2); @@ -2930,8 +2962,12 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op case NFS4_OPEN_DELEGATE_NONE: break; case NFS4_OPEN_DELEGATE_READ: - nfsd4_encode_stateid(resp, &open->op_delegate_stateid); - RESERVE_SPACE(20); + nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid); + if (nfserr) + return nfserr; + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; WRITE32(open->op_recall); /* @@ -2943,8 +2979,12 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op WRITE32(0); /* XXX: is NULL principal ok? */ break; case NFS4_OPEN_DELEGATE_WRITE: - nfsd4_encode_stateid(resp, &open->op_delegate_stateid); - RESERVE_SPACE(32); + nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid); + if (nfserr) + return nfserr; + p = xdr_reserve_space(xdr, 32); + if (!p) + return nfserr_resource; WRITE32(0); /* @@ -2966,12 +3006,16 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op switch (open->op_why_no_deleg) { case WND4_CONTENTION: case WND4_RESOURCE: - RESERVE_SPACE(8); + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; WRITE32(open->op_why_no_deleg); WRITE32(0); /* deleg signaling not supported yet */ break; default: - RESERVE_SPACE(4); + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; WRITE32(open->op_why_no_deleg); } break; @@ -2986,8 +3030,10 @@ out: static __be32 nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) { + struct xdr_stream *xdr = &resp->xdr; + if (!nfserr) - nfsd4_encode_stateid(resp, &oc->oc_resp_stateid); + nfserr = nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); return nfserr; } @@ -2995,8 +3041,10 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct static __be32 nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) { + struct xdr_stream *xdr = &resp->xdr; + if (!nfserr) - nfsd4_encode_stateid(resp, &od->od_stateid); + nfserr = nfsd4_encode_stateid(xdr, &od->od_stateid); return nfserr; } @@ -3019,7 +3067,9 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (resp->xdr.buf->page_len) return nfserr_resource; - RESERVE_SPACE(8); /* eof flag and byte count */ + p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ + if (!p) + return nfserr_resource; maxcount = svc_max_payload(resp->rqstp); if (maxcount > read->rd_length) @@ -3071,7 +3121,9 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, resp->xdr.buf->tail[0].iov_base = p; resp->xdr.buf->tail[0].iov_len = 0; if (maxcount&3) { - RESERVE_SPACE(4); + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; WRITE32(0); resp->xdr.buf->tail[0].iov_base += maxcount&3; resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); @@ -3099,7 +3151,10 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd page = page_address(*(resp->rqstp->rq_next_page++)); maxcount = PAGE_SIZE; - RESERVE_SPACE(4); + + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; /* * XXX: By default, the ->readlink() VFS op will truncate symlinks @@ -3126,7 +3181,9 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd resp->xdr.buf->tail[0].iov_base = p; resp->xdr.buf->tail[0].iov_len = 0; if (maxcount&3) { - RESERVE_SPACE(4); + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; WRITE32(0); resp->xdr.buf->tail[0].iov_base += maxcount&3; resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); @@ -3151,7 +3208,9 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 if (!*resp->rqstp->rq_next_page) return nfserr_resource; - RESERVE_SPACE(NFS4_VERIFIER_SIZE); + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ WRITE32(0); @@ -3220,10 +3279,13 @@ err_no_verf: static __be32 nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(20); + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; write_cinfo(&p, &remove->rm_cinfo); } return nfserr; @@ -3232,10 +3294,13 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(40); + p = xdr_reserve_space(xdr, 40); + if (!p) + return nfserr_resource; write_cinfo(&p, &rename->rn_sinfo); write_cinfo(&p, &rename->rn_tinfo); } @@ -3243,7 +3308,7 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ } static __be32 -nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, +nfsd4_do_encode_secinfo(struct xdr_stream *xdr, __be32 nfserr, struct svc_export *exp) { u32 i, nflavs, supported; @@ -3254,6 +3319,7 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, if (nfserr) goto out; + nfserr = nfserr_resource; if (exp->ex_nflavors) { flavs = exp->ex_flavors; nflavs = exp->ex_nflavors; @@ -3275,7 +3341,9 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, } supported = 0; - RESERVE_SPACE(4); + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out; flavorsp = p++; /* to be backfilled later */ for (i = 0; i < nflavs; i++) { @@ -3284,7 +3352,10 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, if (rpcauth_get_gssinfo(pf, &info) == 0) { supported++; - RESERVE_SPACE(4 + 4 + XDR_LEN(info.oid.len) + 4 + 4); + p = xdr_reserve_space(xdr, 4 + 4 + + XDR_LEN(info.oid.len) + 4 + 4); + if (!p) + goto out; WRITE32(RPC_AUTH_GSS); WRITE32(info.oid.len); WRITEMEM(info.oid.data, info.oid.len); @@ -3292,7 +3363,9 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, WRITE32(info.service); } else if (pf < RPC_AUTH_MAXFLAVOR) { supported++; - RESERVE_SPACE(4); + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out; WRITE32(pf); } else { if (report) @@ -3304,7 +3377,7 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, if (nflavs != supported) report = false; *flavorsp = htonl(supported); - + nfserr = 0; out: if (exp) exp_put(exp); @@ -3315,14 +3388,18 @@ static __be32 nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_secinfo *secinfo) { - return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->si_exp); + struct xdr_stream *xdr = &resp->xdr; + + return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->si_exp); } static __be32 nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_secinfo_no_name *secinfo) { - return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->sin_exp); + struct xdr_stream *xdr = &resp->xdr; + + return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->sin_exp); } /* @@ -3332,9 +3409,12 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; - RESERVE_SPACE(16); + p = xdr_reserve_space(xdr, 16); + if (!p) + return nfserr_resource; if (nfserr) { WRITE32(3); WRITE32(0); @@ -3353,15 +3433,20 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 static __be32 nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(8 + NFS4_VERIFIER_SIZE); + p = xdr_reserve_space(xdr, 8 + NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; WRITEMEM(&scd->se_clientid, 8); WRITEMEM(&scd->se_confirm, NFS4_VERIFIER_SIZE); } else if (nfserr == nfserr_clid_inuse) { - RESERVE_SPACE(8); + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; WRITE32(0); WRITE32(0); } @@ -3371,10 +3456,13 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n static __be32 nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(16); + p = xdr_reserve_space(xdr, 16); + if (!p) + return nfserr_resource; WRITE32(write->wr_bytes_written); WRITE32(write->wr_how_written); WRITEMEM(write->wr_verifier.data, NFS4_VERIFIER_SIZE); @@ -3394,6 +3482,7 @@ static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_exchange_id *exid) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; char *major_id; char *server_scope; @@ -3409,11 +3498,13 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, server_scope = utsname()->nodename; server_scope_sz = strlen(server_scope); - RESERVE_SPACE( + p = xdr_reserve_space(xdr, 8 /* eir_clientid */ + 4 /* eir_sequenceid */ + 4 /* eir_flags */ + 4 /* spr_how */); + if (!p) + return nfserr_resource; WRITEMEM(&exid->clientid, 8); WRITE32(exid->seqid); @@ -3426,7 +3517,9 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, break; case SP4_MACH_CRED: /* spo_must_enforce, spo_must_allow */ - RESERVE_SPACE(16); + p = xdr_reserve_space(xdr, 16); + if (!p) + return nfserr_resource; /* spo_must_enforce bitmap: */ WRITE32(2); @@ -3440,13 +3533,15 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, WARN_ON_ONCE(1); } - RESERVE_SPACE( + p = xdr_reserve_space(xdr, 8 /* so_minor_id */ + 4 /* so_major_id.len */ + (XDR_QUADLEN(major_id_sz) * 4) + 4 /* eir_server_scope.len */ + (XDR_QUADLEN(server_scope_sz) * 4) + 4 /* eir_server_impl_id.count (0) */); + if (!p) + return nfserr_resource; /* The server_owner struct */ WRITE64(minor_id); /* Minor id */ @@ -3467,17 +3562,22 @@ static __be32 nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create_session *sess) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (nfserr) return nfserr; - RESERVE_SPACE(24); + p = xdr_reserve_space(xdr, 24); + if (!p) + return nfserr_resource; WRITEMEM(sess->sessionid.data, NFS4_MAX_SESSIONID_LEN); WRITE32(sess->seqid); WRITE32(sess->flags); - RESERVE_SPACE(28); + p = xdr_reserve_space(xdr, 28); + if (!p) + return nfserr_resource; WRITE32(0); /* headerpadsz */ WRITE32(sess->fore_channel.maxreq_sz); WRITE32(sess->fore_channel.maxresp_sz); @@ -3487,11 +3587,15 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(sess->fore_channel.nr_rdma_attrs); if (sess->fore_channel.nr_rdma_attrs) { - RESERVE_SPACE(4); + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; WRITE32(sess->fore_channel.rdma_attrs); } - RESERVE_SPACE(28); + p = xdr_reserve_space(xdr, 28); + if (!p) + return nfserr_resource; WRITE32(0); /* headerpadsz */ WRITE32(sess->back_channel.maxreq_sz); WRITE32(sess->back_channel.maxresp_sz); @@ -3501,7 +3605,9 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(sess->back_channel.nr_rdma_attrs); if (sess->back_channel.nr_rdma_attrs) { - RESERVE_SPACE(4); + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; WRITE32(sess->back_channel.rdma_attrs); } return 0; @@ -3511,12 +3617,15 @@ static __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_sequence *seq) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (nfserr) return nfserr; - RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 20); + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20); + if (!p) + return nfserr_resource; WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); WRITE32(seq->seqid); WRITE32(seq->slotid); @@ -3533,13 +3642,16 @@ static __be32 nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_test_stateid *test_stateid) { + struct xdr_stream *xdr = &resp->xdr; struct nfsd4_test_stateid_id *stateid, *next; __be32 *p; if (nfserr) return nfserr; - RESERVE_SPACE(4 + (4 * test_stateid->ts_num_ids)); + p = xdr_reserve_space(xdr, 4 + (4 * test_stateid->ts_num_ids)); + if (!p) + return nfserr_resource; *p++ = htonl(test_stateid->ts_num_ids); list_for_each_entry_safe(stateid, next, &test_stateid->ts_stateid_list, ts_id_list) { @@ -3678,7 +3790,11 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) nfsd4_enc encoder; __be32 *p; - RESERVE_SPACE(8); + p = xdr_reserve_space(xdr, 8); + if (!p) { + WARN_ON_ONCE(1); + return; + } WRITE32(op->opnum); post_err_offset = xdr->buf->len; @@ -3730,18 +3846,21 @@ status: * called with nfs4_lock_state() held */ void -nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op) +nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) { __be32 *p; struct nfs4_replay *rp = op->replay; BUG_ON(!rp); - RESERVE_SPACE(8); + p = xdr_reserve_space(xdr, 8 + rp->rp_buflen); + if (!p) { + WARN_ON_ONCE(1); + return; + } WRITE32(op->opnum); *p++ = rp->rp_status; /* already xdr'ed */ - RESERVE_SPACE(rp->rp_buflen); WRITEMEM(rp->rp_buf, rp->rp_buflen); } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 15ca477..ea5ad5d 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -563,7 +563,7 @@ int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *, struct nfsd4_compoundres *); __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); -void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); +void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op); __be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words, struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, -- cgit v0.10.2 From 4e21ac4b6f1d09c56f7d10916eaa738361214ab7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 22 Mar 2014 15:15:11 -0400 Subject: nfsd4: reserve space before inlining 0-copy pages Once we've included page-cache pages in the encoding it's difficult to remove them and restart encoding. (xdr_truncate_encode doesn't handle that case.) So, make sure we'll have adequate space to finish the operation first. For now COMPOUND_SLACK_SPACE checks should prevent this case happening, but we want to remove those checks. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 799a904..8728715 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3071,6 +3071,10 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (!p) return nfserr_resource; + /* Make sure there will be room for padding if needed: */ + if (xdr->end - xdr->p < 1) + return nfserr_resource; + maxcount = svc_max_payload(resp->rqstp); if (maxcount > read->rd_length) maxcount = read->rd_length; @@ -3122,8 +3126,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, resp->xdr.buf->tail[0].iov_len = 0; if (maxcount&3) { p = xdr_reserve_space(xdr, 4); - if (!p) - return nfserr_resource; WRITE32(0); resp->xdr.buf->tail[0].iov_base += maxcount&3; resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); @@ -3156,6 +3158,9 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd if (!p) return nfserr_resource; + if (xdr->end - xdr->p < 1) + return nfserr_resource; + /* * XXX: By default, the ->readlink() VFS op will truncate symlinks * if they would overflow the buffer. Is this kosher in NFSv4? If @@ -3182,8 +3187,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd resp->xdr.buf->tail[0].iov_len = 0; if (maxcount&3) { p = xdr_reserve_space(xdr, 4); - if (!p) - return nfserr_resource; WRITE32(0); resp->xdr.buf->tail[0].iov_base += maxcount&3; resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); -- cgit v0.10.2 From 67492c990300912c717bc95e9f705feb63de2df9 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 8 Mar 2014 16:42:05 -0500 Subject: nfsd4: nfsd4_check_resp_size needn't recalculate length We're keeping the length updated as we go now, so there's no need for the extra calculation here. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 8728715..89c65a3 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3752,32 +3752,20 @@ static nfsd4_enc nfsd4_enc_ops[] = { */ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) { - struct xdr_buf *xb = &resp->rqstp->rq_res; + struct xdr_buf *buf = &resp->rqstp->rq_res; struct nfsd4_session *session = NULL; struct nfsd4_slot *slot = resp->cstate.slot; - u32 length, tlen = 0; if (!nfsd4_has_session(&resp->cstate)) return 0; session = resp->cstate.session; - if (xb->page_len == 0) { - length = (char *)resp->xdr.p - (char *)xb->head[0].iov_base + pad; - } else { - if (xb->tail[0].iov_base && xb->tail[0].iov_len > 0) - tlen = (char *)resp->xdr.p - (char *)xb->tail[0].iov_base; - - length = xb->head[0].iov_len + xb->page_len + tlen + pad; - } - dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__, - length, xb->page_len, tlen, pad); - - if (length > session->se_fchannel.maxresp_sz) + if (buf->len + pad > session->se_fchannel.maxresp_sz) return nfserr_rep_too_big; if ((slot->sl_flags & NFSD4_SLOT_CACHETHIS) && - length > session->se_fchannel.maxresp_cached) + buf->len + pad > session->se_fchannel.maxresp_cached) return nfserr_rep_too_big_to_cache; return 0; -- cgit v0.10.2 From ea8d7720b274607f48fb524337254a9c43dbc2df Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 8 Mar 2014 17:19:55 -0500 Subject: nfsd4: remove redundant encode buffer size checking Now that all op encoders can handle running out of space, we no longer need to check the remaining size for every operation; only nonidempotent operations need that check, and that can be done by nfsd4_check_resp_size. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 6a5b701..a5b666c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1279,7 +1279,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate = &resp->cstate; struct svc_fh *current_fh = &cstate->current_fh; struct svc_fh *save_fh = &cstate->save_fh; - int slack_bytes; u32 plen = 0; __be32 status; @@ -1333,19 +1332,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, goto encode_op; } - /* We must be able to encode a successful response to - * this operation, with enough room left over to encode a - * failed response to the next operation. If we don't - * have enough room, fail with ERR_RESOURCE. - */ - slack_bytes = (char *)resp->xdr.end - (char *)resp->xdr.p; - if (slack_bytes < COMPOUND_SLACK_SPACE - + COMPOUND_ERR_SLACK_SPACE) { - BUG_ON(slack_bytes < COMPOUND_ERR_SLACK_SPACE); - op->status = nfserr_resource; - goto encode_op; - } - opdesc = OPDESC(op); if (!current_fh->fh_dentry) { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 89c65a3..df643e0 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3753,20 +3753,24 @@ static nfsd4_enc nfsd4_enc_ops[] = { __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) { struct xdr_buf *buf = &resp->rqstp->rq_res; - struct nfsd4_session *session = NULL; + struct nfsd4_session *session = resp->cstate.session; struct nfsd4_slot *slot = resp->cstate.slot; + int slack_bytes = (char *)resp->xdr.end - (char *)resp->xdr.p; - if (!nfsd4_has_session(&resp->cstate)) - return 0; + if (nfsd4_has_session(&resp->cstate)) { - session = resp->cstate.session; + if (buf->len + pad > session->se_fchannel.maxresp_sz) + return nfserr_rep_too_big; - if (buf->len + pad > session->se_fchannel.maxresp_sz) - return nfserr_rep_too_big; + if ((slot->sl_flags & NFSD4_SLOT_CACHETHIS) && + buf->len + pad > session->se_fchannel.maxresp_cached) + return nfserr_rep_too_big_to_cache; + } - if ((slot->sl_flags & NFSD4_SLOT_CACHETHIS) && - buf->len + pad > session->se_fchannel.maxresp_cached) - return nfserr_rep_too_big_to_cache; + if (pad > slack_bytes) { + WARN_ON_ONCE(nfsd4_has_session(&resp->cstate)); + return nfserr_resource; + } return 0; } -- cgit v0.10.2 From a8095f7e80fbf3e0efe4ee5cd3f509113c56290f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 11 Mar 2014 17:54:34 -0400 Subject: nfsd4: size-checking cleanup Better variable name, some comments, etc. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index a5b666c..3ce431b 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1279,7 +1279,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate = &resp->cstate; struct svc_fh *current_fh = &cstate->current_fh; struct svc_fh *save_fh = &cstate->save_fh; - u32 plen = 0; __be32 status; svcxdr_init_encode(rqstp, resp); @@ -1349,9 +1348,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, /* If op is non-idempotent */ if (opdesc->op_flags & OP_MODIFIES_SOMETHING) { - plen = opdesc->op_rsize_bop(rqstp, op); /* - * If there's still another operation, make sure + * Don't execute this op if we couldn't encode a + * succesful reply: + */ + u32 plen = opdesc->op_rsize_bop(rqstp, op); + /* + * Plus if there's another operation, make sure * we'll have space to at least encode an error: */ if (resp->opcnt < args->opcnt) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index df643e0..bd529e5 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3739,35 +3739,36 @@ static nfsd4_enc nfsd4_enc_ops[] = { }; /* - * Calculate the total amount of memory that the compound response has taken - * after encoding the current operation with pad. + * Calculate whether we still have space to encode repsize bytes. + * There are two considerations: + * - For NFS versions >=4.1, the size of the reply must stay within + * session limits + * - For all NFS versions, we must stay within limited preallocated + * buffer space. * - * pad: if operation is non-idempotent, pad was calculate by op_rsize_bop() - * which was specified at nfsd4_operation, else pad is zero. - * - * Compare this length to the session se_fmaxresp_sz and se_fmaxresp_cached. - * - * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so - * will be at least a page and will therefore hold the xdr_buf head. + * This is called before the operation is processed, so can only provide + * an upper estimate. For some nonidempotent operations (such as + * getattr), it's not necessarily a problem if that estimate is wrong, + * as we can fail it after processing without significant side effects. */ -__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) +__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize) { struct xdr_buf *buf = &resp->rqstp->rq_res; struct nfsd4_session *session = resp->cstate.session; - struct nfsd4_slot *slot = resp->cstate.slot; int slack_bytes = (char *)resp->xdr.end - (char *)resp->xdr.p; if (nfsd4_has_session(&resp->cstate)) { + struct nfsd4_slot *slot = resp->cstate.slot; - if (buf->len + pad > session->se_fchannel.maxresp_sz) + if (buf->len + respsize > session->se_fchannel.maxresp_sz) return nfserr_rep_too_big; if ((slot->sl_flags & NFSD4_SLOT_CACHETHIS) && - buf->len + pad > session->se_fchannel.maxresp_cached) + buf->len + respsize > session->se_fchannel.maxresp_cached) return nfserr_rep_too_big_to_cache; } - if (pad > slack_bytes) { + if (respsize > slack_bytes) { WARN_ON_ONCE(nfsd4_has_session(&resp->cstate)); return nfserr_resource; } -- cgit v0.10.2 From 2825a7f90753012babe7ee292f4a1eadd3706f92 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 26 Aug 2013 16:04:46 -0400 Subject: nfsd4: allow encoding across page boundaries After this we can handle for example getattr of very large ACLs. Read, readdir, readlink are still special cases with their own limits. Also we can't handle a new operation starting close to the end of a page. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 3ce431b..5d8f915 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1264,6 +1264,10 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, xdr->end = head->iov_base + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE; /* Tail and page_len should be zero at this point: */ buf->len = buf->head[0].iov_len; + xdr->scratch.iov_len = 0; + xdr->page_ptr = buf->pages; + buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages) + - 2 * RPC_MAX_AUTH_SIZE; } /* diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index bd529e5..d3a576d 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1624,6 +1624,7 @@ static int nfsd4_max_reply(u32 opnum) * the head and tail in another page: */ return 2 * PAGE_SIZE; + case OP_GETATTR: case OP_READ: return INT_MAX; default: @@ -2560,21 +2561,31 @@ out_resource: goto out; } +static void svcxdr_init_encode_from_buffer(struct xdr_stream *xdr, + struct xdr_buf *buf, __be32 *p, int bytes) +{ + xdr->scratch.iov_len = 0; + memset(buf, 0, sizeof(struct xdr_buf)); + buf->head[0].iov_base = p; + buf->head[0].iov_len = 0; + buf->len = 0; + xdr->buf = buf; + xdr->iov = buf->head; + xdr->p = p; + xdr->end = (void *)p + bytes; + buf->buflen = bytes; +} + __be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words, struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, u32 *bmval, struct svc_rqst *rqstp, int ignore_crossmnt) { - struct xdr_buf dummy = { - .head[0] = { - .iov_base = *p, - }, - .buflen = words << 2, - }; + struct xdr_buf dummy; struct xdr_stream xdr; __be32 ret; - xdr_init_encode(&xdr, &dummy, NULL); + svcxdr_init_encode_from_buffer(&xdr, &dummy, *p, words << 2); ret = nfsd4_encode_fattr(&xdr, fhp, exp, dentry, bmval, rqstp, ignore_crossmnt); *p = xdr.p; @@ -3064,8 +3075,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr) return nfserr; - if (resp->xdr.buf->page_len) - return nfserr_resource; p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ if (!p) @@ -3075,6 +3084,9 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (xdr->end - xdr->p < 1) return nfserr_resource; + if (resp->xdr.buf->page_len) + return nfserr_resource; + maxcount = svc_max_payload(resp->rqstp); if (maxcount > read->rd_length) maxcount = read->rd_length; @@ -3119,6 +3131,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, - (char *)resp->xdr.buf->head[0].iov_base); resp->xdr.buf->page_len = maxcount; xdr->buf->len += maxcount; + xdr->page_ptr += v; + xdr->buf->buflen = maxcount + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE; xdr->iov = xdr->buf->tail; /* Use rest of head for padding and remaining ops: */ @@ -3145,6 +3159,11 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd if (nfserr) return nfserr; + + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + if (resp->xdr.buf->page_len) return nfserr_resource; if (!*resp->rqstp->rq_next_page) @@ -3154,10 +3173,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd maxcount = PAGE_SIZE; - p = xdr_reserve_space(xdr, 4); - if (!p) - return nfserr_resource; - if (xdr->end - xdr->p < 1) return nfserr_resource; @@ -3180,6 +3195,8 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd - (char *)resp->xdr.buf->head[0].iov_base; resp->xdr.buf->page_len = maxcount; xdr->buf->len += maxcount; + xdr->page_ptr += 1; + xdr->buf->buflen -= PAGE_SIZE; xdr->iov = xdr->buf->tail; /* Use rest of head for padding and remaining ops: */ @@ -3206,15 +3223,16 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 if (nfserr) return nfserr; - if (resp->xdr.buf->page_len) - return nfserr_resource; - if (!*resp->rqstp->rq_next_page) - return nfserr_resource; p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); if (!p) return nfserr_resource; + if (resp->xdr.buf->page_len) + return nfserr_resource; + if (!*resp->rqstp->rq_next_page) + return nfserr_resource; + /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ WRITE32(0); WRITE32(0); @@ -3266,6 +3284,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 xdr->iov = xdr->buf->tail; + xdr->page_ptr++; + xdr->buf->buflen -= PAGE_SIZE; + xdr->iov = xdr->buf->tail; + /* Use rest of head for padding and remaining ops: */ resp->xdr.buf->tail[0].iov_base = tailbase; resp->xdr.buf->tail[0].iov_len = 0; @@ -3800,6 +3822,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) !nfsd4_enc_ops[op->opnum]); encoder = nfsd4_enc_ops[op->opnum]; op->status = encoder(resp, op->status, &op->u); + xdr_commit_encode(xdr); + /* nfsd4_check_resp_size guarantees enough room for error status */ if (!op->status) { int space_needed = 0; @@ -3919,6 +3943,8 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + buf->tail[0].iov_len); + rqstp->rq_next_page = resp->xdr.page_ptr + 1; + p = resp->tagp; *p++ = htonl(resp->taglen); memcpy(p, resp->tag, resp->taglen); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index a0dbbd1..85cb647 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -244,6 +244,7 @@ struct svc_rqst { struct page * rq_pages[RPCSVC_MAXPAGES]; struct page * *rq_respages; /* points into rq_pages */ struct page * *rq_next_page; /* next reply page to use */ + struct page * *rq_page_end; /* one past the last page */ struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index e7bb2e3..b23d69f 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -215,6 +215,7 @@ typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); +extern void xdr_commit_encode(struct xdr_stream *xdr); extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 29772e0..b4737fb 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -597,6 +597,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) } rqstp->rq_pages[i] = p; } + rqstp->rq_page_end = &rqstp->rq_pages[i]; rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ /* Make arg->head point to first page and arg->pages point to rest */ diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 352f3b3..2b546e8 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -462,6 +462,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) struct kvec *iov = buf->head; int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len; + xdr_set_scratch_buffer(xdr, NULL, 0); BUG_ON(scratch_len < 0); xdr->buf = buf; xdr->iov = iov; @@ -482,6 +483,74 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) EXPORT_SYMBOL_GPL(xdr_init_encode); /** + * xdr_commit_encode - Ensure all data is written to buffer + * @xdr: pointer to xdr_stream + * + * We handle encoding across page boundaries by giving the caller a + * temporary location to write to, then later copying the data into + * place; xdr_commit_encode does that copying. + * + * Normally the caller doesn't need to call this directly, as the + * following xdr_reserve_space will do it. But an explicit call may be + * required at the end of encoding, or any other time when the xdr_buf + * data might be read. + */ +void xdr_commit_encode(struct xdr_stream *xdr) +{ + int shift = xdr->scratch.iov_len; + void *page; + + if (shift == 0) + return; + page = page_address(*xdr->page_ptr); + memcpy(xdr->scratch.iov_base, page, shift); + memmove(page, page + shift, (void *)xdr->p - page); + xdr->scratch.iov_len = 0; +} +EXPORT_SYMBOL_GPL(xdr_commit_encode); + +__be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes) +{ + static __be32 *p; + int space_left; + int frag1bytes, frag2bytes; + + if (nbytes > PAGE_SIZE) + return NULL; /* Bigger buffers require special handling */ + if (xdr->buf->len + nbytes > xdr->buf->buflen) + return NULL; /* Sorry, we're totally out of space */ + frag1bytes = (xdr->end - xdr->p) << 2; + frag2bytes = nbytes - frag1bytes; + if (xdr->iov) + xdr->iov->iov_len += frag1bytes; + else { + xdr->buf->page_len += frag1bytes; + xdr->page_ptr++; + } + xdr->iov = NULL; + /* + * If the last encode didn't end exactly on a page boundary, the + * next one will straddle boundaries. Encode into the next + * page, then copy it back later in xdr_commit_encode. We use + * the "scratch" iov to track any temporarily unused fragment of + * space at the end of the previous buffer: + */ + xdr->scratch.iov_base = xdr->p; + xdr->scratch.iov_len = frag1bytes; + p = page_address(*xdr->page_ptr); + /* + * Note this is where the next encode will start after we've + * shifted this one back: + */ + xdr->p = (void *)p + frag2bytes; + space_left = xdr->buf->buflen - xdr->buf->len; + xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE); + xdr->buf->page_len += frag2bytes; + xdr->buf->len += nbytes; + return p; +} + +/** * xdr_reserve_space - Reserve buffer space for sending * @xdr: pointer to xdr_stream * @nbytes: number of bytes to reserve @@ -495,14 +564,18 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) __be32 *p = xdr->p; __be32 *q; + xdr_commit_encode(xdr); /* align nbytes on the next 32-bit boundary */ nbytes += 3; nbytes &= ~3; q = p + (nbytes >> 2); if (unlikely(q > xdr->end || q < p)) - return NULL; + return xdr_get_next_encode_buffer(xdr, nbytes); xdr->p = q; - xdr->iov->iov_len += nbytes; + if (xdr->iov) + xdr->iov->iov_len += nbytes; + else + xdr->buf->page_len += nbytes; xdr->buf->len += nbytes; return p; } @@ -539,6 +612,7 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) WARN_ON_ONCE(1); return; } + xdr_commit_encode(xdr); fraglen = min_t(int, buf->len - len, tail->iov_len); tail->iov_len -= fraglen; -- cgit v0.10.2 From f5236013a21c118e9d317e90c7a152dfe51fab93 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 21 Mar 2014 17:57:57 -0400 Subject: nfsd4: convert 4.1 replay encoding Limits on maxresp_sz mean that we only ever need to replay rpc's that are contained entirely in the head. The one exception is very small zero-copy reads. That's an odd corner case as clients wouldn't normally ask those to be cached. in any case, this seems a little more robust. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b06f9a0..8e22ea4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1560,6 +1560,7 @@ out_err: void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) { + struct xdr_buf *buf = resp->xdr.buf; struct nfsd4_slot *slot = resp->cstate.slot; unsigned int base; @@ -1573,11 +1574,9 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) slot->sl_datalen = 0; return; } - slot->sl_datalen = (char *)resp->xdr.p - (char *)resp->cstate.datap; - base = (char *)resp->cstate.datap - - (char *)resp->xdr.buf->head[0].iov_base; - if (read_bytes_from_xdr_buf(resp->xdr.buf, base, slot->sl_data, - slot->sl_datalen)) + base = resp->cstate.data_offset; + slot->sl_datalen = buf->len - base; + if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen)) WARN("%s: sessions DRC could not cache compound\n", __func__); return; } @@ -1618,7 +1617,8 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq) { struct nfsd4_slot *slot = resp->cstate.slot; - struct kvec *head = resp->xdr.iov; + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; __be32 status; dprintk("--> %s slot %p\n", __func__, slot); @@ -1627,16 +1627,16 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, if (status) return status; - /* The sequence operation has been encoded, cstate->datap set. */ - memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen); + p = xdr_reserve_space(xdr, slot->sl_datalen); + if (!p) { + WARN_ON_ONCE(1); + return nfserr_serverfault; + } + xdr_encode_opaque_fixed(p, slot->sl_data, slot->sl_datalen); + xdr_commit_encode(xdr); resp->opcnt = slot->sl_opcnt; - resp->xdr.p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen); - head->iov_len = (void *)resp->xdr.p - head->iov_base; - resp->xdr.buf->len = head->iov_len; - status = slot->sl_status; - - return status; + return slot->sl_status; } /* diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index d3a576d..a90a1e8 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3659,7 +3659,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(seq->maxslots - 1); /* sr_target_highest_slotid */ WRITE32(seq->status_flags); - resp->cstate.datap = p; /* DRC cache data pointer */ + resp->cstate.data_offset = xdr->buf->len; /* DRC cache data pointer */ return 0; } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index ea5ad5d..ee9ffdc 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -58,7 +58,7 @@ struct nfsd4_compound_state { /* For sessions DRC */ struct nfsd4_session *session; struct nfsd4_slot *slot; - __be32 *datap; + int data_offset; size_t iovlen; u32 minorversion; __be32 status; -- cgit v0.10.2 From 8c7424cff6bd33459945646cfcbf6dc6c899ab24 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 10 Mar 2014 12:19:10 -0400 Subject: nfsd4: don't try to encode conflicting owner if low on space I ran into this corner case in testing: in theory clients can provide state owners up to 1024 bytes long. In the sessions case there might be a risk of this pushing us over the DRC slot size. The conflicting owner isn't really that important, so let's humor a client that provides a small maxresponsize_cached by allowing ourselves to return without the conflicting owner instead of outright failing the operation. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5d8f915..20aad5a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1432,7 +1432,8 @@ out: #define op_encode_change_info_maxsz (5) #define nfs4_fattr_bitmap_maxsz (4) -#define op_encode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) +/* We'll fall back on returning no lockowner if run out of space: */ +#define op_encode_lockowner_maxsz (0) #define op_encode_lock_denied_maxsz (8 + op_encode_lockowner_maxsz) #define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a90a1e8..2f0ea20 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2878,9 +2878,20 @@ nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld) struct xdr_netobj *conf = &ld->ld_owner; __be32 *p; +again: p = xdr_reserve_space(xdr, 32 + XDR_LEN(conf->len)); - if (!p) + if (!p) { + /* + * Don't fail to return the result just because we can't + * return the conflicting open: + */ + if (conf->len) { + conf->len = 0; + conf->data = NULL; + goto again; + } return nfserr_resource; + } WRITE64(ld->ld_start); WRITE64(ld->ld_length); WRITE32(ld->ld_type); @@ -2888,7 +2899,6 @@ nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld) WRITEMEM(&ld->ld_clientid, 8); WRITE32(conf->len); WRITEMEM(conf->data, conf->len); - kfree(conf->data); } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ WRITE64((u64)0); /* clientid */ WRITE32(0); /* length of owner name */ @@ -2905,7 +2915,7 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid); else if (nfserr == nfserr_denied) nfserr = nfsd4_encode_lock_denied(xdr, &lock->lk_denied); - + kfree(lock->lk_denied.ld_owner.data); return nfserr; } -- cgit v0.10.2 From 4f0cefbf389c28b0a2be34960797adb0c84ee43d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 11 Mar 2014 15:39:13 -0400 Subject: nfsd4: more precise nfsd4_max_reply It will turn out to be useful to have a more accurate estimate of reply size; so, piggyback on the existing op reply-size estimators. Also move nfsd4_max_reply to nfs4proc.c to get easier access to struct nfsd4_operation and friends. (Thanks to Christoph Hellwig for pointing out that simplification.) Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 20aad5a..59c3195 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1854,6 +1854,18 @@ static struct nfsd4_operation nfsd4_ops[] = { }, }; +int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + struct nfsd4_operation *opdesc; + nfsd4op_rsize estimator; + + if (op->opnum == OP_ILLEGAL) + return op_encode_hdr_size * sizeof(__be32); + opdesc = OPDESC(op); + estimator = opdesc->op_rsize_bop; + return estimator ? estimator(rqstp, op) : PAGE_SIZE; +} + void warn_on_nonidempotent_op(struct nfsd4_op *op) { if (OPDESC(op)->op_flags & OP_MODIFIES_SOMETHING) { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2f0ea20..8b3d24d 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1605,40 +1605,13 @@ nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op) return true; } -/* - * Return a rough estimate of the maximum possible reply size. Note the - * estimate includes rpc headers so is meant to be passed to - * svc_reserve, not svc_reserve_auth. - * - * Also note the current compound encoding permits only one operation to - * use pages beyond the first one, so the maximum possible length is the - * maximum over these values, not the sum. - */ -static int nfsd4_max_reply(u32 opnum) -{ - switch (opnum) { - case OP_READLINK: - case OP_READDIR: - /* - * Both of these ops take a single page for data and put - * the head and tail in another page: - */ - return 2 * PAGE_SIZE; - case OP_GETATTR: - case OP_READ: - return INT_MAX; - default: - return PAGE_SIZE; - } -} - static __be32 nfsd4_decode_compound(struct nfsd4_compoundargs *argp) { DECODE_HEAD; struct nfsd4_op *op; bool cachethis = false; - int max_reply = PAGE_SIZE; + int max_reply = 2 * RPC_MAX_AUTH_SIZE + 8; /* opcnt, status */ int i; READ_BUF(4); @@ -1647,6 +1620,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) SAVEMEM(argp->tag, argp->taglen); READ32(argp->minorversion); READ32(argp->opcnt); + max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2); if (argp->taglen > NFSD4_MAX_TAGLEN) goto xdr_error; @@ -1684,7 +1658,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) */ cachethis |= nfsd4_cache_this_op(op); - max_reply = max(max_reply, nfsd4_max_reply(op->opnum)); + max_reply += nfsd4_max_reply(argp->rqstp, op); if (op->status) { argp->opcnt = i+1; @@ -1694,8 +1668,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) /* Sessions make the DRC unnecessary: */ if (argp->minorversion) cachethis = false; - if (max_reply != INT_MAX) - svc_reserve(argp->rqstp, max_reply); + svc_reserve(argp->rqstp, max_reply); argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; DECODE_TAIL; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index ee9ffdc..41e5229 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -536,6 +536,7 @@ static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp) return argp->opcnt == resp->opcnt; } +int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op); void warn_on_nonidempotent_op(struct nfsd4_op *op); #define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs) -- cgit v0.10.2 From 6ff9897d2bcf4036dfd139caeddd6f0a51c9ca06 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 11 Mar 2014 16:51:23 -0400 Subject: nfsd4: minor encode_read cleanup Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 8b3d24d..8ce6c8d 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3076,18 +3076,20 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, len = maxcount; v = 0; - while (len > 0) { + while (len) { + int thislen; + page = *(resp->rqstp->rq_next_page); if (!page) { /* ran out of pages */ maxcount -= len; break; } + thislen = min_t(long, len, PAGE_SIZE); resp->rqstp->rq_vec[v].iov_base = page_address(page); - resp->rqstp->rq_vec[v].iov_len = - len < PAGE_SIZE ? len : PAGE_SIZE; + resp->rqstp->rq_vec[v].iov_len = thislen; resp->rqstp->rq_next_page++; v++; - len -= PAGE_SIZE; + len -= thislen; } read->rd_vlen = v; -- cgit v0.10.2 From 89ff884ebbd0a667253dd61ade8a0e70b787c84a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 11 Mar 2014 17:58:57 -0400 Subject: nfsd4: nfsd4_check_resp_size should check against whole buffer Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 8ce6c8d..0eeba21 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3762,7 +3762,6 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize) { struct xdr_buf *buf = &resp->rqstp->rq_res; struct nfsd4_session *session = resp->cstate.session; - int slack_bytes = (char *)resp->xdr.end - (char *)resp->xdr.p; if (nfsd4_has_session(&resp->cstate)) { struct nfsd4_slot *slot = resp->cstate.slot; @@ -3775,7 +3774,7 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize) return nfserr_rep_too_big_to_cache; } - if (respsize > slack_bytes) { + if (buf->len + respsize > buf->buflen) { WARN_ON_ONCE(nfsd4_has_session(&resp->cstate)); return nfserr_resource; } -- cgit v0.10.2 From 30596768b31069a3ae08fc305f394efb8c42b473 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 19 May 2014 16:18:23 -0400 Subject: nfsd4: fix buflen calculation after read encoding We don't necessarily want to assume that the buflen is the same as the number of bytes available in the pages. We may have some reason to set it to something less (for example, later patches will use a smaller buflen to enforce session limits). So, calculate the buflen relative to the previous buflen instead of recalculating it from scratch. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 0eeba21..1278d98 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3053,6 +3053,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, unsigned long maxcount; struct xdr_stream *xdr = &resp->xdr; int starting_len = xdr->buf->len; + int space_left; long len; __be32 *p; @@ -3117,7 +3118,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, resp->xdr.buf->page_len = maxcount; xdr->buf->len += maxcount; xdr->page_ptr += v; - xdr->buf->buflen = maxcount + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE; xdr->iov = xdr->buf->tail; /* Use rest of head for padding and remaining ops: */ @@ -3130,6 +3130,12 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); xdr->buf->len -= (maxcount&3); } + + space_left = min_t(int, (void *)xdr->end - (void *)xdr->p, + xdr->buf->buflen - xdr->buf->len); + xdr->buf->buflen = xdr->buf->len + space_left; + xdr->end = (__be32 *)((void *)xdr->end + space_left); + return 0; } -- cgit v0.10.2 From db3f58a95beea6752d90fed03f9f198d282a3913 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 6 Mar 2014 13:22:18 -0500 Subject: rpc: define xdr_restrict_buflen With this xdr_reserve_space can help us enforce various limits. Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index b23d69f..70c6b92 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -217,6 +217,7 @@ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern void xdr_commit_encode(struct xdr_stream *xdr); extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); +extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 2b546e8..3992844 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -649,6 +649,35 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) EXPORT_SYMBOL(xdr_truncate_encode); /** + * xdr_restrict_buflen - decrease available buffer space + * @xdr: pointer to xdr_stream + * @newbuflen: new maximum number of bytes available + * + * Adjust our idea of how much space is available in the buffer. + * If we've already used too much space in the buffer, returns -1. + * If the available space is already smaller than newbuflen, returns 0 + * and does nothing. Otherwise, adjusts xdr->buf->buflen to newbuflen + * and ensures xdr->end is set at most offset newbuflen from the start + * of the buffer. + */ +int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen) +{ + struct xdr_buf *buf = xdr->buf; + int left_in_this_buf = (void *)xdr->end - (void *)xdr->p; + int end_offset = buf->len + left_in_this_buf; + + if (newbuflen < 0 || newbuflen < buf->len) + return -1; + if (newbuflen > buf->buflen) + return 0; + if (newbuflen < end_offset) + xdr->end = (void *)xdr->end + newbuflen - end_offset; + buf->buflen = newbuflen; + return 0; +} +EXPORT_SYMBOL(xdr_restrict_buflen); + +/** * xdr_write_pages - Insert a list of pages into an XDR buffer for sending * @xdr: pointer to xdr_stream * @pages: list of pages -- cgit v0.10.2 From 47ee52986472dba068e8223cbaf1b65d74238781 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 12 Mar 2014 21:39:35 -0400 Subject: nfsd4: adjust buflen to session channel limit We can simplify session limit enforcement by restricting the xdr buflen to the session size. Also fix a preexisting bug: we should really have been taking into account the auth-required space when comparing against session limits, which are limits on the size of the entire rpc reply, including any krb5 overhead. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8e22ea4..612b85a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2208,11 +2208,13 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_sequence *seq) { struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct xdr_stream *xdr = &resp->xdr; struct nfsd4_session *session; struct nfs4_client *clp; struct nfsd4_slot *slot; struct nfsd4_conn *conn; __be32 status; + int buflen; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (resp->opcnt != 1) @@ -2281,6 +2283,15 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (status) goto out_put_session; + buflen = (seq->cachethis) ? + session->se_fchannel.maxresp_cached : + session->se_fchannel.maxresp_sz; + status = (seq->cachethis) ? nfserr_rep_too_big_to_cache : + nfserr_rep_too_big; + if (xdr_restrict_buflen(xdr, buflen - 2 * RPC_MAX_AUTH_SIZE)) + goto out_put_session; + + status = nfs_ok; /* Success! bump slot seqid */ slot->sl_seqid = seq->seqid; slot->sl_flags |= NFSD4_SLOT_INUSE; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1278d98..5e7bac4 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3767,25 +3767,17 @@ static nfsd4_enc nfsd4_enc_ops[] = { __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize) { struct xdr_buf *buf = &resp->rqstp->rq_res; - struct nfsd4_session *session = resp->cstate.session; + struct nfsd4_slot *slot = resp->cstate.slot; - if (nfsd4_has_session(&resp->cstate)) { - struct nfsd4_slot *slot = resp->cstate.slot; - - if (buf->len + respsize > session->se_fchannel.maxresp_sz) - return nfserr_rep_too_big; - - if ((slot->sl_flags & NFSD4_SLOT_CACHETHIS) && - buf->len + respsize > session->se_fchannel.maxresp_cached) - return nfserr_rep_too_big_to_cache; - } - - if (buf->len + respsize > buf->buflen) { - WARN_ON_ONCE(nfsd4_has_session(&resp->cstate)); + if (buf->len + respsize <= buf->buflen) + return nfs_ok; + if (!nfsd4_has_session(&resp->cstate)) return nfserr_resource; + if (slot->sl_flags & NFSD4_SLOT_CACHETHIS) { + WARN_ON_ONCE(1); + return nfserr_rep_too_big_to_cache; } - - return 0; + return nfserr_rep_too_big; } void -- cgit v0.10.2 From 32aaa62ede574ff99b020b4ee3ff6f9cfc9f0099 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 20 Mar 2014 20:47:41 -0400 Subject: nfsd4: use session limits to release send buffer reservation Once we know the limits the session places on the size of the rpc, we can also use that information to release any unnecessary reserved reply buffer space. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 612b85a..62b882d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2290,6 +2290,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, nfserr_rep_too_big; if (xdr_restrict_buflen(xdr, buflen - 2 * RPC_MAX_AUTH_SIZE)) goto out_put_session; + svc_reserve(rqstp, buflen); status = nfs_ok; /* Success! bump slot seqid */ -- cgit v0.10.2 From 561f0ed498ca4342573a870779cc645d3fd7dfe7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 20 Jan 2014 16:37:11 -0500 Subject: nfsd4: allow large readdirs Currently we limit readdir results to a single page. This can result in a performance regression compared to NFSv3 when reading large directories. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 59c3195..d95d901 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1500,13 +1500,14 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { + u32 maxcount = svc_max_payload(rqstp); u32 rlen = op->u.readdir.rd_maxcount; - if (rlen > PAGE_SIZE) - rlen = PAGE_SIZE; + if (rlen > maxcount) + rlen = maxcount; - return (op_encode_hdr_size + op_encode_verifier_maxsz) - * sizeof(__be32) + rlen; + return (op_encode_hdr_size + op_encode_verifier_maxsz + + XDR_QUADLEN(rlen)) * sizeof(__be32); } static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5e7bac4..4d79e53 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2575,8 +2575,8 @@ static inline int attributes_need_mount(u32 *bmval) } static __be32 -nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, - const char *name, int namlen, __be32 **p, int buflen) +nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd, + const char *name, int namlen) { struct svc_export *exp = cd->rd_fhp->fh_export; struct dentry *dentry; @@ -2628,8 +2628,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, } out_encode: - nfserr = nfsd4_encode_fattr_to_buf(p, buflen, NULL, exp, dentry, - cd->rd_bmval, + nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval, cd->rd_rqstp, ignore_crossmnt); out_put: dput(dentry); @@ -2638,9 +2637,12 @@ out_put: } static __be32 * -nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr) +nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr) { - if (buflen < 6) + __be32 *p; + + p = xdr_reserve_space(xdr, 6); + if (!p) return NULL; *p++ = htonl(2); *p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */ @@ -2657,10 +2659,13 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, { struct readdir_cd *ccd = ccdv; struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common); - int buflen; - __be32 *p = cd->buffer; - __be32 *cookiep; + struct xdr_stream *xdr = cd->xdr; + int start_offset = xdr->buf->len; + int cookie_offset; + int entry_bytes; __be32 nfserr = nfserr_toosmall; + __be64 wire_offset; + __be32 *p; /* In nfsv4, "." and ".." never make it onto the wire.. */ if (name && isdotent(name, namlen)) { @@ -2668,19 +2673,24 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, return 0; } - if (cd->offset) - xdr_encode_hyper(cd->offset, (u64) offset); + if (cd->cookie_offset) { + wire_offset = cpu_to_be64(offset); + write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset, + &wire_offset, 8); + } - buflen = cd->buflen - 4 - XDR_QUADLEN(namlen); - if (buflen < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto fail; - *p++ = xdr_one; /* mark entry present */ - cookiep = p; + cookie_offset = xdr->buf->len; + p = xdr_reserve_space(xdr, 3*4 + namlen); + if (!p) + goto fail; p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ p = xdr_encode_array(p, name, namlen); /* name length & name */ - nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen); + nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen); switch (nfserr) { case nfs_ok: break; @@ -2699,19 +2709,23 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, */ if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)) goto fail; - p = nfsd4_encode_rdattr_error(p, buflen, nfserr); + p = nfsd4_encode_rdattr_error(xdr, nfserr); if (p == NULL) { nfserr = nfserr_toosmall; goto fail; } } - cd->buflen -= (p - cd->buffer); - cd->buffer = p; - cd->offset = cookiep; + nfserr = nfserr_toosmall; + entry_bytes = xdr->buf->len - start_offset; + if (entry_bytes > cd->rd_maxcount) + goto fail; + cd->rd_maxcount -= entry_bytes; + cd->cookie_offset = cookie_offset; skip_entry: cd->common.err = nfs_ok; return 0; fail: + xdr_truncate_encode(xdr, start_offset); cd->common.err = nfserr; return -EINVAL; } @@ -3206,10 +3220,11 @@ static __be32 nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir) { int maxcount; + int bytes_left; loff_t offset; + __be64 wire_offset; struct xdr_stream *xdr = &resp->xdr; int starting_len = xdr->buf->len; - __be32 *page, *tailbase; __be32 *p; if (nfserr) @@ -3219,38 +3234,38 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 if (!p) return nfserr_resource; - if (resp->xdr.buf->page_len) - return nfserr_resource; - if (!*resp->rqstp->rq_next_page) - return nfserr_resource; - /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ WRITE32(0); WRITE32(0); resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p) - (char *)resp->xdr.buf->head[0].iov_base; - tailbase = p; - - maxcount = PAGE_SIZE; - if (maxcount > readdir->rd_maxcount) - maxcount = readdir->rd_maxcount; /* - * Convert from bytes to words, account for the two words already - * written, make sure to leave two words at the end for the next - * pointer and eof field. + * Number of bytes left for directory entries allowing for the + * final 8 bytes of the readdir and a following failed op: + */ + bytes_left = xdr->buf->buflen - xdr->buf->len + - COMPOUND_ERR_SLACK_SPACE - 8; + if (bytes_left < 0) { + nfserr = nfserr_resource; + goto err_no_verf; + } + maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX); + /* + * Note the rfc defines rd_maxcount as the size of the + * READDIR4resok structure, which includes the verifier above + * and the 8 bytes encoded at the end of this function: */ - maxcount = (maxcount >> 2) - 4; - if (maxcount < 0) { - nfserr = nfserr_toosmall; + if (maxcount < 16) { + nfserr = nfserr_toosmall; goto err_no_verf; } + maxcount = min_t(int, maxcount-16, bytes_left); - page = page_address(*(resp->rqstp->rq_next_page++)); + readdir->xdr = xdr; + readdir->rd_maxcount = maxcount; readdir->common.err = 0; - readdir->buflen = maxcount; - readdir->buffer = page; - readdir->offset = NULL; + readdir->cookie_offset = 0; offset = readdir->rd_cookie; nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp, @@ -3258,33 +3273,31 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 &readdir->common, nfsd4_encode_dirent); if (nfserr == nfs_ok && readdir->common.err == nfserr_toosmall && - readdir->buffer == page) - nfserr = nfserr_toosmall; + xdr->buf->len == starting_len + 8) { + /* nothing encoded; which limit did we hit?: */ + if (maxcount - 16 < bytes_left) + /* It was the fault of rd_maxcount: */ + nfserr = nfserr_toosmall; + else + /* We ran out of buffer space: */ + nfserr = nfserr_resource; + } if (nfserr) goto err_no_verf; - if (readdir->offset) - xdr_encode_hyper(readdir->offset, offset); + if (readdir->cookie_offset) { + wire_offset = cpu_to_be64(offset); + write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset, + &wire_offset, 8); + } - p = readdir->buffer; + p = xdr_reserve_space(xdr, 8); + if (!p) { + WARN_ON_ONCE(1); + goto err_no_verf; + } *p++ = 0; /* no more entries */ *p++ = htonl(readdir->common.err == nfserr_eof); - resp->xdr.buf->page_len = ((char *)p) - - (char*)page_address(*(resp->rqstp->rq_next_page-1)); - xdr->buf->len += xdr->buf->page_len; - - xdr->iov = xdr->buf->tail; - - xdr->page_ptr++; - xdr->buf->buflen -= PAGE_SIZE; - xdr->iov = xdr->buf->tail; - - /* Use rest of head for padding and remaining ops: */ - resp->xdr.buf->tail[0].iov_base = tailbase; - resp->xdr.buf->tail[0].iov_len = 0; - resp->xdr.p = resp->xdr.buf->tail[0].iov_base; - resp->xdr.end = resp->xdr.p + - (PAGE_SIZE - resp->xdr.buf->head[0].iov_len)/4; return 0; err_no_verf: diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 41e5229..18cbb6d 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -287,9 +287,8 @@ struct nfsd4_readdir { struct svc_fh * rd_fhp; /* response */ struct readdir_cd common; - __be32 * buffer; - int buflen; - __be32 * offset; + struct xdr_stream *xdr; + int cookie_offset; }; struct nfsd4_release_lockowner { -- cgit v0.10.2 From 3b299709091befc0e02aa33d55ddd5baef006853 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 20 Mar 2014 21:20:26 -0400 Subject: nfsd4: enforce rd_dircount As long as we're here, let's enforce the protocol's limit on the number of directory entries to return in a readdir. I don't think anyone's ever noticed our lack of enforcement, but maybe there's more of a chance they will now that we allow larger readdirs. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 4d79e53..3f2a52c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1033,7 +1033,7 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read READ_BUF(24); READ64(readdir->rd_cookie); COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data)); - READ32(readdir->rd_dircount); /* just in case you needed a useless field... */ + READ32(readdir->rd_dircount); READ32(readdir->rd_maxcount); if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval))) goto out; @@ -2720,6 +2720,9 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, if (entry_bytes > cd->rd_maxcount) goto fail; cd->rd_maxcount -= entry_bytes; + if (!cd->rd_dircount) + goto fail; + cd->rd_dircount--; cd->cookie_offset = cookie_offset; skip_entry: cd->common.err = nfs_ok; -- cgit v0.10.2 From 476a7b1f4b2c9c38255653fa55157565be8b14be Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 20 Jan 2014 17:08:27 -0500 Subject: nfsd4: don't treat readlink like a zero-copy operation There's no advantage to this zero-copy-style readlink encoding, and it unnecessarily limits the kinds of compounds we can handle. (In practice I can't see why a client would want e.g. multiple readlink calls in a comound, but it's probably a spec violation for us not to handle it.) Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3f2a52c..cda6226 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3160,8 +3160,9 @@ static __be32 nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) { int maxcount; + __be32 wire_count; + int zero = 0; struct xdr_stream *xdr = &resp->xdr; - char *page; int length_offset = xdr->buf->len; __be32 *p; @@ -3171,26 +3172,19 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; - - if (resp->xdr.buf->page_len) - return nfserr_resource; - if (!*resp->rqstp->rq_next_page) - return nfserr_resource; - - page = page_address(*(resp->rqstp->rq_next_page++)); - maxcount = PAGE_SIZE; - if (xdr->end - xdr->p < 1) + p = xdr_reserve_space(xdr, maxcount); + if (!p) return nfserr_resource; - /* * XXX: By default, the ->readlink() VFS op will truncate symlinks * if they would overflow the buffer. Is this kosher in NFSv4? If * not, one easy fix is: if ->readlink() precisely fills the buffer, * assume that truncation occurred, and return NFS4ERR_RESOURCE. */ - nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, page, &maxcount); + nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, + (char *)p, &maxcount); if (nfserr == nfserr_isdir) nfserr = nfserr_inval; if (nfserr) { @@ -3198,24 +3192,12 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd return nfserr; } - WRITE32(maxcount); - resp->xdr.buf->head[0].iov_len = (char *)p - - (char *)resp->xdr.buf->head[0].iov_base; - resp->xdr.buf->page_len = maxcount; - xdr->buf->len += maxcount; - xdr->page_ptr += 1; - xdr->buf->buflen -= PAGE_SIZE; - xdr->iov = xdr->buf->tail; - - /* Use rest of head for padding and remaining ops: */ - resp->xdr.buf->tail[0].iov_base = p; - resp->xdr.buf->tail[0].iov_len = 0; - if (maxcount&3) { - p = xdr_reserve_space(xdr, 4); - WRITE32(0); - resp->xdr.buf->tail[0].iov_base += maxcount&3; - resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); - } + wire_count = htonl(maxcount); + write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4); + xdr_truncate_encode(xdr, length_offset + 4 + maxcount); + if (maxcount & 3) + write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, + &zero, 4 - (maxcount&3)); return 0; } -- cgit v0.10.2 From b86cef60dafcbdf5a19adfa990c2c1672222e677 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 23 Mar 2014 12:01:48 -0400 Subject: nfsd4: better estimate of getattr response size We plan to use this estimate to decide whether or not to allow zero-copy reads. Currently we're assuming all getattr's are a page, which can be both too small (ACLs e.g. may be arbitrarily long) and too large (after an upcoming read patch this will unnecessarily prevent zero copy reads in any read compound also containing a getattr). Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d95d901..7507eff 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1465,6 +1465,49 @@ static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); } +/* + * Note since this is an idempotent operation we won't insist on failing + * the op prematurely if the estimate is too large. We may turn off splice + * reads unnecessarily. + */ +static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + u32 *bmap = op->u.getattr.ga_bmval; + u32 bmap0 = bmap[0], bmap1 = bmap[1], bmap2 = bmap[2]; + u32 ret = 0; + + if (bmap0 & FATTR4_WORD0_ACL) + return svc_max_payload(rqstp); + if (bmap0 & FATTR4_WORD0_FS_LOCATIONS) + return svc_max_payload(rqstp); + + if (bmap1 & FATTR4_WORD1_OWNER) { + ret += IDMAP_NAMESZ + 4; + bmap1 &= ~FATTR4_WORD1_OWNER; + } + if (bmap1 & FATTR4_WORD1_OWNER_GROUP) { + ret += IDMAP_NAMESZ + 4; + bmap1 &= ~FATTR4_WORD1_OWNER_GROUP; + } + if (bmap0 & FATTR4_WORD0_FILEHANDLE) { + ret += NFS4_FHSIZE + 4; + bmap0 &= ~FATTR4_WORD0_FILEHANDLE; + } + if (bmap2 & FATTR4_WORD2_SECURITY_LABEL) { + ret += NFSD4_MAX_SEC_LABEL_LEN + 12; + bmap2 &= ~FATTR4_WORD2_SECURITY_LABEL; + } + /* + * Largest of remaining attributes are 16 bytes (e.g., + * supported_attributes) + */ + ret += 16 * (hweight32(bmap0) + hweight32(bmap1) + hweight32(bmap2)); + /* bitmask, length */ + ret += 20; + return ret; +} + static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) @@ -1603,6 +1646,7 @@ static struct nfsd4_operation nfsd4_ops[] = { [OP_GETATTR] = { .op_func = (nfsd4op_func)nfsd4_getattr, .op_flags = ALLOWED_ON_ABSENT_FS, + .op_rsize_bop = nfsd4_getattr_rsize, .op_name = "OP_GETATTR", }, [OP_GETFH] = { -- cgit v0.10.2 From ccae70a9ee415a89b70e97a36886ab55191ebaea Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 23 Mar 2014 09:34:22 -0700 Subject: nfsd4: estimate sequence response size Otherwise a following patch would turn off all 4.1 zero-copy reads. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 7507eff..2d786b8 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1565,6 +1565,12 @@ static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op + op_encode_change_info_maxsz) * sizeof(__be32); } +static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + return NFS4_MAX_SESSIONID_LEN + 20; +} + static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); -- cgit v0.10.2 From b0e35fda827e72cf4b065b52c4c472c28c004fca Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 4 Feb 2014 10:36:59 -0500 Subject: nfsd4: turn off zero-copy-read in exotic cases We currently allow only one read per compound, with operations before and after whose responses will require no more than about a page to encode. While we don't expect clients to violate those limits any time soon, this limitation isn't really condoned by the spec, so to future proof the server we should lift the limitation. At the same time we'd like to continue to support zero-copy reads. Supporting multiple zero-copy-reads per compound would require a new data structure to replace struct xdr_buf, which can represent only one set of included pages. So for now we plan to modify encode_read() to support either zero-copy or non-zero-copy reads, and use some heuristics at the start of the compound processing to decide whether a zero-copy read will work. This will allow us to support more exotic compounds without introducing a performance regression in the normal case. Later patches handle those "exotic compounds", this one just makes sure zero-copy is turned off in those cases. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index cda6226..f6a5cb7 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1612,6 +1612,8 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) struct nfsd4_op *op; bool cachethis = false; int max_reply = 2 * RPC_MAX_AUTH_SIZE + 8; /* opcnt, status */ + int readcount = 0; + int readbytes = 0; int i; READ_BUF(4); @@ -1658,7 +1660,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) */ cachethis |= nfsd4_cache_this_op(op); - max_reply += nfsd4_max_reply(argp->rqstp, op); + if (op->opnum == OP_READ) { + readcount++; + readbytes += nfsd4_max_reply(argp->rqstp, op); + } else + max_reply += nfsd4_max_reply(argp->rqstp, op); if (op->status) { argp->opcnt = i+1; @@ -1668,9 +1674,12 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) /* Sessions make the DRC unnecessary: */ if (argp->minorversion) cachethis = false; - svc_reserve(argp->rqstp, max_reply); + svc_reserve(argp->rqstp, max_reply + readbytes); argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; + if (readcount > 1 || max_reply > PAGE_SIZE - 2*RPC_MAX_AUTH_SIZE) + argp->rqstp->rq_splice_ok = false; + DECODE_TAIL; } @@ -3078,15 +3087,19 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr; p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ - if (!p) + if (!p) { + WARN_ON_ONCE(resp->rqstp->rq_splice_ok); return nfserr_resource; + } /* Make sure there will be room for padding if needed: */ if (xdr->end - xdr->p < 1) return nfserr_resource; - if (resp->xdr.buf->page_len) + if (resp->xdr.buf->page_len) { + WARN_ON_ONCE(resp->rqstp->rq_splice_ok); return nfserr_resource; + } maxcount = svc_max_payload(resp->rqstp); if (maxcount > read->rd_length) -- cgit v0.10.2 From 02fe4707740e7c8a3d0ec34ffbf4630d7d41ca68 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 20 Mar 2014 21:32:04 -0400 Subject: nfsd4: nfsd_vfs_read doesn't use file handle parameter Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 3aa3852..f3a0f6c 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -821,7 +821,7 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, } static __be32 -nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, +nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file, loff_t offset, struct kvec *vec, int vlen, unsigned long *count) { mm_segment_t oldfs; @@ -981,7 +981,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, if (ra && ra->p_set) file->f_ra = ra->p_ra; - err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); + err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count); /* Write back readahead params */ if (ra) { @@ -1010,7 +1010,7 @@ nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE); if (err) goto out; - err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); + err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count); } else /* Note file may still be NULL in NFSv4 special stateid case: */ err = nfsd_read(rqstp, fhp, offset, vec, vlen, count); out: -- cgit v0.10.2 From dc97618ddda9a23e5211e800f0614e9612178200 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 18 Mar 2014 17:01:51 -0400 Subject: nfsd4: separate splice and readv cases The splice and readv cases are actually quite different--for example the former case ignores the array of vectors we build up for the latter. It is probably clearer to separate the two cases entirely. There's some code duplication between the split out encoders, but this is only temporary and will be fixed by a later patch. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index f6a5cb7..bad762b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3069,41 +3069,84 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struc return nfserr; } -static __be32 -nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_read *read) +static __be32 nfsd4_encode_splice_read( + struct nfsd4_compoundres *resp, + struct nfsd4_read *read, + struct file *file, unsigned long maxcount) { - u32 eof; - int v; - struct page *page; - unsigned long maxcount; struct xdr_stream *xdr = &resp->xdr; - int starting_len = xdr->buf->len; + u32 eof; + int starting_len = xdr->buf->len - 8; int space_left; - long len; + __be32 nfserr; + __be32 tmp; __be32 *p; - if (nfserr) - return nfserr; - - p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ - if (!p) { - WARN_ON_ONCE(resp->rqstp->rq_splice_ok); + /* + * Don't inline pages unless we know there's room for eof, + * count, and possible padding: + */ + if (xdr->end - xdr->p < 3) return nfserr_resource; + + nfserr = nfsd_splice_read(read->rd_rqstp, file, + read->rd_offset, &maxcount); + if (nfserr) { + /* + * nfsd_splice_actor may have already messed with the + * page length; reset it so as not to confuse + * xdr_truncate_encode: + */ + xdr->buf->page_len = 0; + return nfserr; } - /* Make sure there will be room for padding if needed: */ - if (xdr->end - xdr->p < 1) - return nfserr_resource; + eof = (read->rd_offset + maxcount >= + read->rd_fhp->fh_dentry->d_inode->i_size); - if (resp->xdr.buf->page_len) { - WARN_ON_ONCE(resp->rqstp->rq_splice_ok); - return nfserr_resource; + tmp = htonl(eof); + write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); + tmp = htonl(maxcount); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); + + resp->xdr.buf->page_len = maxcount; + xdr->buf->len += maxcount; + xdr->page_ptr += (maxcount + PAGE_SIZE - 1) / PAGE_SIZE; + xdr->iov = xdr->buf->tail; + + /* Use rest of head for padding and remaining ops: */ + resp->xdr.buf->tail[0].iov_base = xdr->p; + resp->xdr.buf->tail[0].iov_len = 0; + if (maxcount&3) { + p = xdr_reserve_space(xdr, 4); + WRITE32(0); + resp->xdr.buf->tail[0].iov_base += maxcount&3; + resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); + xdr->buf->len -= (maxcount&3); } - maxcount = svc_max_payload(resp->rqstp); - if (maxcount > read->rd_length) - maxcount = read->rd_length; + space_left = min_t(int, (void *)xdr->end - (void *)xdr->p, + xdr->buf->buflen - xdr->buf->len); + xdr->buf->buflen = xdr->buf->len + space_left; + xdr->end = (__be32 *)((void *)xdr->end + space_left); + + return 0; +} + +static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, + struct nfsd4_read *read, + struct file *file, unsigned long maxcount) +{ + struct xdr_stream *xdr = &resp->xdr; + u32 eof; + int v; + struct page *page; + int starting_len = xdr->buf->len - 8; + int space_left; + long len; + __be32 nfserr; + __be32 tmp; + __be32 *p; len = maxcount; v = 0; @@ -3124,34 +3167,26 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, } read->rd_vlen = v; - nfserr = nfsd_read_file(read->rd_rqstp, read->rd_fhp, read->rd_filp, - read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, - &maxcount); - - if (nfserr) { - /* - * nfsd_splice_actor may have already messed with the - * page length; reset it so as not to confuse - * xdr_truncate_encode: - */ - xdr->buf->page_len = 0; - xdr_truncate_encode(xdr, starting_len); + nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec, + read->rd_vlen, &maxcount); + if (nfserr) return nfserr; - } + eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size); - WRITE32(eof); - WRITE32(maxcount); - WARN_ON_ONCE(resp->xdr.buf->head[0].iov_len != (char *)p - - (char *)resp->xdr.buf->head[0].iov_base); + tmp = htonl(eof); + write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); + tmp = htonl(maxcount); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); + resp->xdr.buf->page_len = maxcount; xdr->buf->len += maxcount; xdr->page_ptr += v; xdr->iov = xdr->buf->tail; /* Use rest of head for padding and remaining ops: */ - resp->xdr.buf->tail[0].iov_base = p; + resp->xdr.buf->tail[0].iov_base = xdr->p; resp->xdr.buf->tail[0].iov_len = 0; if (maxcount&3) { p = xdr_reserve_space(xdr, 4); @@ -3167,6 +3202,60 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, xdr->end = (__be32 *)((void *)xdr->end + space_left); return 0; + +} + +static __be32 +nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_read *read) +{ + unsigned long maxcount; + struct xdr_stream *xdr = &resp->xdr; + struct file *file = read->rd_filp; + int starting_len = xdr->buf->len; + struct raparms *ra; + __be32 *p; + __be32 err; + + if (nfserr) + return nfserr; + + p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ + if (!p) { + WARN_ON_ONCE(resp->rqstp->rq_splice_ok); + return nfserr_resource; + } + + if (resp->xdr.buf->page_len) { + WARN_ON_ONCE(resp->rqstp->rq_splice_ok); + return nfserr_resource; + } + + xdr_commit_encode(xdr); + + maxcount = svc_max_payload(resp->rqstp); + if (maxcount > read->rd_length) + maxcount = read->rd_length; + + if (!read->rd_filp) { + err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp, + &file, &ra); + if (err) + goto err_truncate; + } + + if (file->f_op->splice_read && resp->rqstp->rq_splice_ok) + err = nfsd4_encode_splice_read(resp, read, file, maxcount); + else + err = nfsd4_encode_readv(resp, read, file, maxcount); + + if (!read->rd_filp) + nfsd_put_tmp_read_open(file, ra); + +err_truncate: + if (err) + xdr_truncate_encode(xdr, starting_len); + return err; } static __be32 diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f3a0f6c..b7d35a4 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -820,41 +820,54 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, return __splice_from_pipe(pipe, sd, nfsd_splice_actor); } -static __be32 -nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file, - loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +__be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err) { - mm_segment_t oldfs; - __be32 err; - int host_err; - - err = nfserr_perm; - - if (file->f_op->splice_read && rqstp->rq_splice_ok) { - struct splice_desc sd = { - .len = 0, - .total_len = *count, - .pos = offset, - .u.data = rqstp, - }; - - rqstp->rq_next_page = rqstp->rq_respages + 1; - host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); - } else { - oldfs = get_fs(); - set_fs(KERNEL_DS); - host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); - set_fs(oldfs); - } - if (host_err >= 0) { nfsdstats.io_read += host_err; *count = host_err; - err = 0; fsnotify_access(file); + return 0; } else - err = nfserrno(host_err); - return err; + return nfserrno(host_err); +} + +int nfsd_splice_read(struct svc_rqst *rqstp, + struct file *file, loff_t offset, unsigned long *count) +{ + struct splice_desc sd = { + .len = 0, + .total_len = *count, + .pos = offset, + .u.data = rqstp, + }; + int host_err; + + rqstp->rq_next_page = rqstp->rq_respages + 1; + host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); + return nfsd_finish_read(file, count, host_err); +} + +int nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, + unsigned long *count) +{ + mm_segment_t oldfs; + int host_err; + + oldfs = get_fs(); + set_fs(KERNEL_DS); + host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); + set_fs(oldfs); + return nfsd_finish_read(file, count, host_err); +} + +static __be32 +nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file, + loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +{ + if (file->f_op->splice_read && rqstp->rq_splice_ok) + return nfsd_splice_read(rqstp, file, offset, count); + else + return nfsd_readv(file, offset, vec, vlen, count); } /* @@ -956,33 +969,28 @@ out_nfserr: return err; } -/* - * Read data from a file. count must contain the requested read count - * on entry. On return, *count contains the number of bytes actually read. - * N.B. After this call fhp needs an fh_put - */ -__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, - loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +__be32 nfsd_get_tmp_read_open(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file **file, struct raparms **ra) { - struct file *file; struct inode *inode; - struct raparms *ra; __be32 err; - err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); + err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, file); if (err) return err; - inode = file_inode(file); + inode = file_inode(*file); /* Get readahead parameters */ - ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); - - if (ra && ra->p_set) - file->f_ra = ra->p_ra; + *ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); - err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count); + if (*ra && (*ra)->p_set) + (*file)->f_ra = (*ra)->p_ra; + return nfs_ok; +} +void nfsd_put_tmp_read_open(struct file *file, struct raparms *ra) +{ /* Write back readahead params */ if (ra) { struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; @@ -992,8 +1000,29 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, ra->p_count--; spin_unlock(&rab->pb_lock); } - nfsd_close(file); +} + +/* + * Read data from a file. count must contain the requested read count + * on entry. On return, *count contains the number of bytes actually read. + * N.B. After this call fhp needs an fh_put + */ +__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +{ + struct file *file; + struct raparms *ra; + __be32 err; + + err = nfsd_get_tmp_read_open(rqstp, fhp, &file, &ra); + if (err) + return err; + + err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count); + + nfsd_put_tmp_read_open(file, ra); + return err; } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index fbe90bd..7441e96 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -70,6 +70,14 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, int, struct file **); void nfsd_close(struct file *); +struct raparms; +__be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *, + struct file **, struct raparms **); +void nfsd_put_tmp_read_open(struct file *, struct raparms *); +int nfsd_splice_read(struct svc_rqst *, + struct file *, loff_t, unsigned long *); +int nfsd_readv(struct file *, loff_t, struct kvec *, int, + unsigned long *); __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *); __be32 nfsd_read_file(struct svc_rqst *, struct svc_fh *, struct file *, -- cgit v0.10.2 From 34a78b488f144e011493fa51f10c01d034d47c8e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 13 May 2014 16:32:04 -0400 Subject: nfsd4: read encoding cleanup Trivial cleanup, no change in functionality. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index bad762b..addb93b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3075,6 +3075,7 @@ static __be32 nfsd4_encode_splice_read( struct file *file, unsigned long maxcount) { struct xdr_stream *xdr = &resp->xdr; + struct xdr_buf *buf = xdr->buf; u32 eof; int starting_len = xdr->buf->len - 8; int space_left; @@ -3097,7 +3098,7 @@ static __be32 nfsd4_encode_splice_read( * page length; reset it so as not to confuse * xdr_truncate_encode: */ - xdr->buf->page_len = 0; + buf->page_len = 0; return nfserr; } @@ -3105,29 +3106,29 @@ static __be32 nfsd4_encode_splice_read( read->rd_fhp->fh_dentry->d_inode->i_size); tmp = htonl(eof); - write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); + write_bytes_to_xdr_buf(buf, starting_len , &tmp, 4); tmp = htonl(maxcount); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); + write_bytes_to_xdr_buf(buf, starting_len + 4, &tmp, 4); - resp->xdr.buf->page_len = maxcount; - xdr->buf->len += maxcount; + buf->page_len = maxcount; + buf->len += maxcount; xdr->page_ptr += (maxcount + PAGE_SIZE - 1) / PAGE_SIZE; - xdr->iov = xdr->buf->tail; + xdr->iov = buf->tail; /* Use rest of head for padding and remaining ops: */ - resp->xdr.buf->tail[0].iov_base = xdr->p; - resp->xdr.buf->tail[0].iov_len = 0; + buf->tail[0].iov_base = xdr->p; + buf->tail[0].iov_len = 0; if (maxcount&3) { p = xdr_reserve_space(xdr, 4); WRITE32(0); - resp->xdr.buf->tail[0].iov_base += maxcount&3; - resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); - xdr->buf->len -= (maxcount&3); + buf->tail[0].iov_base += maxcount&3; + buf->tail[0].iov_len = 4 - (maxcount&3); + buf->len -= (maxcount&3); } space_left = min_t(int, (void *)xdr->end - (void *)xdr->p, - xdr->buf->buflen - xdr->buf->len); - xdr->buf->buflen = xdr->buf->len + space_left; + buf->buflen - buf->len); + buf->buflen = buf->len + space_left; xdr->end = (__be32 *)((void *)xdr->end + space_left); return 0; -- cgit v0.10.2 From fec25fa4ad728dd9b063313f2a61ff65eae0d571 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 13 May 2014 17:27:03 -0400 Subject: nfsd4: more read encoding cleanup More cleanup, no change in functionality. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index addb93b..92f071b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3077,11 +3077,9 @@ static __be32 nfsd4_encode_splice_read( struct xdr_stream *xdr = &resp->xdr; struct xdr_buf *buf = xdr->buf; u32 eof; - int starting_len = xdr->buf->len - 8; int space_left; __be32 nfserr; - __be32 tmp; - __be32 *p; + __be32 *p = xdr->p - 2; /* * Don't inline pages unless we know there's room for eof, @@ -3105,25 +3103,25 @@ static __be32 nfsd4_encode_splice_read( eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size); - tmp = htonl(eof); - write_bytes_to_xdr_buf(buf, starting_len , &tmp, 4); - tmp = htonl(maxcount); - write_bytes_to_xdr_buf(buf, starting_len + 4, &tmp, 4); + *(p++) = htonl(eof); + *(p++) = htonl(maxcount); buf->page_len = maxcount; buf->len += maxcount; xdr->page_ptr += (maxcount + PAGE_SIZE - 1) / PAGE_SIZE; - xdr->iov = buf->tail; /* Use rest of head for padding and remaining ops: */ buf->tail[0].iov_base = xdr->p; buf->tail[0].iov_len = 0; + xdr->iov = buf->tail; if (maxcount&3) { - p = xdr_reserve_space(xdr, 4); - WRITE32(0); + int pad = 4 - (maxcount&3); + + *(xdr->p++) = 0; + buf->tail[0].iov_base += maxcount&3; - buf->tail[0].iov_len = 4 - (maxcount&3); - buf->len -= (maxcount&3); + buf->tail[0].iov_len = pad; + buf->len += pad; } space_left = min_t(int, (void *)xdr->end - (void *)xdr->p, -- cgit v0.10.2 From b042098063849794d69b5322fcc6cf9fb5f2586e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 18 Mar 2014 17:44:10 -0400 Subject: nfsd4: allow exotic read compounds I'm not sure why a client would want to stuff multiple reads in a single compound rpc, but it's legal for them to do it, and we should really support it. Signed-off-by: J. Bruce Fields diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt index b930ad0..c49cd7e 100644 --- a/Documentation/filesystems/nfs/nfs41-server.txt +++ b/Documentation/filesystems/nfs/nfs41-server.txt @@ -176,7 +176,5 @@ Nonstandard compound limitations: ca_maxrequestsize request and a ca_maxresponsesize reply, so we may fail to live up to the promise we made in CREATE_SESSION fore channel negotiation. -* No more than one read-like operation allowed per compound; encoding - replies that cross page boundaries (except for read data) not handled. See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues. diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 92f071b..480f12c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3139,28 +3139,34 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, struct xdr_stream *xdr = &resp->xdr; u32 eof; int v; - struct page *page; int starting_len = xdr->buf->len - 8; - int space_left; long len; + int thislen; __be32 nfserr; __be32 tmp; __be32 *p; + u32 zzz = 0; + int pad; len = maxcount; v = 0; - while (len) { - int thislen; - page = *(resp->rqstp->rq_next_page); - if (!page) { /* ran out of pages */ - maxcount -= len; - break; - } + thislen = (void *)xdr->end - (void *)xdr->p; + if (len < thislen) + thislen = len; + p = xdr_reserve_space(xdr, (thislen+3)&~3); + WARN_ON_ONCE(!p); + resp->rqstp->rq_vec[v].iov_base = p; + resp->rqstp->rq_vec[v].iov_len = thislen; + v++; + len -= thislen; + + while (len) { thislen = min_t(long, len, PAGE_SIZE); - resp->rqstp->rq_vec[v].iov_base = page_address(page); + p = xdr_reserve_space(xdr, (thislen+3)&~3); + WARN_ON_ONCE(!p); + resp->rqstp->rq_vec[v].iov_base = p; resp->rqstp->rq_vec[v].iov_len = thislen; - resp->rqstp->rq_next_page++; v++; len -= thislen; } @@ -3170,6 +3176,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, read->rd_vlen, &maxcount); if (nfserr) return nfserr; + xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3)); eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size); @@ -3179,27 +3186,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, tmp = htonl(maxcount); write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); - resp->xdr.buf->page_len = maxcount; - xdr->buf->len += maxcount; - xdr->page_ptr += v; - xdr->iov = xdr->buf->tail; - - /* Use rest of head for padding and remaining ops: */ - resp->xdr.buf->tail[0].iov_base = xdr->p; - resp->xdr.buf->tail[0].iov_len = 0; - if (maxcount&3) { - p = xdr_reserve_space(xdr, 4); - WRITE32(0); - resp->xdr.buf->tail[0].iov_base += maxcount&3; - resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); - xdr->buf->len -= (maxcount&3); - } - - space_left = min_t(int, (void *)xdr->end - (void *)xdr->p, - xdr->buf->buflen - xdr->buf->len); - xdr->buf->buflen = xdr->buf->len + space_left; - xdr->end = (__be32 *)((void *)xdr->end + space_left); - + pad = (maxcount&3) ? 4 - (maxcount&3) : 0; + write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount, + &zzz, pad); return 0; } @@ -3224,15 +3213,15 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, WARN_ON_ONCE(resp->rqstp->rq_splice_ok); return nfserr_resource; } - - if (resp->xdr.buf->page_len) { - WARN_ON_ONCE(resp->rqstp->rq_splice_ok); + if (resp->xdr.buf->page_len && resp->rqstp->rq_splice_ok) { + WARN_ON_ONCE(1); return nfserr_resource; } - xdr_commit_encode(xdr); maxcount = svc_max_payload(resp->rqstp); + if (maxcount > xdr->buf->buflen - xdr->buf->len) + maxcount = xdr->buf->buflen - xdr->buf->len; if (maxcount > read->rd_length) maxcount = read->rd_length; -- cgit v0.10.2 From c8f13d977518e588ac89dcf8e841821569108109 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 8 May 2014 17:38:18 -0400 Subject: nfsd4: really fix nfs4err_resource in 4.1 case encode_getattr, for example, can return nfserr_resource to indicate it ran out of buffer space. That's not a legal error in the 4.1 case. And in the 4.1 case, if we ran out of buffer space, we should have exceeded a session limit too. (Note in 1bc49d83c37cfaf46be357757e592711e67f9809 "nfsd4: fix nfs4err_resource in 4.1 case" we originally tried fixing this error return before fixing the problem that we could error out while we still had lots of available space. The result was to trade one illegal error for another in those cases. We decided that was helpful, so reverted the change in fc208d026be0c7d60db9118583fc62f6ca97743d, and are only reinstating it now that we've elimited almost all of those cases.) Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 480f12c..7f346d8 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3901,6 +3901,14 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) space_needed = COMPOUND_ERR_SLACK_SPACE; op->status = nfsd4_check_resp_size(resp, space_needed); } + if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) { + struct nfsd4_slot *slot = resp->cstate.slot; + + if (slot->sl_flags & NFSD4_SLOT_CACHETHIS) + op->status = nfserr_rep_too_big_to_cache; + else + op->status = nfserr_rep_too_big; + } if (op->status == nfserr_resource || op->status == nfserr_rep_too_big || op->status == nfserr_rep_too_big_to_cache) { -- cgit v0.10.2 From c373b0a4289ebf1ca6fbf4614d8b457b5f1b489f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 22 Mar 2014 17:09:18 -0400 Subject: nfsd4: kill WRITE32 These macros just obscure what's going on. Adopt the convention of the client-side code. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 7f346d8..1dca513 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1683,7 +1683,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) DECODE_TAIL; } -#define WRITE32(n) *p++ = htonl(n) #define WRITE64(n) do { \ *p++ = htonl((u32)((n) >> 32)); \ *p++ = htonl((u32)(n)); \ @@ -1774,7 +1773,7 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, p = xdr_reserve_space(xdr, strlen + 4); if (!p) return nfserr_resource; - WRITE32(strlen); + *p++ = cpu_to_be32(strlen); WRITEMEM(str, strlen); count++; } @@ -1857,7 +1856,7 @@ static __be32 nfsd4_encode_path(struct xdr_stream *xdr, p = xdr_reserve_space(xdr, 4); if (!p) goto out_free; - WRITE32(ncomponents); + *p++ = cpu_to_be32(ncomponents); while (ncomponents) { struct dentry *dentry = components[ncomponents - 1]; @@ -1870,7 +1869,7 @@ static __be32 nfsd4_encode_path(struct xdr_stream *xdr, spin_unlock(&dentry->d_lock); goto out_free; } - WRITE32(len); + *p++ = cpu_to_be32(len); WRITEMEM(dentry->d_name.name, len); dprintk("/%s", dentry->d_name.name); spin_unlock(&dentry->d_lock); @@ -1919,7 +1918,7 @@ static __be32 nfsd4_encode_fs_locations(struct xdr_stream *xdr, p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; - WRITE32(fslocs->locations_count); + *p++ = cpu_to_be32(fslocs->locations_count); for (i=0; ilocations_count; i++) { status = nfsd4_encode_fs_location4(xdr, &fslocs->locations[i]); if (status) @@ -1973,8 +1972,8 @@ nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, * For now we use a 0 here to indicate the null translation; in * the future we may place a call to translation code here. */ - WRITE32(0); /* lfs */ - WRITE32(0); /* pi */ + *p++ = cpu_to_be32(0); /* lfs */ + *p++ = cpu_to_be32(0); /* pi */ p = xdr_encode_opaque(p, context, len); return 0; } @@ -2123,23 +2122,23 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 16); if (!p) goto out_resource; - WRITE32(3); - WRITE32(bmval0); - WRITE32(bmval1); - WRITE32(bmval2); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(bmval0); + *p++ = cpu_to_be32(bmval1); + *p++ = cpu_to_be32(bmval2); } else if (bmval1) { p = xdr_reserve_space(xdr, 12); if (!p) goto out_resource; - WRITE32(2); - WRITE32(bmval0); - WRITE32(bmval1); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(bmval0); + *p++ = cpu_to_be32(bmval1); } else { p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE32(1); - WRITE32(bmval0); + *p++ = cpu_to_be32(1); + *p++ = cpu_to_be32(bmval0); } attrlen_offset = xdr->buf->len; @@ -2161,17 +2160,17 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 12); if (!p) goto out_resource; - WRITE32(2); - WRITE32(word0); - WRITE32(word1); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(word0); + *p++ = cpu_to_be32(word1); } else { p = xdr_reserve_space(xdr, 16); if (!p) goto out_resource; - WRITE32(3); - WRITE32(word0); - WRITE32(word1); - WRITE32(word2); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(word0); + *p++ = cpu_to_be32(word1); + *p++ = cpu_to_be32(word2); } } if (bmval0 & FATTR4_WORD0_TYPE) { @@ -2183,16 +2182,17 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, status = nfserr_serverfault; goto out; } - WRITE32(dummy); + *p++ = cpu_to_be32(dummy); } if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) - WRITE32(NFS4_FH_PERSISTENT); + *p++ = cpu_to_be32(NFS4_FH_PERSISTENT); else - WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME); + *p++ = cpu_to_be32(NFS4_FH_PERSISTENT| + NFS4_FH_VOL_RENAME); } if (bmval0 & FATTR4_WORD0_CHANGE) { p = xdr_reserve_space(xdr, 8); @@ -2210,19 +2210,19 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_SYMLINK_SUPPORT) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_NAMED_ATTR) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); } if (bmval0 & FATTR4_WORD0_FSID) { p = xdr_reserve_space(xdr, 16); @@ -2237,10 +2237,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, WRITE64((u64)0); break; case FSIDSOURCE_DEV: - WRITE32(0); - WRITE32(MAJOR(stat.dev)); - WRITE32(0); - WRITE32(MINOR(stat.dev)); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(MAJOR(stat.dev)); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(MINOR(stat.dev)); break; case FSIDSOURCE_UUID: WRITEMEM(exp->ex_uuid, 16); @@ -2251,19 +2251,19 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); } if (bmval0 & FATTR4_WORD0_LEASE_TIME) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(nn->nfsd4_lease); + *p++ = cpu_to_be32(nn->nfsd4_lease); } if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(rdattr_err); + *p++ = cpu_to_be32(rdattr_err); } if (bmval0 & FATTR4_WORD0_ACL) { struct nfs4_ace *ace; @@ -2273,21 +2273,22 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, if (!p) goto out_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); goto out_acl; } p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(acl->naces); + *p++ = cpu_to_be32(acl->naces); for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) { p = xdr_reserve_space(xdr, 4*3); if (!p) goto out_resource; - WRITE32(ace->type); - WRITE32(ace->flag); - WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL); + *p++ = cpu_to_be32(ace->type); + *p++ = cpu_to_be32(ace->flag); + *p++ = cpu_to_be32(ace->access_mask & + NFS4_ACE_MASK_ALL); status = nfsd4_encode_aclname(xdr, rqstp, ace); if (status) goto out; @@ -2298,38 +2299,38 @@ out_acl: p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(aclsupport ? + *p++ = cpu_to_be32(aclsupport ? ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0); } if (bmval0 & FATTR4_WORD0_CANSETTIME) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); } if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_CHOWN_RESTRICTED) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_FILEHANDLE) { p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4); if (!p) goto out_resource; - WRITE32(fhp->fh_handle.fh_size); + *p++ = cpu_to_be32(fhp->fh_handle.fh_size); WRITEMEM(&fhp->fh_handle.fh_base, fhp->fh_handle.fh_size); } if (bmval0 & FATTR4_WORD0_FILEID) { @@ -2365,7 +2366,7 @@ out_acl: p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_MAXFILESIZE) { p = xdr_reserve_space(xdr, 8); @@ -2377,13 +2378,13 @@ out_acl: p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(255); + *p++ = cpu_to_be32(255); } if (bmval0 & FATTR4_WORD0_MAXNAME) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(statfs.f_namelen); + *p++ = cpu_to_be32(statfs.f_namelen); } if (bmval0 & FATTR4_WORD0_MAXREAD) { p = xdr_reserve_space(xdr, 8); @@ -2401,19 +2402,19 @@ out_acl: p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(stat.mode & S_IALLUGO); + *p++ = cpu_to_be32(stat.mode & S_IALLUGO); } if (bmval1 & FATTR4_WORD1_NO_TRUNC) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval1 & FATTR4_WORD1_NUMLINKS) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(stat.nlink); + *p++ = cpu_to_be32(stat.nlink); } if (bmval1 & FATTR4_WORD1_OWNER) { status = nfsd4_encode_user(xdr, rqstp, stat.uid); @@ -2429,8 +2430,8 @@ out_acl: p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE32((u32) MAJOR(stat.rdev)); - WRITE32((u32) MINOR(stat.rdev)); + *p++ = cpu_to_be32((u32) MAJOR(stat.rdev)); + *p++ = cpu_to_be32((u32) MINOR(stat.rdev)); } if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) { p = xdr_reserve_space(xdr, 8); @@ -2465,29 +2466,29 @@ out_acl: if (!p) goto out_resource; WRITE64((s64)stat.atime.tv_sec); - WRITE32(stat.atime.tv_nsec); + *p++ = cpu_to_be32(stat.atime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { p = xdr_reserve_space(xdr, 12); if (!p) goto out_resource; - WRITE32(0); - WRITE32(1); - WRITE32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(1); + *p++ = cpu_to_be32(0); } if (bmval1 & FATTR4_WORD1_TIME_METADATA) { p = xdr_reserve_space(xdr, 12); if (!p) goto out_resource; WRITE64((s64)stat.ctime.tv_sec); - WRITE32(stat.ctime.tv_nsec); + *p++ = cpu_to_be32(stat.ctime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { p = xdr_reserve_space(xdr, 12); if (!p) goto out_resource; WRITE64((s64)stat.mtime.tv_sec); - WRITE32(stat.mtime.tv_nsec); + *p++ = cpu_to_be32(stat.mtime.tv_nsec); } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { p = xdr_reserve_space(xdr, 8); @@ -2512,10 +2513,10 @@ out_acl: p = xdr_reserve_space(xdr, 16); if (!p) goto out_resource; - WRITE32(3); - WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); - WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1); - WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD2); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD0); + *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD1); + *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD2); } attrlen = htonl(xdr->buf->len - attrlen_offset - 4); @@ -2750,7 +2751,7 @@ nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) p = xdr_reserve_space(xdr, sizeof(stateid_t)); if (!p) return nfserr_resource; - WRITE32(sid->si_generation); + *p++ = cpu_to_be32(sid->si_generation); WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); return 0; } @@ -2765,8 +2766,8 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ p = xdr_reserve_space(xdr, 8); if (!p) return nfserr_resource; - WRITE32(access->ac_supported); - WRITE32(access->ac_resp_access); + *p++ = cpu_to_be32(access->ac_supported); + *p++ = cpu_to_be32(access->ac_resp_access); } return nfserr; } @@ -2781,9 +2782,9 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, if (!p) return nfserr_resource; WRITEMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); - WRITE32(bcts->dir); + *p++ = cpu_to_be32(bcts->dir); /* Sorry, we do not yet support RDMA over 4.1: */ - WRITE32(0); + *p++ = cpu_to_be32(0); } return nfserr; } @@ -2826,9 +2827,9 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ if (!p) return nfserr_resource; write_cinfo(&p, &create->cr_cinfo); - WRITE32(2); - WRITE32(create->cr_bmval[0]); - WRITE32(create->cr_bmval[1]); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(create->cr_bmval[0]); + *p++ = cpu_to_be32(create->cr_bmval[1]); } return nfserr; } @@ -2861,7 +2862,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh p = xdr_reserve_space(xdr, len + 4); if (!p) return nfserr_resource; - WRITE32(len); + *p++ = cpu_to_be32(len); WRITEMEM(&fhp->fh_handle.fh_base, len); } return nfserr; @@ -2893,14 +2894,14 @@ again: } WRITE64(ld->ld_start); WRITE64(ld->ld_length); - WRITE32(ld->ld_type); + *p++ = cpu_to_be32(ld->ld_type); if (conf->len) { WRITEMEM(&ld->ld_clientid, 8); - WRITE32(conf->len); + *p++ = cpu_to_be32(conf->len); WRITEMEM(conf->data, conf->len); } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ WRITE64((u64)0); /* clientid */ - WRITE32(0); /* length of owner name */ + *p++ = cpu_to_be32(0); /* length of owner name */ } return nfserr_denied; } @@ -2972,11 +2973,11 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op if (!p) return nfserr_resource; write_cinfo(&p, &open->op_cinfo); - WRITE32(open->op_rflags); - WRITE32(2); - WRITE32(open->op_bmval[0]); - WRITE32(open->op_bmval[1]); - WRITE32(open->op_delegate_type); + *p++ = cpu_to_be32(open->op_rflags); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(open->op_bmval[0]); + *p++ = cpu_to_be32(open->op_bmval[1]); + *p++ = cpu_to_be32(open->op_delegate_type); switch (open->op_delegate_type) { case NFS4_OPEN_DELEGATE_NONE: @@ -2988,15 +2989,15 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op p = xdr_reserve_space(xdr, 20); if (!p) return nfserr_resource; - WRITE32(open->op_recall); + *p++ = cpu_to_be32(open->op_recall); /* * TODO: ACE's in delegations */ - WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); - WRITE32(0); - WRITE32(0); - WRITE32(0); /* XXX: is NULL principal ok? */ + *p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); /* XXX: is NULL principal ok? */ break; case NFS4_OPEN_DELEGATE_WRITE: nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid); @@ -3005,22 +3006,22 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op p = xdr_reserve_space(xdr, 32); if (!p) return nfserr_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); /* * TODO: space_limit's in delegations */ - WRITE32(NFS4_LIMIT_SIZE); - WRITE32(~(u32)0); - WRITE32(~(u32)0); + *p++ = cpu_to_be32(NFS4_LIMIT_SIZE); + *p++ = cpu_to_be32(~(u32)0); + *p++ = cpu_to_be32(~(u32)0); /* * TODO: ACE's in delegations */ - WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); - WRITE32(0); - WRITE32(0); - WRITE32(0); /* XXX: is NULL principal ok? */ + *p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); /* XXX: is NULL principal ok? */ break; case NFS4_OPEN_DELEGATE_NONE_EXT: /* 4.1 */ switch (open->op_why_no_deleg) { @@ -3029,14 +3030,15 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op p = xdr_reserve_space(xdr, 8); if (!p) return nfserr_resource; - WRITE32(open->op_why_no_deleg); - WRITE32(0); /* deleg signaling not supported yet */ + *p++ = cpu_to_be32(open->op_why_no_deleg); + /* deleg signaling not supported yet: */ + *p++ = cpu_to_be32(0); break; default: p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; - WRITE32(open->op_why_no_deleg); + *p++ = cpu_to_be32(open->op_why_no_deleg); } break; default: @@ -3310,8 +3312,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 return nfserr_resource; /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ - WRITE32(0); - WRITE32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p) - (char *)resp->xdr.buf->head[0].iov_base; @@ -3460,17 +3462,17 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, XDR_LEN(info.oid.len) + 4 + 4); if (!p) goto out; - WRITE32(RPC_AUTH_GSS); - WRITE32(info.oid.len); + *p++ = cpu_to_be32(RPC_AUTH_GSS); + *p++ = cpu_to_be32(info.oid.len); WRITEMEM(info.oid.data, info.oid.len); - WRITE32(info.qop); - WRITE32(info.service); + *p++ = cpu_to_be32(info.qop); + *p++ = cpu_to_be32(info.service); } else if (pf < RPC_AUTH_MAXFLAVOR) { supported++; p = xdr_reserve_space(xdr, 4); if (!p) goto out; - WRITE32(pf); + *p++ = cpu_to_be32(pf); } else { if (report) pr_warn("NFS: SECINFO: security flavor %u " @@ -3520,16 +3522,16 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 if (!p) return nfserr_resource; if (nfserr) { - WRITE32(3); - WRITE32(0); - WRITE32(0); - WRITE32(0); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); } else { - WRITE32(3); - WRITE32(setattr->sa_bmval[0]); - WRITE32(setattr->sa_bmval[1]); - WRITE32(setattr->sa_bmval[2]); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(setattr->sa_bmval[0]); + *p++ = cpu_to_be32(setattr->sa_bmval[1]); + *p++ = cpu_to_be32(setattr->sa_bmval[2]); } return nfserr; } @@ -3551,8 +3553,8 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n p = xdr_reserve_space(xdr, 8); if (!p) return nfserr_resource; - WRITE32(0); - WRITE32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); } return nfserr; } @@ -3567,8 +3569,8 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w p = xdr_reserve_space(xdr, 16); if (!p) return nfserr_resource; - WRITE32(write->wr_bytes_written); - WRITE32(write->wr_how_written); + *p++ = cpu_to_be32(write->wr_bytes_written); + *p++ = cpu_to_be32(write->wr_how_written); WRITEMEM(write->wr_verifier.data, NFS4_VERIFIER_SIZE); } return nfserr; @@ -3611,10 +3613,10 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr_resource; WRITEMEM(&exid->clientid, 8); - WRITE32(exid->seqid); - WRITE32(exid->flags); + *p++ = cpu_to_be32(exid->seqid); + *p++ = cpu_to_be32(exid->flags); - WRITE32(exid->spa_how); + *p++ = cpu_to_be32(exid->spa_how); switch (exid->spa_how) { case SP4_NONE: @@ -3626,11 +3628,11 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr_resource; /* spo_must_enforce bitmap: */ - WRITE32(2); - WRITE32(nfs4_minimal_spo_must_enforce[0]); - WRITE32(nfs4_minimal_spo_must_enforce[1]); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[0]); + *p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[1]); /* empty spo_must_allow bitmap: */ - WRITE32(0); + *p++ = cpu_to_be32(0); break; default: @@ -3650,15 +3652,15 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, /* The server_owner struct */ WRITE64(minor_id); /* Minor id */ /* major id */ - WRITE32(major_id_sz); + *p++ = cpu_to_be32(major_id_sz); WRITEMEM(major_id, major_id_sz); /* Server scope */ - WRITE32(server_scope_sz); + *p++ = cpu_to_be32(server_scope_sz); WRITEMEM(server_scope, server_scope_sz); /* Implementation id */ - WRITE32(0); /* zero length nfs_impl_id4 array */ + *p++ = cpu_to_be32(0); /* zero length nfs_impl_id4 array */ return 0; } @@ -3676,43 +3678,43 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, if (!p) return nfserr_resource; WRITEMEM(sess->sessionid.data, NFS4_MAX_SESSIONID_LEN); - WRITE32(sess->seqid); - WRITE32(sess->flags); + *p++ = cpu_to_be32(sess->seqid); + *p++ = cpu_to_be32(sess->flags); p = xdr_reserve_space(xdr, 28); if (!p) return nfserr_resource; - WRITE32(0); /* headerpadsz */ - WRITE32(sess->fore_channel.maxreq_sz); - WRITE32(sess->fore_channel.maxresp_sz); - WRITE32(sess->fore_channel.maxresp_cached); - WRITE32(sess->fore_channel.maxops); - WRITE32(sess->fore_channel.maxreqs); - WRITE32(sess->fore_channel.nr_rdma_attrs); + *p++ = cpu_to_be32(0); /* headerpadsz */ + *p++ = cpu_to_be32(sess->fore_channel.maxreq_sz); + *p++ = cpu_to_be32(sess->fore_channel.maxresp_sz); + *p++ = cpu_to_be32(sess->fore_channel.maxresp_cached); + *p++ = cpu_to_be32(sess->fore_channel.maxops); + *p++ = cpu_to_be32(sess->fore_channel.maxreqs); + *p++ = cpu_to_be32(sess->fore_channel.nr_rdma_attrs); if (sess->fore_channel.nr_rdma_attrs) { p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; - WRITE32(sess->fore_channel.rdma_attrs); + *p++ = cpu_to_be32(sess->fore_channel.rdma_attrs); } p = xdr_reserve_space(xdr, 28); if (!p) return nfserr_resource; - WRITE32(0); /* headerpadsz */ - WRITE32(sess->back_channel.maxreq_sz); - WRITE32(sess->back_channel.maxresp_sz); - WRITE32(sess->back_channel.maxresp_cached); - WRITE32(sess->back_channel.maxops); - WRITE32(sess->back_channel.maxreqs); - WRITE32(sess->back_channel.nr_rdma_attrs); + *p++ = cpu_to_be32(0); /* headerpadsz */ + *p++ = cpu_to_be32(sess->back_channel.maxreq_sz); + *p++ = cpu_to_be32(sess->back_channel.maxresp_sz); + *p++ = cpu_to_be32(sess->back_channel.maxresp_cached); + *p++ = cpu_to_be32(sess->back_channel.maxops); + *p++ = cpu_to_be32(sess->back_channel.maxreqs); + *p++ = cpu_to_be32(sess->back_channel.nr_rdma_attrs); if (sess->back_channel.nr_rdma_attrs) { p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; - WRITE32(sess->back_channel.rdma_attrs); + *p++ = cpu_to_be32(sess->back_channel.rdma_attrs); } return 0; } @@ -3731,12 +3733,12 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, if (!p) return nfserr_resource; WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); - WRITE32(seq->seqid); - WRITE32(seq->slotid); + *p++ = cpu_to_be32(seq->seqid); + *p++ = cpu_to_be32(seq->slotid); /* Note slotid's are numbered from zero: */ - WRITE32(seq->maxslots - 1); /* sr_highest_slotid */ - WRITE32(seq->maxslots - 1); /* sr_target_highest_slotid */ - WRITE32(seq->status_flags); + *p++ = cpu_to_be32(seq->maxslots - 1); /* sr_highest_slotid */ + *p++ = cpu_to_be32(seq->maxslots - 1); /* sr_target_highest_slotid */ + *p++ = cpu_to_be32(seq->status_flags); resp->cstate.data_offset = xdr->buf->len; /* DRC cache data pointer */ return 0; @@ -3883,7 +3885,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) WARN_ON_ONCE(1); return; } - WRITE32(op->opnum); + *p++ = cpu_to_be32(op->opnum); post_err_offset = xdr->buf->len; if (op->opnum == OP_ILLEGAL) @@ -3956,7 +3958,7 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) WARN_ON_ONCE(1); return; } - WRITE32(op->opnum); + *p++ = cpu_to_be32(op->opnum); *p++ = rp->rp_status; /* already xdr'ed */ WRITEMEM(rp->rp_buf, rp->rp_buflen); -- cgit v0.10.2 From b64c7f3bdfbb468d9026ca91d55c57675724f516 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 22 Mar 2014 17:11:35 -0400 Subject: nfsd4: kill WRITE64 Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1dca513..094a7c5 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1683,10 +1683,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) DECODE_TAIL; } -#define WRITE64(n) do { \ - *p++ = htonl((u32)((n) >> 32)); \ - *p++ = htonl((u32)(n)); \ -} while (0) #define WRITEMEM(ptr,nbytes) do { if (nbytes > 0) { \ *(p + XDR_QUADLEN(nbytes) -1) = 0; \ memcpy(p, ptr, nbytes); \ @@ -2204,7 +2200,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE64(stat.size); + p = xdr_encode_hyper(p, stat.size); } if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) { p = xdr_reserve_space(xdr, 4); @@ -2229,12 +2225,12 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, if (!p) goto out_resource; if (exp->ex_fslocs.migrated) { - WRITE64(NFS4_REFERRAL_FSID_MAJOR); - WRITE64(NFS4_REFERRAL_FSID_MINOR); + p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MAJOR); + p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MINOR); } else switch(fsid_source(fhp)) { case FSIDSOURCE_FSID: - WRITE64((u64)exp->ex_fsid); - WRITE64((u64)0); + p = xdr_encode_hyper(p, (u64)exp->ex_fsid); + p = xdr_encode_hyper(p, (u64)0); break; case FSIDSOURCE_DEV: *p++ = cpu_to_be32(0); @@ -2337,25 +2333,25 @@ out_acl: p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE64(stat.ino); + p = xdr_encode_hyper(p, stat.ino); } if (bmval0 & FATTR4_WORD0_FILES_AVAIL) { p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE64((u64) statfs.f_ffree); + p = xdr_encode_hyper(p, (u64) statfs.f_ffree); } if (bmval0 & FATTR4_WORD0_FILES_FREE) { p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE64((u64) statfs.f_ffree); + p = xdr_encode_hyper(p, (u64) statfs.f_ffree); } if (bmval0 & FATTR4_WORD0_FILES_TOTAL) { p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE64((u64) statfs.f_files); + p = xdr_encode_hyper(p, (u64) statfs.f_files); } if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) { status = nfsd4_encode_fs_locations(xdr, rqstp, exp); @@ -2372,7 +2368,7 @@ out_acl: p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE64(exp->ex_path.mnt->mnt_sb->s_maxbytes); + p = xdr_encode_hyper(p, exp->ex_path.mnt->mnt_sb->s_maxbytes); } if (bmval0 & FATTR4_WORD0_MAXLINK) { p = xdr_reserve_space(xdr, 4); @@ -2390,13 +2386,13 @@ out_acl: p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE64((u64) svc_max_payload(rqstp)); + p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp)); } if (bmval0 & FATTR4_WORD0_MAXWRITE) { p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE64((u64) svc_max_payload(rqstp)); + p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp)); } if (bmval1 & FATTR4_WORD1_MODE) { p = xdr_reserve_space(xdr, 4); @@ -2438,34 +2434,34 @@ out_acl: if (!p) goto out_resource; dummy64 = (u64)statfs.f_bavail * (u64)statfs.f_bsize; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_FREE) { p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; dummy64 = (u64)statfs.f_bfree * (u64)statfs.f_bsize; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) { p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; dummy64 = (u64)statfs.f_blocks * (u64)statfs.f_bsize; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_USED) { p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; dummy64 = (u64)stat.blocks << 9; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { p = xdr_reserve_space(xdr, 12); if (!p) goto out_resource; - WRITE64((s64)stat.atime.tv_sec); + p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec); *p++ = cpu_to_be32(stat.atime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { @@ -2480,14 +2476,14 @@ out_acl: p = xdr_reserve_space(xdr, 12); if (!p) goto out_resource; - WRITE64((s64)stat.ctime.tv_sec); + p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec); *p++ = cpu_to_be32(stat.ctime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { p = xdr_reserve_space(xdr, 12); if (!p) goto out_resource; - WRITE64((s64)stat.mtime.tv_sec); + p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec); *p++ = cpu_to_be32(stat.mtime.tv_nsec); } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { @@ -2501,7 +2497,7 @@ out_acl: if (ignore_crossmnt == 0 && dentry == exp->ex_path.mnt->mnt_root) get_parent_attributes(exp, &stat); - WRITE64(stat.ino); + p = xdr_encode_hyper(p, stat.ino); } if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { status = nfsd4_encode_security_label(xdr, rqstp, context, @@ -2892,15 +2888,15 @@ again: } return nfserr_resource; } - WRITE64(ld->ld_start); - WRITE64(ld->ld_length); + p = xdr_encode_hyper(p, ld->ld_start); + p = xdr_encode_hyper(p, ld->ld_length); *p++ = cpu_to_be32(ld->ld_type); if (conf->len) { WRITEMEM(&ld->ld_clientid, 8); *p++ = cpu_to_be32(conf->len); WRITEMEM(conf->data, conf->len); } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ - WRITE64((u64)0); /* clientid */ + p = xdr_encode_hyper(p, (u64)0); /* clientid */ *p++ = cpu_to_be32(0); /* length of owner name */ } return nfserr_denied; @@ -3650,7 +3646,7 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr_resource; /* The server_owner struct */ - WRITE64(minor_id); /* Minor id */ + p = xdr_encode_hyper(p, minor_id); /* Minor id */ /* major id */ *p++ = cpu_to_be32(major_id_sz); WRITEMEM(major_id, major_id_sz); -- cgit v0.10.2 From 0c0c267ba96f606b541ab8e4bcde54e6b3f0198f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 22 Mar 2014 18:43:16 -0400 Subject: nfsd4: kill WRITEMEM Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 094a7c5..3844dbe 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1683,12 +1683,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) DECODE_TAIL; } -#define WRITEMEM(ptr,nbytes) do { if (nbytes > 0) { \ - *(p + XDR_QUADLEN(nbytes) -1) = 0; \ - memcpy(p, ptr, nbytes); \ - p += XDR_QUADLEN(nbytes); \ -}} while (0) - static void write32(__be32 **p, u32 n) { *(*p)++ = htonl(n); @@ -1769,8 +1763,7 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, p = xdr_reserve_space(xdr, strlen + 4); if (!p) return nfserr_resource; - *p++ = cpu_to_be32(strlen); - WRITEMEM(str, strlen); + p = xdr_encode_opaque(p, str, strlen); count++; } else @@ -1865,8 +1858,7 @@ static __be32 nfsd4_encode_path(struct xdr_stream *xdr, spin_unlock(&dentry->d_lock); goto out_free; } - *p++ = cpu_to_be32(len); - WRITEMEM(dentry->d_name.name, len); + p = xdr_encode_opaque(p, dentry->d_name.name, len); dprintk("/%s", dentry->d_name.name); spin_unlock(&dentry->d_lock); dput(dentry); @@ -2239,7 +2231,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, *p++ = cpu_to_be32(MINOR(stat.dev)); break; case FSIDSOURCE_UUID: - WRITEMEM(exp->ex_uuid, 16); + p = xdr_encode_opaque_fixed(p, exp->ex_uuid, 16); break; } } @@ -2326,8 +2318,8 @@ out_acl: p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4); if (!p) goto out_resource; - *p++ = cpu_to_be32(fhp->fh_handle.fh_size); - WRITEMEM(&fhp->fh_handle.fh_base, fhp->fh_handle.fh_size); + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, + fhp->fh_handle.fh_size); } if (bmval0 & FATTR4_WORD0_FILEID) { p = xdr_reserve_space(xdr, 8); @@ -2748,7 +2740,8 @@ nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) if (!p) return nfserr_resource; *p++ = cpu_to_be32(sid->si_generation); - WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); + p = xdr_encode_opaque_fixed(p, &sid->si_opaque, + sizeof(stateid_opaque_t)); return 0; } @@ -2777,7 +2770,8 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8); if (!p) return nfserr_resource; - WRITEMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); + p = xdr_encode_opaque_fixed(p, bcts->sessionid.data, + NFS4_MAX_SESSIONID_LEN); *p++ = cpu_to_be32(bcts->dir); /* Sorry, we do not yet support RDMA over 4.1: */ *p++ = cpu_to_be32(0); @@ -2807,7 +2801,8 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); if (!p) return nfserr_resource; - WRITEMEM(commit->co_verf.data, NFS4_VERIFIER_SIZE); + p = xdr_encode_opaque_fixed(p, commit->co_verf.data, + NFS4_VERIFIER_SIZE); } return nfserr; } @@ -2858,8 +2853,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh p = xdr_reserve_space(xdr, len + 4); if (!p) return nfserr_resource; - *p++ = cpu_to_be32(len); - WRITEMEM(&fhp->fh_handle.fh_base, len); + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len); } return nfserr; } @@ -2892,9 +2886,8 @@ again: p = xdr_encode_hyper(p, ld->ld_length); *p++ = cpu_to_be32(ld->ld_type); if (conf->len) { - WRITEMEM(&ld->ld_clientid, 8); - *p++ = cpu_to_be32(conf->len); - WRITEMEM(conf->data, conf->len); + p = xdr_encode_opaque_fixed(p, &ld->ld_clientid, 8); + p = xdr_encode_opaque(p, conf->data, conf->len); } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ p = xdr_encode_hyper(p, (u64)0); /* clientid */ *p++ = cpu_to_be32(0); /* length of owner name */ @@ -3459,8 +3452,7 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, if (!p) goto out; *p++ = cpu_to_be32(RPC_AUTH_GSS); - *p++ = cpu_to_be32(info.oid.len); - WRITEMEM(info.oid.data, info.oid.len); + p = xdr_encode_opaque(p, info.oid.data, info.oid.len); *p++ = cpu_to_be32(info.qop); *p++ = cpu_to_be32(info.service); } else if (pf < RPC_AUTH_MAXFLAVOR) { @@ -3542,8 +3534,9 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n p = xdr_reserve_space(xdr, 8 + NFS4_VERIFIER_SIZE); if (!p) return nfserr_resource; - WRITEMEM(&scd->se_clientid, 8); - WRITEMEM(&scd->se_confirm, NFS4_VERIFIER_SIZE); + p = xdr_encode_opaque_fixed(p, &scd->se_clientid, 8); + p = xdr_encode_opaque_fixed(p, &scd->se_confirm, + NFS4_VERIFIER_SIZE); } else if (nfserr == nfserr_clid_inuse) { p = xdr_reserve_space(xdr, 8); @@ -3567,7 +3560,8 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w return nfserr_resource; *p++ = cpu_to_be32(write->wr_bytes_written); *p++ = cpu_to_be32(write->wr_how_written); - WRITEMEM(write->wr_verifier.data, NFS4_VERIFIER_SIZE); + p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, + NFS4_VERIFIER_SIZE); } return nfserr; } @@ -3608,7 +3602,7 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, if (!p) return nfserr_resource; - WRITEMEM(&exid->clientid, 8); + p = xdr_encode_opaque_fixed(p, &exid->clientid, 8); *p++ = cpu_to_be32(exid->seqid); *p++ = cpu_to_be32(exid->flags); @@ -3648,12 +3642,10 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, /* The server_owner struct */ p = xdr_encode_hyper(p, minor_id); /* Minor id */ /* major id */ - *p++ = cpu_to_be32(major_id_sz); - WRITEMEM(major_id, major_id_sz); + p = xdr_encode_opaque(p, major_id, major_id_sz); /* Server scope */ - *p++ = cpu_to_be32(server_scope_sz); - WRITEMEM(server_scope, server_scope_sz); + p = xdr_encode_opaque(p, server_scope, server_scope_sz); /* Implementation id */ *p++ = cpu_to_be32(0); /* zero length nfs_impl_id4 array */ @@ -3673,7 +3665,8 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, p = xdr_reserve_space(xdr, 24); if (!p) return nfserr_resource; - WRITEMEM(sess->sessionid.data, NFS4_MAX_SESSIONID_LEN); + p = xdr_encode_opaque_fixed(p, sess->sessionid.data, + NFS4_MAX_SESSIONID_LEN); *p++ = cpu_to_be32(sess->seqid); *p++ = cpu_to_be32(sess->flags); @@ -3728,7 +3721,8 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20); if (!p) return nfserr_resource; - WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); + p = xdr_encode_opaque_fixed(p, seq->sessionid.data, + NFS4_MAX_SESSIONID_LEN); *p++ = cpu_to_be32(seq->seqid); *p++ = cpu_to_be32(seq->slotid); /* Note slotid's are numbered from zero: */ @@ -3957,7 +3951,7 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) *p++ = cpu_to_be32(op->opnum); *p++ = rp->rp_status; /* already xdr'ed */ - WRITEMEM(rp->rp_buf, rp->rp_buflen); + p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen); } int -- cgit v0.10.2 From d05d5744ef67879877dbe2e3d0fb9fcc27ee44e5 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 22 Mar 2014 18:48:39 -0400 Subject: nfsd4: kill write32, write64 And switch a couple other functions from the encode(&p,...) convention to the p = encode(p,...) convention mostly used elsewhere. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3844dbe..3e347a1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1683,39 +1683,30 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) DECODE_TAIL; } -static void write32(__be32 **p, u32 n) -{ - *(*p)++ = htonl(n); -} - -static void write64(__be32 **p, u64 n) -{ - write32(p, (n >> 32)); - write32(p, (u32)n); -} - -static void write_change(__be32 **p, struct kstat *stat, struct inode *inode) +static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode) { if (IS_I_VERSION(inode)) { - write64(p, inode->i_version); + p = xdr_encode_hyper(p, inode->i_version); } else { - write32(p, stat->ctime.tv_sec); - write32(p, stat->ctime.tv_nsec); + *p++ = cpu_to_be32(stat->ctime.tv_sec); + *p++ = cpu_to_be32(stat->ctime.tv_nsec); } + return p; } -static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) +static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c) { - write32(p, c->atomic); + *p++ = cpu_to_be32(c->atomic); if (c->change_supported) { - write64(p, c->before_change); - write64(p, c->after_change); + p = xdr_encode_hyper(p, c->before_change); + p = xdr_encode_hyper(p, c->after_change); } else { - write32(p, c->before_ctime_sec); - write32(p, c->before_ctime_nsec); - write32(p, c->after_ctime_sec); - write32(p, c->after_ctime_nsec); + *p++ = cpu_to_be32(c->before_ctime_sec); + *p++ = cpu_to_be32(c->before_ctime_nsec); + *p++ = cpu_to_be32(c->after_ctime_sec); + *p++ = cpu_to_be32(c->after_ctime_nsec); } + return p; } /* Encode as an array of strings the string given with components @@ -2186,7 +2177,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - write_change(&p, &stat, dentry->d_inode); + p = encode_change(p, &stat, dentry->d_inode); } if (bmval0 & FATTR4_WORD0_SIZE) { p = xdr_reserve_space(xdr, 8); @@ -2817,7 +2808,7 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ p = xdr_reserve_space(xdr, 32); if (!p) return nfserr_resource; - write_cinfo(&p, &create->cr_cinfo); + p = encode_cinfo(p, &create->cr_cinfo); *p++ = cpu_to_be32(2); *p++ = cpu_to_be32(create->cr_bmval[0]); *p++ = cpu_to_be32(create->cr_bmval[1]); @@ -2940,7 +2931,7 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li p = xdr_reserve_space(xdr, 20); if (!p) return nfserr_resource; - write_cinfo(&p, &link->li_cinfo); + p = encode_cinfo(p, &link->li_cinfo); } return nfserr; } @@ -2961,7 +2952,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op p = xdr_reserve_space(xdr, 40); if (!p) return nfserr_resource; - write_cinfo(&p, &open->op_cinfo); + p = encode_cinfo(p, &open->op_cinfo); *p++ = cpu_to_be32(open->op_rflags); *p++ = cpu_to_be32(2); *p++ = cpu_to_be32(open->op_bmval[0]); @@ -3381,7 +3372,7 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ p = xdr_reserve_space(xdr, 20); if (!p) return nfserr_resource; - write_cinfo(&p, &remove->rm_cinfo); + p = encode_cinfo(p, &remove->rm_cinfo); } return nfserr; } @@ -3396,8 +3387,8 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ p = xdr_reserve_space(xdr, 40); if (!p) return nfserr_resource; - write_cinfo(&p, &rename->rn_sinfo); - write_cinfo(&p, &rename->rn_tinfo); + p = encode_cinfo(p, &rename->rn_sinfo); + p = encode_cinfo(p, &rename->rn_tinfo); } return nfserr; } -- cgit v0.10.2 From a5cddc885b99458df963a75abbe0b40cbef56c48 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 12 May 2014 18:10:58 -0400 Subject: nfsd4: better reservation of head space for krb5 RPC_MAX_AUTH_SIZE is scattered around several places. Better to set it once in the auth code, where this kind of estimate should be made. And while we're at it we can leave it zero when we're not using krb5i or krb5p. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 2d786b8..16e71d0 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1261,13 +1261,13 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, xdr->buf = buf; xdr->iov = head; xdr->p = head->iov_base + head->iov_len; - xdr->end = head->iov_base + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE; + xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; /* Tail and page_len should be zero at this point: */ buf->len = buf->head[0].iov_len; xdr->scratch.iov_len = 0; xdr->page_ptr = buf->pages; buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages) - - 2 * RPC_MAX_AUTH_SIZE; + - rqstp->rq_auth_slack; } /* diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 62b882d..d0a016a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2288,7 +2288,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, session->se_fchannel.maxresp_sz; status = (seq->cachethis) ? nfserr_rep_too_big_to_cache : nfserr_rep_too_big; - if (xdr_restrict_buflen(xdr, buflen - 2 * RPC_MAX_AUTH_SIZE)) + if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack)) goto out_put_session; svc_reserve(rqstp, buflen); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3e347a1..470fe89 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1611,7 +1611,8 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) DECODE_HEAD; struct nfsd4_op *op; bool cachethis = false; - int max_reply = 2 * RPC_MAX_AUTH_SIZE + 8; /* opcnt, status */ + int auth_slack= argp->rqstp->rq_auth_slack; + int max_reply = auth_slack + 8; /* opcnt, status */ int readcount = 0; int readbytes = 0; int i; @@ -1677,7 +1678,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) svc_reserve(argp->rqstp, max_reply + readbytes); argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; - if (readcount > 1 || max_reply > PAGE_SIZE - 2*RPC_MAX_AUTH_SIZE) + if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) argp->rqstp->rq_splice_ok = false; DECODE_TAIL; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 85cb647..1bc7cd0 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -260,7 +260,10 @@ struct svc_rqst { void * rq_argp; /* decoded arguments */ void * rq_resp; /* xdr'd results */ void * rq_auth_data; /* flavor-specific data */ - + int rq_auth_slack; /* extra space xdr code + * should leave in head + * for krb5i, krb5p. + */ int rq_reserved; /* space on socket outq * reserved for this request */ @@ -456,11 +459,7 @@ char * svc_print_addr(struct svc_rqst *, char *, size_t); */ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) { - int added_space = 0; - - if (rqstp->rq_authop->flavour) - added_space = RPC_MAX_AUTH_SIZE; - svc_reserve(rqstp, space + added_space); + svc_reserve(rqstp, space + rqstp->rq_auth_slack); } #endif /* SUNRPC_SVC_H */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 0f73f45..4ce5ecce 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1503,6 +1503,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) if (unwrap_integ_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; + rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE; break; case RPC_GSS_SVC_PRIVACY: /* placeholders for length and seq. number: */ @@ -1511,6 +1512,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) if (unwrap_priv_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; + rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE * 2; break; default: goto auth_err; diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 2af7b0c..79c0f34 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -54,6 +54,8 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) } spin_unlock(&authtab_lock); + rqstp->rq_auth_slack = 0; + rqstp->rq_authop = aops; return aops->accept(rqstp, authp); } -- cgit v0.10.2 From a1f05514b016aeaed638dbf677f443af7e7bde4f Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 23 May 2014 19:57:49 +0800 Subject: NFS4: Avoid NULL reference or double free in nfsd4_fslocs_free() If fsloc_parse() failed at kzalloc(), fs/nfsd/export.c 411 412 fsloc->locations = kzalloc(fsloc->locations_count 413 * sizeof(struct nfsd4_fs_location), GFP_KERNEL); 414 if (!fsloc->locations) 415 return -ENOMEM; svc_export_parse() will call nfsd4_fslocs_free() with fsloc->locations = NULL, so that, "kfree(fsloc->locations[i].path);" will cause a crash. If fsloc_parse() failed after that, fsloc_parse() will call nfsd4_fslocs_free(), and svc_export_parse() will call it again, so that, a double free is caused. This patch checks the fsloc->locations, and set to NULL after it be freed. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 8513c59..9a41d3d 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -295,13 +295,19 @@ svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new, static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc) { + struct nfsd4_fs_location *locations = fsloc->locations; int i; + if (!locations) + return; + for (i = 0; i < fsloc->locations_count; i++) { - kfree(fsloc->locations[i].path); - kfree(fsloc->locations[i].hosts); + kfree(locations[i].path); + kfree(locations[i].hosts); } - kfree(fsloc->locations); + + kfree(locations); + fsloc->locations = NULL; } static void svc_export_put(struct kref *ref) -- cgit v0.10.2 From 0d63790c365852a6ce2913632b933633343ae479 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 23 May 2014 19:59:06 +0800 Subject: NFSD: Helper function for parsing uuid Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 9a41d3d..8771f41 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -480,6 +480,23 @@ static inline int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; } #endif +static inline int +uuid_parse(char **mesg, char *buf, unsigned char **puuid) +{ + int len; + + /* expect a 16 byte uuid encoded as \xXXXX... */ + len = qword_get(mesg, buf, PAGE_SIZE); + if (len != 16) + return -EINVAL; + + *puuid = kmemdup(buf, 16, GFP_KERNEL); + if (*puuid == NULL) + return -ENOMEM; + + return 0; +} + static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) { /* client path expiry [flags anonuid anongid fsid] */ @@ -558,18 +575,9 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) { if (strcmp(buf, "fsloc") == 0) err = fsloc_parse(&mesg, buf, &exp.ex_fslocs); - else if (strcmp(buf, "uuid") == 0) { - /* expect a 16 byte uuid encoded as \xXXXX... */ - len = qword_get(&mesg, buf, PAGE_SIZE); - if (len != 16) - err = -EINVAL; - else { - exp.ex_uuid = - kmemdup(buf, 16, GFP_KERNEL); - if (exp.ex_uuid == NULL) - err = -ENOMEM; - } - } else if (strcmp(buf, "secinfo") == 0) + else if (strcmp(buf, "uuid") == 0) + err = uuid_parse(&mesg, buf, &exp.ex_uuid); + else if (strcmp(buf, "secinfo") == 0) err = secinfo_parse(&mesg, buf, &exp); else /* quietly ignore unknown words and anything -- cgit v0.10.2 From 94eb36892d727145794b80dceffc435d1d68edbb Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 23 May 2014 20:00:19 +0800 Subject: NFSD: Adds macro EX_UUID_LEN for exports uuid's length Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 8771f41..90d37b6 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -487,10 +487,10 @@ uuid_parse(char **mesg, char *buf, unsigned char **puuid) /* expect a 16 byte uuid encoded as \xXXXX... */ len = qword_get(mesg, buf, PAGE_SIZE); - if (len != 16) + if (len != EX_UUID_LEN) return -EINVAL; - *puuid = kmemdup(buf, 16, GFP_KERNEL); + *puuid = kmemdup(buf, EX_UUID_LEN, GFP_KERNEL); if (*puuid == NULL) return -ENOMEM; @@ -663,7 +663,7 @@ static int svc_export_show(struct seq_file *m, if (exp->ex_uuid) { int i; seq_puts(m, ",uuid="); - for (i=0; i<16; i++) { + for (i = 0; i < EX_UUID_LEN; i++) { if ((i&3) == 0 && i) seq_putc(m, ':'); seq_printf(m, "%02x", exp->ex_uuid[i]); diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index d7939a6..c7d4ed0 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -37,6 +37,7 @@ struct nfsd4_fs_locations { * spkm3i, and spkm3p (and using all 8 at once should be rare). */ #define MAX_SECINFO_LIST 8 +#define EX_UUID_LEN 16 struct exp_flavor_info { u32 pseudoflavor; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 470fe89..70d1472 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2223,7 +2223,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, *p++ = cpu_to_be32(MINOR(stat.dev)); break; case FSIDSOURCE_UUID: - p = xdr_encode_opaque_fixed(p, exp->ex_uuid, 16); + p = xdr_encode_opaque_fixed(p, exp->ex_uuid, + EX_UUID_LEN); break; } } -- cgit v0.10.2 From 0faed901c680b153ca090a766ceda699b55993eb Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 23 May 2014 20:53:05 +0800 Subject: NFSD: remove unneeded linux/user_namespace.h include After commit 4c1e1b34d5c8 ("nfsd: Store ex_anon_uid and ex_anon_gid as kuids and kgids") using kuid/kgid for ex_anon_uid/ex_anon_gid, user_namespace.h is not needed. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 2645be4..1042325 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -1,7 +1,6 @@ /* Copyright (C) 1995, 1996 Olaf Kirch */ #include -#include #include "nfsd.h" #include "auth.h" -- cgit v0.10.2 From 61a27f08a63ee9460653633d8a9cc5a09dcb9aa5 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 23 May 2014 20:53:44 +0800 Subject: NFSD: Cleanup unused variable in nfsd_setuser() Commit 8f6c5ffc8987 ("kernel/groups.c: remove return value of set_groups") removed the last use of "ret". Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 1042325..72f4482 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -24,7 +24,6 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) struct cred *new; int i; int flags = nfsexp_flags(rqstp, exp); - int ret; validate_process_creds(); @@ -85,8 +84,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) return 0; oom: - ret = -ENOMEM; abort_creds(new); - return ret; + return -ENOMEM; } -- cgit v0.10.2 From a30ae94c0797f9de47626eecc43359989447d7a3 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 23 May 2014 20:54:27 +0800 Subject: NFSD: Cleanup unneeded including net/ipv6.h Commit 49b28684fdba ("nfsd: Remove deprecated nfsctl system call and related code") removed the only use of ipv6_addr_set_v4mapped(), so net/ipv6.h is unneeded now. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 90d37b6..1d6d7bd 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -17,8 +17,6 @@ #include #include -#include - #include "nfsd.h" #include "nfsfh.h" #include "netns.h" -- cgit v0.10.2 From e6d615f7428bb9a202f7fab563e917e89169d349 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 23 May 2014 20:55:43 +0800 Subject: NFSD: Remove typedef of svc_client and svc_export in export.c No need for a typedef wrapper for svc_export or svc_client, remove them. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 1d6d7bd..858c536 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -23,9 +23,6 @@ #define NFSDDBG_FACILITY NFSDDBG_EXPORT -typedef struct auth_domain svc_client; -typedef struct svc_export svc_export; - /* * We have two caches. * One maps client+vfsmnt+dentry to export options - the export map @@ -783,7 +780,7 @@ svc_export_update(struct svc_export *new, struct svc_export *old) static struct svc_expkey * -exp_find_key(struct cache_detail *cd, svc_client *clp, int fsid_type, +exp_find_key(struct cache_detail *cd, struct auth_domain *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) { struct svc_expkey key, *ek; @@ -805,9 +802,9 @@ exp_find_key(struct cache_detail *cd, svc_client *clp, int fsid_type, return ek; } - -static svc_export *exp_get_by_name(struct cache_detail *cd, svc_client *clp, - const struct path *path, struct cache_req *reqp) +static struct svc_export * +exp_get_by_name(struct cache_detail *cd, struct auth_domain *clp, + const struct path *path, struct cache_req *reqp) { struct svc_export *exp, key; int err; @@ -831,11 +828,11 @@ static svc_export *exp_get_by_name(struct cache_detail *cd, svc_client *clp, /* * Find the export entry for a given dentry. */ -static struct svc_export *exp_parent(struct cache_detail *cd, svc_client *clp, - struct path *path) +static struct svc_export * +exp_parent(struct cache_detail *cd, struct auth_domain *clp, struct path *path) { struct dentry *saved = dget(path->dentry); - svc_export *exp = exp_get_by_name(cd, clp, path, NULL); + struct svc_export *exp = exp_get_by_name(cd, clp, path, NULL); while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) { struct dentry *parent = dget_parent(path->dentry); @@ -856,7 +853,7 @@ static struct svc_export *exp_parent(struct cache_detail *cd, svc_client *clp, * since its harder to fool a kernel module than a user space program. */ int -exp_rootfh(struct net *net, svc_client *clp, char *name, +exp_rootfh(struct net *net, struct auth_domain *clp, char *name, struct knfsd_fh *f, int maxsize) { struct svc_export *exp; -- cgit v0.10.2 From f0db79d54b6c9f612fb2ef4f71ca8340edaf89f1 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 23 May 2014 20:56:16 +0800 Subject: NFSD: Add missing comment of "expiry" in expkey_parse() Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 858c536..263d30e 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -68,7 +68,7 @@ static struct svc_expkey *svc_expkey_lookup(struct cache_detail *cd, struct svc_ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) { - /* client fsidtype fsid [path] */ + /* client fsidtype fsid expiry [path] */ char *buf; int len; struct auth_domain *dom = NULL; -- cgit v0.10.2 From 1f53146da9cb2c941a3928320a6824d3b035455f Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sat, 24 May 2014 11:26:49 +0800 Subject: NFSD: Using type of uint32_t for ex_nflavors instead of int ex_nflavors can't be negative number, just defined by uint32_t. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 263d30e..7884051 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -438,13 +438,14 @@ out_free_all: static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { - int listsize, err; struct exp_flavor_info *f; + u32 listsize; + int err; - err = get_int(mesg, &listsize); + err = get_uint(mesg, &listsize); if (err) return err; - if (listsize < 0 || listsize > MAX_SECINFO_LIST) + if (listsize > MAX_SECINFO_LIST) return -EINVAL; for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) { diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index c7d4ed0..cfeea85 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -54,7 +54,7 @@ struct svc_export { int ex_fsid; unsigned char * ex_uuid; /* 16 byte fsid */ struct nfsd4_fs_locations ex_fslocs; - int ex_nflavors; + uint32_t ex_nflavors; struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; struct cache_detail *cd; }; -- cgit v0.10.2 From be69da8052af38a9d72ed32fb765fd4446e4091c Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sat, 24 May 2014 11:19:57 +0800 Subject: NFSD: Error out when getting more than one fsloc/secinfo/uuid Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 7884051..13b85f9 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -389,6 +389,10 @@ fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) int len; int migrated, i, err; + /* more than one fsloc */ + if (fsloc->locations) + return -EINVAL; + /* listsize */ err = get_uint(mesg, &fsloc->locations_count); if (err) @@ -442,6 +446,10 @@ static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) u32 listsize; int err; + /* more than one secinfo */ + if (exp->ex_nflavors) + return -EINVAL; + err = get_uint(mesg, &listsize); if (err) return err; @@ -481,6 +489,10 @@ uuid_parse(char **mesg, char *buf, unsigned char **puuid) { int len; + /* more than one uuid */ + if (*puuid) + return -EINVAL; + /* expect a 16 byte uuid encoded as \xXXXX... */ len = qword_get(mesg, buf, PAGE_SIZE); if (len != EX_UUID_LEN) -- cgit v0.10.2 From 12337901d654415d9f764b5f5ba50052e9700f37 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 May 2014 10:46:13 +0200 Subject: nfsd: getattr for FATTR4_WORD0_FILES_AVAIL needs the statfs buffer Note nobody's ever noticed because the typical client probably never requests FILES_AVAIL without also requesting something else on the list. Signed-off-by: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 70d1472..3976dc6 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2049,8 +2049,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, err = vfs_getattr(&path, &stat); if (err) goto out_nfserr; - if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | - FATTR4_WORD0_MAXNAME)) || + if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | + FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) || (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL))) { err = vfs_statfs(&path, &statfs); -- cgit v0.10.2 From b52bd7bccca57b2bbe837d14a0bf3e45279459a8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 May 2014 10:46:32 +0200 Subject: nfsd: remove unused function nfsd_read_file Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index b7d35a4..140c496 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1026,26 +1026,6 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, return err; } -/* As above, but use the provided file descriptor. */ -__be32 -nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, - loff_t offset, struct kvec *vec, int vlen, - unsigned long *count) -{ - __be32 err; - - if (file) { - err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, - NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE); - if (err) - goto out; - err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count); - } else /* Note file may still be NULL in NFSv4 special stateid case: */ - err = nfsd_read(rqstp, fhp, offset, vec, vlen, count); -out: - return err; -} - /* * Write data to a file. * The stable flag requests synchronous writes. diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 7441e96..91b6ae3 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -80,8 +80,6 @@ int nfsd_readv(struct file *, loff_t, struct kvec *, int, unsigned long *); __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *); -__be32 nfsd_read_file(struct svc_rqst *, struct svc_fh *, struct file *, - loff_t, struct kvec *, int, unsigned long *); __be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, loff_t, struct kvec *,int, unsigned long *, int *); __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, -- cgit v0.10.2 From a48fd0f9f77b6e144813230c26621c00aac92ce8 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Thu, 29 May 2014 12:18:52 +0800 Subject: SUNRPC/NFSD: Remove using of dprintk with KERN_WARNING When debugging, rpc prints messages from dprintk(KERN_WARNING ...) with "^A4" prefixed, [ 2780.339988] ^A4nfsd: connect from unprivileged port: 127.0.0.1, port=35316 Trond tells, > dprintk != printk. We have NEVER supported dprintk(KERN_WARNING...) This patch removes using of dprintk with KERN_WARNING. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index a337106..ec83934 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -88,9 +88,8 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, /* Check if the request originated from a secure port. */ if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) { RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); - dprintk(KERN_WARNING - "nfsd: request from insecure port %s!\n", - svc_print_addr(rqstp, buf, sizeof(buf))); + dprintk("nfsd: request from insecure port %s!\n", + svc_print_addr(rqstp, buf, sizeof(buf))); return nfserr_perm; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index f3b8eb3..b507cd3 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -849,8 +849,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) * tell us anything. For now just warn about unpriv connections. */ if (!svc_port_is_privileged(sin)) { - dprintk(KERN_WARNING - "%s: connect from unprivileged port: %s\n", + dprintk("%s: connect from unprivileged port: %s\n", serv->sv_name, __svc_print_addr(sin, buf, sizeof(buf))); } -- cgit v0.10.2 From da2ebce6a0f64cc01bd00aba998c0a4fa7c09843 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 30 May 2014 09:09:25 -0400 Subject: nfsd: make nfsd4_encode_fattr static sparse says: CHECK fs/nfsd/nfs4xdr.c fs/nfsd/nfs4xdr.c:2043:1: warning: symbol 'nfsd4_encode_fattr' was not declared. Should it be static? Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3976dc6..a7268b4 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2000,7 +2000,7 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) * Note: @fhp can be NULL; in this case, we might have to compose the filehandle * ourselves. */ -__be32 +static __be32 nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, u32 *bmval, -- cgit v0.10.2 From a832e7ae8b4f71fc522089d83fd445693aa7dbc5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 30 May 2014 09:09:26 -0400 Subject: nfsd: fix laundromat next-run-time calculation The laundromat uses two variables to calculate when it should next run, but one is completely ignored at the end of the run. Merge the two and rename the variable to be more descriptive of what it does. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d0a016a..01a0e43 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3397,8 +3397,7 @@ nfs4_laundromat(struct nfsd_net *nn) struct nfs4_delegation *dp; struct list_head *pos, *next, reaplist; time_t cutoff = get_seconds() - nn->nfsd4_lease; - time_t t, clientid_val = nn->nfsd4_lease; - time_t u, test_val = nn->nfsd4_lease; + time_t t, new_timeo = nn->nfsd4_lease; nfs4_lock_state(); @@ -3410,8 +3409,7 @@ nfs4_laundromat(struct nfsd_net *nn) clp = list_entry(pos, struct nfs4_client, cl_lru); if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { t = clp->cl_time - cutoff; - if (clientid_val > t) - clientid_val = t; + new_timeo = min(new_timeo, t); break; } if (mark_client_expired_locked(clp)) { @@ -3434,9 +3432,8 @@ nfs4_laundromat(struct nfsd_net *nn) if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn) continue; if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) { - u = dp->dl_time - cutoff; - if (test_val > u) - test_val = u; + t = dp->dl_time - cutoff; + new_timeo = min(new_timeo, t); break; } list_move(&dp->dl_recall_lru, &reaplist); @@ -3446,21 +3443,18 @@ nfs4_laundromat(struct nfsd_net *nn) dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); revoke_delegation(dp); } - test_val = nn->nfsd4_lease; list_for_each_safe(pos, next, &nn->close_lru) { oo = container_of(pos, struct nfs4_openowner, oo_close_lru); if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) { - u = oo->oo_time - cutoff; - if (test_val > u) - test_val = u; + t = oo->oo_time - cutoff; + new_timeo = min(new_timeo, t); break; } release_openowner(oo); } - if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) - clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; + new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); nfs4_unlock_state(); - return clientid_val; + return new_timeo; } static struct workqueue_struct *laundry_wq; -- cgit v0.10.2 From 931ee56c67573eb4e51c8a4e78598d965b8b059e Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 30 May 2014 09:09:27 -0400 Subject: nfsd4: use recall_lock for delegation hashing This fixes a bug in the handling of the fi_delegations list. nfs4_setlease does not hold the recall_lock when adding to it. The client_mutex is held, which prevents against concurrent list changes, but nfsd_break_deleg_cb does not hold while walking it. New delegations could theoretically creep onto the list while we're walking it there. Signed-off-by: Benny Halevy Signed-off-by: Jeff Layton Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 01a0e43..05ca64c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -433,12 +433,21 @@ static void unhash_stid(struct nfs4_stid *s) s->sc_type = 0; } +static void +hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) +{ + lockdep_assert_held(&recall_lock); + + list_add(&dp->dl_perfile, &fp->fi_delegations); + list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); +} + /* Called under the state lock. */ static void unhash_delegation(struct nfs4_delegation *dp) { - list_del_init(&dp->dl_perclnt); spin_lock(&recall_lock); + list_del_init(&dp->dl_perclnt); list_del_init(&dp->dl_perfile); list_del_init(&dp->dl_recall_lru); spin_unlock(&recall_lock); @@ -3065,11 +3074,12 @@ static int nfs4_setlease(struct nfs4_delegation *dp) status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); if (status) goto out_free; - list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); fp->fi_lease = fl; fp->fi_deleg_file = get_file(fl->fl_file); atomic_set(&fp->fi_delegees, 1); - list_add(&dp->dl_perfile, &fp->fi_delegations); + spin_lock(&recall_lock); + hash_delegation_locked(dp, fp); + spin_unlock(&recall_lock); return 0; out_free: locks_free_lock(fl); @@ -3090,9 +3100,8 @@ static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) spin_unlock(&recall_lock); return -EAGAIN; } - list_add(&dp->dl_perfile, &fp->fi_delegations); + hash_delegation_locked(dp, fp); spin_unlock(&recall_lock); - list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); return 0; } @@ -4903,6 +4912,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, struct nfs4_delegation *dp, *next; u64 count = 0; + lockdep_assert_held(&recall_lock); list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) { if (victims) list_move(&dp->dl_recall_lru, victims); -- cgit v0.10.2 From ba5378b66f5eb20e464796c5b088c0961fb5f618 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 30 May 2014 09:09:28 -0400 Subject: nfsd: fix setting of NFS4_OO_CONFIRMED in nfsd4_open In the NFS4_OPEN_CLAIM_PREVIOUS case, we should only mark it confirmed if the nfs4_check_open_reclaim check succeeds. In the NFS4_OPEN_CLAIM_DELEG_PREV_FH and NFS4_OPEN_CLAIM_DELEGATE_PREV cases, I see no point in declaring the openowner confirmed when the operation is going to fail anyway, and doing so might allow the client to game things such that it wouldn't need to confirm a subsequent open with the same owner. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 16e71d0..8fc901a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -430,12 +430,12 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; break; case NFS4_OPEN_CLAIM_PREVIOUS: - open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; status = nfs4_check_open_reclaim(&open->op_clientid, cstate->minorversion, nn); if (status) goto out; + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; case NFS4_OPEN_CLAIM_FH: case NFS4_OPEN_CLAIM_DELEG_CUR_FH: status = do_open_fhandle(rqstp, cstate, open); @@ -445,7 +445,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; case NFS4_OPEN_CLAIM_DELEG_PREV_FH: case NFS4_OPEN_CLAIM_DELEGATE_PREV: - open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; dprintk("NFSD: unsupported OPEN claim type %d\n", open->op_claim_type); status = nfserr_notsupp; -- cgit v0.10.2 From 7025005d5e7715efa24c115c45f4a6b4f1a545de Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 30 May 2014 09:09:29 -0400 Subject: nfsd: remove unneeded zeroing of fields in nfsd4_proc_compound The memset of resp in svc_process_common should ensure that these are already zeroed by the time they get here. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 8fc901a..f053c5a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1290,11 +1290,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, xdr_reserve_space(&resp->xdr, 8 + args->taglen); resp->taglen = args->taglen; resp->tag = args->tag; - resp->opcnt = 0; resp->rqstp = rqstp; cstate->minorversion = args->minorversion; - cstate->replay_owner = NULL; - cstate->session = NULL; fh_init(current_fh, NFS4_FHSIZE); fh_init(save_fh, NFS4_FHSIZE); /* -- cgit v0.10.2 From cdc975050077d707d5315e51c2b05a763d4895e5 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 30 May 2014 09:09:30 -0400 Subject: nfsd4: rename recall_lock to state_lock ...as the name is a bit more descriptive and we've started using it for other purposes. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 05ca64c..63e7645 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -81,7 +81,7 @@ static DEFINE_MUTEX(client_mutex); * effort to decrease the scope of the client_mutex, this spinlock may * eventually cover more: */ -static DEFINE_SPINLOCK(recall_lock); +static DEFINE_SPINLOCK(state_lock); static struct kmem_cache *openowner_slab; static struct kmem_cache *lockowner_slab; @@ -235,9 +235,9 @@ static void nfsd4_free_file(struct nfs4_file *f) static inline void put_nfs4_file(struct nfs4_file *fi) { - if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) { + if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { hlist_del(&fi->fi_hash); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); iput(fi->fi_inode); nfsd4_free_file(fi); } @@ -436,7 +436,7 @@ static void unhash_stid(struct nfs4_stid *s) static void hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { - lockdep_assert_held(&recall_lock); + lockdep_assert_held(&state_lock); list_add(&dp->dl_perfile, &fp->fi_delegations); list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); @@ -446,11 +446,11 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) static void unhash_delegation(struct nfs4_delegation *dp) { - spin_lock(&recall_lock); + spin_lock(&state_lock); list_del_init(&dp->dl_perclnt); list_del_init(&dp->dl_perfile); list_del_init(&dp->dl_recall_lru); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); if (dp->dl_file) { nfs4_put_deleg_lease(dp->dl_file); put_nfs4_file(dp->dl_file); @@ -1161,13 +1161,13 @@ destroy_client(struct nfs4_client *clp) struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); INIT_LIST_HEAD(&reaplist); - spin_lock(&recall_lock); + spin_lock(&state_lock); while (!list_empty(&clp->cl_delegations)) { dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); list_del_init(&dp->dl_perclnt); list_move(&dp->dl_recall_lru, &reaplist); } - spin_unlock(&recall_lock); + spin_unlock(&state_lock); while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); destroy_delegation(dp); @@ -2539,9 +2539,9 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) fp->fi_lease = NULL; memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); memset(fp->fi_access, 0, sizeof(fp->fi_access)); - spin_lock(&recall_lock); + spin_lock(&state_lock); hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); } void @@ -2712,15 +2712,15 @@ find_file(struct inode *ino) unsigned int hashval = file_hashval(ino); struct nfs4_file *fp; - spin_lock(&recall_lock); + spin_lock(&state_lock); hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { if (fp->fi_inode == ino) { get_nfs4_file(fp); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); return fp; } } - spin_unlock(&recall_lock); + spin_unlock(&state_lock); return NULL; } @@ -2757,6 +2757,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) struct nfs4_client *clp = dp->dl_stid.sc_client; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + lockdep_assert_held(&state_lock); /* We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease * callback (and since the lease code is serialized by the kernel @@ -2793,11 +2794,11 @@ static void nfsd_break_deleg_cb(struct file_lock *fl) */ fl->fl_break_time = 0; - spin_lock(&recall_lock); + spin_lock(&state_lock); fp->fi_had_conflict = true; list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) nfsd_break_one_deleg(dp); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); } static @@ -3077,9 +3078,9 @@ static int nfs4_setlease(struct nfs4_delegation *dp) fp->fi_lease = fl; fp->fi_deleg_file = get_file(fl->fl_file); atomic_set(&fp->fi_delegees, 1); - spin_lock(&recall_lock); + spin_lock(&state_lock); hash_delegation_locked(dp, fp); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); return 0; out_free: locks_free_lock(fl); @@ -3094,14 +3095,14 @@ static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) dp->dl_file = fp; if (!fp->fi_lease) return nfs4_setlease(dp); - spin_lock(&recall_lock); + spin_lock(&state_lock); atomic_inc(&fp->fi_delegees); if (fp->fi_had_conflict) { - spin_unlock(&recall_lock); + spin_unlock(&state_lock); return -EAGAIN; } hash_delegation_locked(dp, fp); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); return 0; } @@ -3435,7 +3436,7 @@ nfs4_laundromat(struct nfsd_net *nn) clp->cl_clientid.cl_id); expire_client(clp); } - spin_lock(&recall_lock); + spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn) @@ -3447,7 +3448,7 @@ nfs4_laundromat(struct nfsd_net *nn) } list_move(&dp->dl_recall_lru, &reaplist); } - spin_unlock(&recall_lock); + spin_unlock(&state_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); revoke_delegation(dp); @@ -4912,7 +4913,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, struct nfs4_delegation *dp, *next; u64 count = 0; - lockdep_assert_held(&recall_lock); + lockdep_assert_held(&state_lock); list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) { if (victims) list_move(&dp->dl_recall_lru, victims); @@ -4928,9 +4929,9 @@ u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max) LIST_HEAD(victims); u64 count; - spin_lock(&recall_lock); + spin_lock(&state_lock); count = nfsd_find_all_delegations(clp, max, &victims); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) revoke_delegation(dp); @@ -4944,11 +4945,11 @@ u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max) LIST_HEAD(victims); u64 count; - spin_lock(&recall_lock); + spin_lock(&state_lock); count = nfsd_find_all_delegations(clp, max, &victims); list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) nfsd_break_one_deleg(dp); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); return count; } @@ -4957,9 +4958,9 @@ u64 nfsd_print_client_delegations(struct nfs4_client *clp, u64 max) { u64 count = 0; - spin_lock(&recall_lock); + spin_lock(&state_lock); count = nfsd_find_all_delegations(clp, max, NULL); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); nfsd_print_count(clp, count, "delegations"); return count; @@ -5170,12 +5171,12 @@ nfs4_state_shutdown_net(struct net *net) nfs4_lock_state(); INIT_LIST_HEAD(&reaplist); - spin_lock(&recall_lock); + spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); list_move(&dp->dl_recall_lru, &reaplist); } - spin_unlock(&recall_lock); + spin_unlock(&state_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); destroy_delegation(dp); -- cgit v0.10.2 From 3fb87d13ce29637e0ae005fa66a4f917cce6fede Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 30 May 2014 09:09:31 -0400 Subject: nfsd4: hash deleg stateid only on successful nfs4_set_delegation We don't want the stateid to be found in the hash table before the delegation is granted. Currently this is protected by the client_mutex, but we want to break that up and this is a necessary step toward that goal. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 63e7645..e5197d9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -375,7 +375,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); if (dp == NULL) return dp; - dp->dl_stid.sc_type = NFS4_DELEG_STID; /* * delegation seqid's are never incremented. The 4.1 special * meaning of seqid 0 isn't meaningful, really, but let's avoid @@ -438,6 +437,7 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { lockdep_assert_held(&state_lock); + dp->dl_stid.sc_type = NFS4_DELEG_STID; list_add(&dp->dl_perfile, &fp->fi_delegations); list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); } -- cgit v0.10.2 From 05638dc73af2586517468b1159d4b76e90607359 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 2 Jun 2014 12:05:47 -0400 Subject: nfsd4: simplify server xdr->next_page use The rpc code makes available to the NFS server an array of pages to encod into. The server represents its reply as an xdr buf, with the head pointing into the first page in that array, the pages ** array starting just after that, and the tail (if any) sharing any leftover space in the page used by the head. While encoding, we use xdr_stream->page_ptr to keep track of which page we're currently using. Currently we set xdr_stream->page_ptr to buf->pages, which makes the head a weird exception to the rule that page_ptr always points to the page we're currently encoding into. So, instead set it to buf->pages - 1 (the page actually containing the head), and remove the need for a little unintuitive logic in xdr_get_next_encode_buffer() and xdr_truncate_encode. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f053c5a..6851b00 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1264,7 +1264,7 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, /* Tail and page_len should be zero at this point: */ buf->len = buf->head[0].iov_len; xdr->scratch.iov_len = 0; - xdr->page_ptr = buf->pages; + xdr->page_ptr = buf->pages - 1; buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages) - rqstp->rq_auth_slack; } diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 3992844..23fb4e7 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -523,10 +523,9 @@ __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes) frag2bytes = nbytes - frag1bytes; if (xdr->iov) xdr->iov->iov_len += frag1bytes; - else { + else xdr->buf->page_len += frag1bytes; - xdr->page_ptr++; - } + xdr->page_ptr++; xdr->iov = NULL; /* * If the last encode didn't end exactly on a page boundary, the @@ -638,8 +637,10 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) /* xdr->iov should already be NULL */ return; } - if (fraglen) + if (fraglen) { xdr->end = head->iov_base + head->iov_len; + xdr->page_ptr--; + } /* (otherwise assume xdr->end is already set) */ head->iov_len = len; buf->len = len; -- cgit v0.10.2 From 06553991e7757c668efb3bce9dcc740f31aead60 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 2 Jun 2014 12:32:51 -0400 Subject: nfsd4: kill READ32 While we're here, let's kill off a couple of the read-side macros. Leaving the more complicated ones alone for now. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a7268b4..a23fa00 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -98,7 +98,6 @@ xdr_error: \ status = nfserr_bad_xdr; \ goto out -#define READ32(x) (x) = ntohl(*p++) #define READ64(x) do { \ (x) = (u64)ntohl(*p++) << 32; \ (x) |= ntohl(*p++); \ @@ -248,17 +247,17 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) bmval[2] = 0; READ_BUF(4); - READ32(bmlen); + bmlen = be32_to_cpup(p++); if (bmlen > 1000) goto xdr_error; READ_BUF(bmlen << 2); if (bmlen > 0) - READ32(bmval[0]); + bmval[0] = be32_to_cpup(p++); if (bmlen > 1) - READ32(bmval[1]); + bmval[1] = be32_to_cpup(p++); if (bmlen > 2) - READ32(bmval[2]); + bmval[2] = be32_to_cpup(p++); DECODE_TAIL; } @@ -278,7 +277,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, return status; READ_BUF(4); - READ32(expected_len); + expected_len = be32_to_cpup(p++); if (bmval[0] & FATTR4_WORD0_SIZE) { READ_BUF(8); @@ -291,7 +290,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct nfs4_ace *ace; READ_BUF(4); len += 4; - READ32(nace); + nace = be32_to_cpup(p++); if (nace > NFS4_ACL_MAX) return nfserr_fbig; @@ -305,10 +304,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, (*acl)->naces = nace; for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) { READ_BUF(16); len += 16; - READ32(ace->type); - READ32(ace->flag); - READ32(ace->access_mask); - READ32(dummy32); + ace->type = be32_to_cpup(p++); + ace->flag = be32_to_cpup(p++); + ace->access_mask = be32_to_cpup(p++); + dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); len += XDR_QUADLEN(dummy32) << 2; READMEM(buf, dummy32); @@ -330,14 +329,14 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[1] & FATTR4_WORD1_MODE) { READ_BUF(4); len += 4; - READ32(iattr->ia_mode); + iattr->ia_mode = be32_to_cpup(p++); iattr->ia_mode &= (S_IFMT | S_IALLUGO); iattr->ia_valid |= ATTR_MODE; } if (bmval[1] & FATTR4_WORD1_OWNER) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); @@ -348,7 +347,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); @@ -359,7 +358,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: /* We require the high 32 bits of 'seconds' to be 0, and we ignore @@ -367,7 +366,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, READ_BUF(12); len += 12; READ64(iattr->ia_atime.tv_sec); - READ32(iattr->ia_atime.tv_nsec); + iattr->ia_atime.tv_nsec = be32_to_cpup(p++); if (iattr->ia_atime.tv_nsec >= (u32)1000000000) return nfserr_inval; iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); @@ -382,7 +381,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: /* We require the high 32 bits of 'seconds' to be 0, and we ignore @@ -390,7 +389,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, READ_BUF(12); len += 12; READ64(iattr->ia_mtime.tv_sec); - READ32(iattr->ia_mtime.tv_nsec); + iattr->ia_mtime.tv_nsec = be32_to_cpup(p++); if (iattr->ia_mtime.tv_nsec >= (u32)1000000000) return nfserr_inval; iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); @@ -408,13 +407,13 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { READ_BUF(4); len += 4; - READ32(dummy32); /* lfs: we don't use it */ + dummy32 = be32_to_cpup(p++); /* lfs: we don't use it */ READ_BUF(4); len += 4; - READ32(dummy32); /* pi: we don't use it either */ + dummy32 = be32_to_cpup(p++); /* pi: we don't use it either */ READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); if (dummy32 > NFSD4_MAX_SEC_LABEL_LEN) return nfserr_badlabel; @@ -445,7 +444,7 @@ nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid) DECODE_HEAD; READ_BUF(sizeof(stateid_t)); - READ32(sid->si_generation); + sid->si_generation = be32_to_cpup(p++); COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); DECODE_TAIL; @@ -457,7 +456,7 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access DECODE_HEAD; READ_BUF(4); - READ32(access->ac_req_access); + access->ac_req_access = be32_to_cpup(p++); DECODE_TAIL; } @@ -472,7 +471,7 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_ /* callback_sec_params4 */ READ_BUF(4); - READ32(nr_secflavs); + nr_secflavs = be32_to_cpup(p++); if (nr_secflavs) cbs->flavor = (u32)(-1); else @@ -480,7 +479,7 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_ cbs->flavor = 0; for (i = 0; i < nr_secflavs; ++i) { READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); switch (dummy) { case RPC_AUTH_NULL: /* Nothing to read */ @@ -490,21 +489,21 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_ case RPC_AUTH_UNIX: READ_BUF(8); /* stamp */ - READ32(dummy); + dummy = be32_to_cpup(p++); /* machine name */ - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); SAVEMEM(machine_name, dummy); /* uid, gid */ READ_BUF(8); - READ32(uid); - READ32(gid); + uid = be32_to_cpup(p++); + gid = be32_to_cpup(p++); /* more gids */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); if (cbs->flavor == (u32)(-1)) { kuid_t kuid = make_kuid(&init_user_ns, uid); @@ -524,14 +523,14 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_ "not supported!\n"); READ_BUF(8); /* gcbp_service */ - READ32(dummy); + dummy = be32_to_cpup(p++); /* gcbp_handle_from_server */ - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); p += XDR_QUADLEN(dummy); /* gcbp_handle_from_client */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); break; default: @@ -547,7 +546,7 @@ static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, stru DECODE_HEAD; READ_BUF(4); - READ32(bc->bc_cb_program); + bc->bc_cb_program = be32_to_cpup(p++); nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); DECODE_TAIL; @@ -559,7 +558,7 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, READ_BUF(NFS4_MAX_SESSIONID_LEN + 8); COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); - READ32(bcts->dir); + bcts->dir = be32_to_cpup(p++); /* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker * could help us figure out we should be using it. */ DECODE_TAIL; @@ -571,7 +570,7 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) DECODE_HEAD; READ_BUF(4); - READ32(close->cl_seqid); + close->cl_seqid = be32_to_cpup(p++); return nfsd4_decode_stateid(argp, &close->cl_stateid); DECODE_TAIL; @@ -585,7 +584,7 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit READ_BUF(12); READ64(commit->co_offset); - READ32(commit->co_count); + commit->co_count = be32_to_cpup(p++); DECODE_TAIL; } @@ -596,19 +595,19 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create DECODE_HEAD; READ_BUF(4); - READ32(create->cr_type); + create->cr_type = be32_to_cpup(p++); switch (create->cr_type) { case NF4LNK: READ_BUF(4); - READ32(create->cr_linklen); + create->cr_linklen = be32_to_cpup(p++); READ_BUF(create->cr_linklen); SAVEMEM(create->cr_linkname, create->cr_linklen); break; case NF4BLK: case NF4CHR: READ_BUF(8); - READ32(create->cr_specdata1); - READ32(create->cr_specdata2); + create->cr_specdata1 = be32_to_cpup(p++); + create->cr_specdata2 = be32_to_cpup(p++); break; case NF4SOCK: case NF4FIFO: @@ -618,7 +617,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create } READ_BUF(4); - READ32(create->cr_namelen); + create->cr_namelen = be32_to_cpup(p++); READ_BUF(create->cr_namelen); SAVEMEM(create->cr_name, create->cr_namelen); if ((status = check_filename(create->cr_name, create->cr_namelen))) @@ -650,7 +649,7 @@ nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) DECODE_HEAD; READ_BUF(4); - READ32(link->li_namelen); + link->li_namelen = be32_to_cpup(p++); READ_BUF(link->li_namelen); SAVEMEM(link->li_name, link->li_namelen); if ((status = check_filename(link->li_name, link->li_namelen))) @@ -668,24 +667,24 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) * type, reclaim(boolean), offset, length, new_lock_owner(boolean) */ READ_BUF(28); - READ32(lock->lk_type); + lock->lk_type = be32_to_cpup(p++); if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT)) goto xdr_error; - READ32(lock->lk_reclaim); + lock->lk_reclaim = be32_to_cpup(p++); READ64(lock->lk_offset); READ64(lock->lk_length); - READ32(lock->lk_is_new); + lock->lk_is_new = be32_to_cpup(p++); if (lock->lk_is_new) { READ_BUF(4); - READ32(lock->lk_new_open_seqid); + lock->lk_new_open_seqid = be32_to_cpup(p++); status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid); if (status) return status; READ_BUF(8 + sizeof(clientid_t)); - READ32(lock->lk_new_lock_seqid); + lock->lk_new_lock_seqid = be32_to_cpup(p++); COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t)); - READ32(lock->lk_new_owner.len); + lock->lk_new_owner.len = be32_to_cpup(p++); READ_BUF(lock->lk_new_owner.len); READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len); } else { @@ -693,7 +692,7 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) if (status) return status; READ_BUF(4); - READ32(lock->lk_old_lock_seqid); + lock->lk_old_lock_seqid = be32_to_cpup(p++); } DECODE_TAIL; @@ -705,13 +704,13 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) DECODE_HEAD; READ_BUF(32); - READ32(lockt->lt_type); + lockt->lt_type = be32_to_cpup(p++); if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT)) goto xdr_error; READ64(lockt->lt_offset); READ64(lockt->lt_length); COPYMEM(&lockt->lt_clientid, 8); - READ32(lockt->lt_owner.len); + lockt->lt_owner.len = be32_to_cpup(p++); READ_BUF(lockt->lt_owner.len); READMEM(lockt->lt_owner.data, lockt->lt_owner.len); @@ -724,10 +723,10 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) DECODE_HEAD; READ_BUF(8); - READ32(locku->lu_type); + locku->lu_type = be32_to_cpup(p++); if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) goto xdr_error; - READ32(locku->lu_seqid); + locku->lu_seqid = be32_to_cpup(p++); status = nfsd4_decode_stateid(argp, &locku->lu_stateid); if (status) return status; @@ -744,7 +743,7 @@ nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup DECODE_HEAD; READ_BUF(4); - READ32(lookup->lo_len); + lookup->lo_len = be32_to_cpup(p++); READ_BUF(lookup->lo_len); SAVEMEM(lookup->lo_name, lookup->lo_len); if ((status = check_filename(lookup->lo_name, lookup->lo_len))) @@ -759,7 +758,7 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh u32 w; READ_BUF(4); - READ32(w); + w = be32_to_cpup(p++); *share_access = w & NFS4_SHARE_ACCESS_MASK; *deleg_want = w & NFS4_SHARE_WANT_MASK; if (deleg_when) @@ -811,7 +810,7 @@ static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x) __be32 *p; READ_BUF(4); - READ32(*x); + *x = be32_to_cpup(p++); /* Note: unlinke access bits, deny bits may be zero. */ if (*x & ~NFS4_SHARE_DENY_BOTH) return nfserr_bad_xdr; @@ -825,7 +824,7 @@ static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_ne __be32 *p; READ_BUF(4); - READ32(o->len); + o->len = be32_to_cpup(p++); if (o->len == 0 || o->len > NFS4_OPAQUE_LIMIT) return nfserr_bad_xdr; @@ -850,7 +849,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) open->op_xdr_error = 0; /* seqid, share_access, share_deny, clientid, ownerlen */ READ_BUF(4); - READ32(open->op_seqid); + open->op_seqid = be32_to_cpup(p++); /* decode, yet ignore deleg_when until supported */ status = nfsd4_decode_share_access(argp, &open->op_share_access, &open->op_deleg_want, &dummy); @@ -865,13 +864,13 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) if (status) goto xdr_error; READ_BUF(4); - READ32(open->op_create); + open->op_create = be32_to_cpup(p++); switch (open->op_create) { case NFS4_OPEN_NOCREATE: break; case NFS4_OPEN_CREATE: READ_BUF(4); - READ32(open->op_createmode); + open->op_createmode = be32_to_cpup(p++); switch (open->op_createmode) { case NFS4_CREATE_UNCHECKED: case NFS4_CREATE_GUARDED: @@ -904,12 +903,12 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) /* open_claim */ READ_BUF(4); - READ32(open->op_claim_type); + open->op_claim_type = be32_to_cpup(p++); switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_DELEGATE_PREV: READ_BUF(4); - READ32(open->op_fname.len); + open->op_fname.len = be32_to_cpup(p++); READ_BUF(open->op_fname.len); SAVEMEM(open->op_fname.data, open->op_fname.len); if ((status = check_filename(open->op_fname.data, open->op_fname.len))) @@ -917,14 +916,14 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) break; case NFS4_OPEN_CLAIM_PREVIOUS: READ_BUF(4); - READ32(open->op_delegate_type); + open->op_delegate_type = be32_to_cpup(p++); break; case NFS4_OPEN_CLAIM_DELEGATE_CUR: status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); if (status) return status; READ_BUF(4); - READ32(open->op_fname.len); + open->op_fname.len = be32_to_cpup(p++); READ_BUF(open->op_fname.len); SAVEMEM(open->op_fname.data, open->op_fname.len); if ((status = check_filename(open->op_fname.data, open->op_fname.len))) @@ -962,7 +961,7 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con if (status) return status; READ_BUF(4); - READ32(open_conf->oc_seqid); + open_conf->oc_seqid = be32_to_cpup(p++); DECODE_TAIL; } @@ -976,7 +975,7 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d if (status) return status; READ_BUF(4); - READ32(open_down->od_seqid); + open_down->od_seqid = be32_to_cpup(p++); status = nfsd4_decode_share_access(argp, &open_down->od_share_access, &open_down->od_deleg_want, NULL); if (status) @@ -993,7 +992,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) DECODE_HEAD; READ_BUF(4); - READ32(putfh->pf_fhlen); + putfh->pf_fhlen = be32_to_cpup(p++); if (putfh->pf_fhlen > NFS4_FHSIZE) goto xdr_error; READ_BUF(putfh->pf_fhlen); @@ -1020,7 +1019,7 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) return status; READ_BUF(12); READ64(read->rd_offset); - READ32(read->rd_length); + read->rd_length = be32_to_cpup(p++); DECODE_TAIL; } @@ -1033,8 +1032,8 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read READ_BUF(24); READ64(readdir->rd_cookie); COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data)); - READ32(readdir->rd_dircount); - READ32(readdir->rd_maxcount); + readdir->rd_dircount = be32_to_cpup(p++); + readdir->rd_maxcount = be32_to_cpup(p++); if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval))) goto out; @@ -1047,7 +1046,7 @@ nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove DECODE_HEAD; READ_BUF(4); - READ32(remove->rm_namelen); + remove->rm_namelen = be32_to_cpup(p++); READ_BUF(remove->rm_namelen); SAVEMEM(remove->rm_name, remove->rm_namelen); if ((status = check_filename(remove->rm_name, remove->rm_namelen))) @@ -1062,10 +1061,10 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename DECODE_HEAD; READ_BUF(4); - READ32(rename->rn_snamelen); + rename->rn_snamelen = be32_to_cpup(p++); READ_BUF(rename->rn_snamelen + 4); SAVEMEM(rename->rn_sname, rename->rn_snamelen); - READ32(rename->rn_tnamelen); + rename->rn_tnamelen = be32_to_cpup(p++); READ_BUF(rename->rn_tnamelen); SAVEMEM(rename->rn_tname, rename->rn_tnamelen); if ((status = check_filename(rename->rn_sname, rename->rn_snamelen))) @@ -1097,7 +1096,7 @@ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, DECODE_HEAD; READ_BUF(4); - READ32(secinfo->si_namelen); + secinfo->si_namelen = be32_to_cpup(p++); READ_BUF(secinfo->si_namelen); SAVEMEM(secinfo->si_name, secinfo->si_namelen); status = check_filename(secinfo->si_name, secinfo->si_namelen); @@ -1113,7 +1112,7 @@ nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, DECODE_HEAD; READ_BUF(4); - READ32(sin->sin_style); + sin->sin_style = be32_to_cpup(p++); DECODE_TAIL; } @@ -1144,16 +1143,16 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient if (status) return nfserr_bad_xdr; READ_BUF(8); - READ32(setclientid->se_callback_prog); - READ32(setclientid->se_callback_netid_len); + setclientid->se_callback_prog = be32_to_cpup(p++); + setclientid->se_callback_netid_len = be32_to_cpup(p++); READ_BUF(setclientid->se_callback_netid_len + 4); SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len); - READ32(setclientid->se_callback_addr_len); + setclientid->se_callback_addr_len = be32_to_cpup(p++); READ_BUF(setclientid->se_callback_addr_len + 4); SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len); - READ32(setclientid->se_callback_ident); + setclientid->se_callback_ident = be32_to_cpup(p++); DECODE_TAIL; } @@ -1186,7 +1185,7 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify * nfsd4_proc_verify */ READ_BUF(4); - READ32(verify->ve_attrlen); + verify->ve_attrlen = be32_to_cpup(p++); READ_BUF(verify->ve_attrlen); SAVEMEM(verify->ve_attrval, verify->ve_attrlen); @@ -1205,10 +1204,10 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) return status; READ_BUF(16); READ64(write->wr_offset); - READ32(write->wr_stable_how); + write->wr_stable_how = be32_to_cpup(p++); if (write->wr_stable_how > 2) goto xdr_error; - READ32(write->wr_buflen); + write->wr_buflen = be32_to_cpup(p++); /* Sorry .. no magic macros for this.. * * READ_BUF(write->wr_buflen); @@ -1254,7 +1253,7 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel READ_BUF(12); COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t)); - READ32(rlockowner->rl_owner.len); + rlockowner->rl_owner.len = be32_to_cpup(p++); READ_BUF(rlockowner->rl_owner.len); READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len); @@ -1278,63 +1277,63 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, return nfserr_bad_xdr; READ_BUF(4); - READ32(exid->flags); + exid->flags = be32_to_cpup(p++); /* Ignore state_protect4_a */ READ_BUF(4); - READ32(exid->spa_how); + exid->spa_how = be32_to_cpup(p++); switch (exid->spa_how) { case SP4_NONE: break; case SP4_MACH_CRED: /* spo_must_enforce */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); p += dummy; /* spo_must_allow */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); p += dummy; break; case SP4_SSV: /* ssp_ops */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); p += dummy; READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); p += dummy; /* ssp_hash_algs<> */ READ_BUF(4); - READ32(tmp); + tmp = be32_to_cpup(p++); while (tmp--) { READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); p += XDR_QUADLEN(dummy); } /* ssp_encr_algs<> */ READ_BUF(4); - READ32(tmp); + tmp = be32_to_cpup(p++); while (tmp--) { READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); p += XDR_QUADLEN(dummy); } /* ssp_window and ssp_num_gss_handles */ READ_BUF(8); - READ32(dummy); - READ32(dummy); + dummy = be32_to_cpup(p++); + dummy = be32_to_cpup(p++); break; default: goto xdr_error; @@ -1342,7 +1341,7 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, /* Ignore Implementation ID */ READ_BUF(4); /* nfs_impl_id4 array length */ - READ32(dummy); + dummy = be32_to_cpup(p++); if (dummy > 1) goto xdr_error; @@ -1350,13 +1349,13 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, if (dummy == 1) { /* nii_domain */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); p += XDR_QUADLEN(dummy); /* nii_name */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); p += XDR_QUADLEN(dummy); @@ -1376,21 +1375,21 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, READ_BUF(16); COPYMEM(&sess->clientid, 8); - READ32(sess->seqid); - READ32(sess->flags); + sess->seqid = be32_to_cpup(p++); + sess->flags = be32_to_cpup(p++); /* Fore channel attrs */ READ_BUF(28); - READ32(dummy); /* headerpadsz is always 0 */ - READ32(sess->fore_channel.maxreq_sz); - READ32(sess->fore_channel.maxresp_sz); - READ32(sess->fore_channel.maxresp_cached); - READ32(sess->fore_channel.maxops); - READ32(sess->fore_channel.maxreqs); - READ32(sess->fore_channel.nr_rdma_attrs); + dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */ + sess->fore_channel.maxreq_sz = be32_to_cpup(p++); + sess->fore_channel.maxresp_sz = be32_to_cpup(p++); + sess->fore_channel.maxresp_cached = be32_to_cpup(p++); + sess->fore_channel.maxops = be32_to_cpup(p++); + sess->fore_channel.maxreqs = be32_to_cpup(p++); + sess->fore_channel.nr_rdma_attrs = be32_to_cpup(p++); if (sess->fore_channel.nr_rdma_attrs == 1) { READ_BUF(4); - READ32(sess->fore_channel.rdma_attrs); + sess->fore_channel.rdma_attrs = be32_to_cpup(p++); } else if (sess->fore_channel.nr_rdma_attrs > 1) { dprintk("Too many fore channel attr bitmaps!\n"); goto xdr_error; @@ -1398,23 +1397,23 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, /* Back channel attrs */ READ_BUF(28); - READ32(dummy); /* headerpadsz is always 0 */ - READ32(sess->back_channel.maxreq_sz); - READ32(sess->back_channel.maxresp_sz); - READ32(sess->back_channel.maxresp_cached); - READ32(sess->back_channel.maxops); - READ32(sess->back_channel.maxreqs); - READ32(sess->back_channel.nr_rdma_attrs); + dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */ + sess->back_channel.maxreq_sz = be32_to_cpup(p++); + sess->back_channel.maxresp_sz = be32_to_cpup(p++); + sess->back_channel.maxresp_cached = be32_to_cpup(p++); + sess->back_channel.maxops = be32_to_cpup(p++); + sess->back_channel.maxreqs = be32_to_cpup(p++); + sess->back_channel.nr_rdma_attrs = be32_to_cpup(p++); if (sess->back_channel.nr_rdma_attrs == 1) { READ_BUF(4); - READ32(sess->back_channel.rdma_attrs); + sess->back_channel.rdma_attrs = be32_to_cpup(p++); } else if (sess->back_channel.nr_rdma_attrs > 1) { dprintk("Too many back channel attr bitmaps!\n"); goto xdr_error; } READ_BUF(4); - READ32(sess->callback_prog); + sess->callback_prog = be32_to_cpup(p++); nfsd4_decode_cb_sec(argp, &sess->cb_sec); DECODE_TAIL; } @@ -1437,7 +1436,7 @@ nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, DECODE_HEAD; READ_BUF(sizeof(stateid_t)); - READ32(free_stateid->fr_stateid.si_generation); + free_stateid->fr_stateid.si_generation = be32_to_cpup(p++); COPYMEM(&free_stateid->fr_stateid.si_opaque, sizeof(stateid_opaque_t)); DECODE_TAIL; @@ -1451,10 +1450,10 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); - READ32(seq->seqid); - READ32(seq->slotid); - READ32(seq->maxslots); - READ32(seq->cachethis); + seq->seqid = be32_to_cpup(p++); + seq->slotid = be32_to_cpup(p++); + seq->maxslots = be32_to_cpup(p++); + seq->cachethis = be32_to_cpup(p++); DECODE_TAIL; } @@ -1511,7 +1510,7 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str DECODE_HEAD; READ_BUF(4); - READ32(rc->rca_one_fs); + rc->rca_one_fs = be32_to_cpup(p++); DECODE_TAIL; } @@ -1618,11 +1617,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) int i; READ_BUF(4); - READ32(argp->taglen); + argp->taglen = be32_to_cpup(p++); READ_BUF(argp->taglen + 8); SAVEMEM(argp->tag, argp->taglen); - READ32(argp->minorversion); - READ32(argp->opcnt); + argp->minorversion = be32_to_cpup(p++); + argp->opcnt = be32_to_cpup(p++); max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2); if (argp->taglen > NFSD4_MAX_TAGLEN) @@ -1647,7 +1646,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) op->replay = NULL; READ_BUF(4); - READ32(op->opnum); + op->opnum = be32_to_cpup(p++); if (nfsd4_opnum_in_range(argp, op)) op->status = nfsd4_dec_ops[op->opnum](argp, &op->u); -- cgit v0.10.2 From 542d1ab3c7ce53be7d7122a83d016304af4e6345 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 2 Jun 2014 12:45:31 -0400 Subject: nfsd4: kill READ64 Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a23fa00..2d305a1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -98,10 +98,6 @@ xdr_error: \ status = nfserr_bad_xdr; \ goto out -#define READ64(x) do { \ - (x) = (u64)ntohl(*p++) << 32; \ - (x) |= ntohl(*p++); \ -} while (0) #define READMEM(x,nbytes) do { \ x = (char *)p; \ p += XDR_QUADLEN(nbytes); \ @@ -269,6 +265,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, { int expected_len, len = 0; u32 dummy32; + u64 sec; char *buf; DECODE_HEAD; @@ -282,7 +279,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[0] & FATTR4_WORD0_SIZE) { READ_BUF(8); len += 8; - READ64(iattr->ia_size); + p = xdr_decode_hyper(p, &iattr->ia_size); iattr->ia_valid |= ATTR_SIZE; } if (bmval[0] & FATTR4_WORD0_ACL) { @@ -365,7 +362,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ64(iattr->ia_atime.tv_sec); + p = xdr_decode_hyper(p, &sec); + iattr->ia_atime.tv_sec = (time_t)sec; iattr->ia_atime.tv_nsec = be32_to_cpup(p++); if (iattr->ia_atime.tv_nsec >= (u32)1000000000) return nfserr_inval; @@ -388,7 +386,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ64(iattr->ia_mtime.tv_sec); + p = xdr_decode_hyper(p, &sec); + iattr->ia_mtime.tv_sec = sec; iattr->ia_mtime.tv_nsec = be32_to_cpup(p++); if (iattr->ia_mtime.tv_nsec >= (u32)1000000000) return nfserr_inval; @@ -583,7 +582,7 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit DECODE_HEAD; READ_BUF(12); - READ64(commit->co_offset); + p = xdr_decode_hyper(p, &commit->co_offset); commit->co_count = be32_to_cpup(p++); DECODE_TAIL; @@ -671,8 +670,8 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT)) goto xdr_error; lock->lk_reclaim = be32_to_cpup(p++); - READ64(lock->lk_offset); - READ64(lock->lk_length); + p = xdr_decode_hyper(p, &lock->lk_offset); + p = xdr_decode_hyper(p, &lock->lk_length); lock->lk_is_new = be32_to_cpup(p++); if (lock->lk_is_new) { @@ -707,8 +706,8 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) lockt->lt_type = be32_to_cpup(p++); if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT)) goto xdr_error; - READ64(lockt->lt_offset); - READ64(lockt->lt_length); + p = xdr_decode_hyper(p, &lockt->lt_offset); + p = xdr_decode_hyper(p, &lockt->lt_length); COPYMEM(&lockt->lt_clientid, 8); lockt->lt_owner.len = be32_to_cpup(p++); READ_BUF(lockt->lt_owner.len); @@ -731,8 +730,8 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) if (status) return status; READ_BUF(16); - READ64(locku->lu_offset); - READ64(locku->lu_length); + p = xdr_decode_hyper(p, &locku->lu_offset); + p = xdr_decode_hyper(p, &locku->lu_length); DECODE_TAIL; } @@ -1018,7 +1017,7 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) if (status) return status; READ_BUF(12); - READ64(read->rd_offset); + p = xdr_decode_hyper(p, &read->rd_offset); read->rd_length = be32_to_cpup(p++); DECODE_TAIL; @@ -1030,7 +1029,7 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read DECODE_HEAD; READ_BUF(24); - READ64(readdir->rd_cookie); + p = xdr_decode_hyper(p, &readdir->rd_cookie); COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data)); readdir->rd_dircount = be32_to_cpup(p++); readdir->rd_maxcount = be32_to_cpup(p++); @@ -1203,7 +1202,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) if (status) return status; READ_BUF(16); - READ64(write->wr_offset); + p = xdr_decode_hyper(p, &write->wr_offset); write->wr_stable_how = be32_to_cpup(p++); if (write->wr_stable_how > 2) goto xdr_error; -- cgit v0.10.2 From 999e568354b8c797f344a2154761dd94cc84e4ac Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 3 Jun 2014 17:33:35 -0400 Subject: nfs4: remove unused CHANGE_SECURITY_LABEL This constant has the wrong value. And we don't use it. And it's been removed from the 4.2 spec anyway. Signed-off-by: J. Bruce Fields diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 397be39..7f55fed 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2750,7 +2750,7 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) #define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL) -#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_CHANGE_SECURITY_LABEL - 1UL) +#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_SECURITY_LABEL - 1UL) static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 12c2cb9..a1e3064 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -399,8 +399,6 @@ enum lock_type4 { #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) -#define FATTR4_WORD2_CHANGE_SECURITY_LABEL \ - (1UL << 17) /* MDS threshold bitmap bits */ #define THRESHOLD_RD (1UL << 0) -- cgit v0.10.2 From 1b19453d1c6abcfa7c312ba6c9f11a277568fc94 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 5 Jun 2014 09:45:00 -0400 Subject: nfsd: don't halt scanning the DRC LRU list when there's an RC_INPROG entry Currently, the DRC cache pruner will stop scanning the list when it hits an entry that is RC_INPROG. It's possible however for a call to take a *very* long time. In that case, we don't want it to block other entries from being pruned if they are expired or we need to trim the cache to get back under the limit. Fix the DRC cache pruner to just ignore RC_INPROG entries. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index f8f060f..6040da8 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -224,13 +224,6 @@ hash_refile(struct svc_cacherep *rp) hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits)); } -static inline bool -nfsd_cache_entry_expired(struct svc_cacherep *rp) -{ - return rp->c_state != RC_INPROG && - time_after(jiffies, rp->c_timestamp + RC_EXPIRE); -} - /* * Walk the LRU list and prune off entries that are older than RC_EXPIRE. * Also prune the oldest ones when the total exceeds the max number of entries. @@ -242,8 +235,14 @@ prune_cache_entries(void) long freed = 0; list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { - if (!nfsd_cache_entry_expired(rp) && - num_drc_entries <= max_drc_entries) + /* + * Don't free entries attached to calls that are still + * in-progress, but do keep scanning the list. + */ + if (rp->c_state == RC_INPROG) + continue; + if (num_drc_entries <= max_drc_entries && + time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) break; nfsd_reply_cache_free_locked(rp); freed++; -- cgit v0.10.2 From 0bf4828983dff062cd502f27ab8644b32774e72e Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 28 May 2014 15:12:01 -0500 Subject: svcrdma: refactor marshalling logic This patch refactors the NFSRDMA server marshalling logic to remove the intermediary map structures. It also fixes an existing bug where the NFSRDMA server was not minding the device fast register page list length limitations. Signed-off-by: Tom Tucker Signed-off-by: Steve Wise diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 0b8e3e6..5cf99a0 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -115,14 +115,13 @@ struct svc_rdma_fastreg_mr { struct list_head frmr_list; }; struct svc_rdma_req_map { - struct svc_rdma_fastreg_mr *frmr; unsigned long count; union { struct kvec sge[RPCSVC_MAXPAGES]; struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES]; + unsigned long lkey[RPCSVC_MAXPAGES]; }; }; -#define RDMACTXT_F_FAST_UNREG 1 #define RDMACTXT_F_LAST_CTXT 2 #define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */ diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 8d904e4..52d9f2c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -69,7 +70,8 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, /* Set up the XDR head */ rqstp->rq_arg.head[0].iov_base = page_address(page); - rqstp->rq_arg.head[0].iov_len = min(byte_count, ctxt->sge[0].length); + rqstp->rq_arg.head[0].iov_len = + min_t(size_t, byte_count, ctxt->sge[0].length); rqstp->rq_arg.len = byte_count; rqstp->rq_arg.buflen = byte_count; @@ -85,7 +87,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, page = ctxt->pages[sge_no]; put_page(rqstp->rq_pages[sge_no]); rqstp->rq_pages[sge_no] = page; - bc -= min(bc, ctxt->sge[sge_no].length); + bc -= min_t(u32, bc, ctxt->sge[sge_no].length); rqstp->rq_arg.buflen += ctxt->sge[sge_no].length; sge_no++; } @@ -113,291 +115,265 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, rqstp->rq_arg.tail[0].iov_len = 0; } -/* Encode a read-chunk-list as an array of IB SGE - * - * Assumptions: - * - chunk[0]->position points to pages[0] at an offset of 0 - * - pages[] is not physically or virtually contiguous and consists of - * PAGE_SIZE elements. - * - * Output: - * - sge array pointing into pages[] array. - * - chunk_sge array specifying sge index and count for each - * chunk in the read list - * - */ -static int map_read_chunks(struct svcxprt_rdma *xprt, - struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *head, - struct rpcrdma_msg *rmsgp, - struct svc_rdma_req_map *rpl_map, - struct svc_rdma_req_map *chl_map, - int ch_count, - int byte_count) +static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) { - int sge_no; - int sge_bytes; - int page_off; - int page_no; - int ch_bytes; - int ch_no; - struct rpcrdma_read_chunk *ch; + if (rdma_node_get_transport(xprt->sc_cm_id->device->node_type) == + RDMA_TRANSPORT_IWARP) + return 1; + else + return min_t(int, sge_count, xprt->sc_max_sge); +} - sge_no = 0; - page_no = 0; - page_off = 0; - ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; - ch_no = 0; - ch_bytes = ntohl(ch->rc_target.rs_length); - head->arg.head[0] = rqstp->rq_arg.head[0]; - head->arg.tail[0] = rqstp->rq_arg.tail[0]; - head->arg.pages = &head->pages[head->count]; - head->hdr_count = head->count; /* save count of hdr pages */ - head->arg.page_base = 0; - head->arg.page_len = ch_bytes; - head->arg.len = rqstp->rq_arg.len + ch_bytes; - head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes; - head->count++; - chl_map->ch[0].start = 0; - while (byte_count) { - rpl_map->sge[sge_no].iov_base = - page_address(rqstp->rq_arg.pages[page_no]) + page_off; - sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes); - rpl_map->sge[sge_no].iov_len = sge_bytes; - /* - * Don't bump head->count here because the same page - * may be used by multiple SGE. - */ - head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; - rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1]; +typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head, + int *page_no, + u32 *page_offset, + u32 rs_handle, + u32 rs_length, + u64 rs_offset, + int last); + +/* Issue an RDMA_READ using the local lkey to map the data sink */ +static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head, + int *page_no, + u32 *page_offset, + u32 rs_handle, + u32 rs_length, + u64 rs_offset, + int last) +{ + struct ib_send_wr read_wr; + int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; + struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); + int ret, read, pno; + u32 pg_off = *page_offset; + u32 pg_no = *page_no; + + ctxt->direction = DMA_FROM_DEVICE; + ctxt->read_hdr = head; + pages_needed = + min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed)); + read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); + + for (pno = 0; pno < pages_needed; pno++) { + int len = min_t(int, rs_length, PAGE_SIZE - pg_off); + + head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; + head->arg.page_len += len; + head->arg.len += len; + if (!pg_off) + head->count++; + rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; rqstp->rq_next_page = rqstp->rq_respages + 1; + ctxt->sge[pno].addr = + ib_dma_map_page(xprt->sc_cm_id->device, + head->arg.pages[pg_no], pg_off, + PAGE_SIZE - pg_off, + DMA_FROM_DEVICE); + ret = ib_dma_mapping_error(xprt->sc_cm_id->device, + ctxt->sge[pno].addr); + if (ret) + goto err; + atomic_inc(&xprt->sc_dma_used); - byte_count -= sge_bytes; - ch_bytes -= sge_bytes; - sge_no++; - /* - * If all bytes for this chunk have been mapped to an - * SGE, move to the next SGE - */ - if (ch_bytes == 0) { - chl_map->ch[ch_no].count = - sge_no - chl_map->ch[ch_no].start; - ch_no++; - ch++; - chl_map->ch[ch_no].start = sge_no; - ch_bytes = ntohl(ch->rc_target.rs_length); - /* If bytes remaining account for next chunk */ - if (byte_count) { - head->arg.page_len += ch_bytes; - head->arg.len += ch_bytes; - head->arg.buflen += ch_bytes; - } + /* The lkey here is either a local dma lkey or a dma_mr lkey */ + ctxt->sge[pno].lkey = xprt->sc_dma_lkey; + ctxt->sge[pno].length = len; + ctxt->count++; + + /* adjust offset and wrap to next page if needed */ + pg_off += len; + if (pg_off == PAGE_SIZE) { + pg_off = 0; + pg_no++; } - /* - * If this SGE consumed all of the page, move to the - * next page - */ - if ((sge_bytes + page_off) == PAGE_SIZE) { - page_no++; - page_off = 0; - /* - * If there are still bytes left to map, bump - * the page count - */ - if (byte_count) - head->count++; - } else - page_off += sge_bytes; + rs_length -= len; } - BUG_ON(byte_count != 0); - return sge_no; + + if (last && rs_length == 0) + set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); + else + clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); + + memset(&read_wr, 0, sizeof(read_wr)); + read_wr.wr_id = (unsigned long)ctxt; + read_wr.opcode = IB_WR_RDMA_READ; + ctxt->wr_op = read_wr.opcode; + read_wr.send_flags = IB_SEND_SIGNALED; + read_wr.wr.rdma.rkey = rs_handle; + read_wr.wr.rdma.remote_addr = rs_offset; + read_wr.sg_list = ctxt->sge; + read_wr.num_sge = pages_needed; + + ret = svc_rdma_send(xprt, &read_wr); + if (ret) { + pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); + set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + goto err; + } + + /* return current location in page array */ + *page_no = pg_no; + *page_offset = pg_off; + ret = read; + atomic_inc(&rdma_stat_read); + return ret; + err: + svc_rdma_unmap_dma(ctxt); + svc_rdma_put_context(ctxt, 0); + return ret; } -/* Map a read-chunk-list to an XDR and fast register the page-list. - * - * Assumptions: - * - chunk[0] position points to pages[0] at an offset of 0 - * - pages[] will be made physically contiguous by creating a one-off memory - * region using the fastreg verb. - * - byte_count is # of bytes in read-chunk-list - * - ch_count is # of chunks in read-chunk-list - * - * Output: - * - sge array pointing into pages[] array. - * - chunk_sge array specifying sge index and count for each - * chunk in the read list - */ -static int fast_reg_read_chunks(struct svcxprt_rdma *xprt, +/* Issue an RDMA_READ using an FRMR to map the data sink */ +static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, - struct rpcrdma_msg *rmsgp, - struct svc_rdma_req_map *rpl_map, - struct svc_rdma_req_map *chl_map, - int ch_count, - int byte_count) + int *page_no, + u32 *page_offset, + u32 rs_handle, + u32 rs_length, + u64 rs_offset, + int last) { - int page_no; - int ch_no; - u32 offset; - struct rpcrdma_read_chunk *ch; - struct svc_rdma_fastreg_mr *frmr; - int ret = 0; + struct ib_send_wr read_wr; + struct ib_send_wr inv_wr; + struct ib_send_wr fastreg_wr; + u8 key; + int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; + struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); + struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt); + int ret, read, pno; + u32 pg_off = *page_offset; + u32 pg_no = *page_no; - frmr = svc_rdma_get_frmr(xprt); if (IS_ERR(frmr)) return -ENOMEM; - head->frmr = frmr; - head->arg.head[0] = rqstp->rq_arg.head[0]; - head->arg.tail[0] = rqstp->rq_arg.tail[0]; - head->arg.pages = &head->pages[head->count]; - head->hdr_count = head->count; /* save count of hdr pages */ - head->arg.page_base = 0; - head->arg.page_len = byte_count; - head->arg.len = rqstp->rq_arg.len + byte_count; - head->arg.buflen = rqstp->rq_arg.buflen + byte_count; + ctxt->direction = DMA_FROM_DEVICE; + ctxt->frmr = frmr; + pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len); + read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); - /* Fast register the page list */ - frmr->kva = page_address(rqstp->rq_arg.pages[0]); + frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]); frmr->direction = DMA_FROM_DEVICE; frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); - frmr->map_len = byte_count; - frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT; - for (page_no = 0; page_no < frmr->page_list_len; page_no++) { - frmr->page_list->page_list[page_no] = + frmr->map_len = pages_needed << PAGE_SHIFT; + frmr->page_list_len = pages_needed; + + for (pno = 0; pno < pages_needed; pno++) { + int len = min_t(int, rs_length, PAGE_SIZE - pg_off); + + head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; + head->arg.page_len += len; + head->arg.len += len; + if (!pg_off) + head->count++; + rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; + rqstp->rq_next_page = rqstp->rq_respages + 1; + frmr->page_list->page_list[pno] = ib_dma_map_page(xprt->sc_cm_id->device, - rqstp->rq_arg.pages[page_no], 0, + head->arg.pages[pg_no], 0, PAGE_SIZE, DMA_FROM_DEVICE); - if (ib_dma_mapping_error(xprt->sc_cm_id->device, - frmr->page_list->page_list[page_no])) - goto fatal_err; + ret = ib_dma_mapping_error(xprt->sc_cm_id->device, + frmr->page_list->page_list[pno]); + if (ret) + goto err; atomic_inc(&xprt->sc_dma_used); - head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; - } - head->count += page_no; - - /* rq_respages points one past arg pages */ - rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; - rqstp->rq_next_page = rqstp->rq_respages + 1; - /* Create the reply and chunk maps */ - offset = 0; - ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; - for (ch_no = 0; ch_no < ch_count; ch_no++) { - int len = ntohl(ch->rc_target.rs_length); - rpl_map->sge[ch_no].iov_base = frmr->kva + offset; - rpl_map->sge[ch_no].iov_len = len; - chl_map->ch[ch_no].count = 1; - chl_map->ch[ch_no].start = ch_no; - offset += len; - ch++; + /* adjust offset and wrap to next page if needed */ + pg_off += len; + if (pg_off == PAGE_SIZE) { + pg_off = 0; + pg_no++; + } + rs_length -= len; } - ret = svc_rdma_fastreg(xprt, frmr); - if (ret) - goto fatal_err; - - return ch_no; - - fatal_err: - printk("svcrdma: error fast registering xdr for xprt %p", xprt); - svc_rdma_put_frmr(xprt, frmr); - return -EIO; -} - -static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, - struct svc_rdma_op_ctxt *ctxt, - struct svc_rdma_fastreg_mr *frmr, - struct kvec *vec, - u64 *sgl_offset, - int count) -{ - int i; - unsigned long off; + if (last && rs_length == 0) + set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); + else + clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); - ctxt->count = count; - ctxt->direction = DMA_FROM_DEVICE; - for (i = 0; i < count; i++) { - ctxt->sge[i].length = 0; /* in case map fails */ - if (!frmr) { - BUG_ON(!virt_to_page(vec[i].iov_base)); - off = (unsigned long)vec[i].iov_base & ~PAGE_MASK; - ctxt->sge[i].addr = - ib_dma_map_page(xprt->sc_cm_id->device, - virt_to_page(vec[i].iov_base), - off, - vec[i].iov_len, - DMA_FROM_DEVICE); - if (ib_dma_mapping_error(xprt->sc_cm_id->device, - ctxt->sge[i].addr)) - return -EINVAL; - ctxt->sge[i].lkey = xprt->sc_dma_lkey; - atomic_inc(&xprt->sc_dma_used); - } else { - ctxt->sge[i].addr = (unsigned long)vec[i].iov_base; - ctxt->sge[i].lkey = frmr->mr->lkey; - } - ctxt->sge[i].length = vec[i].iov_len; - *sgl_offset = *sgl_offset + vec[i].iov_len; + /* Bump the key */ + key = (u8)(frmr->mr->lkey & 0x000000FF); + ib_update_fast_reg_key(frmr->mr, ++key); + + ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset; + ctxt->sge[0].lkey = frmr->mr->lkey; + ctxt->sge[0].length = read; + ctxt->count = 1; + ctxt->read_hdr = head; + + /* Prepare FASTREG WR */ + memset(&fastreg_wr, 0, sizeof(fastreg_wr)); + fastreg_wr.opcode = IB_WR_FAST_REG_MR; + fastreg_wr.send_flags = IB_SEND_SIGNALED; + fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; + fastreg_wr.wr.fast_reg.page_list = frmr->page_list; + fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; + fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; + fastreg_wr.wr.fast_reg.length = frmr->map_len; + fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; + fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; + fastreg_wr.next = &read_wr; + + /* Prepare RDMA_READ */ + memset(&read_wr, 0, sizeof(read_wr)); + read_wr.send_flags = IB_SEND_SIGNALED; + read_wr.wr.rdma.rkey = rs_handle; + read_wr.wr.rdma.remote_addr = rs_offset; + read_wr.sg_list = ctxt->sge; + read_wr.num_sge = 1; + if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { + read_wr.opcode = IB_WR_RDMA_READ_WITH_INV; + read_wr.wr_id = (unsigned long)ctxt; + read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; + } else { + read_wr.opcode = IB_WR_RDMA_READ; + read_wr.next = &inv_wr; + /* Prepare invalidate */ + memset(&inv_wr, 0, sizeof(inv_wr)); + inv_wr.wr_id = (unsigned long)ctxt; + inv_wr.opcode = IB_WR_LOCAL_INV; + inv_wr.send_flags = IB_SEND_SIGNALED; + inv_wr.ex.invalidate_rkey = frmr->mr->lkey; + } + ctxt->wr_op = read_wr.opcode; + + /* Post the chain */ + ret = svc_rdma_send(xprt, &fastreg_wr); + if (ret) { + pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); + set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + goto err; } - return 0; -} -static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) -{ - if ((rdma_node_get_transport(xprt->sc_cm_id->device->node_type) == - RDMA_TRANSPORT_IWARP) && - sge_count > 1) - return 1; - else - return min_t(int, sge_count, xprt->sc_max_sge); + /* return current location in page array */ + *page_no = pg_no; + *page_offset = pg_off; + ret = read; + atomic_inc(&rdma_stat_read); + return ret; + err: + svc_rdma_unmap_dma(ctxt); + svc_rdma_put_context(ctxt, 0); + svc_rdma_put_frmr(xprt, frmr); + return ret; } -/* - * Use RDMA_READ to read data from the advertised client buffer into the - * XDR stream starting at rq_arg.head[0].iov_base. - * Each chunk in the array - * contains the following fields: - * discrim - '1', This isn't used for data placement - * position - The xdr stream offset (the same for every chunk) - * handle - RMR for client memory region - * length - data transfer length - * offset - 64 bit tagged offset in remote memory region - * - * On our side, we need to read into a pagelist. The first page immediately - * follows the RPC header. - * - * This function returns: - * 0 - No error and no read-list found. - * - * 1 - Successful read-list processing. The data is not yet in - * the pagelist and therefore the RPC request must be deferred. The - * I/O completion will enqueue the transport again and - * svc_rdma_recvfrom will complete the request. - * - * <0 - Error processing/posting read-list. - * - * NOTE: The ctxt must not be touched after the last WR has been posted - * because the I/O completion processing may occur on another - * processor and free / modify the context. Ne touche pas! - */ -static int rdma_read_xdr(struct svcxprt_rdma *xprt, - struct rpcrdma_msg *rmsgp, - struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *hdr_ctxt) +static int rdma_read_chunks(struct svcxprt_rdma *xprt, + struct rpcrdma_msg *rmsgp, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head) { - struct ib_send_wr read_wr; - struct ib_send_wr inv_wr; - int err = 0; - int ch_no; - int ch_count; - int byte_count; - int sge_count; - u64 sgl_offset; + int page_no, ch_count, ret; struct rpcrdma_read_chunk *ch; - struct svc_rdma_op_ctxt *ctxt = NULL; - struct svc_rdma_req_map *rpl_map; - struct svc_rdma_req_map *chl_map; + u32 page_offset, byte_count; + u64 rs_offset; + rdma_reader_fn reader; /* If no read list is present, return 0 */ ch = svc_rdma_get_read_chunk(rmsgp); @@ -408,122 +384,55 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, if (ch_count > RPCSVC_MAXPAGES) return -EINVAL; - /* Allocate temporary reply and chunk maps */ - rpl_map = svc_rdma_get_req_map(); - chl_map = svc_rdma_get_req_map(); + /* The request is completed when the RDMA_READs complete. The + * head context keeps all the pages that comprise the + * request. + */ + head->arg.head[0] = rqstp->rq_arg.head[0]; + head->arg.tail[0] = rqstp->rq_arg.tail[0]; + head->arg.pages = &head->pages[head->count]; + head->hdr_count = head->count; + head->arg.page_base = 0; + head->arg.page_len = 0; + head->arg.len = rqstp->rq_arg.len; + head->arg.buflen = rqstp->rq_arg.buflen; - if (!xprt->sc_frmr_pg_list_len) - sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, - rpl_map, chl_map, ch_count, - byte_count); + /* Use FRMR if supported */ + if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) + reader = rdma_read_chunk_frmr; else - sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, - rpl_map, chl_map, ch_count, - byte_count); - if (sge_count < 0) { - err = -EIO; - goto out; - } - - sgl_offset = 0; - ch_no = 0; + reader = rdma_read_chunk_lcl; + page_no = 0; page_offset = 0; for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; - ch->rc_discrim != 0; ch++, ch_no++) { - u64 rs_offset; -next_sge: - ctxt = svc_rdma_get_context(xprt); - ctxt->direction = DMA_FROM_DEVICE; - ctxt->frmr = hdr_ctxt->frmr; - ctxt->read_hdr = NULL; - clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); - clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); + ch->rc_discrim != 0; ch++) { - /* Prepare READ WR */ - memset(&read_wr, 0, sizeof read_wr); - read_wr.wr_id = (unsigned long)ctxt; - read_wr.opcode = IB_WR_RDMA_READ; - ctxt->wr_op = read_wr.opcode; - read_wr.send_flags = IB_SEND_SIGNALED; - read_wr.wr.rdma.rkey = ntohl(ch->rc_target.rs_handle); xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, &rs_offset); - read_wr.wr.rdma.remote_addr = rs_offset + sgl_offset; - read_wr.sg_list = ctxt->sge; - read_wr.num_sge = - rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); - err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr, - &rpl_map->sge[chl_map->ch[ch_no].start], - &sgl_offset, - read_wr.num_sge); - if (err) { - svc_rdma_unmap_dma(ctxt); - svc_rdma_put_context(ctxt, 0); - goto out; - } - if (((ch+1)->rc_discrim == 0) && - (read_wr.num_sge == chl_map->ch[ch_no].count)) { - /* - * Mark the last RDMA_READ with a bit to - * indicate all RPC data has been fetched from - * the client and the RPC needs to be enqueued. - */ - set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); - if (hdr_ctxt->frmr) { - set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); - /* - * Invalidate the local MR used to map the data - * sink. - */ - if (xprt->sc_dev_caps & - SVCRDMA_DEVCAP_READ_W_INV) { - read_wr.opcode = - IB_WR_RDMA_READ_WITH_INV; - ctxt->wr_op = read_wr.opcode; - read_wr.ex.invalidate_rkey = - ctxt->frmr->mr->lkey; - } else { - /* Prepare INVALIDATE WR */ - memset(&inv_wr, 0, sizeof inv_wr); - inv_wr.opcode = IB_WR_LOCAL_INV; - inv_wr.send_flags = IB_SEND_SIGNALED; - inv_wr.ex.invalidate_rkey = - hdr_ctxt->frmr->mr->lkey; - read_wr.next = &inv_wr; - } - } - ctxt->read_hdr = hdr_ctxt; - } - /* Post the read */ - err = svc_rdma_send(xprt, &read_wr); - if (err) { - printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", - err); - set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); - svc_rdma_unmap_dma(ctxt); - svc_rdma_put_context(ctxt, 0); - goto out; + byte_count = ntohl(ch->rc_target.rs_length); + + while (byte_count > 0) { + ret = reader(xprt, rqstp, head, + &page_no, &page_offset, + ntohl(ch->rc_target.rs_handle), + byte_count, rs_offset, + ((ch+1)->rc_discrim == 0) /* last */ + ); + if (ret < 0) + goto err; + byte_count -= ret; + rs_offset += ret; + head->arg.buflen += ret; } - atomic_inc(&rdma_stat_read); - - if (read_wr.num_sge < chl_map->ch[ch_no].count) { - chl_map->ch[ch_no].count -= read_wr.num_sge; - chl_map->ch[ch_no].start += read_wr.num_sge; - goto next_sge; - } - sgl_offset = 0; - err = 1; } - - out: - svc_rdma_put_req_map(rpl_map); - svc_rdma_put_req_map(chl_map); - + ret = 1; + err: /* Detach arg pages. svc_recv will replenish them */ - for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) - rqstp->rq_pages[ch_no] = NULL; + for (page_no = 0; + &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++) + rqstp->rq_pages[page_no] = NULL; - return err; + return ret; } static int rdma_read_complete(struct svc_rqst *rqstp, @@ -595,13 +504,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) struct svc_rdma_op_ctxt, dto_q); list_del_init(&ctxt->dto_q); - } - if (ctxt) { spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); return rdma_read_complete(rqstp, ctxt); - } - - if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { + } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next, struct svc_rdma_op_ctxt, dto_q); @@ -621,7 +526,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) goto close_out; - BUG_ON(ret); goto out; } dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", @@ -644,12 +548,11 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) } /* Read read-list data. */ - ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt); + ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt); if (ret > 0) { /* read-list posted, defer until data received from client. */ goto defer; - } - if (ret < 0) { + } else if (ret < 0) { /* Post of read-list failed, free context. */ svc_rdma_put_context(ctxt, 1); return 0; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 7e024a5..49fd21a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -49,152 +50,6 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT -/* Encode an XDR as an array of IB SGE - * - * Assumptions: - * - head[0] is physically contiguous. - * - tail[0] is physically contiguous. - * - pages[] is not physically or virtually contiguous and consists of - * PAGE_SIZE elements. - * - * Output: - * SGE[0] reserved for RCPRDMA header - * SGE[1] data from xdr->head[] - * SGE[2..sge_count-2] data from xdr->pages[] - * SGE[sge_count-1] data from xdr->tail. - * - * The max SGE we need is the length of the XDR / pagesize + one for - * head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES - * reserves a page for both the request and the reply header, and this - * array is only concerned with the reply we are assured that we have - * on extra page for the RPCRMDA header. - */ -static int fast_reg_xdr(struct svcxprt_rdma *xprt, - struct xdr_buf *xdr, - struct svc_rdma_req_map *vec) -{ - int sge_no; - u32 sge_bytes; - u32 page_bytes; - u32 page_off; - int page_no = 0; - u8 *frva; - struct svc_rdma_fastreg_mr *frmr; - - frmr = svc_rdma_get_frmr(xprt); - if (IS_ERR(frmr)) - return -ENOMEM; - vec->frmr = frmr; - - /* Skip the RPCRDMA header */ - sge_no = 1; - - /* Map the head. */ - frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK); - vec->sge[sge_no].iov_base = xdr->head[0].iov_base; - vec->sge[sge_no].iov_len = xdr->head[0].iov_len; - vec->count = 2; - sge_no++; - - /* Map the XDR head */ - frmr->kva = frva; - frmr->direction = DMA_TO_DEVICE; - frmr->access_flags = 0; - frmr->map_len = PAGE_SIZE; - frmr->page_list_len = 1; - page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK; - frmr->page_list->page_list[page_no] = - ib_dma_map_page(xprt->sc_cm_id->device, - virt_to_page(xdr->head[0].iov_base), - page_off, - PAGE_SIZE - page_off, - DMA_TO_DEVICE); - if (ib_dma_mapping_error(xprt->sc_cm_id->device, - frmr->page_list->page_list[page_no])) - goto fatal_err; - atomic_inc(&xprt->sc_dma_used); - - /* Map the XDR page list */ - page_off = xdr->page_base; - page_bytes = xdr->page_len + page_off; - if (!page_bytes) - goto encode_tail; - - /* Map the pages */ - vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; - vec->sge[sge_no].iov_len = page_bytes; - sge_no++; - while (page_bytes) { - struct page *page; - - page = xdr->pages[page_no++]; - sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off)); - page_bytes -= sge_bytes; - - frmr->page_list->page_list[page_no] = - ib_dma_map_page(xprt->sc_cm_id->device, - page, page_off, - sge_bytes, DMA_TO_DEVICE); - if (ib_dma_mapping_error(xprt->sc_cm_id->device, - frmr->page_list->page_list[page_no])) - goto fatal_err; - - atomic_inc(&xprt->sc_dma_used); - page_off = 0; /* reset for next time through loop */ - frmr->map_len += PAGE_SIZE; - frmr->page_list_len++; - } - vec->count++; - - encode_tail: - /* Map tail */ - if (0 == xdr->tail[0].iov_len) - goto done; - - vec->count++; - vec->sge[sge_no].iov_len = xdr->tail[0].iov_len; - - if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) == - ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) { - /* - * If head and tail use the same page, we don't need - * to map it again. - */ - vec->sge[sge_no].iov_base = xdr->tail[0].iov_base; - } else { - void *va; - - /* Map another page for the tail */ - page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK; - va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK); - vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; - - frmr->page_list->page_list[page_no] = - ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va), - page_off, - PAGE_SIZE, - DMA_TO_DEVICE); - if (ib_dma_mapping_error(xprt->sc_cm_id->device, - frmr->page_list->page_list[page_no])) - goto fatal_err; - atomic_inc(&xprt->sc_dma_used); - frmr->map_len += PAGE_SIZE; - frmr->page_list_len++; - } - - done: - if (svc_rdma_fastreg(xprt, frmr)) - goto fatal_err; - - return 0; - - fatal_err: - printk("svcrdma: Error fast registering memory for xprt %p\n", xprt); - vec->frmr = NULL; - svc_rdma_put_frmr(xprt, frmr); - return -EIO; -} - static int map_xdr(struct svcxprt_rdma *xprt, struct xdr_buf *xdr, struct svc_rdma_req_map *vec) @@ -208,9 +63,6 @@ static int map_xdr(struct svcxprt_rdma *xprt, BUG_ON(xdr->len != (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); - if (xprt->sc_frmr_pg_list_len) - return fast_reg_xdr(xprt, xdr, vec); - /* Skip the first sge, this is for the RPCRDMA header */ sge_no = 1; @@ -282,8 +134,6 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt, } /* Assumptions: - * - We are using FRMR - * - or - * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE */ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, @@ -327,23 +177,16 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, sge_bytes = min_t(size_t, bc, vec->sge[xdr_sge_no].iov_len-sge_off); sge[sge_no].length = sge_bytes; - if (!vec->frmr) { - sge[sge_no].addr = - dma_map_xdr(xprt, &rqstp->rq_res, xdr_off, - sge_bytes, DMA_TO_DEVICE); - xdr_off += sge_bytes; - if (ib_dma_mapping_error(xprt->sc_cm_id->device, - sge[sge_no].addr)) - goto err; - atomic_inc(&xprt->sc_dma_used); - sge[sge_no].lkey = xprt->sc_dma_lkey; - } else { - sge[sge_no].addr = (unsigned long) - vec->sge[xdr_sge_no].iov_base + sge_off; - sge[sge_no].lkey = vec->frmr->mr->lkey; - } + sge[sge_no].addr = + dma_map_xdr(xprt, &rqstp->rq_res, xdr_off, + sge_bytes, DMA_TO_DEVICE); + xdr_off += sge_bytes; + if (ib_dma_mapping_error(xprt->sc_cm_id->device, + sge[sge_no].addr)) + goto err; + atomic_inc(&xprt->sc_dma_used); + sge[sge_no].lkey = xprt->sc_dma_lkey; ctxt->count++; - ctxt->frmr = vec->frmr; sge_off = 0; sge_no++; xdr_sge_no++; @@ -369,7 +212,6 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, return 0; err: svc_rdma_unmap_dma(ctxt); - svc_rdma_put_frmr(xprt, vec->frmr); svc_rdma_put_context(ctxt, 0); /* Fatal error, close transport */ return -EIO; @@ -397,10 +239,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, res_ary = (struct rpcrdma_write_array *) &rdma_resp->rm_body.rm_chunks[1]; - if (vec->frmr) - max_write = vec->frmr->map_len; - else - max_write = xprt->sc_max_sge * PAGE_SIZE; + max_write = xprt->sc_max_sge * PAGE_SIZE; /* Write chunks start at the pagelist */ for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; @@ -472,10 +311,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, res_ary = (struct rpcrdma_write_array *) &rdma_resp->rm_body.rm_chunks[2]; - if (vec->frmr) - max_write = vec->frmr->map_len; - else - max_write = xprt->sc_max_sge * PAGE_SIZE; + max_write = xprt->sc_max_sge * PAGE_SIZE; /* xdr offset starts at RPC message */ nchunks = ntohl(arg_ary->wc_nchunks); @@ -545,7 +381,6 @@ static int send_reply(struct svcxprt_rdma *rdma, int byte_count) { struct ib_send_wr send_wr; - struct ib_send_wr inv_wr; int sge_no; int sge_bytes; int page_no; @@ -559,7 +394,6 @@ static int send_reply(struct svcxprt_rdma *rdma, "svcrdma: could not post a receive buffer, err=%d." "Closing transport %p.\n", ret, rdma); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); - svc_rdma_put_frmr(rdma, vec->frmr); svc_rdma_put_context(ctxt, 0); return -ENOTCONN; } @@ -567,11 +401,6 @@ static int send_reply(struct svcxprt_rdma *rdma, /* Prepare the context */ ctxt->pages[0] = page; ctxt->count = 1; - ctxt->frmr = vec->frmr; - if (vec->frmr) - set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); - else - clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* Prepare the SGE for the RPCRDMA Header */ ctxt->sge[0].lkey = rdma->sc_dma_lkey; @@ -590,21 +419,15 @@ static int send_reply(struct svcxprt_rdma *rdma, int xdr_off = 0; sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); byte_count -= sge_bytes; - if (!vec->frmr) { - ctxt->sge[sge_no].addr = - dma_map_xdr(rdma, &rqstp->rq_res, xdr_off, - sge_bytes, DMA_TO_DEVICE); - xdr_off += sge_bytes; - if (ib_dma_mapping_error(rdma->sc_cm_id->device, - ctxt->sge[sge_no].addr)) - goto err; - atomic_inc(&rdma->sc_dma_used); - ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; - } else { - ctxt->sge[sge_no].addr = (unsigned long) - vec->sge[sge_no].iov_base; - ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey; - } + ctxt->sge[sge_no].addr = + dma_map_xdr(rdma, &rqstp->rq_res, xdr_off, + sge_bytes, DMA_TO_DEVICE); + xdr_off += sge_bytes; + if (ib_dma_mapping_error(rdma->sc_cm_id->device, + ctxt->sge[sge_no].addr)) + goto err; + atomic_inc(&rdma->sc_dma_used); + ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; ctxt->sge[sge_no].length = sge_bytes; } BUG_ON(byte_count != 0); @@ -627,6 +450,7 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->sge[page_no+1].length = 0; } rqstp->rq_next_page = rqstp->rq_respages + 1; + BUG_ON(sge_no > rdma->sc_max_sge); memset(&send_wr, 0, sizeof send_wr); ctxt->wr_op = IB_WR_SEND; @@ -635,15 +459,6 @@ static int send_reply(struct svcxprt_rdma *rdma, send_wr.num_sge = sge_no; send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; - if (vec->frmr) { - /* Prepare INVALIDATE WR */ - memset(&inv_wr, 0, sizeof inv_wr); - inv_wr.opcode = IB_WR_LOCAL_INV; - inv_wr.send_flags = IB_SEND_SIGNALED; - inv_wr.ex.invalidate_rkey = - vec->frmr->mr->lkey; - send_wr.next = &inv_wr; - } ret = svc_rdma_send(rdma, &send_wr); if (ret) @@ -653,7 +468,6 @@ static int send_reply(struct svcxprt_rdma *rdma, err: svc_rdma_unmap_dma(ctxt); - svc_rdma_put_frmr(rdma, vec->frmr); svc_rdma_put_context(ctxt, 1); return -EIO; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 02db8d9..e7323fb 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -162,7 +163,6 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void) schedule_timeout_uninterruptible(msecs_to_jiffies(500)); } map->count = 0; - map->frmr = NULL; return map; } @@ -338,22 +338,21 @@ static void process_context(struct svcxprt_rdma *xprt, switch (ctxt->wr_op) { case IB_WR_SEND: - if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags)) - svc_rdma_put_frmr(xprt, ctxt->frmr); + BUG_ON(ctxt->frmr); svc_rdma_put_context(ctxt, 1); break; case IB_WR_RDMA_WRITE: + BUG_ON(ctxt->frmr); svc_rdma_put_context(ctxt, 0); break; case IB_WR_RDMA_READ: case IB_WR_RDMA_READ_WITH_INV: + svc_rdma_put_frmr(xprt, ctxt->frmr); if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; BUG_ON(!read_hdr); - if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags)) - svc_rdma_put_frmr(xprt, ctxt->frmr); spin_lock_bh(&xprt->sc_rq_dto_lock); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); list_add_tail(&read_hdr->dto_q, @@ -365,6 +364,7 @@ static void process_context(struct svcxprt_rdma *xprt, break; default: + BUG_ON(1); printk(KERN_ERR "svcrdma: unexpected completion type, " "opcode=%d\n", ctxt->wr_op); @@ -380,29 +380,42 @@ static void process_context(struct svcxprt_rdma *xprt, static void sq_cq_reap(struct svcxprt_rdma *xprt) { struct svc_rdma_op_ctxt *ctxt = NULL; - struct ib_wc wc; + struct ib_wc wc_a[6]; + struct ib_wc *wc; struct ib_cq *cq = xprt->sc_sq_cq; int ret; + memset(wc_a, 0, sizeof(wc_a)); + if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); - while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { - if (wc.status != IB_WC_SUCCESS) - /* Close the transport */ - set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) { + int i; - /* Decrement used SQ WR count */ - atomic_dec(&xprt->sc_sq_count); - wake_up(&xprt->sc_send_wait); + for (i = 0; i < ret; i++) { + wc = &wc_a[i]; + if (wc->status != IB_WC_SUCCESS) { + dprintk("svcrdma: sq wc err status %d\n", + wc->status); - ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; - if (ctxt) - process_context(xprt, ctxt); + /* Close the transport */ + set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + } - svc_xprt_put(&xprt->sc_xprt); + /* Decrement used SQ WR count */ + atomic_dec(&xprt->sc_sq_count); + wake_up(&xprt->sc_send_wait); + + ctxt = (struct svc_rdma_op_ctxt *) + (unsigned long)wc->wr_id; + if (ctxt) + process_context(xprt, ctxt); + + svc_xprt_put(&xprt->sc_xprt); + } } if (ctxt) @@ -995,7 +1008,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) need_dma_mr = 0; break; case RDMA_TRANSPORT_IB: - if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { + if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) { + need_dma_mr = 1; + dma_mr_acc = IB_ACCESS_LOCAL_WRITE; + } else if (!(devattr.device_cap_flags & + IB_DEVICE_LOCAL_DMA_LKEY)) { need_dma_mr = 1; dma_mr_acc = IB_ACCESS_LOCAL_WRITE; } else @@ -1192,14 +1209,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) container_of(xprt, struct svcxprt_rdma, sc_xprt); /* - * If there are fewer SQ WR available than required to send a - * simple response, return false. - */ - if ((rdma->sc_sq_depth - atomic_read(&rdma->sc_sq_count) < 3)) - return 0; - - /* - * ...or there are already waiters on the SQ, + * If there are already waiters on the SQ, * return false. */ if (waitqueue_active(&rdma->sc_send_wait)) -- cgit v0.10.2 From 83710fc753d2ae158aa3cb7a7966d9c1bd05b792 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 5 Jun 2014 09:54:31 -0500 Subject: svcrdma: Fence LOCAL_INV work requests Fencing forces the invalidate to only happen after all prior send work requests have been completed. Signed-off-by: Steve Wise Reported by : Devesh Sharma Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 52d9f2c..8f92a61 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -338,7 +338,7 @@ static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, memset(&inv_wr, 0, sizeof(inv_wr)); inv_wr.wr_id = (unsigned long)ctxt; inv_wr.opcode = IB_WR_LOCAL_INV; - inv_wr.send_flags = IB_SEND_SIGNALED; + inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; inv_wr.ex.invalidate_rkey = frmr->mr->lkey; } ctxt->wr_op = read_wr.opcode; -- cgit v0.10.2 From 48385408b45523d9a432c66292d47ef43efcbb94 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 27 May 2014 11:14:26 -0400 Subject: nfsd4: fix FREE_STATEID lockowner leak 27b11428b7de ("nfsd4: remove lockowner when removing lock stateid") introduced a memory leak. Cc: stable@vger.kernel.org Reported-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e5197d9..c0d45ce 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3743,7 +3743,7 @@ nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp) * correspondance, and we have to delete the lockowner when we * delete the lock stateid: */ - unhash_lockowner(lo); + release_lockowner(lo); return nfs_ok; } -- cgit v0.10.2