From 84822d0b3bc5a74a4290727dd1ab4fc7dcd6a348 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 14 Dec 2012 17:57:50 -0500 Subject: nfsd4: simplify nfsd4_encode_fattr interface slightly It seems slightly simpler to make nfsd4_encode_fattr rather than its callers responsible for advancing the write pointer on success. (Also: the count == 0 check in the verify case looks superfluous. Running out of buffer space is really the only reason fattr encoding should fail with eresource.) Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 9d1c5db..ae73175 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -993,14 +993,15 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!buf) return nfserr_jukebox; + p = buf; status = nfsd4_encode_fattr(&cstate->current_fh, cstate->current_fh.fh_export, - cstate->current_fh.fh_dentry, buf, - &count, verify->ve_bmval, + cstate->current_fh.fh_dentry, &p, + count, verify->ve_bmval, rqstp, 0); /* this means that nfsd4_encode_fattr() ran out of space */ - if (status == nfserr_resource && count == 0) + if (status == nfserr_resource) status = nfserr_not_same; if (status) goto out_kfree; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 0dc1158..fcb5bed 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2006,12 +2006,11 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) * Note: @fhp can be NULL; in this case, we might have to compose the filehandle * ourselves. * - * @countp is the buffer size in _words_; upon successful return this becomes - * replaced with the number of words written. + * countp is the buffer size in _words_ */ __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, - struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval, + struct dentry *dentry, __be32 **buffer, int count, u32 *bmval, struct svc_rqst *rqstp, int ignore_crossmnt) { u32 bmval0 = bmval[0]; @@ -2020,12 +2019,12 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, struct kstat stat; struct svc_fh tempfh; struct kstatfs statfs; - int buflen = *countp << 2; + int buflen = count << 2; __be32 *attrlenp; u32 dummy; u64 dummy64; u32 rdattr_err = 0; - __be32 *p = buffer; + __be32 *p = *buffer; __be32 status; int err; int aclsupport = 0; @@ -2431,7 +2430,7 @@ out_acl: } *attrlenp = htonl((char *)p - (char *)attrlenp - 4); - *countp = p - buffer; + *buffer = p; status = nfs_ok; out: @@ -2443,7 +2442,6 @@ out_nfserr: status = nfserrno(err); goto out; out_resource: - *countp = 0; status = nfserr_resource; goto out; out_serverfault: @@ -2462,7 +2460,7 @@ static inline int attributes_need_mount(u32 *bmval) static __be32 nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, - const char *name, int namlen, __be32 *p, int *buflen) + const char *name, int namlen, __be32 **p, int buflen) { struct svc_export *exp = cd->rd_fhp->fh_export; struct dentry *dentry; @@ -2568,10 +2566,9 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ p = xdr_encode_array(p, name, namlen); /* name length & name */ - nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, p, &buflen); + nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen); switch (nfserr) { case nfs_ok: - p += buflen; break; case nfserr_resource: nfserr = nfserr_toosmall; @@ -2698,10 +2695,8 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2); nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, - resp->p, &buflen, getattr->ga_bmval, + &resp->p, buflen, getattr->ga_bmval, resp->rqstp, 0); - if (!nfserr) - resp->p += buflen; return nfserr; } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 0889bfb..546f898 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -563,7 +563,7 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, - struct dentry *dentry, __be32 *buffer, int *countp, + struct dentry *dentry, __be32 **buffer, int countp, u32 *bmval, struct svc_rqst *, int ignore_crossmnt); extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, -- cgit v0.10.2 From 74b70dded311fa0e6e7529514b29bbb8e6bb1f3e Mon Sep 17 00:00:00 2001 From: Yanchuan Nian Date: Mon, 24 Dec 2012 18:11:27 +0800 Subject: nfsd: Pass correct slot number to nfsd4_put_drc_mem() In alloc_session(), numslots is the correct slot number used by the session. But the slot number passed to nfsd4_put_drc_mem() is the one from nfs client. Signed-off-by: Yanchuan Nian Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ac8ed96..29924a0 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -905,7 +905,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan, new = __alloc_session(slotsize, numslots); if (!new) { - nfsd4_put_drc_mem(slotsize, fchan->maxreqs); + nfsd4_put_drc_mem(slotsize, numslots); return NULL; } init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize, nn); -- cgit v0.10.2 From 266533c6df7a4a4e2ebd0bfdd272f7eb7cf4b81f Mon Sep 17 00:00:00 2001 From: Yanchuan Nian Date: Mon, 24 Dec 2012 18:11:45 +0800 Subject: nfsd: Don't unlock the state while it's not locked In the procedure of CREATE_SESSION, the state is locked after alloc_conn_from_crses(). If the allocation fails, the function goes to "out_free_session", and then "out" where there is an unlock function. Signed-off-by: Yanchuan Nian Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 29924a0..cc41bf4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1844,11 +1844,12 @@ nfsd4_create_session(struct svc_rqst *rqstp, /* cache solo and embedded create sessions under the state lock */ nfsd4_cache_create_session(cr_ses, cs_slot, status); -out: nfs4_unlock_state(); +out: dprintk("%s returns %d\n", __func__, ntohl(status)); return status; out_free_conn: + nfs4_unlock_state(); free_conn(conn); out_free_session: __free_session(new); -- cgit v0.10.2 From ec1686761753ead775e9474a6265323a4c555bc6 Mon Sep 17 00:00:00 2001 From: Yanchuan Nian Date: Fri, 4 Jan 2013 19:45:35 +0800 Subject: nfsd: Remove write permission from file content The write function doesn't be implemented in file content, and it's meaningless to write data into this file directly. Remove write permission from it. Signed-off-by: Yanchuan Nian Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 9afa439..9f84703 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1614,7 +1614,7 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) goto out_nomem; } if (cd->cache_show) { - p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR, + p = proc_create_data("content", S_IFREG|S_IRUSR, cd->u.procfs.proc_ent, &content_file_operations_procfs, cd); cd->u.procfs.content_ent = p; -- cgit v0.10.2 From 624ab4644819948e9dc87c114201e98f2e52490f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 8 Jan 2013 16:22:15 -0500 Subject: svcrpc: silence "unused variable" warning in !RPC_DEBUG case Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index dbf12ac..b9ba2a8 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1042,6 +1042,7 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net) /* * dprintk the given error with the address of the client that caused it. */ +#ifdef RPC_DEBUG static __printf(2, 3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) { @@ -1058,6 +1059,9 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) va_end(args); } +#else +static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {} +#endif /* * Common routine for processing the RPC request. -- cgit v0.10.2 From 35525b79786b2ba58ef13822198ce22c497bc7a2 Mon Sep 17 00:00:00 2001 From: Andriy Skulysh Date: Mon, 7 Jan 2013 00:12:15 +0200 Subject: sunrpc: Fix lockd sleeping until timeout There is a race in enqueueing thread to a pool and waking up a thread. lockd doesn't wake up on reception of lock granted callback if svc_wake_up() is called before lockd's thread is added to a pool. Signed-off-by: Andriy Skulysh Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 676ddf5..1f0216b 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -50,6 +50,7 @@ struct svc_pool { unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ struct svc_pool_stats sp_stats; /* statistics on pool operation */ + int sp_task_pending;/* has pending task */ } ____cacheline_aligned_in_smp; /* diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index b8e47fa..5a9d40c 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -499,7 +499,8 @@ void svc_wake_up(struct svc_serv *serv) rqstp->rq_xprt = NULL; */ wake_up(&rqstp->rq_wait); - } + } else + pool->sp_task_pending = 1; spin_unlock_bh(&pool->sp_lock); } } @@ -634,7 +635,13 @@ struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) * long for cache updates. */ rqstp->rq_chandle.thread_wait = 1*HZ; + pool->sp_task_pending = 0; } else { + if (pool->sp_task_pending) { + pool->sp_task_pending = 0; + spin_unlock_bh(&pool->sp_lock); + return ERR_PTR(-EAGAIN); + } /* No data pending. Go to sleep */ svc_thread_enqueue(pool, rqstp); -- cgit v0.10.2 From bca0ec6511bb96bcb6cb247fd4100a4ea1d1e4f5 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 9 Jan 2013 12:38:34 +0300 Subject: nfsd: fix unused "nn" variable warning in free_client() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_LOCKDEP is disabled, then there would be a warning like this: CC [M] fs/nfsd/nfs4state.o fs/nfsd/nfs4state.c: In function ‘free_client’: fs/nfsd/nfs4state.c:1051:19: warning: unused variable ‘nn’ [-Wunused-variable] So, let's add "maybe_unused" tag to this variable. Reported-by: Toralf Förster Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cc41bf4..4db46aa 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1048,7 +1048,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) static inline void free_client(struct nfs4_client *clp) { - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); while (!list_empty(&clp->cl_sessions)) { -- cgit v0.10.2 From ff89be87c70247ffe3a72271e02eb7765cdd12c4 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 23 Jan 2013 18:25:01 -0500 Subject: nfsd4: require version 4 when enabling or disabling minorversion The current code will allow silly things like: echo "+2 +3 +4 +7.1">/proc/fs/nfsd/versions Reported-by: Fan Chaoting Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 7493428..65889ec 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -534,7 +534,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) else num = simple_strtol(vers, &minorp, 0); if (*minorp == '.') { - if (num < 4) + if (num != 4) return -EINVAL; minor = simple_strtoul(minorp+1, NULL, 0); if (minor == 0) -- cgit v0.10.2 From 885c91f74662404dc179b8775494df383479311c Mon Sep 17 00:00:00 2001 From: majianpeng Date: Tue, 29 Jan 2013 13:16:01 +0800 Subject: nfsd: Fix memleak in svc_export_put In func svc_export_parse, the uuid which used kmemdup to alloc will be changed in func export_update.So the later kfree don't free this memory. And it can't be free in func svc_export_parse because other place still used.So put this operation in func svc_export_put. Signed-off-by: Jianpeng Ma Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index a3946cf..45159ee 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -315,6 +315,7 @@ static void svc_export_put(struct kref *ref) path_put(&exp->ex_path); auth_domain_put(exp->ex_client); nfsd4_fslocs_free(&exp->ex_fslocs); + kfree(exp->ex_uuid); kfree(exp); } -- cgit v0.10.2 From 7b9e8522a65886d8ae168547a67c3617b6ba83f1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 28 Jan 2013 14:41:07 -0500 Subject: nfsd: fix IPv6 address handling in the DRC Currently, it only stores the first 16 bytes of any address. struct sockaddr_in6 is 28 bytes however, so we're currently ignoring the last 12 bytes of the address. Expand the c_addr field to a sockaddr_in6, and cast it to a sockaddr_in as necessary. Also fix the comparitor to use the existing RPC helpers for this. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 93cc9d3..2cac76c 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -12,6 +12,10 @@ /* * Representation of a reply cache entry. + * + * Note that we use a sockaddr_in6 to hold the address instead of the more + * typical sockaddr_storage. This is for space reasons, since sockaddr_storage + * is much larger than a sockaddr_in6. */ struct svc_cacherep { struct hlist_node c_hash; @@ -20,7 +24,7 @@ struct svc_cacherep { unsigned char c_state, /* unused, inprog, done */ c_type, /* status, buffer */ c_secure : 1; /* req came from port < 1024 */ - struct sockaddr_in c_addr; + struct sockaddr_in6 c_addr; __be32 c_xid; u32 c_prot; u32 c_proc; diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 2cbac34..5dd9ec2 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -9,6 +9,7 @@ */ #include +#include #include "nfsd.h" #include "cache.h" @@ -146,7 +147,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) xid == rp->c_xid && proc == rp->c_proc && proto == rp->c_prot && vers == rp->c_vers && time_before(jiffies, rp->c_timestamp + 120*HZ) && - memcmp((char*)&rqstp->rq_addr, (char*)&rp->c_addr, sizeof(rp->c_addr))==0) { + rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) && + rpc_get_port(svc_addr(rqstp)) == rpc_get_port((struct sockaddr *)&rp->c_addr)) { nfsdstats.rchits++; goto found_entry; } @@ -183,7 +185,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) rp->c_state = RC_INPROG; rp->c_xid = xid; rp->c_proc = proc; - memcpy(&rp->c_addr, svc_addr_in(rqstp), sizeof(rp->c_addr)); + rpc_copy_addr((struct sockaddr *)&rp->c_addr, svc_addr(rqstp)); + rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp))); rp->c_prot = proto; rp->c_vers = vers; rp->c_timestamp = jiffies; diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 34206b8..47354a2 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -263,7 +263,9 @@ static inline bool __rpc_copy_addr6(struct sockaddr *dst, * @sap1: first sockaddr * @sap2: second sockaddr * - * Just compares the family and address portion. Ignores port, scope, etc. + * Just compares the family and address portion. Ignores port, but + * compares the scope if it's a link-local address. + * * Returns true if the addrs are equal, false if they aren't. */ static inline bool rpc_cmp_addr(const struct sockaddr *sap1, -- cgit v0.10.2 From 6dc8889589678c228a675b9f01721318a31a8f5b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 28 Jan 2013 14:41:08 -0500 Subject: nfsd: remove unneeded spinlock in nfsd_cache_update The locking rules for cache entries say that locking the cache_lock isn't needed if you're just touching the current entry. Earlier in this function we set rp->c_state to RC_UNUSED without any locking, so I believe it's ok to do the same here. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 5dd9ec2..972c14a 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -286,9 +286,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) cachv = &rp->c_replvec; cachv->iov_base = kmalloc(len << 2, GFP_KERNEL); if (!cachv->iov_base) { - spin_lock(&cache_lock); rp->c_state = RC_UNUSED; - spin_unlock(&cache_lock); return; } cachv->iov_len = len << 2; -- cgit v0.10.2 From 09662d58d5a2d75c8c29558dda4fc5134ef14b25 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 28 Jan 2013 14:41:09 -0500 Subject: nfsd: get rid of RC_INTR The reply cache code never returns this status. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 2cac76c..f8c6df8 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -50,8 +50,7 @@ enum { enum { RC_DROPIT, RC_REPLY, - RC_DOIT, - RC_INTR + RC_DOIT }; /* diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index cee62ab..40cb1cb 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -652,7 +652,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) /* Check whether we have this call in the cache. */ switch (nfsd_cache_lookup(rqstp)) { - case RC_INTR: case RC_DROPIT: return 0; case RC_REPLY: -- cgit v0.10.2 From 8a8bc40d9ba0890f88dbf7a7c8fa81ddc77c08e3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 28 Jan 2013 14:41:10 -0500 Subject: nfsd: create a dedicated slabcache for DRC entries Currently we use kmalloc() which wastes a little bit of memory on each allocation since it's a power of 2 allocator. Since we're allocating a 1024 of these now, and may need even more later, let's create a new slabcache for them. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 972c14a..4aad9e4 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -26,6 +26,7 @@ static struct hlist_head * cache_hash; static struct list_head lru_head; static int cache_disabled = 1; +static struct kmem_cache *drc_slab; /* * Calculate the hash index from an XID. @@ -51,10 +52,15 @@ int nfsd_reply_cache_init(void) struct svc_cacherep *rp; int i; + drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), + 0, 0, NULL); + if (!drc_slab) + goto out_nomem; + INIT_LIST_HEAD(&lru_head); i = CACHESIZE; while (i) { - rp = kmalloc(sizeof(*rp), GFP_KERNEL); + rp = kmem_cache_alloc(drc_slab, GFP_KERNEL); if (!rp) goto out_nomem; list_add(&rp->c_lru, &lru_head); @@ -85,13 +91,18 @@ void nfsd_reply_cache_shutdown(void) if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF) kfree(rp->c_replvec.iov_base); list_del(&rp->c_lru); - kfree(rp); + kmem_cache_free(drc_slab, rp); } cache_disabled = 1; kfree (cache_hash); cache_hash = NULL; + + if (drc_slab) { + kmem_cache_destroy(drc_slab); + drc_slab = NULL; + } } /* -- cgit v0.10.2 From f09841fdfad8dd95593725dfa70388f55b170947 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 28 Jan 2013 14:41:11 -0500 Subject: nfsd: add alloc and free functions for DRC entries Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 4aad9e4..363bc61 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -47,10 +47,34 @@ static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); */ static DEFINE_SPINLOCK(cache_lock); -int nfsd_reply_cache_init(void) +static struct svc_cacherep * +nfsd_reply_cache_alloc(void) { struct svc_cacherep *rp; + + rp = kmem_cache_alloc(drc_slab, GFP_KERNEL); + if (rp) { + rp->c_state = RC_UNUSED; + rp->c_type = RC_NOCACHE; + INIT_LIST_HEAD(&rp->c_lru); + INIT_HLIST_NODE(&rp->c_hash); + } + return rp; +} + +static void +nfsd_reply_cache_free_locked(struct svc_cacherep *rp) +{ + if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF) + kfree(rp->c_replvec.iov_base); + list_del(&rp->c_lru); + kmem_cache_free(drc_slab, rp); +} + +int nfsd_reply_cache_init(void) +{ int i; + struct svc_cacherep *rp; drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), 0, 0, NULL); @@ -60,13 +84,10 @@ int nfsd_reply_cache_init(void) INIT_LIST_HEAD(&lru_head); i = CACHESIZE; while (i) { - rp = kmem_cache_alloc(drc_slab, GFP_KERNEL); + rp = nfsd_reply_cache_alloc(); if (!rp) goto out_nomem; list_add(&rp->c_lru, &lru_head); - rp->c_state = RC_UNUSED; - rp->c_type = RC_NOCACHE; - INIT_HLIST_NODE(&rp->c_hash); i--; } @@ -88,10 +109,7 @@ void nfsd_reply_cache_shutdown(void) while (!list_empty(&lru_head)) { rp = list_entry(lru_head.next, struct svc_cacherep, c_lru); - if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF) - kfree(rp->c_replvec.iov_base); - list_del(&rp->c_lru); - kmem_cache_free(drc_slab, rp); + nfsd_reply_cache_free_locked(rp); } cache_disabled = 1; -- cgit v0.10.2 From 25e6b8b0e1a247747db5275b1b6b362f5acf2245 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 28 Jan 2013 14:41:12 -0500 Subject: nfsd: remove redundant test from nfsd_reply_cache_free Entries can only get a c_type of RC_REPLBUFF iff they are RC_DONE. Therefore the test for RC_DONE isn't necessary here. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 363bc61..2cdc4be 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -65,7 +65,7 @@ nfsd_reply_cache_alloc(void) static void nfsd_reply_cache_free_locked(struct svc_cacherep *rp) { - if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF) + if (rp->c_type == RC_REPLBUFF) kfree(rp->c_replvec.iov_base); list_del(&rp->c_lru); kmem_cache_free(drc_slab, rp); -- cgit v0.10.2 From d1a0774de6cb908f5ba7806d09aaf86bb03fa182 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 28 Jan 2013 14:41:13 -0500 Subject: nfsd: clean up and clarify the cache expiration code Add a preprocessor constant for the expiry time of cache entries, and move the test for an expired entry into a function. Note that the current code does not test for RC_INPROG. It just assumes that it won't take more than 2 minutes to fill out an in-progress entry. I'm not sure how valid that assumption is though, so let's just ensure that we never consider an RC_INPROG entry to be expired. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index f8c6df8..9c7232b 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -70,6 +70,9 @@ enum { */ #define RC_DELAY (HZ/5) +/* Cache entries expire after this time period */ +#define RC_EXPIRE (120 * HZ) + int nfsd_reply_cache_init(void); void nfsd_reply_cache_shutdown(void); int nfsd_cache_lookup(struct svc_rqst *); diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 2cdc4be..634b856 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -142,6 +142,13 @@ hash_refile(struct svc_cacherep *rp) hlist_add_head(&rp->c_hash, cache_hash + request_hash(rp->c_xid)); } +static inline bool +nfsd_cache_entry_expired(struct svc_cacherep *rp) +{ + return rp->c_state != RC_INPROG && + time_after(jiffies, rp->c_timestamp + RC_EXPIRE); +} + /* * Try to find an entry matching the current call in the cache. When none * is found, we grab the oldest unlocked entry off the LRU list. @@ -175,7 +182,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) if (rp->c_state != RC_UNUSED && xid == rp->c_xid && proc == rp->c_proc && proto == rp->c_prot && vers == rp->c_vers && - time_before(jiffies, rp->c_timestamp + 120*HZ) && + !nfsd_cache_entry_expired(rp) && rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) && rpc_get_port(svc_addr(rqstp)) == rpc_get_port((struct sockaddr *)&rp->c_addr)) { nfsdstats.rchits++; -- cgit v0.10.2 From a4a3ec3291249c7db0e03d8b8188ef259b2873da Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 28 Jan 2013 14:41:14 -0500 Subject: nfsd: break out hashtable search into separate function Later, we'll need more than one call site for this, so break it out into a new function. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 634b856..b89e7c8 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -150,6 +150,35 @@ nfsd_cache_entry_expired(struct svc_cacherep *rp) } /* + * Search the request hash for an entry that matches the given rqstp. + * Must be called with cache_lock held. Returns the found entry or + * NULL on failure. + */ +static struct svc_cacherep * +nfsd_cache_search(struct svc_rqst *rqstp) +{ + struct svc_cacherep *rp; + struct hlist_node *hn; + struct hlist_head *rh; + __be32 xid = rqstp->rq_xid; + u32 proto = rqstp->rq_prot, + vers = rqstp->rq_vers, + proc = rqstp->rq_proc; + + rh = &cache_hash[request_hash(xid)]; + hlist_for_each_entry(rp, hn, rh, c_hash) { + if (rp->c_state != RC_UNUSED && + xid == rp->c_xid && proc == rp->c_proc && + proto == rp->c_prot && vers == rp->c_vers && + !nfsd_cache_entry_expired(rp) && + rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) && + rpc_get_port(svc_addr(rqstp)) == rpc_get_port((struct sockaddr *)&rp->c_addr)) + return rp; + } + return NULL; +} + +/* * Try to find an entry matching the current call in the cache. When none * is found, we grab the oldest unlocked entry off the LRU list. * Note that no operation within the loop may sleep. @@ -157,8 +186,6 @@ nfsd_cache_entry_expired(struct svc_cacherep *rp) int nfsd_cache_lookup(struct svc_rqst *rqstp) { - struct hlist_node *hn; - struct hlist_head *rh; struct svc_cacherep *rp; __be32 xid = rqstp->rq_xid; u32 proto = rqstp->rq_prot, @@ -177,17 +204,10 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) spin_lock(&cache_lock); rtn = RC_DOIT; - rh = &cache_hash[request_hash(xid)]; - hlist_for_each_entry(rp, hn, rh, c_hash) { - if (rp->c_state != RC_UNUSED && - xid == rp->c_xid && proc == rp->c_proc && - proto == rp->c_prot && vers == rp->c_vers && - !nfsd_cache_entry_expired(rp) && - rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) && - rpc_get_port(svc_addr(rqstp)) == rpc_get_port((struct sockaddr *)&rp->c_addr)) { - nfsdstats.rchits++; - goto found_entry; - } + rp = nfsd_cache_search(rqstp); + if (rp) { + nfsdstats.rchits++; + goto found_entry; } nfsdstats.rcmisses++; -- cgit v0.10.2 From 2eeb9b2abc83a7fd02063c7558b8e729f5a3ae00 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 2 Feb 2013 05:55:08 -0500 Subject: nfsd: initialize the exp->ex_uuid field in svc_export_init commit 885c91f7466 in Bruce's tree was causing oopses for me: general protection fault: 0000 [#1] SMP Modules linked in: nfsd(OF) nfs_acl(OF) auth_rpcgss(OF) lockd(OF) sunrpc(OF) kvm_amd kvm microcode i2c_piix4 virtio_net virtio_balloon cirrus drm_kms_helper ttm drm virtio_blk i2c_core CPU 0 Pid: 564, comm: exportfs Tainted: GF O 3.8.0-0.rc5.git2.1.fc19.x86_64 #1 Bochs Bochs RIP: 0010:[] [] kfree+0x49/0x280 RSP: 0018:ffff88007a3d7c50 EFLAGS: 00010203 RAX: 01adaf8dadadad80 RBX: 6b6b6b6b6b6b6b6b RCX: 0000000000000001 RDX: ffffffff7fffffff RSI: 0000000000000000 RDI: 6b6b6b6b6b6b6b6b RBP: ffff88007a3d7c80 R08: 6b6b6b6b6b6b6b6b R09: 0000000000000000 R10: 0000000000000018 R11: 0000000000000000 R12: ffff88006a117b50 R13: ffffffffa01a589c R14: ffff8800631b0f50 R15: 01ad998dadadad80 FS: 00007fcaa3616740(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f5d84b6fdd8 CR3: 0000000064db4000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process exportfs (pid: 564, threadinfo ffff88007a3d6000, task ffff88006af28000) Stack: ffff88007a3d7c80 ffff88006a117b68 ffff88006a117b50 0000000000000000 ffff8800631b0f50 ffff88006a117b50 ffff88007a3d7ca0 ffffffffa01a589c ffff880036be1148 ffff88007a3d7cf8 ffff88007a3d7e28 ffffffffa01a6a98 Call Trace: [] svc_export_put+0x5c/0x70 [nfsd] [] svc_export_parse+0x328/0x7e0 [nfsd] [] cache_do_downcall+0x57/0x70 [sunrpc] [] cache_downcall+0x7e/0x100 [sunrpc] [] cache_write_procfs+0x58/0x90 [sunrpc] [] ? cache_downcall+0x100/0x100 [sunrpc] [] proc_reg_write+0x75/0xb0 [] vfs_write+0x9f/0x170 [] sys_write+0x49/0xa0 [] system_call_fastpath+0x16/0x1b Code: 66 66 66 90 48 83 fb 10 0f 86 c3 00 00 00 48 89 df 49 bf 00 00 00 00 00 ea ff ff e8 f2 12 ea ff 48 c1 e8 0c 48 c1 e0 06 49 01 c7 <49> 8b 07 f6 c4 80 0f 85 1d 02 00 00 49 8b 07 a8 80 0f 84 ee 01 RIP [] kfree+0x49/0x280 RSP I think Majianpeng's patch is correct, but incomplete. In order for it to be safe to free the ex_uuid unconditionally in svc_export_put, we need to make sure it's initialized to NULL in the init routine. Cc: majianpeng Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 45159ee..8e9df45 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -671,6 +671,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) new->ex_fslocs.locations = NULL; new->ex_fslocs.locations_count = 0; new->ex_fslocs.migrated = 0; + new->ex_uuid = NULL; new->cd = item->cd; } -- cgit v0.10.2 From 56c2548b2d24de440868885e4c36c985eca2d083 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 4 Feb 2013 08:18:00 -0500 Subject: nfsd: always move DRC entries to the end of LRU list when updating timestamp ...otherwise, we end up with the list ordering wrong. Currently, it's not a problem since we skip RC_INPROG entries, but keeping the ordering strict will be necessary for a later patch that adds a cache cleaner. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index b89e7c8..9d80dfa 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -129,6 +129,7 @@ void nfsd_reply_cache_shutdown(void) static void lru_put_end(struct svc_cacherep *rp) { + rp->c_timestamp = jiffies; list_move_tail(&rp->c_lru, &lru_head); } @@ -245,9 +246,9 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp))); rp->c_prot = proto; rp->c_vers = vers; - rp->c_timestamp = jiffies; hash_refile(rp); + lru_put_end(rp); /* release any buffer */ if (rp->c_type == RC_REPLBUFF) { @@ -262,7 +263,6 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) found_entry: /* We found a matching entry which is either in progress or done. */ age = jiffies - rp->c_timestamp; - rp->c_timestamp = jiffies; lru_put_end(rp); rtn = RC_DROPIT; @@ -354,7 +354,6 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) rp->c_secure = rqstp->rq_secure; rp->c_type = cachetype; rp->c_state = RC_DONE; - rp->c_timestamp = jiffies; spin_unlock(&cache_lock); return; } -- cgit v0.10.2 From 0ee0bf7ee5b55f232b645c4af0b0c37d4e115a32 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 4 Feb 2013 08:18:01 -0500 Subject: nfsd: track the number of DRC entries in the cache Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 9d80dfa..c0c5847 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -27,6 +27,7 @@ static struct hlist_head * cache_hash; static struct list_head lru_head; static int cache_disabled = 1; static struct kmem_cache *drc_slab; +static unsigned int num_drc_entries; /* * Calculate the hash index from an XID. @@ -68,6 +69,7 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp) if (rp->c_type == RC_REPLBUFF) kfree(rp->c_replvec.iov_base); list_del(&rp->c_lru); + --num_drc_entries; kmem_cache_free(drc_slab, rp); } @@ -83,10 +85,12 @@ int nfsd_reply_cache_init(void) INIT_LIST_HEAD(&lru_head); i = CACHESIZE; + num_drc_entries = 0; while (i) { rp = nfsd_reply_cache_alloc(); if (!rp) goto out_nomem; + ++num_drc_entries; list_add(&rp->c_lru, &lru_head); i--; } -- cgit v0.10.2 From 0338dd157282c19696d3c32614a748d0ba814b12 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 4 Feb 2013 08:18:02 -0500 Subject: nfsd: dynamically allocate DRC entries The existing code keeps a fixed-size cache of 1024 entries. This is much too small for a busy server, and wastes memory on an idle one. This patch changes the code to dynamically allocate and free these cache entries. A cap on the number of entries is retained, but it's much larger than the existing value and now scales with the amount of low memory in the machine. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index c0c5847..d213e6e 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -10,17 +10,13 @@ #include #include +#include #include "nfsd.h" #include "cache.h" -/* Size of reply cache. Common values are: - * 4.3BSD: 128 - * 4.4BSD: 256 - * Solaris2: 1024 - * DEC Unix: 512-4096 - */ -#define CACHESIZE 1024 +#define NFSDDBG_FACILITY NFSDDBG_REPCACHE + #define HASHSIZE 64 static struct hlist_head * cache_hash; @@ -28,6 +24,7 @@ static struct list_head lru_head; static int cache_disabled = 1; static struct kmem_cache *drc_slab; static unsigned int num_drc_entries; +static unsigned int max_drc_entries; /* * Calculate the hash index from an XID. @@ -48,6 +45,34 @@ static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); */ static DEFINE_SPINLOCK(cache_lock); +/* + * Put a cap on the size of the DRC based on the amount of available + * low memory in the machine. + * + * 64MB: 8192 + * 128MB: 11585 + * 256MB: 16384 + * 512MB: 23170 + * 1GB: 32768 + * 2GB: 46340 + * 4GB: 65536 + * 8GB: 92681 + * 16GB: 131072 + * + * ...with a hard cap of 256k entries. In the worst case, each entry will be + * ~1k, so the above numbers should give a rough max of the amount of memory + * used in k. + */ +static unsigned int +nfsd_cache_size_limit(void) +{ + unsigned int limit; + unsigned long low_pages = totalram_pages - totalhigh_pages; + + limit = (16 * int_sqrt(low_pages)) << (PAGE_SHIFT-10); + return min_t(unsigned int, limit, 256*1024); +} + static struct svc_cacherep * nfsd_reply_cache_alloc(void) { @@ -68,6 +93,7 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp) { if (rp->c_type == RC_REPLBUFF) kfree(rp->c_replvec.iov_base); + hlist_del(&rp->c_hash); list_del(&rp->c_lru); --num_drc_entries; kmem_cache_free(drc_slab, rp); @@ -75,30 +101,18 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp) int nfsd_reply_cache_init(void) { - int i; - struct svc_cacherep *rp; - drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), 0, 0, NULL); if (!drc_slab) goto out_nomem; - INIT_LIST_HEAD(&lru_head); - i = CACHESIZE; - num_drc_entries = 0; - while (i) { - rp = nfsd_reply_cache_alloc(); - if (!rp) - goto out_nomem; - ++num_drc_entries; - list_add(&rp->c_lru, &lru_head); - i--; - } - - cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); + cache_hash = kcalloc(HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); if (!cache_hash) goto out_nomem; + INIT_LIST_HEAD(&lru_head); + max_drc_entries = nfsd_cache_size_limit(); + num_drc_entries = 0; cache_disabled = 0; return 0; out_nomem: @@ -191,7 +205,7 @@ nfsd_cache_search(struct svc_rqst *rqstp) int nfsd_cache_lookup(struct svc_rqst *rqstp) { - struct svc_cacherep *rp; + struct svc_cacherep *rp, *found; __be32 xid = rqstp->rq_xid; u32 proto = rqstp->rq_prot, vers = rqstp->rq_vers, @@ -210,38 +224,48 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) rtn = RC_DOIT; rp = nfsd_cache_search(rqstp); - if (rp) { - nfsdstats.rchits++; + if (rp) goto found_entry; + + /* Try to use the first entry on the LRU */ + if (!list_empty(&lru_head)) { + rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru); + if (nfsd_cache_entry_expired(rp) || + num_drc_entries >= max_drc_entries) + goto setup_entry; } - nfsdstats.rcmisses++; - /* This loop shouldn't take more than a few iterations normally */ - { - int safe = 0; - list_for_each_entry(rp, &lru_head, c_lru) { - if (rp->c_state != RC_INPROG) - break; - if (safe++ > CACHESIZE) { - printk("nfsd: loop in repcache LRU list\n"); - cache_disabled = 1; - goto out; - } + spin_unlock(&cache_lock); + rp = nfsd_reply_cache_alloc(); + if (!rp) { + dprintk("nfsd: unable to allocate DRC entry!\n"); + return RC_DOIT; } + spin_lock(&cache_lock); + ++num_drc_entries; + + /* + * Must search again just in case someone inserted one + * after we dropped the lock above. + */ + found = nfsd_cache_search(rqstp); + if (found) { + nfsd_reply_cache_free_locked(rp); + rp = found; + goto found_entry; } - /* All entries on the LRU are in-progress. This should not happen */ - if (&rp->c_lru == &lru_head) { - static int complaints; - - printk(KERN_WARNING "nfsd: all repcache entries locked!\n"); - if (++complaints > 5) { - printk(KERN_WARNING "nfsd: disabling repcache.\n"); - cache_disabled = 1; - } - goto out; - } + /* + * We're keeping the one we just allocated. Are we now over the + * limit? Prune one off the tip of the LRU in trade for the one we + * just allocated if so. + */ + if (num_drc_entries >= max_drc_entries) + nfsd_reply_cache_free_locked(list_first_entry(&lru_head, + struct svc_cacherep, c_lru)); +setup_entry: + nfsdstats.rcmisses++; rqstp->rq_cacherep = rp; rp->c_state = RC_INPROG; rp->c_xid = xid; @@ -265,6 +289,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) return rtn; found_entry: + nfsdstats.rchits++; /* We found a matching entry which is either in progress or done. */ age = jiffies - rp->c_timestamp; lru_put_end(rp); @@ -295,7 +320,7 @@ found_entry: break; default: printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type); - rp->c_state = RC_UNUSED; + nfsd_reply_cache_free_locked(rp); } goto out; -- cgit v0.10.2 From 13cc8a78e89db0469e67ac9b3ae466b661af93fa Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 4 Feb 2013 08:18:03 -0500 Subject: nfsd: remove the cache_disabled flag With the change to dynamically allocate entries, the cache is never disabled on the fly. Remove this flag. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index d213e6e..69d29d4 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -21,7 +21,6 @@ static struct hlist_head * cache_hash; static struct list_head lru_head; -static int cache_disabled = 1; static struct kmem_cache *drc_slab; static unsigned int num_drc_entries; static unsigned int max_drc_entries; @@ -113,7 +112,6 @@ int nfsd_reply_cache_init(void) INIT_LIST_HEAD(&lru_head); max_drc_entries = nfsd_cache_size_limit(); num_drc_entries = 0; - cache_disabled = 0; return 0; out_nomem: printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); @@ -130,8 +128,6 @@ void nfsd_reply_cache_shutdown(void) nfsd_reply_cache_free_locked(rp); } - cache_disabled = 1; - kfree (cache_hash); cache_hash = NULL; @@ -215,7 +211,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) int rtn; rqstp->rq_cacherep = NULL; - if (cache_disabled || type == RC_NOCACHE) { + if (type == RC_NOCACHE) { nfsdstats.rcnocache++; return RC_DOIT; } @@ -345,11 +341,11 @@ found_entry: void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) { - struct svc_cacherep *rp; + struct svc_cacherep *rp = rqstp->rq_cacherep; struct kvec *resv = &rqstp->rq_res.head[0], *cachv; int len; - if (!(rp = rqstp->rq_cacherep) || cache_disabled) + if (!rp) return; len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); -- cgit v0.10.2 From 2c6b691c05bf77c4bc7c9f1a9b6d93a160928421 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 4 Feb 2013 08:18:04 -0500 Subject: nfsd: when updating an entry with RC_NOCACHE, just free it There's no need to keep entries around that we're declaring RC_NOCACHE. Ditto if there's a problem with the entry. With this change too, there's no need to test for RC_UNUSED in the search function. If the entry's in the hash table then it's either INPROG or DONE. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 69d29d4..e8ea785 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -98,6 +98,14 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp) kmem_cache_free(drc_slab, rp); } +static void +nfsd_reply_cache_free(struct svc_cacherep *rp) +{ + spin_lock(&cache_lock); + nfsd_reply_cache_free_locked(rp); + spin_unlock(&cache_lock); +} + int nfsd_reply_cache_init(void) { drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), @@ -182,8 +190,7 @@ nfsd_cache_search(struct svc_rqst *rqstp) rh = &cache_hash[request_hash(xid)]; hlist_for_each_entry(rp, hn, rh, c_hash) { - if (rp->c_state != RC_UNUSED && - xid == rp->c_xid && proc == rp->c_proc && + if (xid == rp->c_xid && proc == rp->c_proc && proto == rp->c_prot && vers == rp->c_vers && !nfsd_cache_entry_expired(rp) && rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) && @@ -353,7 +360,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) /* Don't cache excessive amounts of data and XDR failures */ if (!statp || len > (256 >> 2)) { - rp->c_state = RC_UNUSED; + nfsd_reply_cache_free(rp); return; } @@ -367,12 +374,15 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) cachv = &rp->c_replvec; cachv->iov_base = kmalloc(len << 2, GFP_KERNEL); if (!cachv->iov_base) { - rp->c_state = RC_UNUSED; + nfsd_reply_cache_free(rp); return; } cachv->iov_len = len << 2; memcpy(cachv->iov_base, statp, len << 2); break; + case RC_NOCACHE: + nfsd_reply_cache_free(rp); + return; } spin_lock(&cache_lock); lru_put_end(rp); -- cgit v0.10.2 From aca8a23de60c705e2458b2c6731ad59aa0717f83 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 4 Feb 2013 08:18:05 -0500 Subject: nfsd: add recurring workqueue job to clean the cache It's not sufficient to only clean the cache when requests come in. What if we have a flurry of activity and then the server goes idle? Add a workqueue job that will clean the cache every RC_EXPIRE period. Care is taken to only run this when we expect to have entries expiring. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index e8ea785..d7b088b 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -36,6 +36,7 @@ static inline u32 request_hash(u32 xid) } static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); +static void cache_cleaner_func(struct work_struct *unused); /* * locking for the reply cache: @@ -43,6 +44,7 @@ static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); * Otherwise, it when accessing _prev or _next, the lock must be held. */ static DEFINE_SPINLOCK(cache_lock); +static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); /* * Put a cap on the size of the DRC based on the amount of available @@ -131,6 +133,8 @@ void nfsd_reply_cache_shutdown(void) { struct svc_cacherep *rp; + cancel_delayed_work_sync(&cache_cleaner); + while (!list_empty(&lru_head)) { rp = list_entry(lru_head.next, struct svc_cacherep, c_lru); nfsd_reply_cache_free_locked(rp); @@ -146,13 +150,15 @@ void nfsd_reply_cache_shutdown(void) } /* - * Move cache entry to end of LRU list + * Move cache entry to end of LRU list, and queue the cleaner to run if it's + * not already scheduled. */ static void lru_put_end(struct svc_cacherep *rp) { rp->c_timestamp = jiffies; list_move_tail(&rp->c_lru, &lru_head); + schedule_delayed_work(&cache_cleaner, RC_EXPIRE); } /* @@ -173,6 +179,42 @@ nfsd_cache_entry_expired(struct svc_cacherep *rp) } /* + * Walk the LRU list and prune off entries that are older than RC_EXPIRE. + * Also prune the oldest ones when the total exceeds the max number of entries. + */ +static void +prune_cache_entries(void) +{ + struct svc_cacherep *rp, *tmp; + + list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { + if (!nfsd_cache_entry_expired(rp) && + num_drc_entries <= max_drc_entries) + break; + nfsd_reply_cache_free_locked(rp); + } + + /* + * Conditionally rearm the job. If we cleaned out the list, then + * cancel any pending run (since there won't be any work to do). + * Otherwise, we rearm the job or modify the existing one to run in + * RC_EXPIRE since we just ran the pruner. + */ + if (list_empty(&lru_head)) + cancel_delayed_work(&cache_cleaner); + else + mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); +} + +static void +cache_cleaner_func(struct work_struct *unused) +{ + spin_lock(&cache_lock); + prune_cache_entries(); + spin_unlock(&cache_lock); +} + +/* * Search the request hash for an entry that matches the given rqstp. * Must be called with cache_lock held. Returns the found entry or * NULL on failure. @@ -192,7 +234,6 @@ nfsd_cache_search(struct svc_rqst *rqstp) hlist_for_each_entry(rp, hn, rh, c_hash) { if (xid == rp->c_xid && proc == rp->c_proc && proto == rp->c_prot && vers == rp->c_vers && - !nfsd_cache_entry_expired(rp) && rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) && rpc_get_port(svc_addr(rqstp)) == rpc_get_port((struct sockaddr *)&rp->c_addr)) return rp; @@ -234,8 +275,11 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) if (!list_empty(&lru_head)) { rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru); if (nfsd_cache_entry_expired(rp) || - num_drc_entries >= max_drc_entries) + num_drc_entries >= max_drc_entries) { + lru_put_end(rp); + prune_cache_entries(); goto setup_entry; + } } spin_unlock(&cache_lock); -- cgit v0.10.2 From b4e7f2c9450968303d24b48fec9da8abebcc0036 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 4 Feb 2013 08:18:06 -0500 Subject: nfsd: register a shrinker for DRC cache entries Since we dynamically allocate them now, allow the system to call us up to release them if it gets low on memory. Since these entries aren't replaceable, only free ones that are expired or that are over the cap. The the seeks value is set to '1' however to indicate that freeing the these entries is low-cost. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index d7b088b..d16a5d6 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -37,6 +37,13 @@ static inline u32 request_hash(u32 xid) static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); static void cache_cleaner_func(struct work_struct *unused); +static int nfsd_reply_cache_shrink(struct shrinker *shrink, + struct shrink_control *sc); + +struct shrinker nfsd_reply_cache_shrinker = { + .shrink = nfsd_reply_cache_shrink, + .seeks = 1, +}; /* * locking for the reply cache: @@ -110,6 +117,7 @@ nfsd_reply_cache_free(struct svc_cacherep *rp) int nfsd_reply_cache_init(void) { + register_shrinker(&nfsd_reply_cache_shrinker); drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), 0, 0, NULL); if (!drc_slab) @@ -133,6 +141,7 @@ void nfsd_reply_cache_shutdown(void) { struct svc_cacherep *rp; + unregister_shrinker(&nfsd_reply_cache_shrinker); cancel_delayed_work_sync(&cache_cleaner); while (!list_empty(&lru_head)) { @@ -214,6 +223,20 @@ cache_cleaner_func(struct work_struct *unused) spin_unlock(&cache_lock); } +static int +nfsd_reply_cache_shrink(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned int num; + + spin_lock(&cache_lock); + if (sc->nr_to_scan) + prune_cache_entries(); + num = num_drc_entries; + spin_unlock(&cache_lock); + + return num; +} + /* * Search the request hash for an entry that matches the given rqstp. * Must be called with cache_lock held. Returns the found entry or -- cgit v0.10.2 From 2d32b29a1c2830f7c42caa8258c714acd983961f Mon Sep 17 00:00:00 2001 From: majianpeng Date: Tue, 29 Jan 2013 13:16:06 +0800 Subject: nfsd: Fix memleak When free nfs-client, it must free the ->cl_stateids. Cc: stable@kernel.org Signed-off-by: Jianpeng Ma Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4db46aa..a6637de 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1060,6 +1060,8 @@ free_client(struct nfs4_client *clp) } free_svc_cred(&clp->cl_cred); kfree(clp->cl_name.data); + idr_remove_all(&clp->cl_stateids); + idr_destroy(&clp->cl_stateids); kfree(clp); } -- cgit v0.10.2 From 3abdb6071250b896b9d5b6e0c310d6e95666b4d7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 3 Feb 2013 12:23:01 -0500 Subject: nfsd4: simplify idr allocation We don't really need to preallocate at all; just allocate and initialize everything at once, but leave the sc_type field initially 0 to prevent finding the stateid till it's fully initialized. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a6637de..27c77a0 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -261,33 +261,46 @@ static inline int get_new_stid(struct nfs4_stid *stid) return new_stid; } -static void init_stid(struct nfs4_stid *stid, struct nfs4_client *cl, unsigned char type) +static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct +kmem_cache *slab) { - stateid_t *s = &stid->sc_stateid; + struct idr *stateids = &cl->cl_stateids; + static int min_stateid = 0; + struct nfs4_stid *stid; int new_id; - stid->sc_type = type; + stid = kmem_cache_alloc(slab, GFP_KERNEL); + if (!stid) + return NULL; + + if (!idr_pre_get(stateids, GFP_KERNEL)) + goto out_free; + if (idr_get_new_above(stateids, stid, min_stateid, &new_id)) + goto out_free; stid->sc_client = cl; - s->si_opaque.so_clid = cl->cl_clientid; - new_id = get_new_stid(stid); - s->si_opaque.so_id = (u32)new_id; + stid->sc_type = 0; + stid->sc_stateid.si_opaque.so_id = new_id; + stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; /* Will be incremented before return to client: */ - s->si_generation = 0; -} + stid->sc_stateid.si_generation = 0; -static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab) -{ - struct idr *stateids = &cl->cl_stateids; - - if (!idr_pre_get(stateids, GFP_KERNEL)) - return NULL; /* - * Note: if we fail here (or any time between now and the time - * we actually get the new idr), we won't need to undo the idr - * preallocation, since the idr code caps the number of - * preallocated entries. + * It shouldn't be a problem to reuse an opaque stateid value. + * I don't think it is for 4.1. But with 4.0 I worry that, for + * example, a stray write retransmission could be accepted by + * the server when it should have been rejected. Therefore, + * adopt a trick from the sctp code to attempt to maximize the + * amount of time until an id is reused, by ensuring they always + * "increase" (mod INT_MAX): */ - return kmem_cache_alloc(slab, GFP_KERNEL); + + min_stateid = new_id+1; + if (min_stateid == INT_MAX) + min_stateid = 0; + return stid; +out_free: + kfree(stid); + return NULL; } static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) @@ -316,7 +329,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); if (dp == NULL) return dp; - init_stid(&dp->dl_stid, clp, NFS4_DELEG_STID); + dp->dl_stid.sc_type = NFS4_DELEG_STID; /* * delegation seqid's are never incremented. The 4.1 special * meaning of seqid 0 isn't meaningful, really, but let's avoid @@ -337,13 +350,21 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv return dp; } +void free_stid(struct nfs4_stid *s, struct kmem_cache *slab) +{ + struct idr *stateids = &s->sc_client->cl_stateids; + + idr_remove(stateids, s->sc_stateid.si_opaque.so_id); + kmem_cache_free(slab, s); +} + void nfs4_put_delegation(struct nfs4_delegation *dp) { if (atomic_dec_and_test(&dp->dl_count)) { dprintk("NFSD: freeing dp %p\n",dp); put_nfs4_file(dp->dl_file); - kmem_cache_free(deleg_slab, dp); + free_stid(&dp->dl_stid, deleg_slab); num_delegations--; } } @@ -360,9 +381,7 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp) static void unhash_stid(struct nfs4_stid *s) { - struct idr *stateids = &s->sc_client->cl_stateids; - - idr_remove(stateids, s->sc_stateid.si_opaque.so_id); + s->sc_type = 0; } /* Called under the state lock. */ @@ -519,7 +538,7 @@ static void close_generic_stateid(struct nfs4_ol_stateid *stp) static void free_generic_stateid(struct nfs4_ol_stateid *stp) { - kmem_cache_free(stateid_slab, stp); + free_stid(&stp->st_stid, stateid_slab); } static void release_lock_stateid(struct nfs4_ol_stateid *stp) @@ -1260,7 +1279,12 @@ static void gen_confirm(struct nfs4_client *clp) static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) { - return idr_find(&cl->cl_stateids, t->si_opaque.so_id); + struct nfs4_stid *ret; + + ret = idr_find(&cl->cl_stateids, t->si_opaque.so_id); + if (!ret || !ret->sc_type) + return NULL; + return ret; } static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) @@ -2446,9 +2470,8 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_openowner *oo = open->op_openowner; - struct nfs4_client *clp = oo->oo_owner.so_client; - init_stid(&stp->st_stid, clp, NFS4_OPEN_STID); + stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_lockowners); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); @@ -4034,7 +4057,7 @@ alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct stp = nfs4_alloc_stateid(clp); if (stp == NULL) return NULL; - init_stid(&stp->st_stid, clp, NFS4_LOCK_STID); + stp->st_stid.sc_type = NFS4_LOCK_STID; list_add(&stp->st_perfile, &fp->fi_stateids); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); stp->st_stateowner = &lo->lo_owner; -- cgit v0.10.2 From 155a345a52e6cda18946efe2529d99d5040fad6d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 4 Feb 2013 08:10:08 -0500 Subject: sunrpc: copy scope ID in __rpc_copy_addr6 When copying an address, we should also copy the scopeid in the event that this is a link-local address and the scope matters. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 47354a2..6a7c261 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -242,6 +242,7 @@ static inline bool __rpc_copy_addr6(struct sockaddr *dst, dsin6->sin6_family = ssin6->sin6_family; dsin6->sin6_addr = ssin6->sin6_addr; + dsin6->sin6_scope_id = ssin6->sin6_scope_id; return true; } #else /* !(IS_ENABLED(CONFIG_IPV6) */ -- cgit v0.10.2 From 5976687a2b3d1969f02aba16b80ad3ed79be6ad3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 4 Feb 2013 12:50:00 -0500 Subject: sunrpc: move address copy/cmp/convert routines and prototypes from clnt.h to addr.h These routines are used by server and client code, so having them in a separate header would be best. Signed-off-by: Jeff Layton Acked-by: Trond Myklebust Signed-off-by: J. Bruce Fields diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index ca0a080..4885b53 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 0e17090..764c4d2 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 3c2cfc6..1812f02 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 0deb5f6..8064435 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index ca4b11e..62c8c41 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -10,6 +10,7 @@ #include #include +#include #include #include "dns_resolve.h" @@ -42,6 +43,7 @@ EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); #include #include #include +#include #include #include #include diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index acc3472..e5f2fad 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index b720064..1fe284f 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "internal.h" #include "nfs4session.h" diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 1e09eb7..0dd7660 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include "internal.h" diff --git a/fs/nfs/super.c b/fs/nfs/super.c index c25cadf8..3250b41 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index e761ee9..247c00c 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include "state.h" diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 27c77a0..60a2282 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include "xdr4.h" #include "vfs.h" #include "current_stateid.h" diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index d16a5d6..f754469 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -9,7 +9,7 @@ */ #include -#include +#include #include #include "nfsd.h" diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 65889ec..29c3f0d 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include #include diff --git a/include/linux/sunrpc/addr.h b/include/linux/sunrpc/addr.h new file mode 100644 index 0000000..07d8e53 --- /dev/null +++ b/include/linux/sunrpc/addr.h @@ -0,0 +1,170 @@ +/* + * linux/include/linux/sunrpc/addr.h + * + * Various routines for copying and comparing sockaddrs and for + * converting them to and from presentation format. + */ +#ifndef _LINUX_SUNRPC_ADDR_H +#define _LINUX_SUNRPC_ADDR_H + +#include +#include +#include +#include + +size_t rpc_ntop(const struct sockaddr *, char *, const size_t); +size_t rpc_pton(struct net *, const char *, const size_t, + struct sockaddr *, const size_t); +char * rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t); +size_t rpc_uaddr2sockaddr(struct net *, const char *, const size_t, + struct sockaddr *, const size_t); + +static inline unsigned short rpc_get_port(const struct sockaddr *sap) +{ + switch (sap->sa_family) { + case AF_INET: + return ntohs(((struct sockaddr_in *)sap)->sin_port); + case AF_INET6: + return ntohs(((struct sockaddr_in6 *)sap)->sin6_port); + } + return 0; +} + +static inline void rpc_set_port(struct sockaddr *sap, + const unsigned short port) +{ + switch (sap->sa_family) { + case AF_INET: + ((struct sockaddr_in *)sap)->sin_port = htons(port); + break; + case AF_INET6: + ((struct sockaddr_in6 *)sap)->sin6_port = htons(port); + break; + } +} + +#define IPV6_SCOPE_DELIMITER '%' +#define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") + +static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; + const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; + + return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; +} + +static inline bool __rpc_copy_addr4(struct sockaddr *dst, + const struct sockaddr *src) +{ + const struct sockaddr_in *ssin = (struct sockaddr_in *) src; + struct sockaddr_in *dsin = (struct sockaddr_in *) dst; + + dsin->sin_family = ssin->sin_family; + dsin->sin_addr.s_addr = ssin->sin_addr.s_addr; + return true; +} + +#if IS_ENABLED(CONFIG_IPV6) +static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; + const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; + + if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr)) + return false; + else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL) + return sin1->sin6_scope_id == sin2->sin6_scope_id; + + return true; +} + +static inline bool __rpc_copy_addr6(struct sockaddr *dst, + const struct sockaddr *src) +{ + const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src; + struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst; + + dsin6->sin6_family = ssin6->sin6_family; + dsin6->sin6_addr = ssin6->sin6_addr; + dsin6->sin6_scope_id = ssin6->sin6_scope_id; + return true; +} +#else /* !(IS_ENABLED(CONFIG_IPV6) */ +static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + return false; +} + +static inline bool __rpc_copy_addr6(struct sockaddr *dst, + const struct sockaddr *src) +{ + return false; +} +#endif /* !(IS_ENABLED(CONFIG_IPV6) */ + +/** + * rpc_cmp_addr - compare the address portion of two sockaddrs. + * @sap1: first sockaddr + * @sap2: second sockaddr + * + * Just compares the family and address portion. Ignores port, but + * compares the scope if it's a link-local address. + * + * Returns true if the addrs are equal, false if they aren't. + */ +static inline bool rpc_cmp_addr(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + if (sap1->sa_family == sap2->sa_family) { + switch (sap1->sa_family) { + case AF_INET: + return __rpc_cmp_addr4(sap1, sap2); + case AF_INET6: + return __rpc_cmp_addr6(sap1, sap2); + } + } + return false; +} + +/** + * rpc_copy_addr - copy the address portion of one sockaddr to another + * @dst: destination sockaddr + * @src: source sockaddr + * + * Just copies the address portion and family. Ignores port, scope, etc. + * Caller is responsible for making certain that dst is large enough to hold + * the address in src. Returns true if address family is supported. Returns + * false otherwise. + */ +static inline bool rpc_copy_addr(struct sockaddr *dst, + const struct sockaddr *src) +{ + switch (src->sa_family) { + case AF_INET: + return __rpc_copy_addr4(dst, src); + case AF_INET6: + return __rpc_copy_addr6(dst, src); + } + return false; +} + +/** + * rpc_get_scope_id - return scopeid for a given sockaddr + * @sa: sockaddr to get scopeid from + * + * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if + * not an AF_INET6 address. + */ +static inline u32 rpc_get_scope_id(const struct sockaddr *sa) +{ + if (sa->sa_family != AF_INET6) + return 0; + + return ((struct sockaddr_in6 *) sa)->sin6_scope_id; +} + +#endif /* _LINUX_SUNRPC_ADDR_H */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 6a7c261..4a4abde 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -165,160 +165,5 @@ size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); int rpc_localaddr(struct rpc_clnt *, struct sockaddr *, size_t); -size_t rpc_ntop(const struct sockaddr *, char *, const size_t); -size_t rpc_pton(struct net *, const char *, const size_t, - struct sockaddr *, const size_t); -char * rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t); -size_t rpc_uaddr2sockaddr(struct net *, const char *, const size_t, - struct sockaddr *, const size_t); - -static inline unsigned short rpc_get_port(const struct sockaddr *sap) -{ - switch (sap->sa_family) { - case AF_INET: - return ntohs(((struct sockaddr_in *)sap)->sin_port); - case AF_INET6: - return ntohs(((struct sockaddr_in6 *)sap)->sin6_port); - } - return 0; -} - -static inline void rpc_set_port(struct sockaddr *sap, - const unsigned short port) -{ - switch (sap->sa_family) { - case AF_INET: - ((struct sockaddr_in *)sap)->sin_port = htons(port); - break; - case AF_INET6: - ((struct sockaddr_in6 *)sap)->sin6_port = htons(port); - break; - } -} - -#define IPV6_SCOPE_DELIMITER '%' -#define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") - -static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; - const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; - - return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; -} - -static inline bool __rpc_copy_addr4(struct sockaddr *dst, - const struct sockaddr *src) -{ - const struct sockaddr_in *ssin = (struct sockaddr_in *) src; - struct sockaddr_in *dsin = (struct sockaddr_in *) dst; - - dsin->sin_family = ssin->sin_family; - dsin->sin_addr.s_addr = ssin->sin_addr.s_addr; - return true; -} - -#if IS_ENABLED(CONFIG_IPV6) -static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; - const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; - - if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr)) - return false; - else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL) - return sin1->sin6_scope_id == sin2->sin6_scope_id; - - return true; -} - -static inline bool __rpc_copy_addr6(struct sockaddr *dst, - const struct sockaddr *src) -{ - const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src; - struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst; - - dsin6->sin6_family = ssin6->sin6_family; - dsin6->sin6_addr = ssin6->sin6_addr; - dsin6->sin6_scope_id = ssin6->sin6_scope_id; - return true; -} -#else /* !(IS_ENABLED(CONFIG_IPV6) */ -static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - return false; -} - -static inline bool __rpc_copy_addr6(struct sockaddr *dst, - const struct sockaddr *src) -{ - return false; -} -#endif /* !(IS_ENABLED(CONFIG_IPV6) */ - -/** - * rpc_cmp_addr - compare the address portion of two sockaddrs. - * @sap1: first sockaddr - * @sap2: second sockaddr - * - * Just compares the family and address portion. Ignores port, but - * compares the scope if it's a link-local address. - * - * Returns true if the addrs are equal, false if they aren't. - */ -static inline bool rpc_cmp_addr(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - if (sap1->sa_family == sap2->sa_family) { - switch (sap1->sa_family) { - case AF_INET: - return __rpc_cmp_addr4(sap1, sap2); - case AF_INET6: - return __rpc_cmp_addr6(sap1, sap2); - } - } - return false; -} - -/** - * rpc_copy_addr - copy the address portion of one sockaddr to another - * @dst: destination sockaddr - * @src: source sockaddr - * - * Just copies the address portion and family. Ignores port, scope, etc. - * Caller is responsible for making certain that dst is large enough to hold - * the address in src. Returns true if address family is supported. Returns - * false otherwise. - */ -static inline bool rpc_copy_addr(struct sockaddr *dst, - const struct sockaddr *src) -{ - switch (src->sa_family) { - case AF_INET: - return __rpc_copy_addr4(dst, src); - case AF_INET6: - return __rpc_copy_addr6(dst, src); - } - return false; -} - -/** - * rpc_get_scope_id - return scopeid for a given sockaddr - * @sa: sockaddr to get scopeid from - * - * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if - * not an AF_INET6 address. - */ -static inline u32 rpc_get_scope_id(const struct sockaddr *sa) -{ - if (sa->sa_family != AF_INET6) - return 0; - - return ((struct sockaddr_in6 *) sa)->sin6_scope_id; -} - #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index d11418f..a622ad6 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -17,7 +17,8 @@ */ #include -#include +#include +#include #include #include diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 822f020..c8193ce 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -33,6 +33,7 @@ #include #include +#include #include #include #include diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 795a0f4..3df764d 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -26,6 +26,7 @@ #include #include +#include #include #include diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 4d01292..ce34c8e 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -17,7 +18,6 @@ #include #define RPCDBG_FACILITY RPCDBG_AUTH -#include #include "netns.h" diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index c9aa7a3..66591dc 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -51,6 +51,7 @@ #include #include #include +#include #include "xprt_rdma.h" diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 68b0a81..bbc0915 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include -- cgit v0.10.2 From de0b65ca55dc62b6b477f6e02088df2281da7b51 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 4 Feb 2013 12:51:17 -0500 Subject: sunrpc: fix comment in struct xdr_buf definition ...these pages aren't necessarily contiguous. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 6398899..224d060 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -56,7 +56,7 @@ struct xdr_buf { struct kvec head[1], /* RPC header + non-page data */ tail[1]; /* Appended after page data */ - struct page ** pages; /* Array of contiguous pages */ + struct page ** pages; /* Array of pages */ unsigned int page_base, /* Start of page data */ page_len, /* Length of page data */ flags; /* Flags for data disposition */ -- cgit v0.10.2 From 4c190e2f913f038c9c91ee63b59cd037260ba353 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 6 Feb 2013 08:28:55 -0500 Subject: sunrpc: trim off trailing checksum before returning decrypted or integrity authenticated buffer When GSSAPI integrity signatures are in use, or when we're using GSSAPI privacy with the v2 token format, there is a trailing checksum on the xdr_buf that is returned. It's checked during the authentication stage, and afterward nothing cares about it. Ordinarily, it's not a problem since the XDR code generally ignores it, but it will be when we try to compute a checksum over the buffer to help prevent XID collisions in the duplicate reply cache. Fix the code to trim off the checksums after verifying them. Note that in unwrap_integ_data, we must avoid trying to reverify the checksum if the request was deferred since it will no longer be present when it's revisited. Signed-off-by: Jeff Layton diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 224d060..15f9204 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -152,6 +152,7 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); +extern void xdr_buf_trim(struct xdr_buf *, unsigned int); extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int); extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 107c452..88edec9 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -574,6 +574,8 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip; buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip; + /* Trim off the checksum blob */ + xdr_buf_trim(buf, GSS_KRB5_TOK_HDR_LEN + tailskip); return GSS_S_COMPLETE; } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 73e9573..a5b41e2 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -817,13 +817,17 @@ read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj) * The server uses base of head iovec as read pointer, while the * client uses separate pointer. */ static int -unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) +unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) { int stat = -EINVAL; u32 integ_len, maj_stat; struct xdr_netobj mic; struct xdr_buf integ_buf; + /* Did we already verify the signature on the original pass through? */ + if (rqstp->rq_deferred) + return 0; + integ_len = svc_getnl(&buf->head[0]); if (integ_len & 3) return stat; @@ -846,6 +850,8 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) goto out; if (svc_getnl(&buf->head[0]) != seq) goto out; + /* trim off the mic at the end before returning */ + xdr_buf_trim(buf, mic.len + 4); stat = 0; out: kfree(mic.data); @@ -1190,7 +1196,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) /* placeholders for length and seq. number: */ svc_putnl(resv, 0); svc_putnl(resv, 0); - if (unwrap_integ_data(&rqstp->rq_arg, + if (unwrap_integ_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; break; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 5605563..75edcfa 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -879,6 +879,47 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, } EXPORT_SYMBOL_GPL(xdr_buf_subsegment); +/** + * xdr_buf_trim - lop at most "len" bytes off the end of "buf" + * @buf: buf to be trimmed + * @len: number of bytes to reduce "buf" by + * + * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note + * that it's possible that we'll trim less than that amount if the xdr_buf is + * too small, or if (for instance) it's all in the head and the parser has + * already read too far into it. + */ +void xdr_buf_trim(struct xdr_buf *buf, unsigned int len) +{ + size_t cur; + unsigned int trim = len; + + if (buf->tail[0].iov_len) { + cur = min_t(size_t, buf->tail[0].iov_len, trim); + buf->tail[0].iov_len -= cur; + trim -= cur; + if (!trim) + goto fix_len; + } + + if (buf->page_len) { + cur = min_t(unsigned int, buf->page_len, trim); + buf->page_len -= cur; + trim -= cur; + if (!trim) + goto fix_len; + } + + if (buf->head[0].iov_len) { + cur = min_t(size_t, buf->head[0].iov_len, trim); + buf->head[0].iov_len -= cur; + trim -= cur; + } +fix_len: + buf->len -= (len - trim); +} +EXPORT_SYMBOL_GPL(xdr_buf_trim); + static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len) { unsigned int this_len; -- cgit v0.10.2 From 01a7decf75930925322c5efc87af0b5e58eb8650 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 4 Feb 2013 11:57:27 -0500 Subject: nfsd: keep a checksum of the first 256 bytes of request Now that we're allowing more DRC entries, it becomes a lot easier to hit problems with XID collisions. In order to mitigate those, calculate a checksum of up to the first 256 bytes of each request coming in and store that in the cache entry, along with the total length of the request. This initially used crc32, but Chuck Lever and Jim Rees pointed out that crc32 is probably more heavyweight than we really need for generating these checksums, and recommended looking at using the same routines that are used to generate checksums for IP packets. On an x86_64 KVM guest measurements with ftrace showed ~800ns to use csum_partial vs ~1750ns for crc32. The difference probably isn't terribly significant, but for now we may as well use csum_partial. Signed-off-by: Jeff Layton Stones-thrown-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 9c7232b..87fd141 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -29,6 +29,8 @@ struct svc_cacherep { u32 c_prot; u32 c_proc; u32 c_vers; + unsigned int c_len; + __wsum c_csum; unsigned long c_timestamp; union { struct kvec u_vec; @@ -73,6 +75,9 @@ enum { /* Cache entries expire after this time period */ #define RC_EXPIRE (120 * HZ) +/* Checksum this amount of the request */ +#define RC_CSUMLEN (256U) + int nfsd_reply_cache_init(void); void nfsd_reply_cache_shutdown(void); int nfsd_cache_lookup(struct svc_rqst *); diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index f754469..40db57e 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "nfsd.h" #include "cache.h" @@ -130,6 +131,7 @@ int nfsd_reply_cache_init(void) INIT_LIST_HEAD(&lru_head); max_drc_entries = nfsd_cache_size_limit(); num_drc_entries = 0; + return 0; out_nomem: printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); @@ -238,12 +240,45 @@ nfsd_reply_cache_shrink(struct shrinker *shrink, struct shrink_control *sc) } /* + * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes + */ +static __wsum +nfsd_cache_csum(struct svc_rqst *rqstp) +{ + int idx; + unsigned int base; + __wsum csum; + struct xdr_buf *buf = &rqstp->rq_arg; + const unsigned char *p = buf->head[0].iov_base; + size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len, + RC_CSUMLEN); + size_t len = min(buf->head[0].iov_len, csum_len); + + /* rq_arg.head first */ + csum = csum_partial(p, len, 0); + csum_len -= len; + + /* Continue into page array */ + idx = buf->page_base / PAGE_SIZE; + base = buf->page_base & ~PAGE_MASK; + while (csum_len) { + p = page_address(buf->pages[idx]) + base; + len = min(PAGE_SIZE - base, csum_len); + csum = csum_partial(p, len, csum); + csum_len -= len; + base = 0; + ++idx; + } + return csum; +} + +/* * Search the request hash for an entry that matches the given rqstp. * Must be called with cache_lock held. Returns the found entry or * NULL on failure. */ static struct svc_cacherep * -nfsd_cache_search(struct svc_rqst *rqstp) +nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) { struct svc_cacherep *rp; struct hlist_node *hn; @@ -257,6 +292,7 @@ nfsd_cache_search(struct svc_rqst *rqstp) hlist_for_each_entry(rp, hn, rh, c_hash) { if (xid == rp->c_xid && proc == rp->c_proc && proto == rp->c_prot && vers == rp->c_vers && + rqstp->rq_arg.len == rp->c_len && csum == rp->c_csum && rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) && rpc_get_port(svc_addr(rqstp)) == rpc_get_port((struct sockaddr *)&rp->c_addr)) return rp; @@ -277,6 +313,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) u32 proto = rqstp->rq_prot, vers = rqstp->rq_vers, proc = rqstp->rq_proc; + __wsum csum; unsigned long age; int type = rqstp->rq_cachetype; int rtn; @@ -287,10 +324,12 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) return RC_DOIT; } + csum = nfsd_cache_csum(rqstp); + spin_lock(&cache_lock); rtn = RC_DOIT; - rp = nfsd_cache_search(rqstp); + rp = nfsd_cache_search(rqstp, csum); if (rp) goto found_entry; @@ -318,7 +357,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) * Must search again just in case someone inserted one * after we dropped the lock above. */ - found = nfsd_cache_search(rqstp); + found = nfsd_cache_search(rqstp, csum); if (found) { nfsd_reply_cache_free_locked(rp); rp = found; @@ -344,6 +383,8 @@ setup_entry: rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp))); rp->c_prot = proto; rp->c_vers = vers; + rp->c_len = rqstp->rq_arg.len; + rp->c_csum = csum; hash_refile(rp); lru_put_end(rp); -- cgit v0.10.2 From e56a316214d0f1e2446fa7a717309f9414564d9d Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Mon, 11 Feb 2013 16:21:42 -0500 Subject: nfsd4: free_stid can be static Reported-by: Fengguang Wu diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 60a2282..c1a6ddf 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -350,7 +350,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv return dp; } -void free_stid(struct nfs4_stid *s, struct kmem_cache *slab) +static void free_stid(struct nfs4_stid *s, struct kmem_cache *slab) { struct idr *stateids = &s->sc_client->cl_stateids; -- cgit v0.10.2 From 483479c26a65e5f0cc95e9324f313bc95c7dc6fd Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 4 Feb 2013 14:02:35 +0300 Subject: NFS: use SUNRPC cache creation and destruction helper for DNS cache This cache was the first containerized and doesn't use net-aware cache creation and destruction helpers. This is a cleanup patch which just makes code looks clearer and reduce amount of lines of code. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 62c8c41..d5ce5f4 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -353,48 +353,39 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, } EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); +static struct cache_detail nfs_dns_resolve_template = { + .owner = THIS_MODULE, + .hash_size = NFS_DNS_HASHTBL_SIZE, + .name = "dns_resolve", + .cache_put = nfs_dns_ent_put, + .cache_upcall = nfs_dns_upcall, + .cache_parse = nfs_dns_parse, + .cache_show = nfs_dns_show, + .match = nfs_dns_match, + .init = nfs_dns_ent_init, + .update = nfs_dns_ent_update, + .alloc = nfs_dns_ent_alloc, +}; + + int nfs_dns_resolver_cache_init(struct net *net) { - int err = -ENOMEM; + int err; struct nfs_net *nn = net_generic(net, nfs_net_id); - struct cache_detail *cd; - struct cache_head **tbl; - cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL); - if (cd == NULL) - goto err_cd; - - tbl = kzalloc(NFS_DNS_HASHTBL_SIZE * sizeof(struct cache_head *), - GFP_KERNEL); - if (tbl == NULL) - goto err_tbl; - - cd->owner = THIS_MODULE, - cd->hash_size = NFS_DNS_HASHTBL_SIZE, - cd->hash_table = tbl, - cd->name = "dns_resolve", - cd->cache_put = nfs_dns_ent_put, - cd->cache_upcall = nfs_dns_upcall, - cd->cache_parse = nfs_dns_parse, - cd->cache_show = nfs_dns_show, - cd->match = nfs_dns_match, - cd->init = nfs_dns_ent_init, - cd->update = nfs_dns_ent_update, - cd->alloc = nfs_dns_ent_alloc, - - nfs_cache_init(cd); - err = nfs_cache_register_net(net, cd); + nn->nfs_dns_resolve = cache_create_net(&nfs_dns_resolve_template, net); + if (IS_ERR(nn->nfs_dns_resolve)) + return PTR_ERR(nn->nfs_dns_resolve); + + nfs_cache_init(nn->nfs_dns_resolve); + err = nfs_cache_register_net(net, nn->nfs_dns_resolve); if (err) goto err_reg; - nn->nfs_dns_resolve = cd; return 0; err_reg: - nfs_cache_destroy(cd); - kfree(cd->hash_table); -err_tbl: - kfree(cd); -err_cd: + nfs_cache_destroy(nn->nfs_dns_resolve); + cache_destroy_net(nn->nfs_dns_resolve, net); return err; } @@ -405,8 +396,7 @@ void nfs_dns_resolver_cache_destroy(struct net *net) nfs_cache_unregister_net(net, cd); nfs_cache_destroy(cd); - kfree(cd->hash_table); - kfree(cd); + cache_destroy_net(nn->nfs_dns_resolve, net); } static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, -- cgit v0.10.2 From 462b8f6bf1d3f5feb7a346394036dbc1df3a8ed5 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 4 Feb 2013 14:02:40 +0300 Subject: NFS: simplify and clean cache library This is a cleanup patch. Such helpers like nfs_cache_init() and nfs_cache_destroy() are redundant, because they are just a wrappers around sunrpc_init_cache_detail() and sunrpc_destroy_cache_detail() respectively. So let's remove them completely and move corresponding logic to nfs_cache_register_net() and nfs_cache_unregister_net() respectively (since they are called together anyway). Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index 862a2f1..5f7b053 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -128,10 +128,13 @@ int nfs_cache_register_net(struct net *net, struct cache_detail *cd) struct super_block *pipefs_sb; int ret = 0; + sunrpc_init_cache_detail(cd); pipefs_sb = rpc_get_sb_net(net); if (pipefs_sb) { ret = nfs_cache_register_sb(pipefs_sb, cd); rpc_put_sb_net(net); + if (ret) + sunrpc_destroy_cache_detail(cd); } return ret; } @@ -151,14 +154,5 @@ void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd) nfs_cache_unregister_sb(pipefs_sb, cd); rpc_put_sb_net(net); } -} - -void nfs_cache_init(struct cache_detail *cd) -{ - sunrpc_init_cache_detail(cd); -} - -void nfs_cache_destroy(struct cache_detail *cd) -{ sunrpc_destroy_cache_detail(cd); } diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h index 317db95..4116d2c 100644 --- a/fs/nfs/cache_lib.h +++ b/fs/nfs/cache_lib.h @@ -23,8 +23,6 @@ extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void); extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq); extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq); -extern void nfs_cache_init(struct cache_detail *cd); -extern void nfs_cache_destroy(struct cache_detail *cd); extern int nfs_cache_register_net(struct net *net, struct cache_detail *cd); extern void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd); extern int nfs_cache_register_sb(struct super_block *sb, diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index d5ce5f4..9cbc98a 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -377,14 +377,12 @@ int nfs_dns_resolver_cache_init(struct net *net) if (IS_ERR(nn->nfs_dns_resolve)) return PTR_ERR(nn->nfs_dns_resolve); - nfs_cache_init(nn->nfs_dns_resolve); err = nfs_cache_register_net(net, nn->nfs_dns_resolve); if (err) goto err_reg; return 0; err_reg: - nfs_cache_destroy(nn->nfs_dns_resolve); cache_destroy_net(nn->nfs_dns_resolve, net); return err; } @@ -392,10 +390,8 @@ err_reg: void nfs_dns_resolver_cache_destroy(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); - struct cache_detail *cd = nn->nfs_dns_resolve; - nfs_cache_unregister_net(net, cd); - nfs_cache_destroy(cd); + nfs_cache_unregister_net(net, nn->nfs_dns_resolve); cache_destroy_net(nn->nfs_dns_resolve, net); } -- cgit v0.10.2 From 73fb847a44224d5708550e4be7baba9da75e00af Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 4 Feb 2013 14:02:45 +0300 Subject: SUNRPC: introduce cache_detail->cache_request callback This callback will allow to simplify upcalls in further patches in this series. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 9cbc98a..55bc5d1 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -144,7 +144,7 @@ static int nfs_dns_upcall(struct cache_detail *cd, ret = nfs_cache_upcall(cd, key->hostname); if (ret) - ret = sunrpc_cache_pipe_upcall(cd, ch, nfs_dns_request); + ret = sunrpc_cache_pipe_upcall(cd, ch, cd->cache_request); return ret; } @@ -359,6 +359,7 @@ static struct cache_detail nfs_dns_resolve_template = { .name = "dns_resolve", .cache_put = nfs_dns_ent_put, .cache_upcall = nfs_dns_upcall, + .cache_request = nfs_dns_request, .cache_parse = nfs_dns_parse, .cache_show = nfs_dns_show, .match = nfs_dns_match, diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 8e9df45..0e16c7f 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -69,7 +69,7 @@ static void expkey_request(struct cache_detail *cd, static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) { - return sunrpc_cache_pipe_upcall(cd, h, expkey_request); + return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request); } static struct svc_expkey *svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new, @@ -246,6 +246,7 @@ static struct cache_detail svc_expkey_cache_template = { .name = "nfsd.fh", .cache_put = expkey_put, .cache_upcall = expkey_upcall, + .cache_request = expkey_request, .cache_parse = expkey_parse, .cache_show = expkey_show, .match = expkey_match, @@ -340,7 +341,7 @@ static void svc_export_request(struct cache_detail *cd, static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) { - return sunrpc_cache_pipe_upcall(cd, h, svc_export_request); + return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request); } static struct svc_export *svc_export_update(struct svc_export *new, @@ -714,6 +715,7 @@ static struct cache_detail svc_export_cache_template = { .name = "nfsd.export", .cache_put = svc_export_put, .cache_upcall = svc_export_upcall, + .cache_request = svc_export_request, .cache_parse = svc_export_parse, .cache_show = svc_export_show, .match = svc_export_match, diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index a1f10c0..9033dfd 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -142,7 +142,7 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, static int idtoname_upcall(struct cache_detail *cd, struct cache_head *ch) { - return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request); + return sunrpc_cache_pipe_upcall(cd, ch, cd->cache_request); } static int @@ -193,6 +193,7 @@ static struct cache_detail idtoname_cache_template = { .name = "nfs4.idtoname", .cache_put = ent_put, .cache_upcall = idtoname_upcall, + .cache_request = idtoname_request, .cache_parse = idtoname_parse, .cache_show = idtoname_show, .warn_no_listener = warn_no_idmapd, @@ -323,7 +324,7 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, static int nametoid_upcall(struct cache_detail *cd, struct cache_head *ch) { - return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request); + return sunrpc_cache_pipe_upcall(cd, ch, cd->cache_request); } static int @@ -366,6 +367,7 @@ static struct cache_detail nametoid_cache_template = { .name = "nfs4.nametoid", .cache_put = ent_put, .cache_upcall = nametoid_upcall, + .cache_request = nametoid_request, .cache_parse = nametoid_parse, .cache_show = nametoid_show, .warn_no_listener = warn_no_idmapd, diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 5dc9ee4..4f1c858 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -83,6 +83,10 @@ struct cache_detail { int (*cache_upcall)(struct cache_detail *, struct cache_head *); + void (*cache_request)(struct cache_detail *cd, + struct cache_head *ch, + char **bpp, int *blen); + int (*cache_parse)(struct cache_detail *, char *buf, int len); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index a5b41e2..1b0df53 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -184,7 +184,7 @@ static void rsi_request(struct cache_detail *cd, static int rsi_upcall(struct cache_detail *cd, struct cache_head *h) { - return sunrpc_cache_pipe_upcall(cd, h, rsi_request); + return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request); } @@ -276,6 +276,7 @@ static struct cache_detail rsi_cache_template = { .name = "auth.rpcsec.init", .cache_put = rsi_put, .cache_upcall = rsi_upcall, + .cache_request = rsi_request, .cache_parse = rsi_parse, .match = rsi_match, .init = rsi_init, diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index ce34c8e..18b8742 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -159,7 +159,7 @@ static void ip_map_request(struct cache_detail *cd, static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h) { - return sunrpc_cache_pipe_upcall(cd, h, ip_map_request); + return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request); } static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr); @@ -472,7 +472,7 @@ static void unix_gid_request(struct cache_detail *cd, static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h) { - return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request); + return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request); } static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid); @@ -578,6 +578,7 @@ static struct cache_detail unix_gid_cache_template = { .name = "auth.unix.gid", .cache_put = unix_gid_put, .cache_upcall = unix_gid_upcall, + .cache_request = unix_gid_request, .cache_parse = unix_gid_parse, .cache_show = unix_gid_show, .match = unix_gid_match, @@ -875,6 +876,7 @@ static struct cache_detail ip_map_cache_template = { .name = "auth.unix.ip", .cache_put = ip_map_put, .cache_upcall = ip_map_upcall, + .cache_request = ip_map_request, .cache_parse = ip_map_parse, .cache_show = ip_map_show, .match = ip_map_match, -- cgit v0.10.2 From 2d4383383b0b04ca380b67aa2d7397d0b399dcbf Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 4 Feb 2013 14:02:50 +0300 Subject: SUNRPC: rework cache upcall logic For most of SUNRPC caches (except NFS DNS cache) cache_detail->cache_upcall is redundant since all that it's implementations are doing is calling sunrpc_cache_pipe_upcall() with proper function address argument. Cache request function address is now stored on cache_detail structure and thus all the code can be simplified. Now, for those cache details, which doesn't have cache_upcall callback (the only one, which still has is nfs_dns_resolve_template) sunrpc_cache_pipe_upcall will be called instead. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 0e16c7f..15ebf91 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -67,11 +67,6 @@ static void expkey_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request); -} - static struct svc_expkey *svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new, struct svc_expkey *old); static struct svc_expkey *svc_expkey_lookup(struct cache_detail *cd, struct svc_expkey *); @@ -245,7 +240,6 @@ static struct cache_detail svc_expkey_cache_template = { .hash_size = EXPKEY_HASHMAX, .name = "nfsd.fh", .cache_put = expkey_put, - .cache_upcall = expkey_upcall, .cache_request = expkey_request, .cache_parse = expkey_parse, .cache_show = expkey_show, @@ -339,11 +333,6 @@ static void svc_export_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request); -} - static struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old); static struct svc_export *svc_export_lookup(struct svc_export *); @@ -714,7 +703,6 @@ static struct cache_detail svc_export_cache_template = { .hash_size = EXPORT_HASHMAX, .name = "nfsd.export", .cache_put = svc_export_put, - .cache_upcall = svc_export_upcall, .cache_request = svc_export_request, .cache_parse = svc_export_parse, .cache_show = svc_export_show, diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 9033dfd..d9402ea 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -140,12 +140,6 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, } static int -idtoname_upcall(struct cache_detail *cd, struct cache_head *ch) -{ - return sunrpc_cache_pipe_upcall(cd, ch, cd->cache_request); -} - -static int idtoname_match(struct cache_head *ca, struct cache_head *cb) { struct ent *a = container_of(ca, struct ent, h); @@ -192,7 +186,6 @@ static struct cache_detail idtoname_cache_template = { .hash_size = ENT_HASHMAX, .name = "nfs4.idtoname", .cache_put = ent_put, - .cache_upcall = idtoname_upcall, .cache_request = idtoname_request, .cache_parse = idtoname_parse, .cache_show = idtoname_show, @@ -322,12 +315,6 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, } static int -nametoid_upcall(struct cache_detail *cd, struct cache_head *ch) -{ - return sunrpc_cache_pipe_upcall(cd, ch, cd->cache_request); -} - -static int nametoid_match(struct cache_head *ca, struct cache_head *cb) { struct ent *a = container_of(ca, struct ent, h); @@ -366,7 +353,6 @@ static struct cache_detail nametoid_cache_template = { .hash_size = ENT_HASHMAX, .name = "nfs4.nametoid", .cache_put = ent_put, - .cache_upcall = nametoid_upcall, .cache_request = nametoid_request, .cache_parse = nametoid_parse, .cache_show = nametoid_show, diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 1b0df53..eb2b1f7 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -182,12 +182,6 @@ static void rsi_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int rsi_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request); -} - - static int rsi_parse(struct cache_detail *cd, char *mesg, int mlen) { @@ -275,7 +269,6 @@ static struct cache_detail rsi_cache_template = { .hash_size = RSI_HASHMAX, .name = "auth.rpcsec.init", .cache_put = rsi_put, - .cache_upcall = rsi_upcall, .cache_request = rsi_request, .cache_parse = rsi_parse, .match = rsi_match, diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 9f84703..51853d8 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -196,9 +196,9 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_update); static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h) { - if (!cd->cache_upcall) - return -EINVAL; - return cd->cache_upcall(cd, h); + if (cd->cache_upcall) + return cd->cache_upcall(cd, h); + return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request); } static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h) @@ -1152,6 +1152,9 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, char *bp; int len; + if (!detail->cache_request) + return -EINVAL; + if (!cache_listeners_exist(detail)) { warn_no_listener(detail); return -EINVAL; @@ -1605,7 +1608,7 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) if (p == NULL) goto out_nomem; - if (cd->cache_upcall || cd->cache_parse) { + if (cd->cache_request || cd->cache_parse) { p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, cd->u.procfs.proc_ent, &cache_file_operations_procfs, cd); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 18b8742..5085804 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -157,11 +157,6 @@ static void ip_map_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request); -} - static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr); static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time_t expiry); @@ -470,11 +465,6 @@ static void unix_gid_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request); -} - static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid); static int unix_gid_parse(struct cache_detail *cd, @@ -577,7 +567,6 @@ static struct cache_detail unix_gid_cache_template = { .hash_size = GID_HASHMAX, .name = "auth.unix.gid", .cache_put = unix_gid_put, - .cache_upcall = unix_gid_upcall, .cache_request = unix_gid_request, .cache_parse = unix_gid_parse, .cache_show = unix_gid_show, @@ -875,7 +864,6 @@ static struct cache_detail ip_map_cache_template = { .hash_size = IP_HASHMAX, .name = "auth.unix.ip", .cache_put = ip_map_put, - .cache_upcall = ip_map_upcall, .cache_request = ip_map_request, .cache_parse = ip_map_parse, .cache_show = ip_map_show, -- cgit v0.10.2 From 21cd1254d3402a72927ed744e8ac1a7cf532f1ea Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 4 Feb 2013 14:02:55 +0300 Subject: SUNRPC: remove "cache_request" argument in sunrpc_cache_pipe_upcall() function Passing this pointer is redundant since it's stored on cache_detail structure, which is also passed to sunrpc_cache_pipe_upcall () function. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 55bc5d1..9455270 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -144,7 +144,7 @@ static int nfs_dns_upcall(struct cache_detail *cd, ret = nfs_cache_upcall(cd, key->hostname); if (ret) - ret = sunrpc_cache_pipe_upcall(cd, ch, cd->cache_request); + ret = sunrpc_cache_pipe_upcall(cd, ch); return ret; } diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 4f1c858..303399b 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -161,11 +161,7 @@ sunrpc_cache_update(struct cache_detail *detail, struct cache_head *new, struct cache_head *old, int hash); extern int -sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, - void (*cache_request)(struct cache_detail *, - struct cache_head *, - char **, - int *)); +sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h); extern void cache_clean_deferred(void *owner); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 51853d8..8ffec5a 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -198,7 +198,7 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h) { if (cd->cache_upcall) return cd->cache_upcall(cd, h); - return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request); + return sunrpc_cache_pipe_upcall(cd, h); } static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h) @@ -1140,11 +1140,7 @@ static bool cache_listeners_exist(struct cache_detail *detail) * * Each request is at most one page long. */ -int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, - void (*cache_request)(struct cache_detail *, - struct cache_head *, - char **, - int *)) +int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) { char *buf; @@ -1172,7 +1168,7 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, bp = buf; len = PAGE_SIZE; - cache_request(detail, h, &bp, &len); + detail->cache_request(detail, h, &bp, &len); if (len < 0) { kfree(buf); -- cgit v0.10.2 From d94af6dea9cd680fb795dbc409a7360f1c63dc34 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 4 Feb 2013 14:03:03 +0300 Subject: SUNRPC: move cache_detail->cache_request callback call to cache_read() The reason to move cache_request() callback call from sunrpc_cache_pipe_upcall() to cache_read() is that this garantees, that cache access will be done userspace process context (only userspace process have proper root context). This is required for NFSd support in container: svc_export_request() (which is cache_request callback) calls d_path(), which, in turn, traverse dentry up to current->fs->root. Kernel threads always have global root, while container have be in "root jail" - i.e. have it's own nested root. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 8ffec5a..5502471 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -750,6 +750,18 @@ struct cache_reader { int offset; /* if non-0, we have a refcnt on next request */ }; +static int cache_request(struct cache_detail *detail, + struct cache_request *crq) +{ + char *bp = crq->buf; + int len = PAGE_SIZE; + + detail->cache_request(detail, crq->item, &bp, &len); + if (len < 0) + return -EAGAIN; + return PAGE_SIZE - len; +} + static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos, struct cache_detail *cd) { @@ -784,6 +796,13 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, rq->readers++; spin_unlock(&queue_lock); + if (rq->len == 0) { + err = cache_request(cd, rq); + if (err < 0) + goto out; + rq->len = err; + } + if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) { err = -EAGAIN; spin_lock(&queue_lock); @@ -1145,8 +1164,6 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) char *buf; struct cache_request *crq; - char *bp; - int len; if (!detail->cache_request) return -EINVAL; @@ -1166,19 +1183,10 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) return -EAGAIN; } - bp = buf; len = PAGE_SIZE; - - detail->cache_request(detail, h, &bp, &len); - - if (len < 0) { - kfree(buf); - kfree(crq); - return -EAGAIN; - } crq->q.reader = 0; crq->item = cache_get(h); crq->buf = buf; - crq->len = PAGE_SIZE - len; + crq->len = 0; crq->readers = 0; spin_lock(&queue_lock); list_add_tail(&crq->q.list, &detail->queue); -- cgit v0.10.2 From 1ac8362977b9ec75779170ac3074c7b36ab19b82 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 14 Feb 2013 16:45:13 -0500 Subject: nfsd: fix comments on nfsd_cache_lookup Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 40db57e..2f9c2d2 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -302,8 +302,10 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) /* * Try to find an entry matching the current call in the cache. When none - * is found, we grab the oldest unlocked entry off the LRU list. - * Note that no operation within the loop may sleep. + * is found, we try to grab the oldest expired entry off the LRU list. If + * a suitable one isn't there, then drop the cache_lock and allocate a + * new one, then search again in case one got inserted while this thread + * didn't hold the lock. */ int nfsd_cache_lookup(struct svc_rqst *rqstp) @@ -344,6 +346,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) } } + /* Drop the lock and allocate a new entry */ spin_unlock(&cache_lock); rp = nfsd_reply_cache_alloc(); if (!rp) { -- cgit v0.10.2 From 11f779421a39b86da8a523d97e5fd3477878d44f Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 1 Feb 2013 15:56:12 +0300 Subject: nfsd: containerize NFSd filesystem This patch makes NFSD file system superblock to be created per net. This makes possible to get proper network namespace from superblock instead of using hard-coded "init_net". Note: NFSd fs super-block holds network namespace. This garantees, that network namespace won't disappear from underneath of it. This, obviously, means, that in case of kill of a container's "init" (which is not a mount namespace, but network namespace creator) netowrk namespace won't be destroyed. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 29c3f0d..f6d448e 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -220,6 +220,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) struct sockaddr *sap = (struct sockaddr *)&address; size_t salen = sizeof(address); char *fo_path; + struct net *net = file->f_dentry->d_sb->s_fs_info; /* sanity check */ if (size == 0) @@ -232,7 +233,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) if (qword_get(&buf, fo_path, size) < 0) return -EINVAL; - if (rpc_pton(&init_net, fo_path, size, sap, salen) == 0) + if (rpc_pton(net, fo_path, size, sap, salen) == 0) return -EINVAL; return nlmsvc_unlock_all_by_ip(sap); @@ -317,6 +318,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) int len; struct auth_domain *dom; struct knfsd_fh fh; + struct net *net = file->f_dentry->d_sb->s_fs_info; if (size == 0) return -EINVAL; @@ -352,7 +354,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) if (!dom) return -ENOMEM; - len = exp_rootfh(&init_net, dom, path, &fh, maxsize); + len = exp_rootfh(net, dom, path, &fh, maxsize); auth_domain_put(dom); if (len) return len; @@ -396,7 +398,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) { char *mesg = buf; int rv; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; if (size > 0) { int newthreads; @@ -447,7 +449,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) int len; int npools; int *nthreads; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; mutex_lock(&nfsd_mutex); npools = nfsd_nrpools(net); @@ -510,7 +512,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) unsigned minor; ssize_t tlen = 0; char *sep; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (size>0) { @@ -792,7 +794,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size, static ssize_t write_ports(struct file *file, char *buf, size_t size) { ssize_t rv; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; mutex_lock(&nfsd_mutex); rv = __write_ports(file, buf, size, net); @@ -827,7 +829,7 @@ int nfsd_max_blksize; static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) { char *mesg = buf; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (size > 0) { @@ -923,7 +925,8 @@ static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, */ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) { - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn); } @@ -939,7 +942,8 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) */ static ssize_t write_gracetime(struct file *file, char *buf, size_t size) { - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace, nn); } @@ -995,7 +999,8 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size, static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) { ssize_t rv; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); mutex_lock(&nfsd_mutex); rv = __write_recoverydir(file, buf, size, nn); @@ -1037,20 +1042,35 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) #endif /* last one */ {""} }; - return simple_fill_super(sb, 0x6e667364, nfsd_files); + struct net *net = data; + int ret; + + ret = simple_fill_super(sb, 0x6e667364, nfsd_files); + if (ret) + return ret; + sb->s_fs_info = get_net(net); + return 0; } static struct dentry *nfsd_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_single(fs_type, flags, data, nfsd_fill_super); + return mount_ns(fs_type, flags, current->nsproxy->net_ns, nfsd_fill_super); +} + +static void nfsd_umount(struct super_block *sb) +{ + struct net *net = sb->s_fs_info; + + kill_litter_super(sb); + put_net(net); } static struct file_system_type nfsd_fs_type = { .owner = THIS_MODULE, .name = "nfsd", .mount = nfsd_mount, - .kill_sb = kill_litter_super, + .kill_sb = nfsd_umount, }; #ifdef CONFIG_PROC_FS diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 40cb1cb..6cee5db 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -702,8 +702,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) int nfsd_pool_stats_open(struct inode *inode, struct file *file) { int ret; - struct net *net = &init_net; - struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id); mutex_lock(&nfsd_mutex); if (nn->nfsd_serv == NULL) { @@ -720,7 +719,7 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file) int nfsd_pool_stats_release(struct inode *inode, struct file *file) { int ret = seq_release(inode, file); - struct net *net = &init_net; + struct net *net = inode->i_sb->s_fs_info; mutex_lock(&nfsd_mutex); /* this function really, really should have been called svc_put() */ -- cgit v0.10.2 From 96d851c4d28de8cc83fe2bd5c6bc2eb8f253a6c5 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 1 Feb 2013 15:56:17 +0300 Subject: nfsd: use proper net while reading "exports" file Functuon "exports_open" is used for both "/proc/fs/nfs/exports" and "/proc/fs/nfsd/exports" files. Now NFSd filesystem is containerised, so proper net can be taken from superblock for "/proc/fs/nfsd/exports" reader. But for "/proc/fs/nfsd/exports" only current->nsproxy->net_ns can be used. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index f6d448e..8ead2c2 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -125,11 +125,11 @@ static const struct file_operations transaction_ops = { .llseek = default_llseek, }; -static int exports_open(struct inode *inode, struct file *file) +static int exports_net_open(struct net *net, struct file *file) { int err; struct seq_file *seq; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); err = seq_open(file, &nfs_exports_op); if (err) @@ -140,8 +140,26 @@ static int exports_open(struct inode *inode, struct file *file) return 0; } -static const struct file_operations exports_operations = { - .open = exports_open, +static int exports_proc_open(struct inode *inode, struct file *file) +{ + return exports_net_open(current->nsproxy->net_ns, file); +} + +static const struct file_operations exports_proc_operations = { + .open = exports_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .owner = THIS_MODULE, +}; + +static int exports_nfsd_open(struct inode *inode, struct file *file) +{ + return exports_net_open(inode->i_sb->s_fs_info, file); +} + +static const struct file_operations exports_nfsd_operations = { + .open = exports_nfsd_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, @@ -1018,7 +1036,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) static int nfsd_fill_super(struct super_block * sb, void * data, int silent) { static struct tree_descr nfsd_files[] = { - [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, + [NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", &export_features_operations, S_IRUGO}, [NFSD_FO_UnlockIP] = {"unlock_ip", @@ -1081,7 +1099,8 @@ static int create_proc_exports_entry(void) entry = proc_mkdir("fs/nfs", NULL); if (!entry) return -ENOMEM; - entry = proc_create("exports", 0, entry, &exports_operations); + entry = proc_create("exports", 0, entry, + &exports_proc_operations); if (!entry) return -ENOMEM; return 0; -- cgit v0.10.2 From 71a50306934f416e74ba27cbfb88855c22251525 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 1 Feb 2013 15:56:22 +0300 Subject: nfsd: disable usermode helper client tracker in container This tracker uses khelper kthread to execute binaries. Execution itself is done from kthread context - i.e. global root is used. This is not suitable for containers with own root. So, disable this tracker for a while. Note: one of possible solutions can be pass "init" callback to khelper, which will swap root to desired one. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index ba6fdd4..e0ae1cf 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -1185,6 +1185,12 @@ bin_to_hex_dup(const unsigned char *src, int srclen) static int nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net) { + /* XXX: The usermode helper s not working in container yet. */ + if (net != &init_net) { + WARN(1, KERN_ERR "NFSD: attempt to initialize umh client " + "tracking in a container!\n"); + return -EINVAL; + } return nfsd4_umh_cltrack_upcall("init", NULL, NULL); } -- cgit v0.10.2 From deb4534f4f3be7aea7d9d24c3b0d58f370cbf9ef Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 1 Feb 2013 15:56:27 +0300 Subject: nfsd: enable NFSv4 state in containers Currently, NFSd is ready to operate in network namespace based containers. So let's drop check for "init_net" and make it able to fly. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c1a6ddf..f194f86 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4939,16 +4939,6 @@ nfs4_state_start_net(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); int ret; - /* - * FIXME: For now, we hang most of the pernet global stuff off of - * init_net until nfsd is fully containerized. Eventually, we'll - * need to pass a net pointer into this function, take a reference - * to that instead and then do most of the rest of this on a per-net - * basis. - */ - if (net != &init_net) - return -EINVAL; - ret = nfs4_state_create_net(net); if (ret) return ret; -- cgit v0.10.2 From f25cc71e634edcf8a15bc60a48f2b5f3ec9fbb1d Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Wed, 13 Feb 2013 08:40:16 -0700 Subject: lockd: nlmclnt_reclaim(): avoid stack overflow Even though nlmclnt_reclaim() is only one call into the stack frame, 928 bytes on the stack seems like a lot. Recode to dynamically allocate the request structure once from within the reclaimer task, then pass this pointer into nlmclnt_reclaim() for reuse on subsequent calls. smatch analysis: fs/lockd/clntproc.c:620 nlmclnt_reclaim() warn: 'reqst' puts 928 bytes on stack Also remove redundant assignment of 0 after memset. Cc: Trond Myklebust Signed-off-by: Tim Gardner Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 4885b53..6cd673d 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -220,10 +220,19 @@ reclaimer(void *ptr) { struct nlm_host *host = (struct nlm_host *) ptr; struct nlm_wait *block; + struct nlm_rqst *req; struct file_lock *fl, *next; u32 nsmstate; struct net *net = host->net; + req = kmalloc(sizeof(*req), GFP_KERNEL); + if (!req) { + printk(KERN_ERR "lockd: reclaimer unable to alloc memory." + " Locks for %s won't be reclaimed!\n", + host->h_name); + return 0; + } + allow_signal(SIGKILL); down_write(&host->h_rwsem); @@ -253,7 +262,7 @@ restart: */ if (signalled()) continue; - if (nlmclnt_reclaim(host, fl) != 0) + if (nlmclnt_reclaim(host, fl, req) != 0) continue; list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted); if (host->h_nsmstate != nsmstate) { @@ -279,5 +288,6 @@ restart: /* Release host handle after use */ nlmclnt_release_host(host); lockd_down(net); + kfree(req); return 0; } diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 54f9e6c..b43114c 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -615,17 +615,15 @@ out_unlock: * RECLAIM: Try to reclaim a lock */ int -nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl) +nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl, + struct nlm_rqst *req) { - struct nlm_rqst reqst, *req; int status; - req = &reqst; memset(req, 0, sizeof(*req)); locks_init_lock(&req->a_args.lock.fl); locks_init_lock(&req->a_res.lock.fl); req->a_host = host; - req->a_flags = 0; /* Set up the argument struct */ nlmclnt_setlockargs(req, fl); diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index f5a051a..a395f1e 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -212,7 +212,8 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock); void nlmclnt_recovery(struct nlm_host *); -int nlmclnt_reclaim(struct nlm_host *, struct file_lock *); +int nlmclnt_reclaim(struct nlm_host *, struct file_lock *, + struct nlm_rqst *); void nlmclnt_next_cookie(struct nlm_cookie *); /* -- cgit v0.10.2 From e75bafbff2270993926abcc31358361db74a9bc2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 10 Feb 2013 11:33:48 -0500 Subject: svcrpc: make svc_age_temp_xprts enqueue under sv_lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit svc_age_temp_xprts expires xprts in a two-step process: first it takes the sv_lock and moves the xprts to expire off their server-wide list (sv_tempsocks or sv_permsocks) to a local list. Then it drops the sv_lock and enqueues and puts each one. I see no reason for this: svc_xprt_enqueue() will take sp_lock, but the sv_lock and sp_lock are not otherwise nested anywhere (and documentation at the top of this file claims it's correct to nest these with sp_lock inside.) Cc: stable@kernel.org Tested-by: Jason Tibbitts Tested-by: Paweł Sikora Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 5a9d40c..11a33c8 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -863,7 +863,6 @@ static void svc_age_temp_xprts(unsigned long closure) struct svc_serv *serv = (struct svc_serv *)closure; struct svc_xprt *xprt; struct list_head *le, *next; - LIST_HEAD(to_be_aged); dprintk("svc_age_temp_xprts\n"); @@ -884,25 +883,15 @@ static void svc_age_temp_xprts(unsigned long closure) if (atomic_read(&xprt->xpt_ref.refcount) > 1 || test_bit(XPT_BUSY, &xprt->xpt_flags)) continue; - svc_xprt_get(xprt); - list_move(le, &to_be_aged); + list_del_init(le); set_bit(XPT_CLOSE, &xprt->xpt_flags); set_bit(XPT_DETACHED, &xprt->xpt_flags); - } - spin_unlock_bh(&serv->sv_lock); - - while (!list_empty(&to_be_aged)) { - le = to_be_aged.next; - /* fiddling the xpt_list node is safe 'cos we're XPT_DETACHED */ - list_del_init(le); - xprt = list_entry(le, struct svc_xprt, xpt_list); - dprintk("queuing xprt %p for closing\n", xprt); /* a thread will dequeue and close it soon */ svc_xprt_enqueue(xprt); - svc_xprt_put(xprt); } + spin_unlock_bh(&serv->sv_lock); mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); } -- cgit v0.10.2 From cc630d9f476445927fca599f81182c7f06f79058 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 10 Feb 2013 16:08:11 -0500 Subject: svcrpc: fix rpc server shutdown races MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rewrite server shutdown to remove the assumption that there are no longer any threads running (no longer true, for example, when shutting down the service in one network namespace while it's still running in others). Do that by doing what we'd do in normal circumstances: just CLOSE each socket, then enqueue it. Since there may not be threads to handle the resulting queued xprts, also run a simplified version of the svc_recv() loop run by a server to clean up any closed xprts afterwards. Cc: stable@kernel.org Tested-by: Jason Tibbitts Tested-by: Paweł Sikora Acked-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b9ba2a8..89a588b 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -515,15 +515,6 @@ EXPORT_SYMBOL_GPL(svc_create_pooled); void svc_shutdown_net(struct svc_serv *serv, struct net *net) { - /* - * The set of xprts (contained in the sv_tempsocks and - * sv_permsocks lists) is now constant, since it is modified - * only by accepting new sockets (done by service threads in - * svc_recv) or aging old ones (done by sv_temptimer), or - * configuration changes (excluded by whatever locking the - * caller is using--nfsd_mutex in the case of nfsd). So it's - * safe to traverse those lists and shut everything down: - */ svc_close_net(serv, net); if (serv->sv_shutdown) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 11a33c8..80a6640 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -955,21 +955,24 @@ void svc_close_xprt(struct svc_xprt *xprt) } EXPORT_SYMBOL_GPL(svc_close_xprt); -static void svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) +static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) { struct svc_xprt *xprt; + int ret = 0; spin_lock(&serv->sv_lock); list_for_each_entry(xprt, xprt_list, xpt_list) { if (xprt->xpt_net != net) continue; + ret++; set_bit(XPT_CLOSE, &xprt->xpt_flags); - set_bit(XPT_BUSY, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); } spin_unlock(&serv->sv_lock); + return ret; } -static void svc_clear_pools(struct svc_serv *serv, struct net *net) +static struct svc_xprt *svc_dequeue_net(struct svc_serv *serv, struct net *net) { struct svc_pool *pool; struct svc_xprt *xprt; @@ -984,42 +987,46 @@ static void svc_clear_pools(struct svc_serv *serv, struct net *net) if (xprt->xpt_net != net) continue; list_del_init(&xprt->xpt_ready); + spin_unlock_bh(&pool->sp_lock); + return xprt; } spin_unlock_bh(&pool->sp_lock); } + return NULL; } -static void svc_clear_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) +static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) { struct svc_xprt *xprt; - struct svc_xprt *tmp; - LIST_HEAD(victims); - - spin_lock(&serv->sv_lock); - list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { - if (xprt->xpt_net != net) - continue; - list_move(&xprt->xpt_list, &victims); - } - spin_unlock(&serv->sv_lock); - list_for_each_entry_safe(xprt, tmp, &victims, xpt_list) + while ((xprt = svc_dequeue_net(serv, net))) { + set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_delete_xprt(xprt); + } } +/* + * Server threads may still be running (especially in the case where the + * service is still running in other network namespaces). + * + * So we shut down sockets the same way we would on a running server, by + * setting XPT_CLOSE, enqueuing, and letting a thread pick it up to do + * the close. In the case there are no such other threads, + * threads running, svc_clean_up_xprts() does a simple version of a + * server's main event loop, and in the case where there are other + * threads, we may need to wait a little while and then check again to + * see if they're done. + */ void svc_close_net(struct svc_serv *serv, struct net *net) { - svc_close_list(serv, &serv->sv_tempsocks, net); - svc_close_list(serv, &serv->sv_permsocks, net); + int delay = 0; - svc_clear_pools(serv, net); - /* - * At this point the sp_sockets lists will stay empty, since - * svc_xprt_enqueue will not add new entries without taking the - * sp_lock and checking XPT_BUSY. - */ - svc_clear_list(serv, &serv->sv_tempsocks, net); - svc_clear_list(serv, &serv->sv_permsocks, net); + while (svc_close_list(serv, &serv->sv_permsocks, net) + + svc_close_list(serv, &serv->sv_tempsocks, net)) { + + svc_clean_up_xprts(serv, net); + msleep(delay++); + } } /* -- cgit v0.10.2 From 56edc86b5a72bdbc86358e57fb09165136baf0b8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 15 Feb 2013 13:36:34 -0500 Subject: nfsd: fix compiler warning about ambiguous types in nfsd_cache_csum kbuild test robot says: tree: git://linux-nfs.org/~bfields/linux.git for-3.9 head: deb4534f4f3be7aea7d9d24c3b0d58f370cbf9ef commit: 01a7decf75930925322c5efc87af0b5e58eb8650 [32/44] nfsd: keep a checksum of the first 256 bytes of request config: i386-randconfig-x088 (attached as .config) All warnings: fs/nfsd/nfscache.c: In function 'nfsd_cache_csum': >> fs/nfsd/nfscache.c:266:9: warning: comparison of distinct pointer types lacks a cast [enabled by default] vim +266 fs/nfsd/nfscache.c 250 __wsum csum; 251 struct xdr_buf *buf = &rqstp->rq_arg; 252 const unsigned char *p = buf->head[0].iov_base; 253 size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len, 254 RC_CSUMLEN); 255 size_t len = min(buf->head[0].iov_len, csum_len); 256 257 /* rq_arg.head first */ 258 csum = csum_partial(p, len, 0); 259 csum_len -= len; 260 261 /* Continue into page array */ 262 idx = buf->page_base / PAGE_SIZE; 263 base = buf->page_base & ~PAGE_MASK; 264 while (csum_len) { 265 p = page_address(buf->pages[idx]) + base; > 266 len = min(PAGE_SIZE - base, csum_len); 267 csum = csum_partial(p, len, csum); 268 csum_len -= len; 269 base = 0; 270 ++idx; 271 } 272 return csum; 273 } 274 Signed-off-by: Jeff Layton Reported-by: kbuild test robot Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 2f9c2d2..ca43664 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -263,7 +263,7 @@ nfsd_cache_csum(struct svc_rqst *rqstp) base = buf->page_base & ~PAGE_MASK; while (csum_len) { p = page_address(buf->pages[idx]) + base; - len = min(PAGE_SIZE - base, csum_len); + len = min_t(size_t, PAGE_SIZE - base, csum_len); csum = csum_partial(p, len, csum); csum_len -= len; base = 0; -- cgit v0.10.2 From dc107402ae06286a9ed33c32daf3f35514a7cb8d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 20 Feb 2013 17:52:19 -0500 Subject: SUNRPC: make AF_LOCAL connect synchronous It doesn't appear that anyone actually needs to connect asynchronously. Also, using a workqueue for the connect means we lose the namespace information from the original process. This is a problem since there's no way to explicitly pass in a filesystem namespace for resolution of an AF_LOCAL address. Acked-by: Trond Myklebust Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index bbc0915..4dc8eb2 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1866,13 +1866,9 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, * @xprt: RPC transport to connect * @transport: socket transport to connect * @create_sock: function to create a socket of the correct type - * - * Invoked by a work queue tasklet. */ -static void xs_local_setup_socket(struct work_struct *work) +static int xs_local_setup_socket(struct sock_xprt *transport) { - struct sock_xprt *transport = - container_of(work, struct sock_xprt, connect_worker.work); struct rpc_xprt *xprt = &transport->xprt; struct socket *sock; int status = -EIO; @@ -1917,6 +1913,31 @@ out: xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); current->flags &= ~PF_FSTRANS; + return status; +} + +static void xs_local_connect(struct rpc_task *task) +{ + struct rpc_xprt *xprt = task->tk_xprt; + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + int ret; + + if (RPC_IS_ASYNC(task)) { + /* + * We want the AF_LOCAL connect to be resolved in the + * filesystem namespace of the process making the rpc + * call. Thus we connect synchronously. + * + * If we want to support asynchronous AF_LOCAL calls, + * we'll need to figure out how to pass a namespace to + * connect. + */ + rpc_exit(task, -ENOTCONN); + return; + } + ret = xs_local_setup_socket(transport); + if (ret && !RPC_IS_SOFTCONN(task)) + msleep_interruptible(15000); } #ifdef CONFIG_SUNRPC_SWAP @@ -2454,7 +2475,7 @@ static struct rpc_xprt_ops xs_local_ops = { .alloc_slot = xprt_alloc_slot, .rpcbind = xs_local_rpcbind, .set_port = xs_local_set_port, - .connect = xs_connect, + .connect = xs_local_connect, .buf_alloc = rpc_malloc, .buf_free = rpc_free, .send_request = xs_local_send_request, @@ -2627,8 +2648,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) goto out_err; } xprt_set_bound(xprt); - INIT_DELAYED_WORK(&transport->connect_worker, - xs_local_setup_socket); xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL); break; default: -- cgit v0.10.2