From 1447d25eb3a7bbe5bf5e4e7489f09be13e1ec73a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 8 Feb 2008 13:03:37 +1100 Subject: knfsd: Remove NLM_HOST_MAX and associated logic. Lockd caches information about hosts that have recently held locks to expedite the taking of further locks. It periodically discards this information for hosts that have not been used for a few minutes. lockd currently has a value NLM_HOST_MAX, and changes the 'garbage collection' behaviour when the number of hosts exceeds this threshold. However its behaviour is strange, and likely not what was intended. When the number of hosts exceeds the max, it scans *less* often (every 2 minutes vs every minute) and allows unused host information to remain around longer (5 minutes instead of 2). Having this limit is of dubious value anyway, and we have not suffered from the code not getting the limit right, so remove the limit altogether. We go with the larger values (discard 5 minute old hosts every 2 minutes) as they are probably safer. Maybe the periodic garbage collection should be replace to with 'shrinker' handler so we just respond to memory pressure.... Acked-by: Jeff Layton Signed-off-by: Neil Brown Signed-off-by: J. Bruce Fields diff --git a/fs/lockd/host.c b/fs/lockd/host.c index f1ef49f..c3f1194 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -19,12 +19,11 @@ #define NLMDBG_FACILITY NLMDBG_HOSTCACHE -#define NLM_HOST_MAX 64 #define NLM_HOST_NRHASH 32 #define NLM_ADDRHASH(addr) (ntohl(addr) & (NLM_HOST_NRHASH-1)) #define NLM_HOST_REBIND (60 * HZ) -#define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ) -#define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ) +#define NLM_HOST_EXPIRE (300 * HZ) +#define NLM_HOST_COLLECT (120 * HZ) static struct hlist_head nlm_hosts[NLM_HOST_NRHASH]; static unsigned long next_gc; @@ -142,9 +141,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin, INIT_LIST_HEAD(&host->h_granted); INIT_LIST_HEAD(&host->h_reclaim); - if (++nrhosts > NLM_HOST_MAX) - next_gc = 0; - + nrhosts++; out: mutex_unlock(&nlm_host_mutex); return host; -- cgit v0.10.2 From 23d42ee278de1552d67daef5774ba59ff30925db Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 7 Feb 2008 16:34:53 -0500 Subject: SUNRPC: export svc_sock_update_bufs Needed since the plan is to not have a svc_create_thread helper and to have current users of that function just call kthread_run directly. Signed-off-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c475977..a45c378 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1101,6 +1101,7 @@ void svc_sock_update_bufs(struct svc_serv *serv) } spin_unlock_bh(&serv->sv_lock); } +EXPORT_SYMBOL(svc_sock_update_bufs); /* * Initialize socket for RPC use and create svc_sock struct -- cgit v0.10.2 From 7086721f9c8b59331e164e534f588e075cfd9d3f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 7 Feb 2008 16:34:54 -0500 Subject: SUNRPC: have svc_recv() check kthread_should_stop() When using kthreads that call into svc_recv, we want to make sure that they do not block there for a long time when we're trying to take down the kthread. This patch changes svc_recv() to check kthread_should_stop() at the same places that it checks to see if it's signalled(). Also check just before svc_recv() tries to schedule(). By making sure that we check it just after setting the task state we can avoid having to use any locking or signalling to ensure it doesn't block for a long time. There's still a chance of a 500ms sleep if alloc_page() fails, but that should be a rare occurrence and isn't a terribly long time in the context of a kthread being taken down. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 332eb47..2e5d43c 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -587,6 +588,8 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) struct page *p = alloc_page(GFP_KERNEL); if (!p) { int j = msecs_to_jiffies(500); + if (kthread_should_stop()) + return -EINTR; schedule_timeout_uninterruptible(j); } rqstp->rq_pages[i] = p; @@ -607,7 +610,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) try_to_freeze(); cond_resched(); - if (signalled()) + if (signalled() || kthread_should_stop()) return -EINTR; spin_lock_bh(&pool->sp_lock); @@ -626,6 +629,20 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) * to bring down the daemons ... */ set_current_state(TASK_INTERRUPTIBLE); + + /* + * checking kthread_should_stop() here allows us to avoid + * locking and signalling when stopping kthreads that call + * svc_recv. If the thread has already been woken up, then + * we can exit here without sleeping. If not, then it + * it'll be woken up quickly during the schedule_timeout + */ + if (kthread_should_stop()) { + set_current_state(TASK_RUNNING); + spin_unlock_bh(&pool->sp_lock); + return -EINTR; + } + add_wait_queue(&rqstp->rq_wait, &wait); spin_unlock_bh(&pool->sp_lock); @@ -641,7 +658,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) svc_thread_dequeue(pool, rqstp); spin_unlock_bh(&pool->sp_lock); dprintk("svc: server %p, no data yet\n", rqstp); - return signalled()? -EINTR : -EAGAIN; + if (signalled() || kthread_should_stop()) + return -EINTR; + else + return -EAGAIN; } } spin_unlock_bh(&pool->sp_lock); -- cgit v0.10.2 From d751a7cd0695554498f25d3026ca6710dbb3698f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 7 Feb 2008 16:34:55 -0500 Subject: NLM: Convert lockd to use kthreads Have lockd_up start lockd using kthread_run. With this change, lockd_down now blocks until lockd actually exits, so there's no longer need for the waitqueue code at the end of lockd_down. This also means that only one lockd can be running at a time which simplifies the code within lockd's main loop. This also adds a check for kthread_should_stop in the main loop of nlmsvc_retry_blocked and after that function returns. There's no sense continuing to retry blocks if lockd is coming down anyway. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 1ed8bd4..66b5c98 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -48,14 +49,11 @@ EXPORT_SYMBOL(nlmsvc_ops); static DEFINE_MUTEX(nlmsvc_mutex); static unsigned int nlmsvc_users; -static pid_t nlmsvc_pid; +static struct task_struct *nlmsvc_task; static struct svc_serv *nlmsvc_serv; int nlmsvc_grace_period; unsigned long nlmsvc_timeout; -static DECLARE_COMPLETION(lockd_start_done); -static DECLARE_WAIT_QUEUE_HEAD(lockd_exit); - /* * These can be set at insmod time (useful for NFS as root filesystem), * and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003 @@ -111,35 +109,30 @@ static inline void clear_grace_period(void) /* * This is the lockd kernel thread */ -static void -lockd(struct svc_rqst *rqstp) +static int +lockd(void *vrqstp) { int err = 0; + struct svc_rqst *rqstp = vrqstp; unsigned long grace_period_expire; - /* Lock module and set up kernel thread */ - /* lockd_up is waiting for us to startup, so will - * be holding a reference to this module, so it - * is safe to just claim another reference - */ - __module_get(THIS_MODULE); - lock_kernel(); - - /* - * Let our maker know we're running. - */ - nlmsvc_pid = current->pid; - nlmsvc_serv = rqstp->rq_server; - complete(&lockd_start_done); - - daemonize("lockd"); + /* try_to_freeze() is called from svc_recv() */ set_freezable(); - /* Process request with signals blocked, but allow SIGKILL. */ + /* Allow SIGKILL to tell lockd to drop all of its locks */ allow_signal(SIGKILL); dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); + /* + * FIXME: it would be nice if lockd didn't spend its entire life + * running under the BKL. At the very least, it would be good to + * have someone clarify what it's intended to protect here. I've + * seen some handwavy posts about posix locking needing to be + * done under the BKL, but it's far from clear. + */ + lock_kernel(); + if (!nlm_timeout) nlm_timeout = LOCKD_DFLT_TIMEO; nlmsvc_timeout = nlm_timeout * HZ; @@ -148,10 +141,9 @@ lockd(struct svc_rqst *rqstp) /* * The main request loop. We don't terminate until the last - * NFS mount or NFS daemon has gone away, and we've been sent a - * signal, or else another process has taken over our job. + * NFS mount or NFS daemon has gone away. */ - while ((nlmsvc_users || !signalled()) && nlmsvc_pid == current->pid) { + while (!kthread_should_stop()) { long timeout = MAX_SCHEDULE_TIMEOUT; RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); @@ -161,6 +153,7 @@ lockd(struct svc_rqst *rqstp) nlmsvc_invalidate_all(); grace_period_expire = set_grace_period(); } + continue; } /* @@ -195,28 +188,19 @@ lockd(struct svc_rqst *rqstp) } flush_signals(current); + if (nlmsvc_ops) + nlmsvc_invalidate_all(); + nlm_shutdown_hosts(); - /* - * Check whether there's a new lockd process before - * shutting down the hosts and clearing the slot. - */ - if (!nlmsvc_pid || current->pid == nlmsvc_pid) { - if (nlmsvc_ops) - nlmsvc_invalidate_all(); - nlm_shutdown_hosts(); - nlmsvc_pid = 0; - nlmsvc_serv = NULL; - } else - printk(KERN_DEBUG - "lockd: new process, skipping host shutdown\n"); - wake_up(&lockd_exit); + unlock_kernel(); + + nlmsvc_task = NULL; + nlmsvc_serv = NULL; /* Exit the RPC thread */ svc_exit_thread(rqstp); - /* Release module */ - unlock_kernel(); - module_put_and_exit(0); + return 0; } /* @@ -261,14 +245,15 @@ static int make_socks(struct svc_serv *serv, int proto) int lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ { - struct svc_serv * serv; - int error = 0; + struct svc_serv *serv; + struct svc_rqst *rqstp; + int error = 0; mutex_lock(&nlmsvc_mutex); /* * Check whether we're already up and running. */ - if (nlmsvc_pid) { + if (nlmsvc_serv) { if (proto) error = make_socks(nlmsvc_serv, proto); goto out; @@ -295,13 +280,28 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ /* * Create the kernel thread and wait for it to start. */ - error = svc_create_thread(lockd, serv); - if (error) { + rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); + if (IS_ERR(rqstp)) { + error = PTR_ERR(rqstp); printk(KERN_WARNING - "lockd_up: create thread failed, error=%d\n", error); + "lockd_up: svc_rqst allocation failed, error=%d\n", + error); + goto destroy_and_out; + } + + svc_sock_update_bufs(serv); + nlmsvc_serv = rqstp->rq_server; + + nlmsvc_task = kthread_run(lockd, rqstp, serv->sv_name); + if (IS_ERR(nlmsvc_task)) { + error = PTR_ERR(nlmsvc_task); + nlmsvc_task = NULL; + nlmsvc_serv = NULL; + printk(KERN_WARNING + "lockd_up: kthread_run failed, error=%d\n", error); + svc_exit_thread(rqstp); goto destroy_and_out; } - wait_for_completion(&lockd_start_done); /* * Note: svc_serv structures have an initial use count of 1, @@ -323,37 +323,21 @@ EXPORT_SYMBOL(lockd_up); void lockd_down(void) { - static int warned; - mutex_lock(&nlmsvc_mutex); if (nlmsvc_users) { if (--nlmsvc_users) goto out; - } else - printk(KERN_WARNING "lockd_down: no users! pid=%d\n", nlmsvc_pid); - - if (!nlmsvc_pid) { - if (warned++ == 0) - printk(KERN_WARNING "lockd_down: no lockd running.\n"); - goto out; + } else { + printk(KERN_ERR "lockd_down: no users! task=%p\n", + nlmsvc_task); + BUG(); } - warned = 0; - kill_proc(nlmsvc_pid, SIGKILL, 1); - /* - * Wait for the lockd process to exit, but since we're holding - * the lockd semaphore, we can't wait around forever ... - */ - clear_thread_flag(TIF_SIGPENDING); - interruptible_sleep_on_timeout(&lockd_exit, HZ); - if (nlmsvc_pid) { - printk(KERN_WARNING - "lockd_down: lockd failed to exit, clearing pid\n"); - nlmsvc_pid = 0; + if (!nlmsvc_task) { + printk(KERN_ERR "lockd_down: no lockd running.\n"); + BUG(); } - spin_lock_irq(¤t->sighand->siglock); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + kthread_stop(nlmsvc_task); out: mutex_unlock(&nlmsvc_mutex); } diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index fe9bdb4..4da7c4c 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -29,6 +29,7 @@ #include #include #include +#include #define NLMDBG_FACILITY NLMDBG_SVCLOCK @@ -887,7 +888,7 @@ nlmsvc_retry_blocked(void) unsigned long timeout = MAX_SCHEDULE_TIMEOUT; struct nlm_block *block; - while (!list_empty(&nlm_blocked)) { + while (!list_empty(&nlm_blocked) && !kthread_should_stop()) { block = list_entry(nlm_blocked.next, struct nlm_block, b_list); if (block->b_when == NLM_NEVER) -- cgit v0.10.2 From f15364bd4cf8799a7677b6daeed7b67d9139d974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Charbon?= Date: Fri, 18 Jan 2008 15:50:56 +0100 Subject: IPv6 support for NFS server export caches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds IPv6 support to the interfaces that are used to express nfsd exports. All addressed are stored internally as IPv6; backwards compatibility is maintained using mapped addresses. Thanks to Bruce Fields, Brian Haley, Neil Brown and Hideaki Joshifuji for comments Signed-off-by: Aurelien Charbon Cc: Neil Brown Cc: Brian Haley Cc: YOSHIFUJI Hideaki / 吉藤英明 Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 8a6f7c9..33bfcf0 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -35,6 +35,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_EXPORT @@ -1548,6 +1549,7 @@ exp_addclient(struct nfsctl_client *ncp) { struct auth_domain *dom; int i, err; + struct in6_addr addr6; /* First, consistency check. */ err = -EINVAL; @@ -1566,9 +1568,10 @@ exp_addclient(struct nfsctl_client *ncp) goto out_unlock; /* Insert client into hashtable. */ - for (i = 0; i < ncp->cl_naddr; i++) - auth_unix_add_addr(ncp->cl_addrlist[i], dom); - + for (i = 0; i < ncp->cl_naddr; i++) { + ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6); + auth_unix_add_addr(&addr6, dom); + } auth_unix_forget_old(dom); auth_domain_put(dom); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 8516137..9f038a4a 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -37,6 +37,7 @@ #include #include +#include /* * We have a single directory with 9 nodes in it. @@ -222,6 +223,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size) struct auth_domain *clp; int err = 0; struct knfsd_fh *res; + struct in6_addr in6; if (size < sizeof(*data)) return -EINVAL; @@ -236,7 +238,11 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size) res = (struct knfsd_fh*)buf; exp_readlock(); - if (!(clp = auth_unix_lookup(sin->sin_addr))) + + ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); + + clp = auth_unix_lookup(&in6); + if (!clp) err = -EPERM; else { err = exp_rootfh(clp, data->gd_path, res, data->gd_maxlen); @@ -257,6 +263,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size) int err = 0; struct knfsd_fh fh; char *res; + struct in6_addr in6; if (size < sizeof(*data)) return -EINVAL; @@ -271,7 +278,11 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size) res = buf; sin = (struct sockaddr_in *)&data->gd_addr; exp_readlock(); - if (!(clp = auth_unix_lookup(sin->sin_addr))) + + ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); + + clp = auth_unix_lookup(&in6); + if (!clp) err = -EPERM; else { err = exp_rootfh(clp, data->gd_path, &fh, NFS_FHSIZE); diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 22e1ef8..d39dbdc 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -24,6 +24,7 @@ struct svc_cred { }; struct svc_rqst; /* forward decl */ +struct in6_addr; /* Authentication is done in the context of a domain. * @@ -120,10 +121,10 @@ extern void svc_auth_unregister(rpc_authflavor_t flavor); extern struct auth_domain *unix_domain_find(char *name); extern void auth_domain_put(struct auth_domain *item); -extern int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom); +extern int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom); extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new); extern struct auth_domain *auth_domain_find(char *name); -extern struct auth_domain *auth_unix_lookup(struct in_addr addr); +extern struct auth_domain *auth_unix_lookup(struct in6_addr *addr); extern int auth_unix_forget_old(struct auth_domain *dom); extern void svcauth_unix_purge(void); extern void svcauth_unix_info_release(void *); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 49c4898..e0a612b 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -383,6 +383,15 @@ static inline int ipv6_addr_orchid(const struct in6_addr *a) == htonl(0x20010010)); } +static inline void ipv6_addr_set_v4mapped(const __be32 addr, + struct in6_addr *v4mapped) +{ + ipv6_addr_set(v4mapped, + 0, 0, + htonl(0x0000FFFF), + addr); +} + /* * find the first different bit between two addresses * length of address must be a multiple of 32bits diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 3c64051..3f30ee6 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -11,7 +11,8 @@ #include #include #include - +#include +#include #define RPCDBG_FACILITY RPCDBG_AUTH @@ -85,7 +86,7 @@ static void svcauth_unix_domain_release(struct auth_domain *dom) struct ip_map { struct cache_head h; char m_class[8]; /* e.g. "nfsd" */ - struct in_addr m_addr; + struct in6_addr m_addr; struct unix_domain *m_client; int m_add_change; }; @@ -113,12 +114,19 @@ static inline int hash_ip(__be32 ip) return (hash ^ (hash>>8)) & 0xff; } #endif +static inline int hash_ip6(struct in6_addr ip) +{ + return (hash_ip(ip.s6_addr32[0]) ^ + hash_ip(ip.s6_addr32[1]) ^ + hash_ip(ip.s6_addr32[2]) ^ + hash_ip(ip.s6_addr32[3])); +} static int ip_map_match(struct cache_head *corig, struct cache_head *cnew) { struct ip_map *orig = container_of(corig, struct ip_map, h); struct ip_map *new = container_of(cnew, struct ip_map, h); return strcmp(orig->m_class, new->m_class) == 0 - && orig->m_addr.s_addr == new->m_addr.s_addr; + && ipv6_addr_equal(&orig->m_addr, &new->m_addr); } static void ip_map_init(struct cache_head *cnew, struct cache_head *citem) { @@ -126,7 +134,7 @@ static void ip_map_init(struct cache_head *cnew, struct cache_head *citem) struct ip_map *item = container_of(citem, struct ip_map, h); strcpy(new->m_class, item->m_class); - new->m_addr.s_addr = item->m_addr.s_addr; + ipv6_addr_copy(&new->m_addr, &item->m_addr); } static void update(struct cache_head *cnew, struct cache_head *citem) { @@ -150,22 +158,24 @@ static void ip_map_request(struct cache_detail *cd, struct cache_head *h, char **bpp, int *blen) { - char text_addr[20]; + char text_addr[40]; struct ip_map *im = container_of(h, struct ip_map, h); - __be32 addr = im->m_addr.s_addr; - - snprintf(text_addr, 20, "%u.%u.%u.%u", - ntohl(addr) >> 24 & 0xff, - ntohl(addr) >> 16 & 0xff, - ntohl(addr) >> 8 & 0xff, - ntohl(addr) >> 0 & 0xff); + if (ipv6_addr_v4mapped(&(im->m_addr))) { + snprintf(text_addr, 20, NIPQUAD_FMT, + ntohl(im->m_addr.s6_addr32[3]) >> 24 & 0xff, + ntohl(im->m_addr.s6_addr32[3]) >> 16 & 0xff, + ntohl(im->m_addr.s6_addr32[3]) >> 8 & 0xff, + ntohl(im->m_addr.s6_addr32[3]) >> 0 & 0xff); + } else { + snprintf(text_addr, 40, NIP6_FMT, NIP6(im->m_addr)); + } qword_add(bpp, blen, im->m_class); qword_add(bpp, blen, text_addr); (*bpp)[-1] = '\n'; } -static struct ip_map *ip_map_lookup(char *class, struct in_addr addr); +static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr); static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry); static int ip_map_parse(struct cache_detail *cd, @@ -176,10 +186,10 @@ static int ip_map_parse(struct cache_detail *cd, * for scratch: */ char *buf = mesg; int len; - int b1,b2,b3,b4; + int b1, b2, b3, b4, b5, b6, b7, b8; char c; char class[8]; - struct in_addr addr; + struct in6_addr addr; int err; struct ip_map *ipmp; @@ -198,7 +208,23 @@ static int ip_map_parse(struct cache_detail *cd, len = qword_get(&mesg, buf, mlen); if (len <= 0) return -EINVAL; - if (sscanf(buf, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4) + if (sscanf(buf, NIPQUAD_FMT "%c", &b1, &b2, &b3, &b4, &c) == 4) { + addr.s6_addr32[0] = 0; + addr.s6_addr32[1] = 0; + addr.s6_addr32[2] = htonl(0xffff); + addr.s6_addr32[3] = + htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4); + } else if (sscanf(buf, NIP6_FMT "%c", + &b1, &b2, &b3, &b4, &b5, &b6, &b7, &b8, &c) == 8) { + addr.s6_addr16[0] = htons(b1); + addr.s6_addr16[1] = htons(b2); + addr.s6_addr16[2] = htons(b3); + addr.s6_addr16[3] = htons(b4); + addr.s6_addr16[4] = htons(b5); + addr.s6_addr16[5] = htons(b6); + addr.s6_addr16[6] = htons(b7); + addr.s6_addr16[7] = htons(b8); + } else return -EINVAL; expiry = get_expiry(&mesg); @@ -216,10 +242,7 @@ static int ip_map_parse(struct cache_detail *cd, } else dom = NULL; - addr.s_addr = - htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4); - - ipmp = ip_map_lookup(class,addr); + ipmp = ip_map_lookup(class, &addr); if (ipmp) { err = ip_map_update(ipmp, container_of(dom, struct unix_domain, h), @@ -239,7 +262,7 @@ static int ip_map_show(struct seq_file *m, struct cache_head *h) { struct ip_map *im; - struct in_addr addr; + struct in6_addr addr; char *dom = "-no-domain-"; if (h == NULL) { @@ -248,20 +271,24 @@ static int ip_map_show(struct seq_file *m, } im = container_of(h, struct ip_map, h); /* class addr domain */ - addr = im->m_addr; + ipv6_addr_copy(&addr, &im->m_addr); if (test_bit(CACHE_VALID, &h->flags) && !test_bit(CACHE_NEGATIVE, &h->flags)) dom = im->m_client->h.name; - seq_printf(m, "%s %d.%d.%d.%d %s\n", - im->m_class, - ntohl(addr.s_addr) >> 24 & 0xff, - ntohl(addr.s_addr) >> 16 & 0xff, - ntohl(addr.s_addr) >> 8 & 0xff, - ntohl(addr.s_addr) >> 0 & 0xff, - dom - ); + if (ipv6_addr_v4mapped(&addr)) { + seq_printf(m, "%s" NIPQUAD_FMT "%s\n", + im->m_class, + ntohl(addr.s6_addr32[3]) >> 24 & 0xff, + ntohl(addr.s6_addr32[3]) >> 16 & 0xff, + ntohl(addr.s6_addr32[3]) >> 8 & 0xff, + ntohl(addr.s6_addr32[3]) >> 0 & 0xff, + dom); + } else { + seq_printf(m, "%s" NIP6_FMT "%s\n", + im->m_class, NIP6(addr), dom); + } return 0; } @@ -281,16 +308,16 @@ struct cache_detail ip_map_cache = { .alloc = ip_map_alloc, }; -static struct ip_map *ip_map_lookup(char *class, struct in_addr addr) +static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr) { struct ip_map ip; struct cache_head *ch; strcpy(ip.m_class, class); - ip.m_addr = addr; + ipv6_addr_copy(&ip.m_addr, addr); ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h, hash_str(class, IP_HASHBITS) ^ - hash_ip(addr.s_addr)); + hash_ip6(*addr)); if (ch) return container_of(ch, struct ip_map, h); @@ -319,14 +346,14 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex ch = sunrpc_cache_update(&ip_map_cache, &ip.h, &ipm->h, hash_str(ipm->m_class, IP_HASHBITS) ^ - hash_ip(ipm->m_addr.s_addr)); + hash_ip6(ipm->m_addr)); if (!ch) return -ENOMEM; cache_put(ch, &ip_map_cache); return 0; } -int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom) +int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom) { struct unix_domain *udom; struct ip_map *ipmp; @@ -355,7 +382,7 @@ int auth_unix_forget_old(struct auth_domain *dom) } EXPORT_SYMBOL(auth_unix_forget_old); -struct auth_domain *auth_unix_lookup(struct in_addr addr) +struct auth_domain *auth_unix_lookup(struct in6_addr *addr) { struct ip_map *ipm; struct auth_domain *rv; @@ -650,9 +677,24 @@ static int unix_gid_find(uid_t uid, struct group_info **gip, int svcauth_unix_set_client(struct svc_rqst *rqstp) { - struct sockaddr_in *sin = svc_addr_in(rqstp); + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6, sin6_storage; struct ip_map *ipm; + switch (rqstp->rq_addr.ss_family) { + case AF_INET: + sin = svc_addr_in(rqstp); + sin6 = &sin6_storage; + ipv6_addr_set(&sin6->sin6_addr, 0, 0, + htonl(0x0000FFFF), sin->sin_addr.s_addr); + break; + case AF_INET6: + sin6 = svc_addr_in6(rqstp); + break; + default: + BUG(); + } + rqstp->rq_client = NULL; if (rqstp->rq_proc == 0) return SVC_OK; @@ -660,7 +702,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) ipm = ip_map_cached_get(rqstp); if (ipm == NULL) ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class, - sin->sin_addr); + &sin6->sin6_addr); if (ipm == NULL) return SVC_DENIED; -- cgit v0.10.2 From 065f30ec14b1460c695b371bc44e068832a60d9b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 19 Jan 2008 13:58:23 -0500 Subject: nfs: remove unnecessary NFS_NEED_* defines Thanks to Robert Day for pointing out that these two defines are unused. Signed-off-by: J. Bruce Fields Cc: Trond Myklebust Trond Myklebust Cc: Neil Brown Cc: "Robert P. J. Day" diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 83e865a..412738d 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -10,7 +10,6 @@ * nfs symlink handling code */ -#define NFS_NEED_XDR_TYPES #include #include #include diff --git a/include/linux/nfs3.h b/include/linux/nfs3.h index 7f11fa5..539f3b5 100644 --- a/include/linux/nfs3.h +++ b/include/linux/nfs3.h @@ -96,7 +96,7 @@ struct nfs3_fh { #define MOUNTPROC3_UMNTALL 4 -#if defined(__KERNEL__) || defined(NFS_NEED_KERNEL_TYPES) +#if defined(__KERNEL__) /* Number of 32bit words in post_op_attr */ #define NFS3_POST_OP_ATTR_WORDS 22 -- cgit v0.10.2 From f3362737be14668f4e8f5c8d082eb131aafc1353 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 26 Jan 2008 14:58:45 -0500 Subject: nfsd4: remove unnecessary CHECK_FH check in preprocess_seqid_op Every caller sets this flag, so it's meaningless. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 81a75f3..b73e96d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2127,7 +2127,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei } } - if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) { + if (nfs4_check_fh(current_fh, stp)) { dprintk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n"); return nfserr_bad_stateid; } @@ -2194,7 +2194,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, oc->oc_seqid, &oc->oc_req_stateid, - CHECK_FH | CONFIRM | OPEN_STATE, + CONFIRM | OPEN_STATE, &oc->oc_stateowner, &stp, NULL))) goto out; @@ -2265,7 +2265,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, od->od_seqid, &od->od_stateid, - CHECK_FH | OPEN_STATE, + OPEN_STATE, &od->od_stateowner, &stp, NULL))) goto out; @@ -2318,7 +2318,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, close->cl_seqid, &close->cl_stateid, - CHECK_FH | OPEN_STATE | CLOSE_STATE, + OPEN_STATE | CLOSE_STATE, &close->cl_stateowner, &stp, NULL))) goto out; status = nfs_ok; @@ -2623,7 +2623,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_seqid_op(&cstate->current_fh, lock->lk_new_open_seqid, &lock->lk_new_open_stateid, - CHECK_FH | OPEN_STATE, + OPEN_STATE, &lock->lk_replay_owner, &open_stp, lock); if (status) @@ -2650,7 +2650,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_seqid_op(&cstate->current_fh, lock->lk_old_lock_seqid, &lock->lk_old_lock_stateid, - CHECK_FH | LOCK_STATE, + LOCK_STATE, &lock->lk_replay_owner, &lock_stp, lock); if (status) goto out; @@ -2847,7 +2847,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, locku->lu_seqid, &locku->lu_stateid, - CHECK_FH | LOCK_STATE, + LOCK_STATE, &locku->lu_stateowner, &stp, NULL))) goto out; -- cgit v0.10.2 From 0836f587258c2a24bfdc8810ad2327e7f354b6c7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 26 Jan 2008 19:08:12 -0500 Subject: nfsd4: simplify stateid sequencing checks Pull this common code into a separate function. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b73e96d..b83b58d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1975,6 +1975,26 @@ io_during_grace_disallowed(struct inode *inode, int flags) && mandatory_lock(inode); } +static int check_stateid_generation(stateid_t *in, stateid_t *ref) +{ + /* If the client sends us a stateid from the future, it's buggy: */ + if (in->si_generation > ref->si_generation) + return nfserr_bad_stateid; + /* + * The following, however, can happen. For example, if the + * client sends an open and some IO at the same time, the open + * may bump si_generation while the IO is still in flight. + * Thanks to hard links and renames, the client never knows what + * file an open will affect. So it could avoid that situation + * only by serializing all opens and IO from the same open + * owner. To recover from the old_stateid error, the client + * will just have to retry the IO: + */ + if (in->si_generation < ref->si_generation) + return nfserr_old_stateid; + return nfs_ok; +} + /* * Checks for stateid operations */ @@ -2023,12 +2043,8 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl goto out; stidp = &stp->st_stateid; } - if (stateid->si_generation > stidp->si_generation) - goto out; - - /* OLD STATEID */ - status = nfserr_old_stateid; - if (stateid->si_generation < stidp->si_generation) + status = check_stateid_generation(stateid, stidp); + if (status) goto out; if (stp) { if ((status = nfs4_check_openmode(stp,flags))) @@ -2065,6 +2081,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei { struct nfs4_stateid *stp; struct nfs4_stateowner *sop; + __be32 status; dprintk("NFSD: preprocess_seqid_op: seqid=%d " "stateid = (%08x/%08x/%08x/%08x)\n", seqid, @@ -2150,15 +2167,9 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei " confirmed yet!\n"); return nfserr_bad_stateid; } - if (stateid->si_generation > stp->st_stateid.si_generation) { - dprintk("NFSD: preprocess_seqid_op: future stateid?!\n"); - return nfserr_bad_stateid; - } - - if (stateid->si_generation < stp->st_stateid.si_generation) { - dprintk("NFSD: preprocess_seqid_op: old stateid!\n"); - return nfserr_old_stateid; - } + status = check_stateid_generation(stateid, &stp->st_stateid); + if (status) + return status; renew_client(sop->so_client); return nfs_ok; -- cgit v0.10.2 From 6a85fa3adddd3a74bd5b94c4b72668d307b88377 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 26 Jan 2008 23:36:48 -0500 Subject: nfsd4: kill unnecessary check in preprocess_stateid_op This condition is always true. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b83b58d..a40d1ec 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2052,7 +2052,7 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl renew_client(stp->st_stateowner->so_client); if (filpp) *filpp = stp->st_vfs_file; - } else if (dp) { + } else { if ((status = nfs4_check_delegmode(dp, flags))) goto out; renew_client(dp->dl_client); -- cgit v0.10.2 From 67eb6ff610d50da231a37beb634d6dea4b5025ab Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 31 Jan 2008 16:14:54 -0500 Subject: svcrpc: move unused field from cache_deferred_req This field is set once and never used; probably some artifact of an earlier implementation idea. Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 03547d6..2d8b211 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -120,7 +120,6 @@ struct cache_deferred_req { struct list_head hash; /* on hash chain */ struct list_head recent; /* on fifo */ struct cache_head *item; /* cache item we wait on */ - time_t recv_time; void *owner; /* we might need to discard all defered requests * owned by someone */ void (*revisit)(struct cache_deferred_req *req, diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index b5f2786..d75530f 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -571,7 +571,6 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item) return -ETIMEDOUT; dreq->item = item; - dreq->recv_time = get_seconds(); spin_lock(&cache_defer_lock); -- cgit v0.10.2 From c0ce6ec87c59d7a29438717b1f72f83fb408f416 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 11 Feb 2008 15:48:47 -0500 Subject: nfsd: clarify readdir/mountpoint-crossing code The code here is difficult to understand; attempt to clarify somewhat by pulling out one of the more mystifying conditionals into a separate function. While we're here, also add lease_time to the list of attributes that we don't really need to cross a mountpoint to fetch. Signed-off-by: J. Bruce Fields Cc: Peter Staubach diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 0e6a179..1ba7ad9 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1867,6 +1867,15 @@ out_serverfault: goto out; } +static inline int attributes_need_mount(u32 *bmval) +{ + if (bmval[0] & ~(FATTR4_WORD0_RDATTR_ERROR | FATTR4_WORD0_LEASE_TIME)) + return 1; + if (bmval[1] & ~FATTR4_WORD1_MOUNTED_ON_FILEID) + return 1; + return 0; +} + static __be32 nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, const char *name, int namlen, __be32 *p, int *buflen) @@ -1888,9 +1897,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, * we will not follow the cross mount and will fill the attribtutes * directly from the mountpoint dentry. */ - if (d_mountpoint(dentry) && - (cd->rd_bmval[0] & ~FATTR4_WORD0_RDATTR_ERROR) == 0 && - (cd->rd_bmval[1] & ~FATTR4_WORD1_MOUNTED_ON_FILEID) == 0) + if (d_mountpoint(dentry) && !attributes_need_mount(cd->rd_bmval)) ignore_crossmnt = 1; else if (d_mountpoint(dentry)) { int err; -- cgit v0.10.2 From 5ea0dd61f221ba2701314a85e998b8202412553d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2008 17:11:39 -0500 Subject: NFSD: Remove NFSD_TCP kernel build option Likewise, distros usually leave CONFIG_NFSD_TCP enabled. TCP support in the Linux NFS server is stable enough that we can leave it on always. CONFIG_NFSD_TCP adds about 10 lines of code, and defaults to "Y" anyway. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/fs/Kconfig b/fs/Kconfig index 8b18a87..1200277 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1696,7 +1696,6 @@ config NFSD select EXPORTFS select NFSD_V2_ACL if NFSD_V3_ACL select NFS_ACL_SUPPORT if NFSD_V2_ACL - select NFSD_TCP if NFSD_V4 select CRYPTO_MD5 if NFSD_V4 select CRYPTO if NFSD_V4 select FS_POSIX_ACL if NFSD_V4 @@ -1755,15 +1754,6 @@ config NFSD_V4 should only be used if you are interested in helping to test NFSv4. If unsure, say N. -config NFSD_TCP - bool "Provide NFS server over TCP support" - depends on NFSD - default y - help - If you want your NFS server to support TCP connections, say Y here. - TCP connections usually perform better than the default UDP when - the network is lossy or congested. If unsure, say Y. - config ROOT_NFS bool "Root file system on NFS" depends on NFS_FS=y && IP_PNP diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 9647b0f..941041f 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -244,7 +244,6 @@ static int nfsd_init_socks(int port) if (error < 0) return error; -#ifdef CONFIG_NFSD_TCP error = lockd_up(IPPROTO_TCP); if (error >= 0) { error = svc_create_xprt(nfsd_serv, "tcp", port, @@ -254,7 +253,6 @@ static int nfsd_init_socks(int port) } if (error < 0) return error; -#endif return 0; } -- cgit v0.10.2 From d24455b5ffe02a652e8cb1ed2d3570a512c898f8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2008 17:11:54 -0500 Subject: NFSD: Update help text for CONFIG_NFSD Clean up: refresh the help text for Kconfig items related to the NFS server. Remove obsolete URLs, and make the language consistent among the options. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/fs/Kconfig b/fs/Kconfig index 1200277..a1dccb6 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1702,56 +1702,74 @@ config NFSD select PROC_FS if NFSD_V4 select PROC_FS if SUNRPC_GSS help - If you want your Linux box to act as an NFS *server*, so that other - computers on your local network which support NFS can access certain - directories on your box transparently, you have two options: you can - use the self-contained user space program nfsd, in which case you - should say N here, or you can say Y and use the kernel based NFS - server. The advantage of the kernel based solution is that it is - faster. + Choose Y here if you want to allow other computers to access + files residing on this system using Sun's Network File System + protocol. To compile the NFS server support as a module, + choose M here: the module will be called nfsd. - In either case, you will need support software; the respective - locations are given in the file in the - NFS section. + You may choose to use a user-space NFS server instead, in which + case you can choose N here. - If you say Y here, you will get support for version 2 of the NFS - protocol (NFSv2). If you also want NFSv3, say Y to the next question - as well. + To export local file systems using NFS, you also need to install + user space programs which can be found in the Linux nfs-utils + package, available from http://linux-nfs.org/. More detail about + the Linux NFS server implementation is available via the + exports(5) man page. - Please read the NFS-HOWTO, available from - . + Below you can choose which versions of the NFS protocol are + available to clients mounting the NFS server on this system. + Support for NFS version 2 (RFC 1094) is always available when + CONFIG_NFSD is selected. - To compile the NFS server support as a module, choose M here: the - module will be called nfsd. If unsure, say N. + If unsure, say N. config NFSD_V2_ACL bool depends on NFSD config NFSD_V3 - bool "Provide NFSv3 server support" + bool "NFS server support for NFS version 3" depends on NFSD help - If you would like to include the NFSv3 server as well as the NFSv2 - server, say Y here. If unsure, say Y. + This option enables support in your system's NFS server for + version 3 of the NFS protocol (RFC 1813). + + If unsure, say Y. config NFSD_V3_ACL - bool "Provide server support for the NFSv3 ACL protocol extension" + bool "NFS server support for the NFSv3 ACL protocol extension" depends on NFSD_V3 help - Implement the NFSv3 ACL protocol extension for manipulating POSIX - Access Control Lists on exported file systems. NFS clients should - be compiled with the NFSv3 ACL protocol extension; see the - CONFIG_NFS_V3_ACL option. If unsure, say N. + Solaris NFS servers support an auxiliary NFSv3 ACL protocol that + never became an official part of the NFS version 3 protocol. + This protocol extension allows applications on NFS clients to + manipulate POSIX Access Control Lists on files residing on NFS + servers. NFS servers enforce POSIX ACLs on local files whether + this protocol is available or not. + + This option enables support in your system's NFS server for the + NFSv3 ACL protocol extension allowing NFS clients to manipulate + POSIX ACLs on files exported by your system's NFS server. NFS + clients which support the Solaris NFSv3 ACL protocol can then + access and modify ACLs on your NFS server. + + To store ACLs on your NFS server, you also need to enable ACL- + related CONFIG options for your local file systems of choice. + + If unsure, say N. config NFSD_V4 - bool "Provide NFSv4 server support (EXPERIMENTAL)" + bool "NFS server support for NFS version 4 (EXPERIMENTAL)" depends on NFSD && NFSD_V3 && EXPERIMENTAL select RPCSEC_GSS_KRB5 help - If you would like to include the NFSv4 server as well as the NFSv2 - and NFSv3 servers, say Y here. This feature is experimental, and - should only be used if you are interested in helping to test NFSv4. + This option enables support in your system's NFS server for + version 4 of the NFS protocol (RFC 3530). + + To export files using NFSv4, you need to install additional user + space programs which can be found in the Linux nfs-utils package, + available from http://linux-nfs.org/. + If unsure, say N. config ROOT_NFS -- cgit v0.10.2 From 892069552eedfa0d2fca41680d076a5bc0a3c26e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2008 17:12:24 -0500 Subject: NFSD: Move "select FS_POSIX_ACL if NFSD_V4" Clean up: since FS_POSIX_ACL is a non-visible boolean entry, it can be selected safely under the NFSD_V4 entry (also a boolean). Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/fs/Kconfig b/fs/Kconfig index a1dccb6..94250f0 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -411,7 +411,7 @@ config JFS_STATISTICS to be made available to the user in the /proc/fs/jfs/ directory. config FS_POSIX_ACL -# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs) +# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs/nfs4) # # NOTE: you can implement Posix ACLs without these helpers (XFS does). # Never use this symbol for ifdefs. @@ -1698,7 +1698,6 @@ config NFSD select NFS_ACL_SUPPORT if NFSD_V2_ACL select CRYPTO_MD5 if NFSD_V4 select CRYPTO if NFSD_V4 - select FS_POSIX_ACL if NFSD_V4 select PROC_FS if NFSD_V4 select PROC_FS if SUNRPC_GSS help @@ -1761,6 +1760,7 @@ config NFSD_V3_ACL config NFSD_V4 bool "NFS server support for NFS version 4 (EXPERIMENTAL)" depends on NFSD && NFSD_V3 && EXPERIMENTAL + select FS_POSIX_ACL select RPCSEC_GSS_KRB5 help This option enables support in your system's NFS server for -- cgit v0.10.2 From 78dd0992a3462e05138cf4d08df759bf1c7a08c9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2008 17:12:31 -0500 Subject: NFSD: Move "select NFSD_V2_ACL if NFSD_V3_ACL" Clean up: since NFSD_V2_ACL is a boolean, it can be selected safely under the NFSD_V3_ACL entry (also a boolean). Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/fs/Kconfig b/fs/Kconfig index 94250f0..b609843 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1694,7 +1694,6 @@ config NFSD select LOCKD select SUNRPC select EXPORTFS - select NFSD_V2_ACL if NFSD_V3_ACL select NFS_ACL_SUPPORT if NFSD_V2_ACL select CRYPTO_MD5 if NFSD_V4 select CRYPTO if NFSD_V4 @@ -1738,6 +1737,7 @@ config NFSD_V3 config NFSD_V3_ACL bool "NFS server support for the NFSv3 ACL protocol extension" depends on NFSD_V3 + select NFSD_V2_ACL help Solaris NFS servers support an auxiliary NFSv3 ACL protocol that never became an official part of the NFS version 3 protocol. -- cgit v0.10.2 From 6aaa67b5f3b9fe24f0c76d0415cc72e5a1137bea Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Feb 2008 17:12:38 -0500 Subject: NFSD: Remove redundant "select" clauses in fs/Kconfig As far as I can tell, selecting the CRYPTO and CRYPTO_MD5 entries under CONFIG_NFSD is redundant, since CONFIG_NFSD_V4 already selects RPCSEC_GSS_KRB5, which selects these entries. Testing with "make menuconfig" shows that the entries under CRYPTO still properly reflect "Y" or "M" based on the setting of CONFIG_NFSD after this change is applied. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/fs/Kconfig b/fs/Kconfig index b609843..1d81be3 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1695,8 +1695,6 @@ config NFSD select SUNRPC select EXPORTFS select NFS_ACL_SUPPORT if NFSD_V2_ACL - select CRYPTO_MD5 if NFSD_V4 - select CRYPTO if NFSD_V4 select PROC_FS if NFSD_V4 select PROC_FS if SUNRPC_GSS help -- cgit v0.10.2 From 7b54fe61ffd5bfa4e50d371a2415225aa0cbb38e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 12 Feb 2008 11:47:24 -0500 Subject: SUNRPC: allow svc_recv to break out of 500ms sleep when alloc_page fails svc_recv() calls alloc_page(), and if it fails it does a 500ms uninterruptible sleep and then reattempts. There doesn't seem to be any real reason for this to be uninterruptible, so change it to an interruptible sleep. Also check for kthread_stop() and signalled() after setting the task state to avoid races that might lead to sleeping after kthread_stop() wakes up the task. I've done some very basic smoke testing with this, but obviously it's hard to test the actual changes since this all depends on an alloc_page() call failing. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 2e5d43c..d8e8d79 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -587,10 +587,12 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) while (rqstp->rq_pages[i] == NULL) { struct page *p = alloc_page(GFP_KERNEL); if (!p) { - int j = msecs_to_jiffies(500); - if (kthread_should_stop()) + set_current_state(TASK_INTERRUPTIBLE); + if (signalled() || kthread_should_stop()) { + set_current_state(TASK_RUNNING); return -EINTR; - schedule_timeout_uninterruptible(j); + } + schedule_timeout(msecs_to_jiffies(500)); } rqstp->rq_pages[i] = p; } -- cgit v0.10.2 From f2b0dee2ec8d562ad9ced2b7481be72d356c6cfc Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 13 Feb 2008 23:30:26 +0200 Subject: make nfsd_create_setattr() static This patch makes the needlessly global nfsd_create_setattr() static. Signed-off-by: Adrian Bunk Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 304bf5f..d5a238e 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1152,7 +1152,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, } #endif /* CONFIG_NFSD_V3 */ -__be32 +static __be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, struct iattr *iap) { -- cgit v0.10.2 From 93245d11fcaccdebccabe86a2b92db524f82d8b4 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Mon, 18 Feb 2008 02:01:49 -0800 Subject: lockd: fix sparse warning in svcshare.c fs/lockd/svcshare.c:74:50: warning: Using plain integer as NULL pointer Signed-off-by: Harvey Harrison Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: J. Bruce Fields diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c index 068886d..b0ae070 100644 --- a/fs/lockd/svcshare.c +++ b/fs/lockd/svcshare.c @@ -71,7 +71,8 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file, struct nlm_share *share, **shpp; struct xdr_netobj *oh = &argp->lock.oh; - for (shpp = &file->f_shares; (share = *shpp) != 0; shpp = &share->s_next) { + for (shpp = &file->f_shares; (share = *shpp) != NULL; + shpp = &share->s_next) { if (share->s_host == host && nlm_cmp_owner(share, oh)) { *shpp = share->s_next; kfree(share); -- cgit v0.10.2 From dd35210e1e2cb46d6dba5c97f1bc3784c4f97998 Mon Sep 17 00:00:00 2001 From: Harshula Jayasuriya Date: Wed, 20 Feb 2008 10:56:56 +1100 Subject: sunrpc: GSS integrity and decryption failures should return GARBAGE_ARGS In function svcauth_gss_accept() (net/sunrpc/auth_gss/svcauth_gss.c) the code that handles GSS integrity and decryption failures should be returning GARBAGE_ARGS as specified in RFC 2203, sections 5.3.3.4.2 and 5.3.3.4.3. Reviewed-by: Greg Banks Signed-off-by: Harshula Jayasuriya Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 481f984..5905d56 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1146,7 +1146,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_SVC_INTEGRITY: if (unwrap_integ_data(&rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) - goto auth_err; + goto garbage_args; /* placeholders for length and seq. number: */ svc_putnl(resv, 0); svc_putnl(resv, 0); @@ -1154,7 +1154,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_SVC_PRIVACY: if (unwrap_priv_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) - goto auth_err; + goto garbage_args; /* placeholders for length and seq. number: */ svc_putnl(resv, 0); svc_putnl(resv, 0); @@ -1169,6 +1169,11 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) ret = SVC_OK; goto out; } +garbage_args: + /* Restore write pointer to its original value: */ + xdr_ressize_check(rqstp, reject_stat); + ret = SVC_GARBAGE; + goto out; auth_err: /* Restore write pointer to its original value: */ xdr_ressize_check(rqstp, reject_stat); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 090af78..7efb513 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -921,8 +921,7 @@ svc_process(struct svc_rqst *rqstp) case SVC_OK: break; case SVC_GARBAGE: - rpc_stat = rpc_garbage_args; - goto err_bad; + goto err_garbage; case SVC_SYSERR: rpc_stat = rpc_system_err; goto err_bad; -- cgit v0.10.2 From 164f98adbbd50c67177b096a59f55c1a56a45c82 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 20 Feb 2008 14:02:47 -0500 Subject: lockd: fix race in nlm_release() The sm_count is decremented to zero but left on the nsm_handles list. So in the space between decrementing sm_count and acquiring nsm_mutex, it is possible for another task to find this nsm_handle, increment the use count and then enter nsm_release itself. Thus there's nothing to prevent the nsm being freed before we acquire nsm_mutex here. Signed-off-by: J. Bruce Fields diff --git a/fs/lockd/host.c b/fs/lockd/host.c index c3f1194..960911c 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -529,12 +529,10 @@ nsm_release(struct nsm_handle *nsm) { if (!nsm) return; + mutex_lock(&nsm_mutex); if (atomic_dec_and_test(&nsm->sm_count)) { - mutex_lock(&nsm_mutex); - if (atomic_read(&nsm->sm_count) == 0) { - list_del(&nsm->sm_link); - kfree(nsm); - } - mutex_unlock(&nsm_mutex); + list_del(&nsm->sm_link); + kfree(nsm); } + mutex_unlock(&nsm_mutex); } -- cgit v0.10.2 From a95e56e72c196970a8067cd515c658d064813170 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 20 Feb 2008 15:27:31 -0500 Subject: lockd: clean up __nsm_find() Use list_for_each_entry(). Also, in keeping with kernel style, make the normal case (kzalloc succeeds) unindented and handle the abnormal case with a goto. Signed-off-by: J. Bruce Fields diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 960911c..de0ffb6 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -465,7 +465,7 @@ __nsm_find(const struct sockaddr_in *sin, int create) { struct nsm_handle *nsm = NULL; - struct list_head *pos; + struct nsm_handle *pos; if (!sin) return NULL; @@ -480,16 +480,16 @@ __nsm_find(const struct sockaddr_in *sin, } mutex_lock(&nsm_mutex); - list_for_each(pos, &nsm_handles) { - nsm = list_entry(pos, struct nsm_handle, sm_link); + list_for_each_entry(pos, &nsm_handles, sm_link) { if (hostname && nsm_use_hostnames) { - if (strlen(nsm->sm_name) != hostname_len - || memcmp(nsm->sm_name, hostname, hostname_len)) + if (strlen(pos->sm_name) != hostname_len + || memcmp(pos->sm_name, hostname, hostname_len)) continue; - } else if (!nlm_cmp_addr(&nsm->sm_addr, sin)) + } else if (!nlm_cmp_addr(&pos->sm_addr, sin)) continue; - atomic_inc(&nsm->sm_count); + atomic_inc(&pos->sm_count); + nsm = pos; goto out; } @@ -499,15 +499,15 @@ __nsm_find(const struct sockaddr_in *sin, } nsm = kzalloc(sizeof(*nsm) + hostname_len + 1, GFP_KERNEL); - if (nsm != NULL) { - nsm->sm_addr = *sin; - nsm->sm_name = (char *) (nsm + 1); - memcpy(nsm->sm_name, hostname, hostname_len); - nsm->sm_name[hostname_len] = '\0'; - atomic_set(&nsm->sm_count, 1); - - list_add(&nsm->sm_link, &nsm_handles); - } + if (nsm == NULL) + goto out; + nsm->sm_addr = *sin; + nsm->sm_name = (char *) (nsm + 1); + memcpy(nsm->sm_name, hostname, hostname_len); + nsm->sm_name[hostname_len] = '\0'; + atomic_set(&nsm->sm_count, 1); + + list_add(&nsm->sm_link, &nsm_handles); out: mutex_unlock(&nsm_mutex); -- cgit v0.10.2 From d8421202121ce74daf4625ca9d1d825bbd7ce66a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 20 Feb 2008 15:40:15 -0500 Subject: lockd: convert nsm_mutex to a spinlock There's no reason for a mutex here, except to allow an allocation under the lock, which we can avoid with the usual trick of preallocating memory for the new object and freeing it if it turns out to be unnecessary. Signed-off-by: J. Bruce Fields diff --git a/fs/lockd/host.c b/fs/lockd/host.c index de0ffb6..c785479 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -457,7 +457,7 @@ nlm_gc_hosts(void) * Manage NSM handles */ static LIST_HEAD(nsm_handles); -static DEFINE_MUTEX(nsm_mutex); +static DEFINE_SPINLOCK(nsm_lock); static struct nsm_handle * __nsm_find(const struct sockaddr_in *sin, @@ -479,7 +479,8 @@ __nsm_find(const struct sockaddr_in *sin, return NULL; } - mutex_lock(&nsm_mutex); +retry: + spin_lock(&nsm_lock); list_for_each_entry(pos, &nsm_handles, sm_link) { if (hostname && nsm_use_hostnames) { @@ -489,28 +490,32 @@ __nsm_find(const struct sockaddr_in *sin, } else if (!nlm_cmp_addr(&pos->sm_addr, sin)) continue; atomic_inc(&pos->sm_count); + kfree(nsm); nsm = pos; - goto out; + goto found; } - - if (!create) { - nsm = NULL; - goto out; + if (nsm) { + list_add(&nsm->sm_link, &nsm_handles); + goto found; } + spin_unlock(&nsm_lock); + + if (!create) + return NULL; nsm = kzalloc(sizeof(*nsm) + hostname_len + 1, GFP_KERNEL); if (nsm == NULL) - goto out; + return NULL; + nsm->sm_addr = *sin; nsm->sm_name = (char *) (nsm + 1); memcpy(nsm->sm_name, hostname, hostname_len); nsm->sm_name[hostname_len] = '\0'; atomic_set(&nsm->sm_count, 1); + goto retry; - list_add(&nsm->sm_link, &nsm_handles); - -out: - mutex_unlock(&nsm_mutex); +found: + spin_unlock(&nsm_lock); return nsm; } @@ -529,10 +534,9 @@ nsm_release(struct nsm_handle *nsm) { if (!nsm) return; - mutex_lock(&nsm_mutex); - if (atomic_dec_and_test(&nsm->sm_count)) { + if (atomic_dec_and_lock(&nsm->sm_count, &nsm_lock)) { list_del(&nsm->sm_link); + spin_unlock(&nsm_lock); kfree(nsm); } - mutex_unlock(&nsm_mutex); } -- cgit v0.10.2 From a254b246ee238ab90e7b3fae1f76875b608b2213 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 20 Feb 2008 12:49:00 -0800 Subject: nfsd: fix sparse warnings Add extern to nfsd/nfsd.h fs/nfsd/nfssvc.c:146:5: warning: symbol 'nfsd_nrthreads' was not declared. Should it be static? fs/nfsd/nfssvc.c:261:5: warning: symbol 'nfsd_nrpools' was not declared. Should it be static? fs/nfsd/nfssvc.c:269:5: warning: symbol 'nfsd_get_nrthreads' was not declared. Should it be static? fs/nfsd/nfssvc.c:281:5: warning: symbol 'nfsd_set_nrthreads' was not declared. Should it be static? fs/nfsd/export.c:1534:23: warning: symbol 'nfs_exports_op' was not declared. Should it be static? Add include of auth.h fs/nfsd/auth.c:27:5: warning: symbol 'nfsd_setuser' was not declared. Should it be static? Make static, move forward declaration closer to where it's needed. fs/nfsd/nfs4state.c:1877:1: warning: symbol 'laundromat_main' was not declared. Should it be static? Make static, forward declaration was already marked static. fs/nfsd/nfs4idmap.c:206:1: warning: symbol 'idtoname_parse' was not declared. Should it be static? fs/nfsd/vfs.c:1156:1: warning: symbol 'nfsd_create_setattr' was not declared. Should it be static? Signed-off-by: Harvey Harrison Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index d13403e..294992e 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -10,6 +10,7 @@ #include #include #include +#include "auth.h" int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) { diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 996bd88..5b39842 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -202,7 +202,7 @@ static struct cache_detail idtoname_cache = { .alloc = ent_alloc, }; -int +static int idtoname_parse(struct cache_detail *cd, char *buf, int buflen) { struct ent ent, *res; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a40d1ec..8235d31 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1763,10 +1763,6 @@ out: return status; } -static struct workqueue_struct *laundry_wq; -static void laundromat_main(struct work_struct *); -static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main); - __be32 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, clientid_t *clid) @@ -1874,7 +1870,11 @@ nfs4_laundromat(void) return clientid_val; } -void +static struct workqueue_struct *laundry_wq; +static void laundromat_main(struct work_struct *); +static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main); + +static void laundromat_main(struct work_struct *not_used) { time_t t; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 9f038a4a..613bcb8 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -150,7 +150,6 @@ static const struct file_operations transaction_ops = { .release = simple_transaction_release, }; -extern struct seq_operations nfs_exports_op; static int exports_open(struct inode *inode, struct file *file) { return seq_open(file, &nfs_exports_op); @@ -358,8 +357,6 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) return mesg - buf; } -extern int nfsd_nrthreads(void); - static ssize_t write_threads(struct file *file, char *buf, size_t size) { /* if size > 0, look for a number of threads and call nfsd_svc @@ -382,10 +379,6 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) return strlen(buf); } -extern int nfsd_nrpools(void); -extern int nfsd_get_nrthreads(int n, int *); -extern int nfsd_set_nrthreads(int n, int *); - static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) { /* if size > 0, look for an array of number of threads per node diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 8caf4c4..f4de14d 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -56,12 +56,20 @@ extern struct svc_program nfsd_program; extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; extern struct svc_serv *nfsd_serv; + +extern struct seq_operations nfs_exports_op; + /* * Function prototypes. */ int nfsd_svc(unsigned short port, int nrservs); int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp); +int nfsd_nrthreads(void); +int nfsd_nrpools(void); +int nfsd_get_nrthreads(int n, int *); +int nfsd_set_nrthreads(int n, int *); + /* nfsd/vfs.c */ int fh_lock_parent(struct svc_fh *, struct dentry *); int nfsd_racache_init(int); -- cgit v0.10.2 From 3ba1514815817f93a4f09615726dd4bcd0ddbbc9 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 20 Feb 2008 12:49:02 -0800 Subject: nfsd: fix sparse warning in vfs.c fs/nfsd/vfs.c:991:27: warning: Using plain integer as NULL pointer Signed-off-by: Harvey Harrison Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d5a238e..832e2b8 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -988,7 +988,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, * flushing the data to disk is handled separately below. */ - if (file->f_op->fsync == 0) {/* COMMIT3 cannot work */ + if (!file->f_op->fsync) {/* COMMIT3 cannot work */ stable = 2; *stablep = 2; /* FILE_SYNC */ } -- cgit v0.10.2 From a277e33cbe3fdfb9a77b448ea3043be22f000dfd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 20 Feb 2008 08:55:30 -0500 Subject: NFS: convert nfs4 callback thread to kthread API There's a general push to convert kernel threads to use the (much cleaner) kthread API. This patch converts the NFSv4 callback kernel thread to the kthread API. In addition to being generally cleaner this also removes the dependency on signals when shutting down the thread. Note that this patch depends on the recent patches to svc_recv() to make it check kthread_should_stop() periodically. Those patches are in Bruce's tree at the moment and are slated for 2.6.26 along with the lockd conversion, so this conversion is probably also appropriate for 2.6.26. Signed-off-by: Jeff Layton Acked-by: Trond Myklebust Signed-off-by: J. Bruce Fields diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 66648dd..2e5de77 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -27,9 +28,7 @@ struct nfs_callback_data { unsigned int users; struct svc_serv *serv; - pid_t pid; - struct completion started; - struct completion stopped; + struct task_struct *task; }; static struct nfs_callback_data nfs_callback_info; @@ -57,27 +56,20 @@ module_param_call(callback_tcpport, param_set_port, param_get_int, /* * This is the callback kernel thread. */ -static void nfs_callback_svc(struct svc_rqst *rqstp) +static int +nfs_callback_svc(void *vrqstp) { int err; + struct svc_rqst *rqstp = vrqstp; - __module_get(THIS_MODULE); - lock_kernel(); - - nfs_callback_info.pid = current->pid; - daemonize("nfsv4-svc"); - /* Process request with signals blocked, but allow SIGKILL. */ - allow_signal(SIGKILL); set_freezable(); - complete(&nfs_callback_info.started); - - for(;;) { - if (signalled()) { - if (nfs_callback_info.users == 0) - break; - flush_signals(current); - } + /* + * FIXME: do we really need to run this under the BKL? If so, please + * add a comment about what it's intended to protect. + */ + lock_kernel(); + while (!kthread_should_stop()) { /* * Listen for a request on the socket */ @@ -92,13 +84,10 @@ static void nfs_callback_svc(struct svc_rqst *rqstp) } svc_process(rqstp); } - - flush_signals(current); - svc_exit_thread(rqstp); - nfs_callback_info.pid = 0; - complete(&nfs_callback_info.stopped); unlock_kernel(); - module_put_and_exit(0); + nfs_callback_info.task = NULL; + svc_exit_thread(rqstp); + return 0; } /* @@ -107,14 +96,13 @@ static void nfs_callback_svc(struct svc_rqst *rqstp) int nfs_callback_up(void) { struct svc_serv *serv = NULL; + struct svc_rqst *rqstp; int ret = 0; lock_kernel(); mutex_lock(&nfs_callback_mutex); - if (nfs_callback_info.users++ || nfs_callback_info.pid != 0) + if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) goto out; - init_completion(&nfs_callback_info.started); - init_completion(&nfs_callback_info.stopped); serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); ret = -ENOMEM; if (!serv) @@ -127,15 +115,28 @@ int nfs_callback_up(void) nfs_callback_tcpport = ret; dprintk("Callback port = 0x%x\n", nfs_callback_tcpport); - ret = svc_create_thread(nfs_callback_svc, serv); - if (ret < 0) + rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); + if (IS_ERR(rqstp)) { + ret = PTR_ERR(rqstp); goto out_err; + } + + svc_sock_update_bufs(serv); nfs_callback_info.serv = serv; - wait_for_completion(&nfs_callback_info.started); + + nfs_callback_info.task = kthread_run(nfs_callback_svc, rqstp, + "nfsv4-svc"); + if (IS_ERR(nfs_callback_info.task)) { + ret = PTR_ERR(nfs_callback_info.task); + nfs_callback_info.serv = NULL; + nfs_callback_info.task = NULL; + svc_exit_thread(rqstp); + goto out_err; + } out: /* * svc_create creates the svc_serv with sv_nrthreads == 1, and then - * svc_create_thread increments that. So we need to call svc_destroy + * svc_prepare_thread increments that. So we need to call svc_destroy * on both success and failure so that the refcount is 1 when the * thread exits. */ @@ -159,12 +160,8 @@ void nfs_callback_down(void) lock_kernel(); mutex_lock(&nfs_callback_mutex); nfs_callback_info.users--; - do { - if (nfs_callback_info.users != 0 || nfs_callback_info.pid == 0) - break; - if (kill_proc(nfs_callback_info.pid, SIGKILL, 1) < 0) - break; - } while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0); + if (nfs_callback_info.users == 0 && nfs_callback_info.task != NULL) + kthread_stop(nfs_callback_info.task); mutex_unlock(&nfs_callback_mutex); unlock_kernel(); } -- cgit v0.10.2 From a3fa73bd0eea74c58315114c9fc3e913f6c26d61 Mon Sep 17 00:00:00 2001 From: James Lentini Date: Mon, 25 Feb 2008 12:20:13 -0500 Subject: Documentation: NFS/RDMA instructions for 2.6.25-rc1 Add some instructions for using the new NFS/RDMA features. Signed-off-by: James Lentini Cc: Roland Dreier Signed-off-by: J. Bruce Fields diff --git a/Documentation/filesystems/nfs-rdma.txt b/Documentation/filesystems/nfs-rdma.txt new file mode 100644 index 0000000..1ae3487 --- /dev/null +++ b/Documentation/filesystems/nfs-rdma.txt @@ -0,0 +1,252 @@ +################################################################################ +# # +# NFS/RDMA README # +# # +################################################################################ + + Author: NetApp and Open Grid Computing + Date: February 25, 2008 + +Table of Contents +~~~~~~~~~~~~~~~~~ + - Overview + - Getting Help + - Installation + - Check RDMA and NFS Setup + - NFS/RDMA Setup + +Overview +~~~~~~~~ + + This document describes how to install and setup the Linux NFS/RDMA client + and server software. + + The NFS/RDMA client was first included in Linux 2.6.24. The NFS/RDMA server + was first included in the following release, Linux 2.6.25. + + In our testing, we have obtained excellent performance results (full 10Gbit + wire bandwidth at minimal client CPU) under many workloads. The code passes + the full Connectathon test suite and operates over both Infiniband and iWARP + RDMA adapters. + +Getting Help +~~~~~~~~~~~~ + + If you get stuck, you can ask questions on the + + nfs-rdma-devel@lists.sourceforge.net + + mailing list. + +Installation +~~~~~~~~~~~~ + + These instructions are a step by step guide to building a machine for + use with NFS/RDMA. + + - Install an RDMA device + + Any device supported by the drivers in drivers/infiniband/hw is acceptable. + + Testing has been performed using several Mellanox-based IB cards, the + Ammasso AMS1100 iWARP adapter, and the Chelsio cxgb3 iWARP adapter. + + - Install a Linux distribution and tools + + The first kernel release to contain both the NFS/RDMA client and server was + Linux 2.6.25 Therefore, a distribution compatible with this and subsequent + Linux kernel release should be installed. + + The procedures described in this document have been tested with + distributions from Red Hat's Fedora Project (http://fedora.redhat.com/). + + - Install nfs-utils-1.1.1 or greater on the client + + An NFS/RDMA mount point can only be obtained by using the mount.nfs + command in nfs-utils-1.1.1 or greater. To see which version of mount.nfs + you are using, type: + + > /sbin/mount.nfs -V + + If the version is less than 1.1.1 or the command does not exist, + then you will need to install the latest version of nfs-utils. + + Download the latest package from: + + http://www.kernel.org/pub/linux/utils/nfs + + Uncompress the package and follow the installation instructions. + + If you will not be using GSS and NFSv4, the installation process + can be simplified by disabling these features when running configure: + + > ./configure --disable-gss --disable-nfsv4 + + For more information on this see the package's README and INSTALL files. + + After building the nfs-utils package, there will be a mount.nfs binary in + the utils/mount directory. This binary can be used to initiate NFS v2, v3, + or v4 mounts. To initiate a v4 mount, the binary must be called mount.nfs4. + The standard technique is to create a symlink called mount.nfs4 to mount.nfs. + + NOTE: mount.nfs and therefore nfs-utils-1.1.1 or greater is only needed + on the NFS client machine. You do not need this specific version of + nfs-utils on the server. Furthermore, only the mount.nfs command from + nfs-utils-1.1.1 is needed on the client. + + - Install a Linux kernel with NFS/RDMA + + The NFS/RDMA client and server are both included in the mainline Linux + kernel version 2.6.25 and later. This and other versions of the 2.6 Linux + kernel can be found at: + + ftp://ftp.kernel.org/pub/linux/kernel/v2.6/ + + Download the sources and place them in an appropriate location. + + - Configure the RDMA stack + + Make sure your kernel configuration has RDMA support enabled. Under + Device Drivers -> InfiniBand support, update the kernel configuration + to enable InfiniBand support [NOTE: the option name is misleading. Enabling + InfiniBand support is required for all RDMA devices (IB, iWARP, etc.)]. + + Enable the appropriate IB HCA support (mlx4, mthca, ehca, ipath, etc.) or + iWARP adapter support (amso, cxgb3, etc.). + + If you are using InfiniBand, be sure to enable IP-over-InfiniBand support. + + - Configure the NFS client and server + + Your kernel configuration must also have NFS file system support and/or + NFS server support enabled. These and other NFS related configuration + options can be found under File Systems -> Network File Systems. + + - Build, install, reboot + + The NFS/RDMA code will be enabled automatically if NFS and RDMA + are turned on. The NFS/RDMA client and server are configured via the hidden + SUNRPC_XPRT_RDMA config option that depends on SUNRPC and INFINIBAND. The + value of SUNRPC_XPRT_RDMA will be: + + - N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client + and server will not be built + - M if both SUNRPC and INFINIBAND are on (M or Y) and at least one is M, + in this case the NFS/RDMA client and server will be built as modules + - Y if both SUNRPC and INFINIBAND are Y, in this case the NFS/RDMA client + and server will be built into the kernel + + Therefore, if you have followed the steps above and turned no NFS and RDMA, + the NFS/RDMA client and server will be built. + + Build a new kernel, install it, boot it. + +Check RDMA and NFS Setup +~~~~~~~~~~~~~~~~~~~~~~~~ + + Before configuring the NFS/RDMA software, it is a good idea to test + your new kernel to ensure that the kernel is working correctly. + In particular, it is a good idea to verify that the RDMA stack + is functioning as expected and standard NFS over TCP/IP and/or UDP/IP + is working properly. + + - Check RDMA Setup + + If you built the RDMA components as modules, load them at + this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel + card: + + > modprobe ib_mthca + > modprobe ib_ipoib + + If you are using InfiniBand, make sure there is a Subnet Manager (SM) + running on the network. If your IB switch has an embedded SM, you can + use it. Otherwise, you will need to run an SM, such as OpenSM, on one + of your end nodes. + + If an SM is running on your network, you should see the following: + + > cat /sys/class/infiniband/driverX/ports/1/state + 4: ACTIVE + + where driverX is mthca0, ipath5, ehca3, etc. + + To further test the InfiniBand software stack, use IPoIB (this + assumes you have two IB hosts named host1 and host2): + + host1> ifconfig ib0 a.b.c.x + host2> ifconfig ib0 a.b.c.y + host1> ping a.b.c.y + host2> ping a.b.c.x + + For other device types, follow the appropriate procedures. + + - Check NFS Setup + + For the NFS components enabled above (client and/or server), + test their functionality over standard Ethernet using TCP/IP or UDP/IP. + +NFS/RDMA Setup +~~~~~~~~~~~~~~ + + We recommend that you use two machines, one to act as the client and + one to act as the server. + + One time configuration: + + - On the server system, configure the /etc/exports file and + start the NFS/RDMA server. + + Exports entries with the following format have been tested: + + /vol0 10.97.103.47(rw,async) 192.168.0.47(rw,async,insecure,no_root_squash) + + Here the first IP address is the client's Ethernet address and the second + IP address is the clients IPoIB address. + + Each time a machine boots: + + - Load and configure the RDMA drivers + + For InfiniBand using a Mellanox adapter: + + > modprobe ib_mthca + > modprobe ib_ipoib + > ifconfig ib0 a.b.c.d + + NOTE: use unique addresses for the client and server + + - Start the NFS server + + If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config), + load the RDMA transport module: + + > modprobe svcrdma + + Regardless of how the server was built (module or built-in), start the server: + + > /etc/init.d/nfs start + + or + + > service nfs start + + Instruct the server to listen on the RDMA transport: + + > echo rdma 2050 > /proc/fs/nfsd/portlist + + - On the client system + + If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config), + load the RDMA client module: + + > modprobe xprtrdma.ko + + Regardless of how the client was built (module or built-in), issue the mount.nfs command: + + > /path/to/your/mount.nfs :/ /mnt -i -o rdma,port=2050 + + To verify that the mount is using RDMA, run "cat /proc/mounts" and check the + "proto" field for the given mount. + + Congratulations! You're using NFS/RDMA! -- cgit v0.10.2 From 9167f501c6b53492eb2566dd618ce7f55f2856d5 Mon Sep 17 00:00:00 2001 From: Felix Blyakher Date: Tue, 26 Feb 2008 10:54:36 -0800 Subject: nfsd: initialize lease type in nfs4_open_delegation() While lease is correctly checked by supplying the type argument to vfs_setlease(), it's stored with fl_type uninitialized. This breaks the logic when checking the type of the lease. The fix is to initialize fl_type. The old code still happened to function correctly since F_RDLCK is zero, and we only implement read delegations currently (nor write delegations). But that's no excuse for not fixing this. Signed-off-by: Felix Blyakher Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8235d31..55dfdd7 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1639,6 +1639,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta locks_init_lock(&fl); fl.fl_lmops = &nfsd_lease_mng_ops; fl.fl_flags = FL_LEASE; + fl.fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; fl.fl_end = OFFSET_MAX; fl.fl_owner = (fl_owner_t)dp; fl.fl_file = stp->st_vfs_file; @@ -1647,8 +1648,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta /* vfs_setlease checks to see if delegation should be handed out. * the lock_manager callbacks fl_mylease and fl_change are used */ - if ((status = vfs_setlease(stp->st_vfs_file, - flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK, &flp))) { + if ((status = vfs_setlease(stp->st_vfs_file, fl.fl_type, &flp))) { dprintk("NFSD: setlease failed [%d], no delegation\n", status); unhash_delegation(dp); flag = NFS4_OPEN_DELEGATE_NONE; -- cgit v0.10.2 From 830bb59b6ece51c36dd456b685d145c69d3b7e1c Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Tue, 11 Mar 2008 12:44:27 -0500 Subject: SVCRDMA: Add check for XPT_CLOSE in svc_rdma_send SVCRDMA: Add check for XPT_CLOSE in svc_rdma_send The svcrdma transport can crash if a send is waiting for an empty SQ slot and the connection is closed due to an asynchronous error. The crash is caused when svc_rdma_send attempts to send on a deleted QP. Signed-off-by: Tom Tucker Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 16fd3f6..af408fc 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -1036,6 +1036,8 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) wait_event(xprt->sc_send_wait, atomic_read(&xprt->sc_sq_count) < xprt->sc_sq_depth); + if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) + return 0; continue; } /* Bumped used SQ WR count and post */ -- cgit v0.10.2 From 3d4a6886786f839976c36e62303507692bf87d8d Mon Sep 17 00:00:00 2001 From: Kevin Coffman Date: Thu, 21 Feb 2008 13:44:12 -0500 Subject: Correct grammer/typos in dprintks cleanup: Fix grammer/typos to use "too" instead of "to" Signed-off-by: Kevin Coffman Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 0dd7923..1d52308 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -66,8 +66,8 @@ krb5_encrypt( goto out; if (crypto_blkcipher_ivsize(tfm) > 16) { - dprintk("RPC: gss_k5encrypt: tfm iv size to large %d\n", - crypto_blkcipher_ivsize(tfm)); + dprintk("RPC: gss_k5encrypt: tfm iv size too large %d\n", + crypto_blkcipher_ivsize(tfm)); goto out; } @@ -102,7 +102,7 @@ krb5_decrypt( goto out; if (crypto_blkcipher_ivsize(tfm) > 16) { - dprintk("RPC: gss_k5decrypt: tfm iv size to large %d\n", + dprintk("RPC: gss_k5decrypt: tfm iv size too large %d\n", crypto_blkcipher_ivsize(tfm)); goto out; } -- cgit v0.10.2 From 30aef3166ab27f7bcb14c5e809205af8126fa10b Mon Sep 17 00:00:00 2001 From: Kevin Coffman Date: Thu, 21 Feb 2008 13:44:27 -0500 Subject: Remove define for KRB5_CKSUM_LENGTH, which will become enctype-dependent cleanup: When adding new encryption types, the checksum length can be different for each enctype. Face the fact that the current code only supports DES which has a checksum length of 8. Signed-off-by: Kevin Coffman Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 5a4b1e0..2167383 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -70,8 +70,6 @@ enum seal_alg { SEAL_ALG_DES3KD = 0x0002 }; -#define KRB5_CKSUM_LENGTH 8 - #define CKSUMTYPE_CRC32 0x0001 #define CKSUMTYPE_RSA_MD4 0x0002 #define CKSUMTYPE_RSA_MD4_DES 0x0003 diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index dedcbd6..39c08b7 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -109,8 +109,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, md5cksum.data, md5cksum.len)) return GSS_S_FAILURE; - memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, - KRB5_CKSUM_LENGTH); + memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8); spin_lock(&krb5_seq_lock); seq_send = ctx->seq_send++; diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 3bdc527..3cd99a7 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -176,9 +176,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, md5cksum.data, md5cksum.len)) return GSS_S_FAILURE; - memcpy(krb5_hdr + 16, - md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, - KRB5_CKSUM_LENGTH); + memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8); spin_lock(&krb5_seq_lock); seq_send = kctx->seq_send++; -- cgit v0.10.2 From c3bb257d2d3a1a4e49372b9d74eaebe0fcf110dd Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 12 Mar 2008 14:04:25 -0700 Subject: net/sunrpc/svc.c: suppress unintialized var warning net/sunrpc/svc.c: In function '__svc_create_thread': net/sunrpc/svc.c:587: warning: 'oldmask.bits[0u]' may be used uninitialized in this function Cc: Neil Brown Cc: Trond Myklebust Cc: David S. Miller Cc: Tom Tucker Cc: Chuck Lever Signed-off-by: Andrew Morton Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 7efb513..a6e6819 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -590,7 +590,7 @@ __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, struct svc_rqst *rqstp; int error = -ENOMEM; int have_oldmask = 0; - cpumask_t oldmask; + cpumask_t uninitialized_var(oldmask); rqstp = svc_prepare_thread(serv, pool); if (IS_ERR(rqstp)) { -- cgit v0.10.2 From 03550fac06c4f0c39a1885d46015c28794413c82 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 14 Mar 2008 17:51:12 -0400 Subject: nfsd: move most of fh_verify to separate function Move the code that actually parses the filehandle and looks up the dentry and export to a separate function. This simplifies the reference counting a little and moves fh_verify() a little closer to the kernel ideal of small, minimally-indentended functions. Clean up a few other minor style sins along the way. Signed-off-by: J. Bruce Fields Cc: Neil Brown diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 3e6b3f4..100ae56 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -113,6 +113,124 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, } /* + * Use the given filehandle to look up the corresponding export and + * dentry. On success, the results are used to set fh_export and + * fh_dentry. + */ +static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) +{ + struct knfsd_fh *fh = &fhp->fh_handle; + struct fid *fid = NULL, sfid; + struct svc_export *exp; + struct dentry *dentry; + int fileid_type; + int data_left = fh->fh_size/4; + __be32 error; + + error = nfserr_stale; + if (rqstp->rq_vers > 2) + error = nfserr_badhandle; + if (rqstp->rq_vers == 4 && fh->fh_size == 0) + return nfserr_nofilehandle; + + if (fh->fh_version == 1) { + int len; + + if (--data_left < 0) + return error; + if (fh->fh_auth_type != 0) + return error; + len = key_len(fh->fh_fsid_type) / 4; + if (len == 0) + return error; + if (fh->fh_fsid_type == FSID_MAJOR_MINOR) { + /* deprecated, convert to type 3 */ + len = key_len(FSID_ENCODE_DEV)/4; + fh->fh_fsid_type = FSID_ENCODE_DEV; + fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1]))); + fh->fh_fsid[1] = fh->fh_fsid[2]; + } + data_left -= len; + if (data_left < 0) + return error; + exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth); + fid = (struct fid *)(fh->fh_auth + len); + } else { + __u32 tfh[2]; + dev_t xdev; + ino_t xino; + + if (fh->fh_size != NFS_FHSIZE) + return error; + /* assume old filehandle format */ + xdev = old_decode_dev(fh->ofh_xdev); + xino = u32_to_ino_t(fh->ofh_xino); + mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL); + exp = rqst_exp_find(rqstp, FSID_DEV, tfh); + } + + error = nfserr_stale; + if (PTR_ERR(exp) == -ENOENT) + return error; + + if (IS_ERR(exp)) + return nfserrno(PTR_ERR(exp)); + + error = nfsd_setuser_and_check_port(rqstp, exp); + if (error) + goto out; + + /* + * Look up the dentry using the NFS file handle. + */ + error = nfserr_stale; + if (rqstp->rq_vers > 2) + error = nfserr_badhandle; + + if (fh->fh_version != 1) { + sfid.i32.ino = fh->ofh_ino; + sfid.i32.gen = fh->ofh_generation; + sfid.i32.parent_ino = fh->ofh_dirino; + fid = &sfid; + data_left = 3; + if (fh->ofh_dirino == 0) + fileid_type = FILEID_INO32_GEN; + else + fileid_type = FILEID_INO32_GEN_PARENT; + } else + fileid_type = fh->fh_fileid_type; + + if (fileid_type == FILEID_ROOT) + dentry = dget(exp->ex_path.dentry); + else { + dentry = exportfs_decode_fh(exp->ex_path.mnt, fid, + data_left, fileid_type, + nfsd_acceptable, exp); + } + if (dentry == NULL) + goto out; + if (IS_ERR(dentry)) { + if (PTR_ERR(dentry) != -EINVAL) + error = nfserrno(PTR_ERR(dentry)); + goto out; + } + + if (S_ISDIR(dentry->d_inode->i_mode) && + (dentry->d_flags & DCACHE_DISCONNECTED)) { + printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", + dentry->d_parent->d_name.name, dentry->d_name.name); + } + + fhp->fh_dentry = dentry; + fhp->fh_export = exp; + nfsd_nr_verified++; + return 0; +out: + exp_put(exp); + return error; +} + +/* * Perform sanity checks on the dentry in a client's file handle. * * Note that the file handle dentry may need to be freed even after @@ -124,115 +242,18 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, __be32 fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) { - struct knfsd_fh *fh = &fhp->fh_handle; - struct svc_export *exp = NULL; + struct svc_export *exp; struct dentry *dentry; - __be32 error = 0; + __be32 error; dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp)); if (!fhp->fh_dentry) { - struct fid *fid = NULL, sfid; - int fileid_type; - int data_left = fh->fh_size/4; - - error = nfserr_stale; - if (rqstp->rq_vers > 2) - error = nfserr_badhandle; - if (rqstp->rq_vers == 4 && fh->fh_size == 0) - return nfserr_nofilehandle; - - if (fh->fh_version == 1) { - int len; - if (--data_left<0) goto out; - switch (fh->fh_auth_type) { - case 0: break; - default: goto out; - } - len = key_len(fh->fh_fsid_type) / 4; - if (len == 0) goto out; - if (fh->fh_fsid_type == FSID_MAJOR_MINOR) { - /* deprecated, convert to type 3 */ - len = key_len(FSID_ENCODE_DEV)/4; - fh->fh_fsid_type = FSID_ENCODE_DEV; - fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1]))); - fh->fh_fsid[1] = fh->fh_fsid[2]; - } - if ((data_left -= len)<0) goto out; - exp = rqst_exp_find(rqstp, fh->fh_fsid_type, - fh->fh_auth); - fid = (struct fid *)(fh->fh_auth + len); - } else { - __u32 tfh[2]; - dev_t xdev; - ino_t xino; - if (fh->fh_size != NFS_FHSIZE) - goto out; - /* assume old filehandle format */ - xdev = old_decode_dev(fh->ofh_xdev); - xino = u32_to_ino_t(fh->ofh_xino); - mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL); - exp = rqst_exp_find(rqstp, FSID_DEV, tfh); - } - - error = nfserr_stale; - if (PTR_ERR(exp) == -ENOENT) - goto out; - - if (IS_ERR(exp)) { - error = nfserrno(PTR_ERR(exp)); - goto out; - } - - error = nfsd_setuser_and_check_port(rqstp, exp); + error = nfsd_set_fh_dentry(rqstp, fhp); if (error) goto out; - - /* - * Look up the dentry using the NFS file handle. - */ - error = nfserr_stale; - if (rqstp->rq_vers > 2) - error = nfserr_badhandle; - - if (fh->fh_version != 1) { - sfid.i32.ino = fh->ofh_ino; - sfid.i32.gen = fh->ofh_generation; - sfid.i32.parent_ino = fh->ofh_dirino; - fid = &sfid; - data_left = 3; - if (fh->ofh_dirino == 0) - fileid_type = FILEID_INO32_GEN; - else - fileid_type = FILEID_INO32_GEN_PARENT; - } else - fileid_type = fh->fh_fileid_type; - - if (fileid_type == FILEID_ROOT) - dentry = dget(exp->ex_path.dentry); - else { - dentry = exportfs_decode_fh(exp->ex_path.mnt, fid, - data_left, fileid_type, - nfsd_acceptable, exp); - } - if (dentry == NULL) - goto out; - if (IS_ERR(dentry)) { - if (PTR_ERR(dentry) != -EINVAL) - error = nfserrno(PTR_ERR(dentry)); - goto out; - } - - if (S_ISDIR(dentry->d_inode->i_mode) && - (dentry->d_flags & DCACHE_DISCONNECTED)) { - printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); - } - - fhp->fh_dentry = dentry; - fhp->fh_export = exp; - nfsd_nr_verified++; - cache_get(&exp->h); + dentry = fhp->fh_dentry; + exp = fhp->fh_export; } else { /* * just rechecking permissions @@ -242,7 +263,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) dprintk("nfsd: fh_verify - just checking\n"); dentry = fhp->fh_dentry; exp = fhp->fh_export; - cache_get(&exp->h); /* * Set user creds for this exportpoint; necessary even * in the "just checking" case because this may be a @@ -281,8 +301,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) access, ntohl(error)); } out: - if (exp && !IS_ERR(exp)) - exp_put(exp); if (error == nfserr_stale) nfsdstats.fh_stale++; return error; -- cgit v0.10.2 From 6ffd4be6336f9c3f1a10822099587544cd0a11d7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 27 Mar 2008 16:34:40 -0400 Subject: NFSD: Use "depends on" for PROC_FS dependency Recently, commit 440bcc59 added a reverse dependency to fs/Kconfig to ensure that PROC_FS was enabled if NFSD_V4 was enabled. There is a guideline in Documentation/kbuild/kconfig-language.txt that states "In general use select only for non-visible symbols (no prompts anywhere) and for symbols with no dependencies." A quick grep around other Kconfig files reveals that no entry currently uses "select PROC_FS" -- every one uses "depends on". Thus CONFIG_NFSD_V4 should use "depends on PROC_FS" as well. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/fs/Kconfig b/fs/Kconfig index 1d81be3..9539848 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1695,7 +1695,6 @@ config NFSD select SUNRPC select EXPORTFS select NFS_ACL_SUPPORT if NFSD_V2_ACL - select PROC_FS if NFSD_V4 select PROC_FS if SUNRPC_GSS help Choose Y here if you want to allow other computers to access @@ -1757,7 +1756,7 @@ config NFSD_V3_ACL config NFSD_V4 bool "NFS server support for NFS version 4 (EXPERIMENTAL)" - depends on NFSD && NFSD_V3 && EXPERIMENTAL + depends on NFSD && NFSD_V3 && PROC_FS && EXPERIMENTAL select FS_POSIX_ACL select RPCSEC_GSS_KRB5 help -- cgit v0.10.2 From 3329ba05231808c96cf9fd0598b8f46afec9777c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 27 Mar 2008 16:34:47 -0400 Subject: SUNRPC: Remove PROC_FS dependency Recently, commit 440bcc59 added a reverse dependency to fs/Kconfig to ensure that PROC_FS was enabled if SUNRPC_GSS was enabled. Apparently this isn't necessary because the auth_gss components under net/sunrpc will build correctly even if PROC_FS is disabled, though RPCSEC_GSS will not work without /proc. It also violates the guideline in Documentation/kbuild/kconfig-language.txt that states "In general use select only for non-visible symbols (no prompts anywhere) and for symbols with no dependencies." To address these issues, remove the dependency. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/fs/Kconfig b/fs/Kconfig index 9539848..157a885 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1695,7 +1695,6 @@ config NFSD select SUNRPC select EXPORTFS select NFS_ACL_SUPPORT if NFSD_V2_ACL - select PROC_FS if SUNRPC_GSS help Choose Y here if you want to allow other computers to access files residing on this system using Sun's Network File System -- cgit v0.10.2 From 1a448fdb3c5495405bc44d77ea676150f9195444 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 27 Mar 2008 16:34:54 -0400 Subject: NFSD: Remove NFSv4 dependency on NFSv3 Clean up: Because NFSD_V4 "depends on" NFSD_V3, it appears as a child of the NFSD_V3 menu entry, and is not visible if NFSD_V3 is unselected. Replace the dependency on NFSD_V3 with a "select NFSD_V3". This makes NFSD_V4 look and work just like NFS_V3, while ensuring that NFSD_V3 is enabled if NFSD_V4 is. Sam Ravnborg adds: "This use of select is questionable. In general it is bad to select a symbol with dependencies. In this case the dependencies of NFSD_V3 are duplicated for NFSD_V4 so we will not se erratic configurations but do you remember to update NFSD_V4 when you add a depends on NFSD_V3? But I see no other clean way to do it right now." Later he said: "My comment was more to say we have things to address in kconfig. This is abuse in the acceptable range." Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/fs/Kconfig b/fs/Kconfig index 157a885..56c83f4 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1755,7 +1755,8 @@ config NFSD_V3_ACL config NFSD_V4 bool "NFS server support for NFS version 4 (EXPERIMENTAL)" - depends on NFSD && NFSD_V3 && PROC_FS && EXPERIMENTAL + depends on NFSD && PROC_FS && EXPERIMENTAL + select NFSD_V3 select FS_POSIX_ACL select RPCSEC_GSS_KRB5 help -- cgit v0.10.2 From 5743d65c2f77d5145fb4c4262c4dd70c3f078776 Mon Sep 17 00:00:00 2001 From: Kevin Coffman Date: Mon, 31 Mar 2008 10:31:33 -0400 Subject: gss_krb5: consistently use unsigned for seqnum Consistently use unsigned (u32 vs. s32) for seqnum. In get_mic function, send the local copy of seq_send, rather than the context version. Signed-off-by: Kevin Coffman Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 2167383..a10f1fb 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -148,9 +148,9 @@ gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *inbuf, s32 krb5_make_seq_num(struct crypto_blkcipher *key, int direction, - s32 seqnum, unsigned char *cksum, unsigned char *buf); + u32 seqnum, unsigned char *cksum, unsigned char *buf); s32 krb5_get_seq_num(struct crypto_blkcipher *key, unsigned char *cksum, - unsigned char *buf, int *direction, s32 * seqnum); + unsigned char *buf, int *direction, u32 *seqnum); diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 39c08b7..8e3c87d 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -116,7 +116,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, spin_unlock(&krb5_seq_lock); if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff, - ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8)) + seq_send, krb5_hdr + 16, krb5_hdr + 8)) return GSS_S_FAILURE; return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE; diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c index 43f3421..f160be6 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c @@ -43,7 +43,7 @@ s32 krb5_make_seq_num(struct crypto_blkcipher *key, int direction, - s32 seqnum, + u32 seqnum, unsigned char *cksum, unsigned char *buf) { unsigned char plain[8]; @@ -65,7 +65,7 @@ s32 krb5_get_seq_num(struct crypto_blkcipher *key, unsigned char *cksum, unsigned char *buf, - int *direction, s32 * seqnum) + int *direction, u32 *seqnum) { s32 code; unsigned char plain[8]; diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index e30a993..d91a5d0 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -82,7 +82,7 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; s32 now; int direction; - s32 seqnum; + u32 seqnum; unsigned char *ptr = (unsigned char *)read_token->data; int bodysize; -- cgit v0.10.2 From 4ab4b0bedda7d41c63cef98cd5d6cabada460936 Mon Sep 17 00:00:00 2001 From: Kevin Coffman Date: Mon, 31 Mar 2008 10:31:44 -0400 Subject: sunrpc: make token header values less confusing g_make_token_header() and g_token_size() add two too many, and therefore their callers pass in "(logical_value - 2)" rather than "logical_value" as hard-coded values which causes confusion. This dates back to the original g_make_token_header which took an optional token type (token_id) value and added it to the token. This was removed, but the routine always adds room for the token_id rather than not. Signed-off-by: Kevin Coffman Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c index ea8c92e..d83b881 100644 --- a/net/sunrpc/auth_gss/gss_generic_token.c +++ b/net/sunrpc/auth_gss/gss_generic_token.c @@ -148,7 +148,7 @@ int g_token_size(struct xdr_netobj *mech, unsigned int body_size) { /* set body_size to sequence contents size */ - body_size += 4 + (int) mech->len; /* NEED overflow check */ + body_size += 2 + (int) mech->len; /* NEED overflow check */ return(1 + der_length_size(body_size) + body_size); } @@ -161,7 +161,7 @@ void g_make_token_header(struct xdr_netobj *mech, int body_size, unsigned char **buf) { *(*buf)++ = 0x60; - der_write_length(buf, 4 + mech->len + body_size); + der_write_length(buf, 2 + mech->len + body_size); *(*buf)++ = 0x06; *(*buf)++ = (unsigned char) mech->len; TWRITE_STR(*buf, mech->data, ((int) mech->len)); diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 8e3c87d..5f1d36d 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -87,10 +87,10 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, now = get_seconds(); - token->len = g_token_size(&ctx->mech_used, 22); + token->len = g_token_size(&ctx->mech_used, 24); ptr = token->data; - g_make_token_header(&ctx->mech_used, 22, &ptr); + g_make_token_header(&ctx->mech_used, 24, &ptr); *ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff); *ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff); diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 3cd99a7..b00b1b4 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -137,7 +137,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, BUG_ON((buf->len - offset) % blocksize); plainlen = blocksize + buf->len - offset; - headlen = g_token_size(&kctx->mech_used, 22 + plainlen) - + headlen = g_token_size(&kctx->mech_used, 24 + plainlen) - (buf->len - offset); ptr = buf->head[0].iov_base + offset; @@ -149,7 +149,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, buf->len += headlen; BUG_ON((buf->len - offset - headlen) % blocksize); - g_make_token_header(&kctx->mech_used, 22 + plainlen, &ptr); + g_make_token_header(&kctx->mech_used, 24 + plainlen, &ptr); *ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff); diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c index abf17ce..c832712 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_seal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c @@ -107,10 +107,10 @@ spkm3_make_token(struct spkm3_ctx *ctx, tokenlen = 10 + ctxelen + 1 + md5elen + 1; /* Create token header using generic routines */ - token->len = g_token_size(&ctx->mech_used, tokenlen); + token->len = g_token_size(&ctx->mech_used, tokenlen + 2); ptr = token->data; - g_make_token_header(&ctx->mech_used, tokenlen, &ptr); + g_make_token_header(&ctx->mech_used, tokenlen + 2, &ptr); spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit); } else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */ -- cgit v0.10.2 From a5ae03989254ec25cd3a934ca02c008d67e259f7 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Tue, 1 Apr 2008 21:48:37 -0400 Subject: NFSD: Strip __KERNEL__ testing from unexported header files. Also, sort the Kbuild file. Signed-off-by: Robert P. J. Day Signed-off-by: J. Bruce Fields diff --git a/include/linux/nfsd/Kbuild b/include/linux/nfsd/Kbuild index e726fc3..fc97204 100644 --- a/include/linux/nfsd/Kbuild +++ b/include/linux/nfsd/Kbuild @@ -1,6 +1,6 @@ unifdef-y += const.h +unifdef-y += debug.h unifdef-y += export.h +unifdef-y += nfsfh.h unifdef-y += stats.h unifdef-y += syscall.h -unifdef-y += nfsfh.h -unifdef-y += debug.h diff --git a/include/linux/nfsd/cache.h b/include/linux/nfsd/cache.h index 7b5d784..04b355c 100644 --- a/include/linux/nfsd/cache.h +++ b/include/linux/nfsd/cache.h @@ -10,7 +10,6 @@ #ifndef NFSCACHE_H #define NFSCACHE_H -#ifdef __KERNEL__ #include #include @@ -77,5 +76,4 @@ void nfsd_reply_cache_shutdown(void); int nfsd_cache_lookup(struct svc_rqst *, int); void nfsd_cache_update(struct svc_rqst *, int, __be32 *); -#endif /* __KERNEL__ */ #endif /* NFSCACHE_H */ diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index f4de14d..21ee440 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -27,7 +27,6 @@ #define NFSD_VERSION "0.5" #define NFSD_SUPPORTED_MINOR_VERSION 0 -#ifdef __KERNEL__ /* * Special flags for nfsd_permission. These must be different from MAY_READ, * MAY_WRITE, and MAY_EXEC. @@ -334,6 +333,4 @@ extern struct timeval nfssvc_boot; #endif /* CONFIG_NFSD_V4 */ -#endif /* __KERNEL__ */ - #endif /* LINUX_NFSD_NFSD_H */ -- cgit v0.10.2 From 3c61eecb607dbc2777074b1a95b8a97e31a96a73 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 7 Apr 2008 13:05:27 -0400 Subject: lockd: Fix stale nlmsvc_unlink_block comment As of 5996a298da43a03081e9ba2116983d173001c862 ("NLM: don't unlock on cancel requests") we no longer unlock in this case, so the comment is no longer accurate. Thanks to Stuart Friedberg for pointing out the inconsistency. Cc: Stuart Friedberg Signed-off-by: J. Bruce Fields diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 4da7c4c..1f122c1 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -227,8 +227,7 @@ failed: } /* - * Delete a block. If the lock was cancelled or the grant callback - * failed, unlock is set to 1. + * Delete a block. * It is the caller's responsibility to check whether the file * can be closed hereafter. */ -- cgit v0.10.2 From e1ba1ab76e68de9f4a93fae8406627924efaed99 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 7 Apr 2008 13:09:47 -0400 Subject: nfsd: fix comment Obvious comment nit. Signed-off-by: J. Bruce Fields diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 2e5de77..a9f1538 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -153,7 +153,7 @@ out_err: } /* - * Kill the server process if it is not already up. + * Kill the server process if it is not already down. */ void nfs_callback_down(void) { -- cgit v0.10.2 From 8774282c4cef82695ccca8bd09976de5d6e49610 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 7 Apr 2008 16:45:37 -0400 Subject: SUNRPC: remove svc_create_thread() Now that the nfs4 callback thread uses the kthread API, there are no more users of svc_create_thread(). Remove it. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 64c9755..4b54c5f 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -386,7 +386,6 @@ struct svc_serv * svc_create(struct svc_program *, unsigned int, void (*shutdown)(struct svc_serv*)); struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool); -int svc_create_thread(svc_thread_fn, struct svc_serv *); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, void (*shutdown)(struct svc_serv*), diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index a6e6819..e7b716c 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -619,16 +619,6 @@ out_thread: } /* - * Create a thread in the default pool. Caller must hold BKL. - */ -int -svc_create_thread(svc_thread_fn func, struct svc_serv *serv) -{ - return __svc_create_thread(func, serv, &serv->sv_pools[0]); -} -EXPORT_SYMBOL(svc_create_thread); - -/* * Choose a pool in which to create a new thread, for svc_set_num_threads */ static inline struct svc_pool * -- cgit v0.10.2 From ff7d9756b501744540be65e172d27ee321d86103 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 28 Mar 2008 16:04:56 -0400 Subject: nfsd: use static memory for callback program and stats There's no need to dynamically allocate this memory, and doing so may create the possibility of races on shutdown of the rpc client. (We've witnessed it only after adding rpcsec_gss support to the server, after which the rpc code can send destroys calls that expect to still be able to access the rpc_stats structure after it has been destroyed.) Such races are in theory possible if the module containing this "static" memory is removed very quickly after an rpc client is destroyed, but we haven't seen that happen. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index aae2b29..562abf3 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -344,6 +344,21 @@ static struct rpc_version * nfs_cb_version[] = { &nfs_cb_version4, }; +static struct rpc_program cb_program; + +static struct rpc_stat cb_stats = { + .program = &cb_program +}; + +#define NFS4_CALLBACK 0x40000000 +static struct rpc_program cb_program = { + .name = "nfs4_cb", + .number = NFS4_CALLBACK, + .nrvers = ARRAY_SIZE(nfs_cb_version), + .version = nfs_cb_version, + .stats = &cb_stats, +}; + /* Reference counting, callback cleanup, etc., all look racy as heck. * And why is cb_set an atomic? */ @@ -358,13 +373,12 @@ static int do_probe_callback(void *data) .to_maxval = (NFSD_LEASE_TIME/2) * HZ, .to_exponential = 1, }; - struct rpc_program * program = &cb->cb_program; struct rpc_create_args args = { .protocol = IPPROTO_TCP, .address = (struct sockaddr *)&addr, .addrsize = sizeof(addr), .timeout = &timeparms, - .program = program, + .program = &cb_program, .version = nfs_cb_version[1]->number, .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ .flags = (RPC_CLNT_CREATE_NOPING), @@ -382,16 +396,8 @@ static int do_probe_callback(void *data) addr.sin_port = htons(cb->cb_port); addr.sin_addr.s_addr = htonl(cb->cb_addr); - /* Initialize rpc_program */ - program->name = "nfs4_cb"; - program->number = cb->cb_prog; - program->nrvers = ARRAY_SIZE(nfs_cb_version); - program->version = nfs_cb_version; - program->stats = &cb->cb_stat; - /* Initialize rpc_stat */ - memset(program->stats, 0, sizeof(cb->cb_stat)); - program->stats->program = program; + memset(args.program->stats, 0, sizeof(struct rpc_stat)); /* Create RPC client */ client = rpc_create(&args); -- cgit v0.10.2 From 9078dc08143e23b18ea72e70265f0df920cf2998 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 8 Apr 2008 13:12:52 +0100 Subject: Use a zero sized array for raw field in struct fid The raw field's size can vary so we use a zero sized array since gcc will not allow a variable sized array inside a union. This has been tested with ext3 and gfs2 and relates to the bug report: http://lkml.org/lkml/2007/10/24/374 and discussion thread: http://lkml.org/lkml/2008/4/7/65 Signed-off-by: Steven Whitehouse Cc: Christoph Hellwig Cc: Neil Brown Cc: Adrian Bunk Signed-off-by: J. Bruce Fields diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index adcbb05..de8387b 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -43,7 +43,7 @@ struct fid { u32 parent_ino; u32 parent_gen; } i32; - __u32 raw[6]; + __u32 raw[0]; }; }; -- cgit v0.10.2 From 06e02d66fa0055230efc2443c43ee4f3ab5eb0b6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 8 Apr 2008 15:40:07 -0400 Subject: NFS: don't let nfs_callback_svc exit on unexpected svc_recv errors (try #2) When svc_recv returns an unexpected error, nfs_callback_svc will print a warning and exit. This problematic for several reasons. In particular, it will cause the reference counts for the thread to be wrong, and no new thread will be started until all nfs4 mounts are unmounted. Rather than exiting on error from svc_recv, have the thread do a 1s sleep and then retry the loop. This is unlikely to cause any harm, and if the error turns out to be something temporary then it may be able to recover. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index a9f1538..5606ae3 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -59,7 +59,7 @@ module_param_call(callback_tcpport, param_set_port, param_get_int, static int nfs_callback_svc(void *vrqstp) { - int err; + int err, preverr = 0; struct svc_rqst *rqstp = vrqstp; set_freezable(); @@ -74,14 +74,20 @@ nfs_callback_svc(void *vrqstp) * Listen for a request on the socket */ err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT); - if (err == -EAGAIN || err == -EINTR) + if (err == -EAGAIN || err == -EINTR) { + preverr = err; continue; + } if (err < 0) { - printk(KERN_WARNING - "%s: terminating on error %d\n", - __FUNCTION__, -err); - break; + if (err != preverr) { + printk(KERN_WARNING "%s: unexpected error " + "from svc_recv (%d)\n", __func__, err); + preverr = err; + } + schedule_timeout_uninterruptible(HZ); + continue; } + preverr = err; svc_process(rqstp); } unlock_kernel(); -- cgit v0.10.2 From f97c650dda24e48405399aa0676e90da52408515 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 8 Apr 2008 15:40:08 -0400 Subject: NLM: don't let lockd exit on unexpected svc_recv errors (try #2) When svc_recv returns an unexpected error, lockd will print a warning and exit. This problematic for several reasons. In particular, it will cause the reference counts for the thread to be wrong, and can lead to a potential BUG() call. Rather than exiting on error from svc_recv, have the thread do a 1s sleep and then retry the loop. This is unlikely to cause any harm, and if the error turns out to be something temporary then it may be able to recover. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 66b5c98..cf977bb 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -112,7 +112,7 @@ static inline void clear_grace_period(void) static int lockd(void *vrqstp) { - int err = 0; + int err = 0, preverr = 0; struct svc_rqst *rqstp = vrqstp; unsigned long grace_period_expire; @@ -172,14 +172,20 @@ lockd(void *vrqstp) * recvfrom routine. */ err = svc_recv(rqstp, timeout); - if (err == -EAGAIN || err == -EINTR) + if (err == -EAGAIN || err == -EINTR) { + preverr = err; continue; + } if (err < 0) { - printk(KERN_WARNING - "lockd: terminating on error %d\n", - -err); - break; + if (err != preverr) { + printk(KERN_WARNING "%s: unexpected error " + "from svc_recv (%d)\n", __func__, err); + preverr = err; + } + schedule_timeout_interruptible(HZ); + continue; } + preverr = err; dprintk("lockd: request from %s\n", svc_print_addr(rqstp, buf, sizeof(buf))); -- cgit v0.10.2 From b7872fe86db78cc96c85a13338ea6e3fe1aef610 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 14 Apr 2008 12:27:01 -0400 Subject: SUNRPC: RPC server still uses 2.4 method for disabling TCP Nagle Use the 2.6 method for disabling TCP Nagle in the kernel's RPC server. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index a45c378..8b1bda3 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -1045,7 +1046,6 @@ void svc_cleanup_xprt_sock(void) static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) { struct sock *sk = svsk->sk_sk; - struct tcp_sock *tp = tcp_sk(sk); svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv); set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); @@ -1063,7 +1063,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) svsk->sk_reclen = 0; svsk->sk_tcplen = 0; - tp->nonagle = 1; /* disable Nagle's algorithm */ + tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; /* initialise setting must have enough space to * receive and respond to one request. -- cgit v0.10.2 From c0401ea008fb7c785a93428752d69dccafb127ec Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 14 Apr 2008 12:27:30 -0400 Subject: SUNRPC: Update RPC server's TCP record marker decoder Clean up: Update the RPC server's TCP record marker decoder to match the constructs used by the RPC client's TCP socket transport. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 206f092..8cff696 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -26,8 +26,8 @@ struct svc_sock { void (*sk_owspace)(struct sock *); /* private TCP part */ - int sk_reclen; /* length of record */ - int sk_tcplen; /* current read length */ + u32 sk_reclen; /* length of record */ + u32 sk_tcplen; /* current read length */ }; /* diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 8b1bda3..3e65719 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -823,8 +824,8 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) * the next four bytes. Otherwise try to gobble up as much as * possible up to the complete record length. */ - if (svsk->sk_tcplen < 4) { - unsigned long want = 4 - svsk->sk_tcplen; + if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { + int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; struct kvec iov; iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; @@ -834,32 +835,31 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) svsk->sk_tcplen += len; if (len < want) { - dprintk("svc: short recvfrom while reading record length (%d of %lu)\n", - len, want); + dprintk("svc: short recvfrom while reading record " + "length (%d of %d)\n", len, want); svc_xprt_received(&svsk->sk_xprt); return -EAGAIN; /* record header not complete */ } svsk->sk_reclen = ntohl(svsk->sk_reclen); - if (!(svsk->sk_reclen & 0x80000000)) { + if (!(svsk->sk_reclen & RPC_LAST_STREAM_FRAGMENT)) { /* FIXME: technically, a record can be fragmented, * and non-terminal fragments will not have the top * bit set in the fragment length header. * But apparently no known nfs clients send fragmented * records. */ if (net_ratelimit()) - printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx" - " (non-terminal)\n", - (unsigned long) svsk->sk_reclen); + printk(KERN_NOTICE "RPC: multiple fragments " + "per record not supported\n"); goto err_delete; } - svsk->sk_reclen &= 0x7fffffff; + svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK; dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); if (svsk->sk_reclen > serv->sv_max_mesg) { if (net_ratelimit()) - printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx" - " (large)\n", - (unsigned long) svsk->sk_reclen); + printk(KERN_NOTICE "RPC: " + "fragment too large: 0x%08lx\n", + (unsigned long)svsk->sk_reclen); goto err_delete; } } -- cgit v0.10.2 From 50c8bb13eaaf345caf2e7966667ba1d3e4d68af2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 14 Apr 2008 12:27:45 -0400 Subject: SUNRPC: Use unsigned index when looping over arrays Clean up: Suppress a harmless compiler warning in the RPC server related to array indices. ARRAY_SIZE() returns a size_t, so use unsigned type for a loop index when looping over arrays. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e7b716c..bf46186 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -534,8 +534,9 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) static void svc_release_buffer(struct svc_rqst *rqstp) { - int i; - for (i=0; irq_pages); i++) + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(rqstp->rq_pages); i++) if (rqstp->rq_pages[i]) put_page(rqstp->rq_pages[i]); } -- cgit v0.10.2 From 0dc220f0815497858db539d27947f3ec83202ace Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 14 Apr 2008 12:27:52 -0400 Subject: SUNRPC: Use unsigned loop and array index in svc_init_buffer() Clean up: Suppress a harmless compiler warning. Index rq_pages[] with an unsigned type. Make "pages" unsigned as well, as it never represents a value less than zero. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index bf46186..d74c2d2 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -510,8 +510,7 @@ EXPORT_SYMBOL(svc_destroy); static int svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) { - int pages; - int arghi; + unsigned int pages, arghi; pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply. * We assume one is at most one page @@ -525,7 +524,7 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) rqstp->rq_pages[arghi++] = p; pages--; } - return ! pages; + return pages == 0; } /* -- cgit v0.10.2 From dee3209d993f17081d2c58d6470dfc8d6662078b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Apr 2008 16:28:46 -0400 Subject: knfsd: get rid of imode variable in nfsd_setattr ...it's not really needed. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 832e2b8..1d0406c 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -264,7 +264,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, struct inode *inode; int accmode = MAY_SATTR; int ftype = 0; - int imode; __be32 err; int host_err; int size_change = 0; @@ -360,10 +359,9 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, DQUOT_INIT(inode); } - imode = inode->i_mode; if (iap->ia_valid & ATTR_MODE) { iap->ia_mode &= S_IALLUGO; - imode = iap->ia_mode |= (imode & ~S_IALLUGO); + iap->ia_mode |= (inode->i_mode & ~S_IALLUGO); /* if changing uid/gid revoke setuid/setgid in mode */ if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) { iap->ia_valid |= ATTR_KILL_PRIV; -- cgit v0.10.2 From ca456252db0521e5e88024fa2b67535e9739e030 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Apr 2008 16:28:47 -0400 Subject: knfsd: clear both setuid and setgid whenever a chown is done Currently, knfsd only clears the setuid bit if the owner of a file is changed on a SETATTR call, and only clears the setgid bit if the group is changed. POSIX says this in the spec for chown(): "If the specified file is a regular file, one or more of the S_IXUSR, S_IXGRP, or S_IXOTH bits of the file mode are set, and the process does not have appropriate privileges, the set-user-ID (S_ISUID) and set-group-ID (S_ISGID) bits of the file mode shall be cleared upon successful return from chown()." If I'm reading this correctly, then knfsd is doing this wrong. It should be clearing both the setuid and setgid bit on any SETATTR that changes the uid or gid. This wasn't really as noticable before, but now that the ATTR_KILL_S*ID bits are a no-op for the NFS client, it's more evident. This patch corrects the nfsd_setattr logic so that this occurs. It also does a bit of cleanup to the function. There is also one small behavioral change. If a SETATTR call comes in that changes the uid/gid and the mode, then we now only clear the setgid bit if the group execute bit isn't set. The setgid bit without a group execute bit signifies mandatory locking and we likely don't want to clear the bit in that case. Since there is no call in POSIX that should generate a SETATTR call like this, then this should rarely happen, but it's worth noting. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 1d0406c..a3a291f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -359,24 +359,25 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, DQUOT_INIT(inode); } + /* sanitize the mode change */ if (iap->ia_valid & ATTR_MODE) { iap->ia_mode &= S_IALLUGO; iap->ia_mode |= (inode->i_mode & ~S_IALLUGO); - /* if changing uid/gid revoke setuid/setgid in mode */ - if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) { - iap->ia_valid |= ATTR_KILL_PRIV; + } + + /* Revoke setuid/setgid on chown */ + if (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) || + ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)) { + iap->ia_valid |= ATTR_KILL_PRIV; + if (iap->ia_valid & ATTR_MODE) { + /* we're setting mode too, just clear the s*id bits */ iap->ia_mode &= ~S_ISUID; + if (iap->ia_mode & S_IXGRP) + iap->ia_mode &= ~S_ISGID; + } else { + /* set ATTR_KILL_* bits and let VFS handle it */ + iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); } - if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid) - iap->ia_mode &= ~S_ISGID; - } else { - /* - * Revoke setuid/setgid bit on chown/chgrp - */ - if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) - iap->ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV; - if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid) - iap->ia_valid |= ATTR_KILL_SGID; } /* Change the attributes. */ -- cgit v0.10.2