From cf07d2ea43e5c22149435ee9002cb737eac20eca Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 28 Feb 2010 23:20:19 -0500 Subject: nfsd4: simplify references to nfsd4 lease time Instead of accessing the lease time directly, some users call nfs4_lease_time(), and some a macro, NFSD_LEASE_TIME, defined as nfs4_lease_time(). Neither layer of indirection serves any purpose. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 4bc22c7..ed12ad4 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -455,7 +455,7 @@ static struct rpc_program cb_program = { static int max_cb_time(void) { - return max(NFSD_LEASE_TIME/10, (time_t)1) * HZ; + return max(nfsd4_lease/10, (time_t)1) * HZ; } /* Reference counting, callback cleanup, etc., all look racy as heck. diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3a20c09..cc9164a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -44,7 +44,7 @@ #define NFSDDBG_FACILITY NFSDDBG_PROC /* Globals */ -static time_t lease_time = 90; /* default lease time */ +time_t nfsd4_lease = 90; /* default lease time */ static time_t user_lease_time = 90; static time_t boot_time; static u32 current_ownerid = 1; @@ -2560,9 +2560,9 @@ nfs4_laundromat(void) struct nfs4_stateowner *sop; struct nfs4_delegation *dp; struct list_head *pos, *next, reaplist; - time_t cutoff = get_seconds() - NFSD_LEASE_TIME; - time_t t, clientid_val = NFSD_LEASE_TIME; - time_t u, test_val = NFSD_LEASE_TIME; + time_t cutoff = get_seconds() - nfsd4_lease; + time_t t, clientid_val = nfsd4_lease; + time_t u, test_val = nfsd4_lease; nfs4_lock_state(); @@ -2602,7 +2602,7 @@ nfs4_laundromat(void) list_del_init(&dp->dl_recall_lru); unhash_delegation(dp); } - test_val = NFSD_LEASE_TIME; + test_val = nfsd4_lease; list_for_each_safe(pos, next, &close_lru) { sop = list_entry(pos, struct nfs4_stateowner, so_close_lru); if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) { @@ -2672,7 +2672,7 @@ EXPIRED_STATEID(stateid_t *stateid) { if (time_before((unsigned long)boot_time, ((unsigned long)stateid->si_boot)) && - time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) { + time_before((unsigned long)(stateid->si_boot + nfsd4_lease), get_seconds())) { dprintk("NFSD: expired stateid " STATEID_FMT "!\n", STATEID_VAL(stateid)); return 1; @@ -3976,7 +3976,7 @@ nfsd4_load_reboot_recovery_data(void) unsigned long get_nfs4_grace_period(void) { - return max(user_lease_time, lease_time) * HZ; + return max(user_lease_time, nfsd4_lease) * HZ; } /* @@ -4009,7 +4009,7 @@ __nfs4_state_start(void) boot_time = get_seconds(); grace_time = get_nfs4_grace_period(); - lease_time = user_lease_time; + nfsd4_lease = user_lease_time; locks_start_grace(&nfsd4_manager); printk(KERN_INFO "NFSD: starting %ld-second grace period\n", grace_time/HZ); @@ -4036,12 +4036,6 @@ nfs4_state_start(void) return 0; } -time_t -nfs4_lease_time(void) -{ - return lease_time; -} - static void __nfs4_state_shutdown(void) { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c458fb1..f61bd73 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1899,7 +1899,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, if (bmval0 & FATTR4_WORD0_LEASE_TIME) { if ((buflen -= 4) < 0) goto out_resource; - WRITE32(NFSD_LEASE_TIME); + WRITE32(nfsd4_lease); } if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { if ((buflen -= 4) < 0) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 0f0e77f..8bff674 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1203,8 +1203,6 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) } #ifdef CONFIG_NFSD_V4 -extern time_t nfs4_leasetime(void); - static ssize_t __write_leasetime(struct file *file, char *buf, size_t size) { /* if size > 10 seconds, call @@ -1224,8 +1222,7 @@ static ssize_t __write_leasetime(struct file *file, char *buf, size_t size) nfs4_reset_lease(lease); } - return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", - nfs4_lease_time()); + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", nfsd4_lease); } /** diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index e942a1a..b463093 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -82,7 +82,6 @@ int nfs4_state_init(void); void nfsd4_free_slabs(void); int nfs4_state_start(void); void nfs4_state_shutdown(void); -time_t nfs4_lease_time(void); void nfs4_reset_lease(time_t leasetime); int nfs4_reset_recoverydir(char *recdir); #else @@ -90,7 +89,6 @@ static inline int nfs4_state_init(void) { return 0; } static inline void nfsd4_free_slabs(void) { } static inline int nfs4_state_start(void) { return 0; } static inline void nfs4_state_shutdown(void) { } -static inline time_t nfs4_lease_time(void) { return 0; } static inline void nfs4_reset_lease(time_t leasetime) { } static inline int nfs4_reset_recoverydir(char *recdir) { return 0; } #endif @@ -229,6 +227,8 @@ extern struct timeval nfssvc_boot; #ifdef CONFIG_NFSD_V4 +extern time_t nfsd4_lease; + /* before processing a COMPOUND operation, we have to check that there * is enough space in the buffer for XDR encode to succeed. otherwise, * we might process an operation with side effects, and be unable to @@ -247,7 +247,6 @@ extern struct timeval nfssvc_boot; #define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */ #define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */ -#define NFSD_LEASE_TIME (nfs4_lease_time()) #define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */ /* -- cgit v0.10.2 From e46b498c84163e86e2627c30bca298c968664f65 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 1 Mar 2010 19:21:21 -0500 Subject: nfsd4: simplify lease/grace interaction The original code here assumed we'd allow the user to change the lease any time, but only allow the change to take effect on restart. Since then we modified the code to allow setting the lease on when the server is down. Update the rest of the code to reflect that fact, clarify variable names, and add document. Also, the code insisted that the grace period always be the longer of the old and new lease periods, but that's overly conservative--as long as it lasts at least the old lease period, old clients should still know to recover in time. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cc9164a..eb8d124 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -45,7 +45,7 @@ /* Globals */ time_t nfsd4_lease = 90; /* default lease time */ -static time_t user_lease_time = 90; +static time_t nfsd4_grace = 90; static time_t boot_time; static u32 current_ownerid = 1; static u32 current_fileid = 1; @@ -2551,6 +2551,12 @@ nfsd4_end_grace(void) dprintk("NFSD: end of grace period\n"); nfsd4_recdir_purge_old(); locks_end_grace(&nfsd4_manager); + /* + * Now that every NFSv4 client has had the chance to recover and + * to see the (possibly new, possibly shorter) lease time, we + * can safely set the next grace time to the current lease time: + */ + nfsd4_grace = nfsd4_lease; } static time_t @@ -3973,12 +3979,6 @@ nfsd4_load_reboot_recovery_data(void) printk("NFSD: Failure reading reboot recovery data\n"); } -unsigned long -get_nfs4_grace_period(void) -{ - return max(user_lease_time, nfsd4_lease) * HZ; -} - /* * Since the lifetime of a delegation isn't limited to that of an open, a * client may quite reasonably hang on to a delegation as long as it has @@ -4005,18 +4005,14 @@ set_max_delegations(void) static int __nfs4_state_start(void) { - unsigned long grace_time; - boot_time = get_seconds(); - grace_time = get_nfs4_grace_period(); - nfsd4_lease = user_lease_time; locks_start_grace(&nfsd4_manager); printk(KERN_INFO "NFSD: starting %ld-second grace period\n", - grace_time/HZ); + nfsd4_grace); laundry_wq = create_singlethread_workqueue("nfsd4"); if (laundry_wq == NULL) return -ENOMEM; - queue_delayed_work(laundry_wq, &laundromat_work, grace_time); + queue_delayed_work(laundry_wq, &laundromat_work, nfsd4_grace * HZ); set_max_delegations(); return set_callback_cred(); } @@ -4123,17 +4119,11 @@ nfs4_recoverydir(void) /* * Called when leasetime is changed. * - * The only way the protocol gives us to handle on-the-fly lease changes is to - * simulate a reboot. Instead of doing that, we just wait till the next time - * we start to register any changes in lease time. If the administrator - * really wants to change the lease time *now*, they can go ahead and bring - * nfsd down and then back up again after changing the lease time. - * - * user_lease_time is protected by nfsd_mutex since it's only really accessed + * nfsd4_lease is protected by nfsd_mutex since it's only really accessed * when nfsd is starting */ void nfs4_reset_lease(time_t leasetime) { - user_lease_time = leasetime; + nfsd4_lease = leasetime; } -- cgit v0.10.2 From f958a1320ff7a1e0e861d3c90de6da12a88839dc Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 1 Mar 2010 19:43:02 -0500 Subject: nfsd4: remove unnecessary lease-setting function This is another layer of indirection that doesn't really buy us anything. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index eb8d124..4471046 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4115,15 +4115,3 @@ nfs4_recoverydir(void) { return user_recovery_dirname; } - -/* - * Called when leasetime is changed. - * - * nfsd4_lease is protected by nfsd_mutex since it's only really accessed - * when nfsd is starting - */ -void -nfs4_reset_lease(time_t leasetime) -{ - nfsd4_lease = leasetime; -} diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 8bff674..6738e9d 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1219,7 +1219,7 @@ static ssize_t __write_leasetime(struct file *file, char *buf, size_t size) return rv; if (lease < 10 || lease > 3600) return -EINVAL; - nfs4_reset_lease(lease); + nfsd4_lease = lease; } return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", nfsd4_lease); -- cgit v0.10.2 From f013574014816c7a557b3c52233f3620463f0b9b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 1 Mar 2010 19:32:36 -0500 Subject: nfsd4: reshuffle lease-setting code to allow reuse We'll soon allow setting the grace period, so we'll want to share this code. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 6738e9d..9c73cac 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1203,26 +1203,36 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) } #ifdef CONFIG_NFSD_V4 -static ssize_t __write_leasetime(struct file *file, char *buf, size_t size) +static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time) { /* if size > 10 seconds, call * nfs4_reset_lease() then write out the new lease (seconds) as reply */ char *mesg = buf; - int rv, lease; + int rv, i; if (size > 0) { if (nfsd_serv) return -EBUSY; - rv = get_int(&mesg, &lease); + rv = get_int(&mesg, &i); if (rv) return rv; - if (lease < 10 || lease > 3600) + if (i < 10 || i > 3600) return -EINVAL; - nfsd4_lease = lease; + *time = i; } - return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", nfsd4_lease); + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time); +} + +static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time) +{ + ssize_t rv; + + mutex_lock(&nfsd_mutex); + rv = __nfsd4_write_time(file, buf, size, time); + mutex_unlock(&nfsd_mutex); + return rv; } /** @@ -1248,12 +1258,7 @@ static ssize_t __write_leasetime(struct file *file, char *buf, size_t size) */ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) { - ssize_t rv; - - mutex_lock(&nfsd_mutex); - rv = __write_leasetime(file, buf, size); - mutex_unlock(&nfsd_mutex); - return rv; + return nfsd4_write_time(file, buf, size, &nfsd4_lease); } extern char *nfs4_recoverydir(void); -- cgit v0.10.2 From efc4bb4fdd09c11f5558446e584a494c6feb43c7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 2 Mar 2010 11:04:06 -0500 Subject: nfsd4: allow setting grace period time Allow explicit configuration of the grace period time as well as the lease period time. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4471046..6edfe23 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -45,7 +45,7 @@ /* Globals */ time_t nfsd4_lease = 90; /* default lease time */ -static time_t nfsd4_grace = 90; +time_t nfsd4_grace = 90; static time_t boot_time; static u32 current_ownerid = 1; static u32 current_fileid = 1; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 9c73cac..7ab70ff 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -45,6 +45,7 @@ enum { */ #ifdef CONFIG_NFSD_V4 NFSD_Leasetime, + NFSD_Gracetime, NFSD_RecoveryDir, #endif }; @@ -69,6 +70,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size); static ssize_t write_maxblksize(struct file *file, char *buf, size_t size); #ifdef CONFIG_NFSD_V4 static ssize_t write_leasetime(struct file *file, char *buf, size_t size); +static ssize_t write_gracetime(struct file *file, char *buf, size_t size); static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); #endif @@ -90,6 +92,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { [NFSD_MaxBlkSize] = write_maxblksize, #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = write_leasetime, + [NFSD_Gracetime] = write_gracetime, [NFSD_RecoveryDir] = write_recoverydir, #endif }; @@ -1261,6 +1264,21 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) return nfsd4_write_time(file, buf, size, &nfsd4_lease); } +/** + * write_gracetime - Set or report current NFSv4 grace period time + * + * As above, but sets the time of the NFSv4 grace period. + * + * Note this should never be set to less than the *previous* + * lease-period time, but we don't try to enforce this. (In the common + * case (a new boot), we don't know what the previous lease time was + * anyway.) + */ +static ssize_t write_gracetime(struct file *file, char *buf, size_t size) +{ + return nfsd4_write_time(file, buf, size, &nfsd4_grace); +} + extern char *nfs4_recoverydir(void); static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size) @@ -1352,6 +1370,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, #endif /* last one */ {""} diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index b463093..7237776 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -228,6 +228,7 @@ extern struct timeval nfssvc_boot; #ifdef CONFIG_NFSD_V4 extern time_t nfsd4_lease; +extern time_t nfsd4_grace; /* before processing a COMPOUND operation, we have to check that there * is enough space in the buffer for XDR encode to succeed. otherwise, -- cgit v0.10.2 From e7b184f199fd3c80b618ec8244cbda70857d2779 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 2 Mar 2010 11:18:40 -0500 Subject: nfsd4: document lease/grace-period limits The current documentation here is out of date, and not quite right. (Future work: some user documentation would be useful.) Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 7ab70ff..413cb8e 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1208,9 +1208,6 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) #ifdef CONFIG_NFSD_V4 static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time) { - /* if size > 10 seconds, call - * nfs4_reset_lease() then write out the new lease (seconds) as reply - */ char *mesg = buf; int rv, i; @@ -1220,6 +1217,18 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, tim rv = get_int(&mesg, &i); if (rv) return rv; + /* + * Some sanity checking. We don't have a reason for + * these particular numbers, but problems with the + * extremes are: + * - Too short: the briefest network outage may + * cause clients to lose all their locks. Also, + * the frequent polling may be wasteful. + * - Too long: do you really want reboot recovery + * to take more than an hour? Or to make other + * clients wait an hour before being able to + * revoke a dead client's locks? + */ if (i < 10 || i > 3600) return -EINVAL; *time = i; -- cgit v0.10.2 From 3af4974eb2c7867d6e160977195dfde586d0e564 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 3 Feb 2010 17:31:31 +1100 Subject: sunrpc: don't keep expired entries in the auth caches. currently expired entries remain in the auth caches as long as there is a reference. This was needed long ago when the auth_domain cache used the same cache infrastructure. But since that (being a very different sort of cache) was separated, this test is no longer needed. So remove the test on refcnt and tidy up the surrounding code. This allows the cache_dequeue call (which needed to be there to drop a potentially awkward reference) can be moved outside of the spinlock which is a better place for it. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 39bddba..83592e0 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -397,31 +397,28 @@ static int cache_clean(void) /* Ok, now to clean this strand */ cp = & current_detail->hash_table[current_index]; - ch = *cp; - for (; ch; cp= & ch->next, ch= *cp) { + for (ch = *cp ; ch ; cp = & ch->next, ch = *cp) { if (current_detail->nextcheck > ch->expiry_time) current_detail->nextcheck = ch->expiry_time+1; if (ch->expiry_time >= get_seconds() && ch->last_refresh >= current_detail->flush_time) continue; - if (test_and_clear_bit(CACHE_PENDING, &ch->flags)) - cache_dequeue(current_detail, ch); - if (atomic_read(&ch->ref.refcount) == 1) - break; - } - if (ch) { *cp = ch->next; ch->next = NULL; current_detail->entries--; rv = 1; + break; } + write_unlock(¤t_detail->hash_lock); d = current_detail; if (!ch) current_index ++; spin_unlock(&cache_list_lock); if (ch) { + if (test_and_clear_bit(CACHE_PENDING, &ch->flags)) + cache_dequeue(current_detail, ch); cache_revisit_request(ch); cache_put(ch, d); } -- cgit v0.10.2 From 2f50d8b63dd6e5320a9d223298df19df3502da29 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 3 Feb 2010 17:31:31 +1100 Subject: sunrpc/cache: factor out cache_is_expired This removes a tiny bit of code duplication, but more important prepares for following patch which will perform the expiry check in cache_lookup and the rest of the validity check in cache_check. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 83592e0..9826c5c 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -49,6 +49,12 @@ static void cache_init(struct cache_head *h) h->last_refresh = now; } +static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) +{ + return (h->expiry_time < get_seconds()) || + (detail->flush_time > h->last_refresh); +} + struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, struct cache_head *key, int hash) { @@ -184,9 +190,7 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h) static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h) { if (!test_bit(CACHE_VALID, &h->flags) || - h->expiry_time < get_seconds()) - return -EAGAIN; - else if (detail->flush_time > h->last_refresh) + cache_is_expired(detail, h)) return -EAGAIN; else { /* entry is valid */ @@ -400,8 +404,7 @@ static int cache_clean(void) for (ch = *cp ; ch ; cp = & ch->next, ch = *cp) { if (current_detail->nextcheck > ch->expiry_time) current_detail->nextcheck = ch->expiry_time+1; - if (ch->expiry_time >= get_seconds() && - ch->last_refresh >= current_detail->flush_time) + if (!cache_is_expired(current_detail, ch)) continue; *cp = ch->next; -- cgit v0.10.2 From d202cce8963d9268ff355a386e20243e8332b308 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 3 Feb 2010 17:31:31 +1100 Subject: sunrpc: never return expired entries in sunrpc_cache_lookup If sunrpc_cache_lookup finds an expired entry, remove it from the cache and return a freshly created non-VALID entry instead. This ensures that we only ever get a usable entry, or an entry that will become usable once an update arrives. i.e. we will never need to repeat the lookup. This allows us to remove the 'is_expired' test from cache_check (i.e. from cache_is_valid). cache_check should never get an expired entry as 'lookup' will never return one. If it does happen - due to inconvenient timing - then just accept it as still valid, it won't be very much past it's use-by date. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 9826c5c..3e1ef8b 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -59,7 +59,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, struct cache_head *key, int hash) { struct cache_head **head, **hp; - struct cache_head *new = NULL; + struct cache_head *new = NULL, *freeme = NULL; head = &detail->hash_table[hash]; @@ -68,6 +68,9 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, for (hp=head; *hp != NULL ; hp = &(*hp)->next) { struct cache_head *tmp = *hp; if (detail->match(tmp, key)) { + if (cache_is_expired(detail, tmp)) + /* This entry is expired, we will discard it. */ + break; cache_get(tmp); read_unlock(&detail->hash_lock); return tmp; @@ -92,6 +95,13 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, for (hp=head; *hp != NULL ; hp = &(*hp)->next) { struct cache_head *tmp = *hp; if (detail->match(tmp, key)) { + if (cache_is_expired(detail, tmp)) { + *hp = tmp->next; + tmp->next = NULL; + detail->entries --; + freeme = tmp; + break; + } cache_get(tmp); write_unlock(&detail->hash_lock); cache_put(new, detail); @@ -104,6 +114,8 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, cache_get(new); write_unlock(&detail->hash_lock); + if (freeme) + cache_put(freeme, detail); return new; } EXPORT_SYMBOL_GPL(sunrpc_cache_lookup); @@ -189,8 +201,7 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h) static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h) { - if (!test_bit(CACHE_VALID, &h->flags) || - cache_is_expired(detail, h)) + if (!test_bit(CACHE_VALID, &h->flags)) return -EAGAIN; else { /* entry is valid */ -- cgit v0.10.2 From 61f8603d93fa0b0e2f73be7a4f048696417a24a3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 3 Feb 2010 17:31:31 +1100 Subject: nfsd: factor out hash functions for export caches. Both the _lookup and the _update functions for these two caches independently calculate the hash of the key. So factor out that code for improved reuse. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index a0c4016..65ddc5b 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -258,10 +258,9 @@ static struct cache_detail svc_expkey_cache = { .alloc = expkey_alloc, }; -static struct svc_expkey * -svc_expkey_lookup(struct svc_expkey *item) +static int +svc_expkey_hash(struct svc_expkey *item) { - struct cache_head *ch; int hash = item->ek_fsidtype; char * cp = (char*)item->ek_fsid; int len = key_len(item->ek_fsidtype); @@ -269,6 +268,14 @@ svc_expkey_lookup(struct svc_expkey *item) hash ^= hash_mem(cp, len, EXPKEY_HASHBITS); hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS); hash &= EXPKEY_HASHMASK; + return hash; +} + +static struct svc_expkey * +svc_expkey_lookup(struct svc_expkey *item) +{ + struct cache_head *ch; + int hash = svc_expkey_hash(item); ch = sunrpc_cache_lookup(&svc_expkey_cache, &item->h, hash); @@ -282,13 +289,7 @@ static struct svc_expkey * svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old) { struct cache_head *ch; - int hash = new->ek_fsidtype; - char * cp = (char*)new->ek_fsid; - int len = key_len(new->ek_fsidtype); - - hash ^= hash_mem(cp, len, EXPKEY_HASHBITS); - hash ^= hash_ptr(new->ek_client, EXPKEY_HASHBITS); - hash &= EXPKEY_HASHMASK; + int hash = svc_expkey_hash(new); ch = sunrpc_cache_update(&svc_expkey_cache, &new->h, &old->h, hash); @@ -737,14 +738,22 @@ struct cache_detail svc_export_cache = { .alloc = svc_export_alloc, }; -static struct svc_export * -svc_export_lookup(struct svc_export *exp) +static int +svc_export_hash(struct svc_export *exp) { - struct cache_head *ch; int hash; + hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS); hash ^= hash_ptr(exp->ex_path.dentry, EXPORT_HASHBITS); hash ^= hash_ptr(exp->ex_path.mnt, EXPORT_HASHBITS); + return hash; +} + +static struct svc_export * +svc_export_lookup(struct svc_export *exp) +{ + struct cache_head *ch; + int hash = svc_export_hash(exp); ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h, hash); @@ -758,10 +767,7 @@ static struct svc_export * svc_export_update(struct svc_export *new, struct svc_export *old) { struct cache_head *ch; - int hash; - hash = hash_ptr(old->ex_client, EXPORT_HASHBITS); - hash ^= hash_ptr(old->ex_path.dentry, EXPORT_HASHBITS); - hash ^= hash_ptr(old->ex_path.mnt, EXPORT_HASHBITS); + int hash = svc_export_hash(old); ch = sunrpc_cache_update(&svc_export_cache, &new->h, &old->h, -- cgit v0.10.2 From 91885258e8343bb65c08f668d7e6c16563eb4284 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 19 Mar 2010 08:06:28 -0400 Subject: nfsd: don't break lease while servicing a COMMIT This is the second attempt to fix the problem whereby a COMMIT call causes a lease break and triggers a possible deadlock. The problem is that nfsd attempts to break a lease on a COMMIT call. This triggers a delegation recall if the lease is held for a delegation. If the client is the one holding the delegation and it's the same one on which it's issuing the COMMIT, then it can't return that delegation until the COMMIT is complete. But, nfsd won't complete the COMMIT until the delegation is returned. The client and server are essentially deadlocked until the state is marked bad (due to the client not responding on the callback channel). The first patch attempted to deal with this by eliminating the open of the file altogether and simply had nfsd_commit pass a NULL file pointer to the vfs_fsync_range. That would conflict with some work in progress by Christoph Hellwig to clean up the fsync interface, so this patch takes a different approach. This declares a new NFSD_MAY_NOT_BREAK_LEASE access flag that indicates to nfsd_open that it should not break any leases when opening the file, and has nfsd_commit set that flag on the nfsd_open call. For now, this patch leaves nfsd_commit opening the file with write access since I'm not clear on what sort of access would be more appropriate. Signed-off-by: Jeff Layton Cc: stable@kernel.org Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a11b0e867..c2dcb4c 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -723,7 +723,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, struct inode *inode; int flags = O_RDONLY|O_LARGEFILE; __be32 err; - int host_err; + int host_err = 0; validate_process_creds(); @@ -760,7 +760,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, * Check to see if there are any leases on this file. * This may block while leases are broken. */ - host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0)); + if (!(access & NFSD_MAY_NOT_BREAK_LEASE)) + host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0)); if (host_err == -EWOULDBLOCK) host_err = -ETIMEDOUT; if (host_err) /* NOMEM or WOULDBLOCK */ @@ -1168,7 +1169,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; } - err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); + err = nfsd_open(rqstp, fhp, S_IFREG, + NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file); if (err) goto out; if (EX_ISSYNC(fhp->fh_export)) { diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 4b1de0a..217a62c 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -20,6 +20,7 @@ #define NFSD_MAY_OWNER_OVERRIDE 64 #define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ #define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 +#define NFSD_MAY_NOT_BREAK_LEASE 512 #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) -- cgit v0.10.2 From a5990ea1254cd186b38744507aeec3136a0c1c95 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 11 Mar 2010 14:08:10 -0800 Subject: sunrpc/cache: fix module refcnt leak in a failure path Don't forget to release the module refcnt if seq_open() returns failure. Signed-off-by: Li Zefan Cc: J. Bruce Fields Cc: Neil Brown Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 3e1ef8b..a3f340c 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1244,8 +1244,10 @@ static int content_open(struct inode *inode, struct file *file, if (!cd || !try_module_get(cd->owner)) return -EACCES; han = __seq_open_private(file, &cache_content_op, sizeof(*han)); - if (han == NULL) + if (han == NULL) { + module_put(cd->owner); return -ENOMEM; + } han->cd = cd; return 0; -- cgit v0.10.2 From 788e69e548cc8d127b90f0de1f7b7e983d1d587a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 29 Mar 2010 21:02:31 -0400 Subject: svcrpc: don't hold sv_lock over svc_xprt_put() svc_xprt_put() can call tcp_close(), which can sleep, so we shouldn't be holding this lock. In fact, only the xpt_list removal and the sv_tmpcnt decrement should need the sv_lock here. Reported-by: Mi Jinlong Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 8f0f1fb..c334f54 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -892,12 +892,12 @@ void svc_delete_xprt(struct svc_xprt *xprt) */ if (test_bit(XPT_TEMP, &xprt->xpt_flags)) serv->sv_tmpcnt--; + spin_unlock_bh(&serv->sv_lock); while ((dr = svc_deferred_dequeue(xprt)) != NULL) kfree(dr); svc_xprt_put(xprt); - spin_unlock_bh(&serv->sv_lock); } void svc_close_xprt(struct svc_xprt *xprt) -- cgit v0.10.2 From 227f98d98d2ed7929f41426adc21f57b927354a6 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 18 Feb 2010 08:27:24 -0800 Subject: nfsd4: preallocate nfs4_rpc_args Instead of allocating this small structure, just include it in the delegation. The nfsd4_callback structure isn't really necessary yet, but we plan to add to it all the information necessary to perform a callback. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index ed12ad4..b99c3f0 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -78,11 +78,6 @@ enum nfs_cb_opnum4 { cb_sequence_dec_sz + \ op_dec_sz) -struct nfs4_rpc_args { - void *args_op; - struct nfsd4_cb_sequence args_seq; -}; - /* * Generic encode routines from fs/nfs/nfs4xdr.c */ @@ -676,7 +671,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) break; default: /* success, or error we can't handle */ - goto done; + return; } if (dp->dl_retries--) { rpc_delay(task, 2*HZ); @@ -687,8 +682,6 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) atomic_set(&clp->cl_cb_conn.cb_set, 0); warn_no_callback_path(clp, task->tk_status); } -done: - kfree(task->tk_msg.rpc_argp); } static void nfsd4_cb_recall_release(void *calldata) @@ -714,24 +707,19 @@ nfsd4_cb_recall(struct nfs4_delegation *dp) { struct nfs4_client *clp = dp->dl_client; struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; - struct nfs4_rpc_args *args; + struct nfs4_rpc_args *args = &dp->dl_recall.cb_args; struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], .rpc_cred = callback_cred }; - int status = -ENOMEM; + int status; - args = kzalloc(sizeof(*args), GFP_KERNEL); - if (!args) - goto out; args->args_op = dp; msg.rpc_argp = args; dp->dl_retries = 1; status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT, &nfsd4_cb_recall_ops, dp); -out: if (status) { - kfree(args); put_nfs4_client(clp); nfs4_put_delegation(dp); } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index fefeae2..b854379 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -70,6 +70,15 @@ struct nfsd4_cb_sequence { struct nfs4_client *cbs_clp; }; +struct nfs4_rpc_args { + void *args_op; + struct nfsd4_cb_sequence args_seq; +}; + +struct nfsd4_callback { + struct nfs4_rpc_args cb_args; +}; + struct nfs4_delegation { struct list_head dl_perfile; struct list_head dl_perclnt; @@ -86,6 +95,7 @@ struct nfs4_delegation { stateid_t dl_stateid; struct knfsd_fh dl_fh; int dl_retries; + struct nfsd4_callback dl_recall; }; /* client delegation callback info */ -- cgit v0.10.2 From 147efd0dd702ce2f1ab44449bd70369405ef68fd Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 21 Feb 2010 17:41:19 -0800 Subject: nfsd4: shutdown callbacks on expiry Once we've expired the client, there's no further purpose to the callbacks; go ahead and shut down the callback client rather than waiting for the last reference to go. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index efef7f2..9ce5831 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -697,9 +697,6 @@ shutdown_callback_client(struct nfs4_client *clp) static inline void free_client(struct nfs4_client *clp) { - shutdown_callback_client(clp); - if (clp->cl_cb_xprt) - svc_xprt_put(clp->cl_cb_xprt); if (clp->cl_cred.cr_group_info) put_group_info(clp->cl_cred.cr_group_info); kfree(clp->cl_principal); @@ -752,6 +749,9 @@ expire_client(struct nfs4_client *clp) se_perclnt); release_session(ses); } + shutdown_callback_client(clp); + if (clp->cl_cb_xprt) + svc_xprt_put(clp->cl_cb_xprt); put_nfs4_client(clp); } -- cgit v0.10.2 From 3df796dbe97a98a6a25e6b7b88e9d326e261f371 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 21 Feb 2010 17:51:53 -0800 Subject: nfsd4: remove dprintk I haven't found this useful. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9ce5831..5d86df1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -718,9 +718,6 @@ expire_client(struct nfs4_client *clp) struct nfs4_delegation *dp; struct list_head reaplist; - dprintk("NFSD: expire_client cl_count %d\n", - atomic_read(&clp->cl_count)); - INIT_LIST_HEAD(&reaplist); spin_lock(&recall_lock); while (!list_empty(&clp->cl_delegations)) { -- cgit v0.10.2 From 9045b4b9f7f340f43de0cf687b5b52f6feaaa984 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 21 Feb 2010 17:53:04 -0800 Subject: nfsd4: remove probe task's reference on client Any null probe rpc will be synchronously destroyed by the rpc_shutdown_client() in expire_client(), so the rpc task cannot outlast the nfs4 client. Therefore there's no need for that task to hold a reference on the client. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index b99c3f0..91eb2ea 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -509,7 +509,6 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) warn_no_callback_path(clp, task->tk_status); else atomic_set(&clp->cl_cb_conn.cb_set, 1); - put_nfs4_client(clp); } static const struct rpc_call_ops nfsd4_cb_probe_ops = { @@ -542,10 +541,8 @@ void do_probe_callback(struct nfs4_client *clp) status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, &nfsd4_cb_probe_ops, (void *)clp); - if (status) { + if (status) warn_no_callback_path(clp, status); - put_nfs4_client(clp); - } } /* @@ -563,10 +560,6 @@ nfsd4_probe_callback(struct nfs4_client *clp) warn_no_callback_path(clp, status); return; } - - /* the task holds a reference to the nfs4_client struct */ - atomic_inc(&clp->cl_count); - do_probe_callback(clp); } -- cgit v0.10.2 From 408b79bcc32d7221a4975771ab6bff3d3173d530 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 15 Apr 2010 15:11:09 -0400 Subject: nfsd4: consistent session flag setting We should clear these flags on any new create_session, not just on the first one. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5d86df1..5051ade 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1323,12 +1323,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, cs_slot->sl_seqid++; /* from 0 to 1 */ move_to_confirmed(unconf); - /* - * We do not support RDMA or persistent sessions - */ - cr_ses->flags &= ~SESSION4_PERSIST; - cr_ses->flags &= ~SESSION4_RDMA; - if (cr_ses->flags & SESSION4_BACK_CHAN) { unconf->cl_cb_xprt = rqstp->rq_xprt; svc_xprt_get(unconf->cl_cb_xprt); @@ -1348,6 +1342,12 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out; } + /* + * We do not support RDMA or persistent sessions + */ + cr_ses->flags &= ~SESSION4_PERSIST; + cr_ses->flags &= ~SESSION4_RDMA; + status = alloc_init_session(rqstp, conf, cr_ses); if (status) goto out; -- cgit v0.10.2 From 3c4ab2aaa90826060b1e8d4036f9bb8325f8759e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 19 Apr 2010 15:12:51 -0400 Subject: nfsd4: indentation cleanup Looks like a put-and-paste mistake. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index efa3377..c28958e 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -513,9 +513,8 @@ extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp); extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq); extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, - struct nfsd4_compound_state *, -struct nfsd4_exchange_id *); - extern __be32 nfsd4_create_session(struct svc_rqst *, + struct nfsd4_compound_state *, struct nfsd4_exchange_id *); +extern __be32 nfsd4_create_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_create_session *); extern __be32 nfsd4_sequence(struct svc_rqst *, -- cgit v0.10.2 From b5a1a81e5c25fb6bb3fdc1812ba69ff6ab638fcf Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 3 Mar 2010 14:52:55 -0500 Subject: nfsd4: don't sleep in lease-break callback The NFSv4 server's fl_break callback can sleep (dropping the BKL), in order to allocate a new rpc task to send a recall to the client. As far as I can tell this doesn't cause any races in the current code, but the analysis is difficult. Also, the sleep here may complicate the move away from the BKL. So, just schedule some work to do the job for us instead. The work will later also prove useful for restarting a call after the callback information is changed. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 91eb2ea..e078c74 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -32,6 +32,7 @@ */ #include +#include #include "nfsd.h" #include "state.h" @@ -692,11 +693,41 @@ static const struct rpc_call_ops nfsd4_cb_recall_ops = { .rpc_release = nfsd4_cb_recall_release, }; +static struct workqueue_struct *callback_wq; + +int nfsd4_create_callback_queue(void) +{ + callback_wq = create_singlethread_workqueue("nfsd4_callbacks"); + if (!callback_wq) + return -ENOMEM; + return 0; +} + +void nfsd4_destroy_callback_queue(void) +{ + destroy_workqueue(callback_wq); +} + +void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt +*new) +{ + struct rpc_clnt *old = clp->cl_cb_conn.cb_client; + + clp->cl_cb_conn.cb_client = new; + /* + * After this, any work that saw the old value of cb_client will + * be gone: + */ + flush_workqueue(callback_wq); + /* So we can safely shut it down: */ + if (old) + rpc_shutdown_client(old); +} + /* * called with dp->dl_count inc'ed. */ -void -nfsd4_cb_recall(struct nfs4_delegation *dp) +static void _nfsd4_cb_recall(struct nfs4_delegation *dp) { struct nfs4_client *clp = dp->dl_client; struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; @@ -707,6 +738,9 @@ nfsd4_cb_recall(struct nfs4_delegation *dp) }; int status; + if (clnt == NULL) + return; /* Client is shutting down; give up. */ + args->args_op = dp; msg.rpc_argp = args; dp->dl_retries = 1; @@ -717,3 +751,19 @@ nfsd4_cb_recall(struct nfs4_delegation *dp) nfs4_put_delegation(dp); } } + +void nfsd4_do_callback_rpc(struct work_struct *w) +{ + /* XXX: for now, just send off delegation recall. */ + /* In future, generalize to handle any sort of callback. */ + struct nfsd4_callback *c = container_of(w, struct nfsd4_callback, cb_work); + struct nfs4_delegation *dp = container_of(c, struct nfs4_delegation, dl_recall); + + _nfsd4_cb_recall(dp); +} + + +void nfsd4_cb_recall(struct nfs4_delegation *dp) +{ + queue_work(callback_wq, &dp->dl_recall.cb_work); +} diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5051ade..adc51d1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -198,6 +198,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f atomic_set(&dp->dl_count, 1); list_add(&dp->dl_perfile, &fp->fi_delegations); list_add(&dp->dl_perclnt, &clp->cl_delegations); + INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc); return dp; } @@ -679,21 +680,6 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) return clp; } -static void -shutdown_callback_client(struct nfs4_client *clp) -{ - struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; - - if (clnt) { - /* - * Callback threads take a reference on the client, so there - * should be no outstanding callbacks at this point. - */ - clp->cl_cb_conn.cb_client = NULL; - rpc_shutdown_client(clnt); - } -} - static inline void free_client(struct nfs4_client *clp) { @@ -746,7 +732,7 @@ expire_client(struct nfs4_client *clp) se_perclnt); release_session(ses); } - shutdown_callback_client(clp); + nfsd4_set_callback_client(clp, NULL); if (clp->cl_cb_xprt) svc_xprt_put(clp->cl_cb_xprt); put_nfs4_client(clp); @@ -1392,7 +1378,7 @@ nfsd4_destroy_session(struct svc_rqst *r, spin_unlock(&sessionid_lock); /* wait for callbacks */ - shutdown_callback_client(ses->se_client); + nfsd4_set_callback_client(ses->se_client, NULL); nfsd4_put_session(ses); status = nfs_ok; out: @@ -4004,16 +3990,27 @@ set_max_delegations(void) static int __nfs4_state_start(void) { + int ret; + boot_time = get_seconds(); locks_start_grace(&nfsd4_manager); printk(KERN_INFO "NFSD: starting %ld-second grace period\n", nfsd4_grace); + ret = set_callback_cred(); + if (ret) + return -ENOMEM; laundry_wq = create_singlethread_workqueue("nfsd4"); if (laundry_wq == NULL) return -ENOMEM; + ret = nfsd4_create_callback_queue(); + if (ret) + goto out_free_laundry; queue_delayed_work(laundry_wq, &laundromat_work, nfsd4_grace * HZ); set_max_delegations(); - return set_callback_cred(); + return 0; +out_free_laundry: + destroy_workqueue(laundry_wq); + return ret; } int @@ -4075,6 +4072,7 @@ nfs4_state_shutdown(void) nfs4_lock_state(); nfs4_release_reclaim(); __nfs4_state_shutdown(); + nfsd4_destroy_callback_queue(); nfs4_unlock_state(); } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index b854379..c4c92ae 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -77,6 +77,7 @@ struct nfs4_rpc_args { struct nfsd4_callback { struct nfs4_rpc_args cb_args; + struct work_struct cb_work; }; struct nfs4_delegation { @@ -391,7 +392,11 @@ extern void put_nfs4_client(struct nfs4_client *clp); extern void nfs4_free_stateowner(struct kref *kref); extern int set_callback_cred(void); extern void nfsd4_probe_callback(struct nfs4_client *clp); +extern void nfsd4_do_callback_rpc(struct work_struct *); extern void nfsd4_cb_recall(struct nfs4_delegation *dp); +extern int nfsd4_create_callback_queue(void); +extern void nfsd4_destroy_callback_queue(void); +extern void nfsd4_set_callback_client(struct nfs4_client *, struct rpc_clnt *); extern void nfs4_put_delegation(struct nfs4_delegation *dp); extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); extern void nfsd4_init_recdir(char *recdir_name); -- cgit v0.10.2 From b12a05cbdfdf7e4d8cbe8fa78e995f971420086b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 4 Mar 2010 11:32:59 -0500 Subject: nfsd4: cl_count is unused Now that the shutdown sequence guarantees callbacks are shut down before the client is destroyed, we no longer have a use for cl_count. We'll probably reinstate a reference count on the client some day, but it will be held by users other than callbacks. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index e078c74..5856fc8 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -681,10 +681,8 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) static void nfsd4_cb_recall_release(void *calldata) { struct nfs4_delegation *dp = calldata; - struct nfs4_client *clp = dp->dl_client; nfs4_put_delegation(dp); - put_nfs4_client(clp); } static const struct rpc_call_ops nfsd4_cb_recall_ops = { @@ -746,10 +744,8 @@ static void _nfsd4_cb_recall(struct nfs4_delegation *dp) dp->dl_retries = 1; status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT, &nfsd4_cb_recall_ops, dp); - if (status) { - put_nfs4_client(clp); + if (status) nfs4_put_delegation(dp); - } } void nfsd4_do_callback_rpc(struct work_struct *w) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index adc51d1..cf650cb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -690,13 +690,6 @@ free_client(struct nfs4_client *clp) kfree(clp); } -void -put_nfs4_client(struct nfs4_client *clp) -{ - if (atomic_dec_and_test(&clp->cl_count)) - free_client(clp); -} - static void expire_client(struct nfs4_client *clp) { @@ -735,7 +728,7 @@ expire_client(struct nfs4_client *clp) nfsd4_set_callback_client(clp, NULL); if (clp->cl_cb_xprt) svc_xprt_put(clp->cl_cb_xprt); - put_nfs4_client(clp); + free_client(clp); } static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) @@ -821,7 +814,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, } memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); - atomic_set(&clp->cl_count, 1); atomic_set(&clp->cl_cb_conn.cb_set, 0); INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_strhash); @@ -2010,7 +2002,6 @@ void nfsd_break_deleg_cb(struct file_lock *fl) * lock) we know the server hasn't removed the lease yet, we know * it's safe to take a reference: */ atomic_inc(&dp->dl_count); - atomic_inc(&dp->dl_client->cl_count); spin_lock(&recall_lock); list_add_tail(&dp->dl_recall_lru, &del_recall_lru); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index c4c92ae..cef20ab 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -224,7 +224,6 @@ struct nfs4_client { clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ struct nfs4_cb_conn cl_cb_conn; /* callback info */ - atomic_t cl_count; /* ref count */ u32 cl_firststate; /* recovery dir creation */ /* for nfs41 */ @@ -388,7 +387,6 @@ extern void nfs4_lock_state(void); extern void nfs4_unlock_state(void); extern int nfs4_in_grace(void); extern __be32 nfs4_check_open_reclaim(clientid_t *clid); -extern void put_nfs4_client(struct nfs4_client *clp); extern void nfs4_free_stateowner(struct kref *kref); extern int set_callback_cred(void); extern void nfsd4_probe_callback(struct nfs4_client *clp); -- cgit v0.10.2 From 2bf23875f55af6038a5d1c164a52cec4c24609ba Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 8 Mar 2010 12:37:27 -0500 Subject: nfsd4: rearrange cb data structures Mainly I just want to separate the arguments used for setting up the tcp client from the rest. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 5856fc8..d6c46a9 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -455,7 +455,7 @@ static int max_cb_time(void) } /* Reference counting, callback cleanup, etc., all look racy as heck. - * And why is cb_set an atomic? */ + * And why is cl_cb_set an atomic? */ int setup_callback_client(struct nfs4_client *clp) { @@ -481,7 +481,7 @@ int setup_callback_client(struct nfs4_client *clp) if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) return -EINVAL; if (cb->cb_minorversion) { - args.bc_xprt = clp->cl_cb_xprt; + args.bc_xprt = clp->cl_cb_conn.cb_xprt; args.protocol = XPRT_TRANSPORT_BC_TCP; } /* Create RPC client */ @@ -491,7 +491,7 @@ int setup_callback_client(struct nfs4_client *clp) PTR_ERR(client)); return PTR_ERR(client); } - cb->cb_client = client; + clp->cl_cb_client = client; return 0; } @@ -509,7 +509,7 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) if (task->tk_status) warn_no_callback_path(clp, task->tk_status); else - atomic_set(&clp->cl_cb_conn.cb_set, 1); + atomic_set(&clp->cl_cb_set, 1); } static const struct rpc_call_ops nfsd4_cb_probe_ops = { @@ -531,7 +531,6 @@ int set_callback_cred(void) void do_probe_callback(struct nfs4_client *clp) { - struct nfs4_cb_conn *cb = &clp->cl_cb_conn; struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], .rpc_argp = clp, @@ -539,7 +538,7 @@ void do_probe_callback(struct nfs4_client *clp) }; int status; - status = rpc_call_async(cb->cb_client, &msg, + status = rpc_call_async(clp->cl_cb_client, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, &nfsd4_cb_probe_ops, (void *)clp); if (status) @@ -554,7 +553,7 @@ nfsd4_probe_callback(struct nfs4_client *clp) { int status; - BUG_ON(atomic_read(&clp->cl_cb_conn.cb_set)); + BUG_ON(atomic_read(&clp->cl_cb_set)); status = setup_callback_client(clp); if (status) { @@ -656,7 +655,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case -EIO: /* Network partition? */ - atomic_set(&clp->cl_cb_conn.cb_set, 0); + atomic_set(&clp->cl_cb_set, 0); warn_no_callback_path(clp, task->tk_status); case -EBADHANDLE: case -NFS4ERR_BAD_STATEID: @@ -673,7 +672,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) rpc_restart_call(task); return; } else { - atomic_set(&clp->cl_cb_conn.cb_set, 0); + atomic_set(&clp->cl_cb_set, 0); warn_no_callback_path(clp, task->tk_status); } } @@ -709,11 +708,11 @@ void nfsd4_destroy_callback_queue(void) void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new) { - struct rpc_clnt *old = clp->cl_cb_conn.cb_client; + struct rpc_clnt *old = clp->cl_cb_client; - clp->cl_cb_conn.cb_client = new; + clp->cl_cb_client = new; /* - * After this, any work that saw the old value of cb_client will + * After this, any work that saw the old value of cl_cb_client will * be gone: */ flush_workqueue(callback_wq); @@ -728,7 +727,7 @@ void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt static void _nfsd4_cb_recall(struct nfs4_delegation *dp) { struct nfs4_client *clp = dp->dl_client; - struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; + struct rpc_clnt *clnt = clp->cl_cb_client; struct nfs4_rpc_args *args = &dp->dl_recall.cb_args; struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cf650cb..59c9bd4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -726,8 +726,8 @@ expire_client(struct nfs4_client *clp) release_session(ses); } nfsd4_set_callback_client(clp, NULL); - if (clp->cl_cb_xprt) - svc_xprt_put(clp->cl_cb_xprt); + if (clp->cl_cb_conn.cb_xprt) + svc_xprt_put(clp->cl_cb_conn.cb_xprt); free_client(clp); } @@ -814,7 +814,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, } memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); - atomic_set(&clp->cl_cb_conn.cb_set, 0); + atomic_set(&clp->cl_cb_set, 0); INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_strhash); INIT_LIST_HEAD(&clp->cl_openowners); @@ -1302,8 +1302,8 @@ nfsd4_create_session(struct svc_rqst *rqstp, move_to_confirmed(unconf); if (cr_ses->flags & SESSION4_BACK_CHAN) { - unconf->cl_cb_xprt = rqstp->rq_xprt; - svc_xprt_get(unconf->cl_cb_xprt); + unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt; + svc_xprt_get(rqstp->rq_xprt); rpc_copy_addr( (struct sockaddr *)&unconf->cl_cb_conn.cb_addr, sa); @@ -1607,7 +1607,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, else { /* XXX: We just turn off callbacks until we can handle * change request correctly. */ - atomic_set(&conf->cl_cb_conn.cb_set, 0); + atomic_set(&conf->cl_cb_set, 0); expire_client(unconf); status = nfs_ok; @@ -2320,7 +2320,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta { struct nfs4_delegation *dp; struct nfs4_stateowner *sop = stp->st_stateowner; - struct nfs4_cb_conn *cb = &sop->so_client->cl_cb_conn; + int cb_up = atomic_read(&sop->so_client->cl_cb_set); struct file_lock fl, *flp = &fl; int status, flag = 0; @@ -2328,7 +2328,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta open->op_recall = 0; switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_PREVIOUS: - if (!atomic_read(&cb->cb_set)) + if (!cb_up) open->op_recall = 1; flag = open->op_delegate_type; if (flag == NFS4_OPEN_DELEGATE_NONE) @@ -2339,7 +2339,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta * had the chance to reclaim theirs.... */ if (locks_in_grace()) goto out; - if (!atomic_read(&cb->cb_set) || !sop->so_confirmed) + if (!cb_up || !sop->so_confirmed) goto out; if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) flag = NFS4_OPEN_DELEGATE_WRITE; @@ -2510,7 +2510,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, renew_client(clp); status = nfserr_cb_path_down; if (!list_empty(&clp->cl_delegations) - && !atomic_read(&clp->cl_cb_conn.cb_set)) + && !atomic_read(&clp->cl_cb_set)) goto out; status = nfs_ok; out: diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index cef20ab..cf43812 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -107,9 +107,7 @@ struct nfs4_cb_conn { u32 cb_prog; u32 cb_minorversion; u32 cb_ident; /* minorversion 0 only */ - /* RPC client info */ - atomic_t cb_set; /* successful CB_NULL call */ - struct rpc_clnt * cb_client; + struct svc_xprt *cb_xprt; /* minorversion 1 only */ }; /* Maximum number of slots per session. 160 is useful for long haul TCP */ @@ -223,9 +221,13 @@ struct nfs4_client { struct svc_cred cl_cred; /* setclientid principal */ clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ - struct nfs4_cb_conn cl_cb_conn; /* callback info */ u32 cl_firststate; /* recovery dir creation */ + /* for v4.0 and v4.1 callbacks: */ + struct nfs4_cb_conn cl_cb_conn; + struct rpc_clnt *cl_cb_client; + atomic_t cl_cb_set; + /* for nfs41 */ struct list_head cl_sessions; struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ @@ -236,7 +238,6 @@ struct nfs4_client { /* We currently support a single back channel with a single slot */ unsigned long cl_cb_slot_busy; u32 cl_cb_seq_nr; - struct svc_xprt *cl_cb_xprt; /* 4.1 callback transport */ struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ /* wait here for slots */ }; -- cgit v0.10.2 From 4b21d0defcc9680da8a694e92d5fe8eb668c2c0b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 7 Mar 2010 23:39:01 -0500 Subject: nfsd4: allow 4.0 clients to change callback path The rfc allows a client to change the callback parameters, but we didn't previously implement it. Teach the callbacks to rerun themselves (by placing themselves on a workqueue) when they recognize that their rpc task has been killed and that the callback connection has changed. Then we can change the callback connection by setting up a new rpc client, modifying the nfs4 client to point at it, waiting for any work in progress to complete, and then shutting down the old client. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index d6c46a9..ea77aa6 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -457,9 +457,8 @@ static int max_cb_time(void) /* Reference counting, callback cleanup, etc., all look racy as heck. * And why is cl_cb_set an atomic? */ -int setup_callback_client(struct nfs4_client *clp) +int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb) { - struct nfs4_cb_conn *cb = &clp->cl_cb_conn; struct rpc_timeout timeparms = { .to_initval = max_cb_time(), .to_retries = 0, @@ -481,7 +480,7 @@ int setup_callback_client(struct nfs4_client *clp) if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) return -EINVAL; if (cb->cb_minorversion) { - args.bc_xprt = clp->cl_cb_conn.cb_xprt; + args.bc_xprt = cb->cb_xprt; args.protocol = XPRT_TRANSPORT_BC_TCP; } /* Create RPC client */ @@ -491,7 +490,7 @@ int setup_callback_client(struct nfs4_client *clp) PTR_ERR(client)); return PTR_ERR(client); } - clp->cl_cb_client = client; + nfsd4_set_callback_client(clp, client); return 0; } @@ -548,14 +547,13 @@ void do_probe_callback(struct nfs4_client *clp) /* * Set up the callback client and put a NFSPROC4_CB_NULL on the wire... */ -void -nfsd4_probe_callback(struct nfs4_client *clp) +void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb) { int status; BUG_ON(atomic_read(&clp->cl_cb_set)); - status = setup_callback_client(clp); + status = setup_callback_client(clp, cb); if (status) { warn_no_callback_path(clp, status); return; @@ -645,18 +643,32 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) } } + static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) { struct nfs4_delegation *dp = calldata; struct nfs4_client *clp = dp->dl_client; + struct rpc_clnt *current_rpc_client = clp->cl_cb_client; nfsd4_cb_done(task, calldata); + if (current_rpc_client == NULL) { + /* We're shutting down; give up. */ + /* XXX: err, or is it ok just to fall through + * and rpc_restart_call? */ + return; + } + switch (task->tk_status) { case -EIO: /* Network partition? */ atomic_set(&clp->cl_cb_set, 0); warn_no_callback_path(clp, task->tk_status); + if (current_rpc_client != task->tk_client) { + /* queue a callback on the new connection: */ + nfsd4_cb_recall(dp); + return; + } case -EBADHANDLE: case -NFS4ERR_BAD_STATEID: /* Race: client probably got cb_recall @@ -705,8 +717,7 @@ void nfsd4_destroy_callback_queue(void) destroy_workqueue(callback_wq); } -void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt -*new) +void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new) { struct rpc_clnt *old = clp->cl_cb_client; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 59c9bd4..4300d9f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1312,7 +1312,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, cstate->minorversion; unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog; unconf->cl_cb_seq_nr = 1; - nfsd4_probe_callback(unconf); + nfsd4_probe_callback(unconf, &unconf->cl_cb_conn); } conf = unconf; } else { @@ -1605,9 +1605,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, if (!same_creds(&conf->cl_cred, &unconf->cl_cred)) status = nfserr_clid_inuse; else { - /* XXX: We just turn off callbacks until we can handle - * change request correctly. */ atomic_set(&conf->cl_cb_set, 0); + nfsd4_probe_callback(conf, &unconf->cl_cb_conn); expire_client(unconf); status = nfs_ok; @@ -1641,7 +1640,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, } move_to_confirmed(unconf); conf = unconf; - nfsd4_probe_callback(conf); + nfsd4_probe_callback(conf, &conf->cl_cb_conn); status = nfs_ok; } } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index cf43812..98836fd 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -390,7 +390,7 @@ extern int nfs4_in_grace(void); extern __be32 nfs4_check_open_reclaim(clientid_t *clid); extern void nfs4_free_stateowner(struct kref *kref); extern int set_callback_cred(void); -extern void nfsd4_probe_callback(struct nfs4_client *clp); +extern void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); extern void nfsd4_do_callback_rpc(struct work_struct *); extern void nfsd4_cb_recall(struct nfs4_delegation *dp); extern int nfsd4_create_callback_queue(void); -- cgit v0.10.2 From 5771635592267758e7dc5647f2a0088aa6244159 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 21 Apr 2010 12:27:19 -0400 Subject: nfsd4: complete enforcement of 4.1 op ordering Enforce the rules about compound op ordering. Motivated by implementing RECLAIM_COMPLETE, for which the client is implicit in the current session, so it is important to ensure a succesful SEQUENCE proceeds the RECLAIM_COMPLETE. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 37514c4..e147dbc 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -968,20 +968,36 @@ static struct nfsd4_operation nfsd4_ops[]; static const char *nfsd4_op_name(unsigned opnum); /* - * Enforce NFSv4.1 COMPOUND ordering rules. + * Enforce NFSv4.1 COMPOUND ordering rules: * - * TODO: - * - enforce NFS4ERR_NOT_ONLY_OP, - * - DESTROY_SESSION MUST be the final operation in the COMPOUND request. + * Also note, enforced elsewhere: + * - SEQUENCE other than as first op results in + * NFS4ERR_SEQUENCE_POS. (Enforced in nfsd4_sequence().) + * - BIND_CONN_TO_SESSION must be the only op in its compound + * (Will be enforced in nfsd4_bind_conn_to_session().) + * - DESTROY_SESSION must be the final operation in a compound, if + * sessionid's in SEQUENCE and DESTROY_SESSION are the same. + * (Enforced in nfsd4_destroy_session().) */ -static bool nfs41_op_ordering_ok(struct nfsd4_compoundargs *args) +static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args) { - if (args->minorversion && args->opcnt > 0) { - struct nfsd4_op *op = &args->ops[0]; - return (op->status == nfserr_op_illegal) || - (nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP); - } - return true; + struct nfsd4_op *op = &args->ops[0]; + + /* These ordering requirements don't apply to NFSv4.0: */ + if (args->minorversion == 0) + return nfs_ok; + /* This is weird, but OK, not our problem: */ + if (args->opcnt == 0) + return nfs_ok; + if (op->status == nfserr_op_illegal) + return nfs_ok; + if (!(nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP)) + return nfserr_op_not_in_session; + if (op->opnum == OP_SEQUENCE) + return nfs_ok; + if (args->opcnt != 1) + return nfserr_not_only_op; + return nfs_ok; } /* @@ -1023,13 +1039,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, if (args->minorversion > nfsd_supported_minorversion) goto out; - if (!nfs41_op_ordering_ok(args)) { + status = nfs41_check_op_ordering(args); + if (status) { op = &args->ops[0]; - op->status = nfserr_sequence_pos; + op->status = status; goto encode_op; } - status = nfs_ok; while (!status && resp->opcnt < args->opcnt) { op = &args->ops[resp->opcnt++]; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4300d9f..bba9fff 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1343,6 +1343,14 @@ out: return status; } +static bool nfsd4_last_compound_op(struct svc_rqst *rqstp) +{ + struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct nfsd4_compoundargs *argp = rqstp->rq_argp; + + return argp->opcnt == resp->opcnt; +} + __be32 nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate, @@ -1358,6 +1366,11 @@ nfsd4_destroy_session(struct svc_rqst *r, * - Do we need to clear any callback info from previous session? */ + if (!memcmp(&sessionid->sessionid, &cstate->session->se_sessionid, + sizeof(struct nfs4_sessionid))) { + if (!nfsd4_last_compound_op(r)) + return nfserr_not_only_op; + } dump_sessionid(__func__, &sessionid->sessionid); spin_lock(&sessionid_lock); ses = find_in_sessionid_hashtbl(&sessionid->sessionid); -- cgit v0.10.2 From d03859a4aca3969efd91dc77be7efa2ae45b05d8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 22 Apr 2010 11:30:59 +0200 Subject: nfsd: potential ERR_PTR dereference on exp_export() error paths. We "goto finish" from several places where "exp" is an ERR_PTR. Also I changed the check for "fsid_key" so that it was consistent with the check I added. Signed-off-by: Dan Carpenter Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 65ddc5b..55da4d3 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1076,9 +1076,9 @@ exp_export(struct nfsctl_export *nxp) err = 0; finish: kfree(new.ex_pathname); - if (exp) + if (!IS_ERR_OR_NULL(exp)) exp_put(exp); - if (fsid_key && !IS_ERR(fsid_key)) + if (!IS_ERR_OR_NULL(fsid_key)) cache_put(&fsid_key->h, &svc_expkey_cache); path_put(&path); out_put_clp: -- cgit v0.10.2 From e0c8233622cbd49d171bc57b60e725f2fb748750 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 22 Apr 2010 17:04:25 -0400 Subject: nfsd4: fix filehandle comment Minor typos. Signed-off-by: J. Bruce Fields diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h index 65e333a..45bb5a8 100644 --- a/include/linux/nfsd/nfsfh.h +++ b/include/linux/nfsd/nfsfh.h @@ -40,8 +40,8 @@ struct nfs_fhbase_old { * This is the new flexible, extensible style NFSv2/v3 file handle. * by Neil Brown - March 2000 * - * The file handle is seens as a list of 4byte words. - * The first word contains a version number (1) and four descriptor bytes + * The file handle is seens as a list of four-byte words. + * The first word contains a version number (1) and three descriptor bytes * that tell how the remaining 3 variable length fields should be handled. * These three bytes are auth_type, fsid_type and fileid_type. * -- cgit v0.10.2 From 26c0c75e69265961e891ed80b38fb62a548ab371 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 24 Apr 2010 15:35:43 -0400 Subject: nfsd4: fix unlikely race in session replay case In the replay case, the renew_client(session->se_client); happens after we've droppped the sessionid_lock, and without holding a reference on the session; so there's nothing preventing the session being freed before we get here. Thanks to Benny Halevy for catching a bug in an earlier version of this patch. Signed-off-by: J. Bruce Fields Acked-by: Benny Halevy diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e147dbc..61282f8 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1027,6 +1027,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, resp->rqstp = rqstp; resp->cstate.minorversion = args->minorversion; resp->cstate.replay_owner = NULL; + resp->cstate.session = NULL; fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); /* Use the deferral mechanism only for NFSv4.0 compounds */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index bba9fff..737315c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1443,11 +1443,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, cstate->slot = slot; cstate->session = session; - /* Hold a session reference until done processing the compound: - * nfsd4_put_session called only if the cstate slot is set. - */ - nfsd4_get_session(session); out: + /* Hold a session reference until done processing the compound. */ + if (cstate->session) + nfsd4_get_session(cstate->session); spin_unlock(&sessionid_lock); /* Renew the clientid on success and on replay */ if (cstate->session) { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index fb27b1d..05bc5bd 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3306,10 +3306,12 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo iov = &rqstp->rq_res.head[0]; iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; BUG_ON(iov->iov_len > PAGE_SIZE); - if (nfsd4_has_session(cs) && cs->status != nfserr_replay_cache) { - nfsd4_store_cache_entry(resp); - dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); - resp->cstate.slot->sl_inuse = false; + if (nfsd4_has_session(cs)) { + if (cs->status != nfserr_replay_cache) { + nfsd4_store_cache_entry(resp); + dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); + resp->cstate.slot->sl_inuse = false; + } nfsd4_put_session(resp->cstate.session); } return 1; -- cgit v0.10.2 From b48fa6b99100dc7772af3cd276035fcec9719ceb Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Mon, 1 Mar 2010 16:51:14 +1100 Subject: sunrpc: centralise most calls to svc_xprt_received svc_xprt_received must be called when ->xpo_recvfrom has finished receiving a message, so that the XPT_BUSY flag will be cleared and if necessary, requeued for further work. This call is currently made in each ->xpo_recvfrom function, often from multiple different points. In each case it is the earliest point on a particular path where it is known that the protection provided by XPT_BUSY is no longer needed. However there are (still) some error paths which do not call svc_xprt_received, and requiring each ->xpo_recvfrom to make the call does not encourage robustness. So: move the svc_xprt_received call to be made just after the call to ->xpo_recvfrom(), and move it of the various ->xpo_recvfrom methods. This means that it may not be called at the earliest possible instant, but this is unlikely to be a measurable performance issue. Note that there are still other calls to svc_xprt_received as it is also needed when an xprt is newly created. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c334f54..75f9aa2 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -743,8 +743,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (rqstp->rq_deferred) { svc_xprt_received(xprt); len = svc_deferred_recv(rqstp); - } else + } else { len = xprt->xpt_ops->xpo_recvfrom(rqstp); + svc_xprt_received(xprt); + } dprintk("svc: got len=%d\n", len); } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index a29f259..a338927 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -547,7 +547,6 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) dprintk("svc: recvfrom returned error %d\n", -err); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); } - svc_xprt_received(&svsk->sk_xprt); return -EAGAIN; } len = svc_addr_len(svc_addr(rqstp)); @@ -562,11 +561,6 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) svsk->sk_sk->sk_stamp = skb->tstamp; set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ - /* - * Maybe more packets - kick another thread ASAP. - */ - svc_xprt_received(&svsk->sk_xprt); - len = skb->len - sizeof(struct udphdr); rqstp->rq_arg.len = len; @@ -917,7 +911,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) if (len < want) { dprintk("svc: short recvfrom while reading record " "length (%d of %d)\n", len, want); - svc_xprt_received(&svsk->sk_xprt); goto err_again; /* record header not complete */ } @@ -953,7 +946,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) if (len < svsk->sk_reclen) { dprintk("svc: incomplete TCP record (%d of %d)\n", len, svsk->sk_reclen); - svc_xprt_received(&svsk->sk_xprt); goto err_again; /* record not complete */ } len = svsk->sk_reclen; @@ -961,14 +953,11 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) return len; error: - if (len == -EAGAIN) { + if (len == -EAGAIN) dprintk("RPC: TCP recv_record got EAGAIN\n"); - svc_xprt_received(&svsk->sk_xprt); - } return len; err_delete: set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); - svc_xprt_received(&svsk->sk_xprt); err_again: return -EAGAIN; } @@ -1110,7 +1099,6 @@ out: svsk->sk_tcplen = 0; svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); - svc_xprt_received(&svsk->sk_xprt); if (serv->sv_stats) serv->sv_stats->nettcpcnt++; @@ -1119,7 +1107,6 @@ out: err_again: if (len == -EAGAIN) { dprintk("RPC: TCP recvfrom got EAGAIN\n"); - svc_xprt_received(&svsk->sk_xprt); return len; } error: diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index f92e37e..0194de8 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -566,7 +566,6 @@ static int rdma_read_complete(struct svc_rqst *rqstp, ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, rqstp->rq_arg.head[0].iov_len); - svc_xprt_received(rqstp->rq_xprt); return ret; } @@ -665,7 +664,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) rqstp->rq_arg.head[0].iov_len); rqstp->rq_prot = IPPROTO_MAX; svc_xprt_copy_addrs(rqstp, xprt); - svc_xprt_received(xprt); return ret; close_out: @@ -678,6 +676,5 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) */ set_bit(XPT_CLOSE, &xprt->xpt_flags); defer: - svc_xprt_received(xprt); return 0; } -- cgit v0.10.2 From fb4b698fc78347419aa9ae7114e1375f92107500 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 28 Apr 2010 17:45:06 -0400 Subject: nfsd: further comment typos Whoops, missed some more. "Reviewed-by, I guess": Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h index 45bb5a8..80d55bb 100644 --- a/include/linux/nfsd/nfsfh.h +++ b/include/linux/nfsd/nfsfh.h @@ -40,12 +40,12 @@ struct nfs_fhbase_old { * This is the new flexible, extensible style NFSv2/v3 file handle. * by Neil Brown - March 2000 * - * The file handle is seens as a list of four-byte words. + * The file handle starts with a sequence of four-byte words. * The first word contains a version number (1) and three descriptor bytes * that tell how the remaining 3 variable length fields should be handled. * These three bytes are auth_type, fsid_type and fileid_type. * - * All 4byte values are in host-byte-order. + * All four-byte values are in host-byte-order. * * The auth_type field specifies how the filehandle can be authenticated * This might allow a file to be confirmed to be in a writable part of a -- cgit v0.10.2 From dbd65a7e44fff4741a0b2c84bd6bace85d22c242 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Mon, 3 May 2010 19:31:33 +0300 Subject: nfsd4: use local variable in nfs4svc_encode_compoundres 'cs' is already computed, re-use it. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 05bc5bd..b27bcf3 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3310,9 +3310,9 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo if (cs->status != nfserr_replay_cache) { nfsd4_store_cache_entry(resp); dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); - resp->cstate.slot->sl_inuse = false; + cs->slot->sl_inuse = false; } - nfsd4_put_session(resp->cstate.session); + nfsd4_put_session(cs->session); } return 1; } -- cgit v0.10.2 From 5d4cec2f2fdbb3d830fa014226d0d965df548bad Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 1 May 2010 12:56:06 -0400 Subject: nfsd4: fix bare destroy_session null dereference It's legal to send a DESTROY_SESSION outside any session (as the only operation in a compound), in which case cstate->session will be NULL; check for that case. While we're at it, move these checks into a separate helper function. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f05a327..835d6ce 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1352,6 +1352,13 @@ static bool nfsd4_last_compound_op(struct svc_rqst *rqstp) return argp->opcnt == resp->opcnt; } +static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) +{ + if (!session) + return 0; + return !memcmp(sid, &session->se_sessionid, sizeof(*sid)); +} + __be32 nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate, @@ -1367,8 +1374,7 @@ nfsd4_destroy_session(struct svc_rqst *r, * - Do we need to clear any callback info from previous session? */ - if (!memcmp(&sessionid->sessionid, &cstate->session->se_sessionid, - sizeof(struct nfs4_sessionid))) { + if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { if (!nfsd4_last_compound_op(r)) return nfserr_not_only_op; } -- cgit v0.10.2 From 9089f1b4782ff52835059779fd37b7ad765a25c7 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 12 May 2010 00:12:26 +0300 Subject: nfsd4: rename sessionid_lock to client_lock In preparation to share the lock's scope to both client and session hash tables. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 835d6ce..2313dbf 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -250,6 +250,9 @@ unhash_delegation(struct nfs4_delegation *dp) * SETCLIENTID state */ +/* client_lock protects the session hash table */ +static DEFINE_SPINLOCK(client_lock); + /* Hash tables for nfs4_clientid state */ #define CLIENT_HASH_BITS 4 #define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) @@ -368,7 +371,6 @@ static void release_openowner(struct nfs4_stateowner *sop) nfs4_put_stateowner(sop); } -static DEFINE_SPINLOCK(sessionid_lock); #define SESSION_HASH_SIZE 512 static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE]; @@ -566,10 +568,10 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, new->se_flags = cses->flags; kref_init(&new->se_ref); - spin_lock(&sessionid_lock); + spin_lock(&client_lock); list_add(&new->se_hash, &sessionid_hashtbl[idx]); list_add(&new->se_perclnt, &clp->cl_sessions); - spin_unlock(&sessionid_lock); + spin_unlock(&client_lock); status = nfs_ok; out: @@ -580,7 +582,7 @@ out_free: goto out; } -/* caller must hold sessionid_lock */ +/* caller must hold client_lock */ static struct nfsd4_session * find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid) { @@ -603,7 +605,7 @@ find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid) return NULL; } -/* caller must hold sessionid_lock */ +/* caller must hold client_lock */ static void unhash_session(struct nfsd4_session *ses) { @@ -614,9 +616,9 @@ unhash_session(struct nfsd4_session *ses) static void release_session(struct nfsd4_session *ses) { - spin_lock(&sessionid_lock); + spin_lock(&client_lock); unhash_session(ses); - spin_unlock(&sessionid_lock); + spin_unlock(&client_lock); nfsd4_put_session(ses); } @@ -1379,15 +1381,15 @@ nfsd4_destroy_session(struct svc_rqst *r, return nfserr_not_only_op; } dump_sessionid(__func__, &sessionid->sessionid); - spin_lock(&sessionid_lock); + spin_lock(&client_lock); ses = find_in_sessionid_hashtbl(&sessionid->sessionid); if (!ses) { - spin_unlock(&sessionid_lock); + spin_unlock(&client_lock); goto out; } unhash_session(ses); - spin_unlock(&sessionid_lock); + spin_unlock(&client_lock); /* wait for callbacks */ nfsd4_set_callback_client(ses->se_client, NULL); @@ -1411,7 +1413,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (resp->opcnt != 1) return nfserr_sequence_pos; - spin_lock(&sessionid_lock); + spin_lock(&client_lock); status = nfserr_badsession; session = find_in_sessionid_hashtbl(&seq->sessionid); if (!session) @@ -1454,7 +1456,7 @@ out: /* Hold a session reference until done processing the compound. */ if (cstate->session) nfsd4_get_session(cstate->session); - spin_unlock(&sessionid_lock); + spin_unlock(&client_lock); /* Renew the clientid on success and on replay */ if (cstate->session) { nfs4_lock_state(); -- cgit v0.10.2 From be1fdf6c4386f56271d2f690b93ef686b769587c Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 12 May 2010 00:12:39 +0300 Subject: nfsd4: fold release_session into expire_client and grab the client lock once for all the client's sessions. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2313dbf..f8bf619 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -613,15 +613,6 @@ unhash_session(struct nfsd4_session *ses) list_del(&ses->se_perclnt); } -static void -release_session(struct nfsd4_session *ses) -{ - spin_lock(&client_lock); - unhash_session(ses); - spin_unlock(&client_lock); - nfsd4_put_session(ses); -} - void free_session(struct kref *kref) { @@ -722,12 +713,15 @@ expire_client(struct nfs4_client *clp) sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); release_openowner(sop); } + spin_lock(&client_lock); while (!list_empty(&clp->cl_sessions)) { struct nfsd4_session *ses; ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, se_perclnt); - release_session(ses); + unhash_session(ses); + nfsd4_put_session(ses); } + spin_unlock(&client_lock); nfsd4_set_callback_client(clp, NULL); if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); -- cgit v0.10.2 From 328efbab0f8ae1617448917906a12e5f568553b6 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 12 May 2010 00:12:51 +0300 Subject: nfsd4: use list_move in move_to_confirmed rather than list_del_init, list_add Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f8bf619..aecafb2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -859,10 +859,9 @@ move_to_confirmed(struct nfs4_client *clp) unsigned int strhashval; dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); - list_del_init(&clp->cl_strhash); list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]); strhashval = clientstr_hashval(clp->cl_recdir); - list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]); + list_move(&clp->cl_strhash, &conf_str_hashtbl[strhashval]); renew_client(clp); } -- cgit v0.10.2 From 36acb66bda512dd8159c3e1b40358c5219524868 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 12 May 2010 00:13:04 +0300 Subject: nfsd4: extend the client_lock to cover cl_lru To be used later on to hold a reference count on the client while in use by a nfsv4.1 compound. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index aecafb2..3f572cb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -250,7 +250,7 @@ unhash_delegation(struct nfs4_delegation *dp) * SETCLIENTID state */ -/* client_lock protects the session hash table */ +/* client_lock protects the client lru list and session hash table */ static DEFINE_SPINLOCK(client_lock); /* Hash tables for nfs4_clientid state */ @@ -628,8 +628,9 @@ free_session(struct kref *kref) kfree(ses); } +/* must be called under the client_lock */ static inline void -renew_client(struct nfs4_client *clp) +renew_client_locked(struct nfs4_client *clp) { /* * Move client to the end to the LRU list. @@ -641,6 +642,14 @@ renew_client(struct nfs4_client *clp) clp->cl_time = get_seconds(); } +static inline void +renew_client(struct nfs4_client *clp) +{ + spin_lock(&client_lock); + renew_client_locked(clp); + spin_unlock(&client_lock); +} + /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ static int STALE_CLIENTID(clientid_t *clid) @@ -706,14 +715,14 @@ expire_client(struct nfs4_client *clp) list_del_init(&dp->dl_recall_lru); unhash_delegation(dp); } - list_del(&clp->cl_idhash); - list_del(&clp->cl_strhash); - list_del(&clp->cl_lru); while (!list_empty(&clp->cl_openowners)) { sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); release_openowner(sop); } + list_del(&clp->cl_idhash); + list_del(&clp->cl_strhash); spin_lock(&client_lock); + list_del(&clp->cl_lru); while (!list_empty(&clp->cl_sessions)) { struct nfsd4_session *ses; ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, @@ -848,8 +857,7 @@ add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval) list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]); idhashval = clientid_hashval(clp->cl_clientid.cl_id); list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]); - list_add_tail(&clp->cl_lru, &client_lru); - clp->cl_time = get_seconds(); + renew_client(clp); } static void @@ -1447,15 +1455,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, out: /* Hold a session reference until done processing the compound. */ - if (cstate->session) - nfsd4_get_session(cstate->session); - spin_unlock(&client_lock); - /* Renew the clientid on success and on replay */ if (cstate->session) { - nfs4_lock_state(); - renew_client(session->se_client); - nfs4_unlock_state(); + nfsd4_get_session(cstate->session); + /* Renew the clientid on success and on replay */ + renew_client_locked(session->se_client); } + spin_unlock(&client_lock); dprintk("%s: return %d\n", __func__, ntohl(status)); return status; } @@ -2564,6 +2569,8 @@ nfs4_laundromat(void) dprintk("NFSD: laundromat service - starting\n"); if (locks_in_grace()) nfsd4_end_grace(); + INIT_LIST_HEAD(&reaplist); + spin_lock(&client_lock); list_for_each_safe(pos, next, &client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { @@ -2572,12 +2579,16 @@ nfs4_laundromat(void) clientid_val = t; break; } + list_move(&clp->cl_lru, &reaplist); + } + spin_unlock(&client_lock); + list_for_each_safe(pos, next, &reaplist) { + clp = list_entry(pos, struct nfs4_client, cl_lru); dprintk("NFSD: purging unused client (clientid %08x)\n", clp->cl_clientid.cl_id); nfsd4_remove_clid_dir(clp); expire_client(clp); } - INIT_LIST_HEAD(&reaplist); spin_lock(&recall_lock); list_for_each_safe(pos, next, &del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); -- cgit v0.10.2 From 84d38ac9abf0a5bc0044c9363acaad55a9a4be0d Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 12 May 2010 00:13:16 +0300 Subject: nfsd4: refactor expire_client Separate out unhashing of the client and session. To be used later by the laundromat. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3f572cb..dede43c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -693,6 +693,20 @@ free_client(struct nfs4_client *clp) kfree(clp); } +/* must be called under the client_lock */ +static inline void +unhash_client_locked(struct nfs4_client *clp) +{ + list_del(&clp->cl_lru); + while (!list_empty(&clp->cl_sessions)) { + struct nfsd4_session *ses; + ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, + se_perclnt); + unhash_session(ses); + nfsd4_put_session(ses); + } +} + static void expire_client(struct nfs4_client *clp) { @@ -719,21 +733,14 @@ expire_client(struct nfs4_client *clp) sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); release_openowner(sop); } + nfsd4_set_callback_client(clp, NULL); + if (clp->cl_cb_conn.cb_xprt) + svc_xprt_put(clp->cl_cb_conn.cb_xprt); list_del(&clp->cl_idhash); list_del(&clp->cl_strhash); spin_lock(&client_lock); - list_del(&clp->cl_lru); - while (!list_empty(&clp->cl_sessions)) { - struct nfsd4_session *ses; - ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, - se_perclnt); - unhash_session(ses); - nfsd4_put_session(ses); - } + unhash_client_locked(clp); spin_unlock(&client_lock); - nfsd4_set_callback_client(clp, NULL); - if (clp->cl_cb_conn.cb_xprt) - svc_xprt_put(clp->cl_cb_conn.cb_xprt); free_client(clp); } -- cgit v0.10.2 From 46583e2597af649f134462d2f2c1be5e6689198d Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 12 May 2010 00:13:29 +0300 Subject: nfsd4: introduce nfs4_client.cl_refcount Currently just initialize the cl_refcount to 1 and decrement in expire_client(), conditionally freeing the client when the refcount reaches 0. To be used later by nfsv4.1 compounds to keep the client from timing out while in use. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index dede43c..e439a88 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -740,8 +740,9 @@ expire_client(struct nfs4_client *clp) list_del(&clp->cl_strhash); spin_lock(&client_lock); unhash_client_locked(clp); + if (atomic_read(&clp->cl_refcount) == 0) + free_client(clp); spin_unlock(&client_lock); - free_client(clp); } static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) @@ -827,6 +828,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, } memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); + atomic_set(&clp->cl_refcount, 0); atomic_set(&clp->cl_cb_set, 0); INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_strhash); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 98836fd..ee42a0b 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -233,6 +233,8 @@ struct nfs4_client { struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ u32 cl_exchange_flags; struct nfs4_sessionid cl_sessionid; + /* number of rpc's in progress over an associated session: */ + atomic_t cl_refcount; /* for nfs41 callbacks */ /* We currently support a single back channel with a single slot */ -- cgit v0.10.2 From 07cd4909a6c0c275ef42fd27748226975919e336 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 12 May 2010 00:13:41 +0300 Subject: nfsd4: mark_client_expired Mark the client as expired under the client_lock so it won't be renewed when an nfsv4.1 session is done, after it was explicitly expired during processing of the compound. Do not renew a client mark as expired (in particular, it is not on the lru list anymore) Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e439a88..98aa7e8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -632,6 +632,14 @@ free_session(struct kref *kref) static inline void renew_client_locked(struct nfs4_client *clp) { + if (is_client_expired(clp)) { + dprintk("%s: client (clientid %08x/%08x) already expired\n", + __func__, + clp->cl_clientid.cl_boot, + clp->cl_clientid.cl_id); + return; + } + /* * Move client to the end to the LRU list. */ @@ -697,6 +705,7 @@ free_client(struct nfs4_client *clp) static inline void unhash_client_locked(struct nfs4_client *clp) { + mark_client_expired(clp); list_del(&clp->cl_lru); while (!list_empty(&clp->cl_sessions)) { struct nfsd4_session *ses; @@ -836,6 +845,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, INIT_LIST_HEAD(&clp->cl_delegations); INIT_LIST_HEAD(&clp->cl_sessions); INIT_LIST_HEAD(&clp->cl_lru); + clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); copy_verf(clp, verf); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index ee42a0b..cfd743e 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -166,7 +166,7 @@ struct nfsd4_session { struct list_head se_hash; /* hash by sessionid */ struct list_head se_perclnt; u32 se_flags; - struct nfs4_client *se_client; /* for expire_client */ + struct nfs4_client *se_client; struct nfs4_sessionid se_sessionid; struct nfsd4_channel_attrs se_fchannel; struct nfsd4_channel_attrs se_bchannel; @@ -244,6 +244,18 @@ struct nfs4_client { /* wait here for slots */ }; +static inline void +mark_client_expired(struct nfs4_client *clp) +{ + clp->cl_time = 0; +} + +static inline bool +is_client_expired(struct nfs4_client *clp) +{ + return clp->cl_time == 0; +} + /* struct nfs4_client_reset * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl * upon lease reset, or from upcall to state_daemon (to read in state -- cgit v0.10.2 From d76829889ac4250a18cfcc1a606bb256bb9c570c Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 12 May 2010 00:13:54 +0300 Subject: nfsd4: keep a reference count on client while in use Get a refcount on the client on SEQUENCE, Release the refcount and renew the client when all respective compounds completed. Do not expire the client by the laundromat while in use. If the client was expired via another path, free it when the compounds complete and the refcount reaches 0. Note that unhash_client_locked must call list_del_init on cl_lru as it may be called twice for the same client (once from nfs4_laundromat and then from expire_client) Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 98aa7e8..cc0e911 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -701,6 +701,22 @@ free_client(struct nfs4_client *clp) kfree(clp); } +void +release_session_client(struct nfsd4_session *session) +{ + struct nfs4_client *clp = session->se_client; + + if (!atomic_dec_and_lock(&clp->cl_refcount, &client_lock)) + return; + if (is_client_expired(clp)) { + free_client(clp); + session->se_client = NULL; + } else + renew_client_locked(clp); + spin_unlock(&client_lock); + nfsd4_put_session(session); +} + /* must be called under the client_lock */ static inline void unhash_client_locked(struct nfs4_client *clp) @@ -1476,8 +1492,7 @@ out: /* Hold a session reference until done processing the compound. */ if (cstate->session) { nfsd4_get_session(cstate->session); - /* Renew the clientid on success and on replay */ - renew_client_locked(session->se_client); + atomic_inc(&session->se_client->cl_refcount); } spin_unlock(&client_lock); dprintk("%s: return %d\n", __func__, ntohl(status)); @@ -2598,7 +2613,13 @@ nfs4_laundromat(void) clientid_val = t; break; } - list_move(&clp->cl_lru, &reaplist); + if (atomic_read(&clp->cl_refcount)) { + dprintk("NFSD: client in use (clientid %08x)\n", + clp->cl_clientid.cl_id); + continue; + } + unhash_client_locked(clp); + list_add(&clp->cl_lru, &reaplist); } spin_unlock(&client_lock); list_for_each_safe(pos, next, &reaplist) { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5c2de47..126d0ca 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3313,7 +3313,8 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); cs->slot->sl_inuse = false; } - nfsd4_put_session(cs->session); + /* Renew the clientid on success and on replay */ + release_session_client(cs->session); } return 1; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index cfd743e..006c842 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -420,6 +420,7 @@ extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id); extern void nfsd4_recdir_purge_old(void); extern int nfsd4_create_clid_dir(struct nfs4_client *clp); extern void nfsd4_remove_clid_dir(struct nfs4_client *clp); +extern void release_session_client(struct nfsd4_session *); static inline void nfs4_put_stateowner(struct nfs4_stateowner *so) -- cgit v0.10.2 From ab707e156593ff7fffd615757332dbff6616836a Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 12 May 2010 00:14:06 +0300 Subject: nfsd4: nfsd4_destroy_session must set callback client under the state lock nfsd4_set_callback_client must be called under the state lock to atomically set or unset the callback client and shutting down the previous one. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 1d5051d..77bc9d3 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -718,6 +718,7 @@ void nfsd4_destroy_callback_queue(void) destroy_workqueue(callback_wq); } +/* must be called under the state lock */ void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new) { struct rpc_clnt *old = clp->cl_cb_client; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cc0e911..ede9dde 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1427,8 +1427,10 @@ nfsd4_destroy_session(struct svc_rqst *r, unhash_session(ses); spin_unlock(&client_lock); + nfs4_lock_state(); /* wait for callbacks */ nfsd4_set_callback_client(ses->se_client, NULL); + nfs4_unlock_state(); nfsd4_put_session(ses); status = nfs_ok; out: -- cgit v0.10.2 From 4dc6ec00f6347b72312fa41dfc587d5302b05544 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 19 Apr 2010 15:11:28 -0400 Subject: nfsd4: implement reclaim_complete This is a mandatory operation. Also, here (not in open) is where we should be committing the reboot recovery information. Signed-off-by: J. Bruce Fields diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt index 6a53a84..0488491 100644 --- a/Documentation/filesystems/nfs/nfs41-server.txt +++ b/Documentation/filesystems/nfs/nfs41-server.txt @@ -137,7 +137,7 @@ NS*| OPENATTR | OPT | | Section 18.17 | | READ | REQ | | Section 18.22 | | READDIR | REQ | | Section 18.23 | | READLINK | OPT | | Section 18.24 | -NS | RECLAIM_COMPLETE | REQ | | Section 18.51 | + | RECLAIM_COMPLETE | REQ | | Section 18.51 | | RELEASE_LOCKOWNER | MNI | | N/A | | REMOVE | REQ | | Section 18.25 | | RENAME | REQ | | Section 18.26 | diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e2dc960..59ec449 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1312,6 +1312,11 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, .op_name = "OP_SEQUENCE", }, + [OP_RECLAIM_COMPLETE] = { + .op_func = (nfsd4op_func)nfsd4_reclaim_complete, + .op_flags = ALLOWED_WITHOUT_FH, + .op_name = "OP_RECLAIM_COMPLETE", + }, }; static const char *nfsd4_op_name(unsigned opnum) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ede9dde..84b0fe9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1502,6 +1502,35 @@ out: } __be32 +nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc) +{ + if (rc->rca_one_fs) { + if (!cstate->current_fh.fh_dentry) + return nfserr_nofilehandle; + /* + * We don't take advantage of the rca_one_fs case. + * That's OK, it's optional, we can safely ignore it. + */ + return nfs_ok; + } + nfs4_lock_state(); + if (is_client_expired(cstate->session->se_client)) { + nfs4_unlock_state(); + /* + * The following error isn't really legal. + * But we only get here if the client just explicitly + * destroyed the client. Surely it no longer cares what + * error it gets back on an operation for the dead + * client. + */ + return nfserr_stale_clientid; + } + nfsd4_create_clid_dir(cstate->session->se_client); + nfs4_unlock_state(); + return nfs_ok; +} + +__be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setclientid *setclid) { @@ -2510,10 +2539,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf } memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t)); - if (nfsd4_has_session(&resp->cstate)) { + if (nfsd4_has_session(&resp->cstate)) open->op_stateowner->so_confirmed = 1; - nfsd4_create_clid_dir(open->op_stateowner->so_client); - } /* * Attempt to hand out a delegation. No error return, because the diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 126d0ca..ac17a70 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1234,6 +1234,16 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, DECODE_TAIL; } +static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(rc->rca_one_fs); + + DECODE_TAIL; +} + static __be32 nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) { @@ -1346,7 +1356,7 @@ static nfsd4_dec nfsd41_dec_ops[] = { [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, }; struct nfsd4_minorversion_ops { diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index c28958e..4d476ff 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -381,6 +381,10 @@ struct nfsd4_destroy_session { struct nfs4_sessionid sessionid; }; +struct nfsd4_reclaim_complete { + u32 rca_one_fs; +}; + struct nfsd4_op { int opnum; __be32 status; @@ -421,6 +425,7 @@ struct nfsd4_op { struct nfsd4_create_session create_session; struct nfsd4_destroy_session destroy_session; struct nfsd4_sequence sequence; + struct nfsd4_reclaim_complete reclaim_complete; } u; struct nfs4_replay * replay; }; @@ -523,6 +528,7 @@ extern __be32 nfsd4_sequence(struct svc_rqst *, extern __be32 nfsd4_destroy_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_destroy_session *); +__be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *); extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *, struct nfsd4_open *open); extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, -- cgit v0.10.2 From 15ddb4aec54422ead137b03ea4e9b3f5db3f7cc2 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 14 May 2010 15:33:36 +0400 Subject: NFSD: don't report compiled-out versions as present The /proc/fs/nfsd/versions file calls nfsd_vers() to check whether the particular nfsd version is present/available. The problem is that once I turn off e.g. NFSD-V4 this call returns -1 which is true from the callers POV which is wrong. The proposal is to report false in that case. The bug has existed since 6658d3a7bbfd1768 "[PATCH] knfsd: remove nfsd_versbits as intermediate storage for desired versions". Signed-off-by: Pavel Emelyanov Cc: stable@kernel.org Acked-by: NeilBrown Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 171699e..06b2a26 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -120,7 +120,7 @@ u32 nfsd_supported_minorversion; int nfsd_vers(int vers, enum vers_op change) { if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS) - return -1; + return 0; switch(change) { case NFSD_SET: nfsd_versions[vers] = nfsd_version[vers]; -- cgit v0.10.2 From b7299f44394336f51b526247a870d47d28f4f97c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 14 May 2010 17:57:35 -0400 Subject: nfs4: minor callback code simplification, comment Note the position in the version array doesn't have to match the actual rpc version number--to me it seems clearer to maintain the distinction. Also document choice of rpc callback version number, as discussed in e.g. http://www.ietf.org/mail-archive/web/nfsv4/current/msg07985.html and followups. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 77bc9d3..eb78e7e 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -424,13 +424,19 @@ static struct rpc_procinfo nfs4_cb_procedures[] = { }; static struct rpc_version nfs_cb_version4 = { +/* + * Note on the callback rpc program version number: despite language in rfc + * 5661 section 18.36.3 requiring servers to use 4 in this field, the + * official xdr descriptions for both 4.0 and 4.1 specify version 1, and + * in practice that appears to be what implementations use. The section + * 18.36.3 language is expected to be fixed in an erratum. + */ .number = 1, .nrprocs = ARRAY_SIZE(nfs4_cb_procedures), .procs = nfs4_cb_procedures }; static struct rpc_version * nfs_cb_version[] = { - NULL, &nfs_cb_version4, }; @@ -471,7 +477,7 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb) .timeout = &timeparms, .program = &cb_program, .prognumber = cb->cb_prog, - .version = nfs_cb_version[1]->number, + .version = 0, .authflavor = clp->cl_flavor, .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), .client_name = clp->cl_principal, -- cgit v0.10.2 From 47cee541a46a73b20dc279bf4c4690f776f6c81b Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 17 May 2010 20:00:37 +0400 Subject: nfsd: safer initialization order in find_file() The alloc_init_file() first adds a file to the hash and then initializes its fi_inode, fi_id and fi_had_conflict. The uninitialized fi_inode could thus be erroneously checked by the find_file(), so move the hash insertion lower. The client_mutex should prevent this race in practice; however, we eventually hope to make less use of the client_mutex, so the ordering here is an accident waiting to happen. I didn't find whether the same can be true for two other fields, but the common sense tells me it's better to initialize an object before putting it into a global hash table :) Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 84b0fe9..296eded 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1757,12 +1757,12 @@ alloc_init_file(struct inode *ino) INIT_LIST_HEAD(&fp->fi_hash); INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); - spin_lock(&recall_lock); - list_add(&fp->fi_hash, &file_hashtbl[hashval]); - spin_unlock(&recall_lock); fp->fi_inode = igrab(ino); fp->fi_id = current_fileid++; fp->fi_had_conflict = false; + spin_lock(&recall_lock); + list_add(&fp->fi_hash, &file_hashtbl[hashval]); + spin_unlock(&recall_lock); return fp; } return NULL; -- cgit v0.10.2 From e4e83ea47babd9d4bf95a13aed87f8ef51e46472 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 22 Apr 2010 16:21:39 -0400 Subject: Revert "nfsd4: distinguish expired from stale stateids" This reverts commit 78155ed75f470710f2aecb3e75e3d97107ba8374. We're depending here on the boot time that we use to generate the stateid being monotonic, but get_seconds() is not necessarily. We still depend at least on boot_time being different every time, but that is a safer bet. We have a few reports of errors that might be explained by this problem, though we haven't been able to confirm any of them. But the minor gain of distinguishing expired from stale errors seems not worth the risk. Conflicts: fs/nfsd/nfs4state.c Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 296eded..12f7109 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -190,7 +190,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f dp->dl_vfs_file = stp->st_vfs_file; dp->dl_type = type; dp->dl_ident = cb->cb_ident; - dp->dl_stateid.si_boot = get_seconds(); + dp->dl_stateid.si_boot = boot_time; dp->dl_stateid.si_stateownerid = current_delegid++; dp->dl_stateid.si_fileid = 0; dp->dl_stateid.si_generation = 0; @@ -1884,7 +1884,7 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open * stp->st_stateowner = sop; get_nfs4_file(fp); stp->st_file = fp; - stp->st_stateid.si_boot = get_seconds(); + stp->st_stateid.si_boot = boot_time; stp->st_stateid.si_stateownerid = sop->so_id; stp->st_stateid.si_fileid = fp->fi_id; stp->st_stateid.si_generation = 0; @@ -2733,39 +2733,11 @@ nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp) static int STALE_STATEID(stateid_t *stateid) { - if (time_after((unsigned long)boot_time, - (unsigned long)stateid->si_boot)) { - dprintk("NFSD: stale stateid " STATEID_FMT "!\n", - STATEID_VAL(stateid)); - return 1; - } - return 0; -} - -static int -EXPIRED_STATEID(stateid_t *stateid) -{ - if (time_before((unsigned long)boot_time, - ((unsigned long)stateid->si_boot)) && - time_before((unsigned long)(stateid->si_boot + nfsd4_lease), get_seconds())) { - dprintk("NFSD: expired stateid " STATEID_FMT "!\n", - STATEID_VAL(stateid)); - return 1; - } - return 0; -} - -static __be32 -stateid_error_map(stateid_t *stateid) -{ - if (STALE_STATEID(stateid)) - return nfserr_stale_stateid; - if (EXPIRED_STATEID(stateid)) - return nfserr_expired; - - dprintk("NFSD: bad stateid " STATEID_FMT "!\n", + if (stateid->si_boot == boot_time) + return 0; + dprintk("NFSD: stale stateid " STATEID_FMT "!\n", STATEID_VAL(stateid)); - return nfserr_bad_stateid; + return 1; } static inline int @@ -2889,10 +2861,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, status = nfserr_bad_stateid; if (is_delegation_stateid(stateid)) { dp = find_delegation_stateid(ino, stateid); - if (!dp) { - status = stateid_error_map(stateid); + if (!dp) goto out; - } status = check_stateid_generation(stateid, &dp->dl_stateid, flags); if (status) @@ -2905,10 +2875,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, *filpp = dp->dl_vfs_file; } else { /* open or lock stateid */ stp = find_stateid(stateid, flags); - if (!stp) { - status = stateid_error_map(stateid); + if (!stp) goto out; - } if (nfs4_check_fh(current_fh, stp)) goto out; if (!stp->st_stateowner->so_confirmed) @@ -2980,7 +2948,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, */ sop = search_close_lru(stateid->si_stateownerid, flags); if (sop == NULL) - return stateid_error_map(stateid); + return nfserr_bad_stateid; *sopp = sop; goto check_replay; } @@ -3247,10 +3215,8 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!is_delegation_stateid(stateid)) goto out; dp = find_delegation_stateid(inode, stateid); - if (!dp) { - status = stateid_error_map(stateid); + if (!dp) goto out; - } status = check_stateid_generation(stateid, &dp->dl_stateid, flags); if (status) goto out; @@ -3476,7 +3442,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc stp->st_stateowner = sop; get_nfs4_file(fp); stp->st_file = fp; - stp->st_stateid.si_boot = get_seconds(); + stp->st_stateid.si_boot = boot_time; stp->st_stateid.si_stateownerid = sop->so_id; stp->st_stateid.si_fileid = fp->fi_id; stp->st_stateid.si_generation = 0; -- cgit v0.10.2