From 2283963f27fdd56b185e49a964c290130c7c95ab Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 11 Jan 2009 14:27:17 -0500 Subject: nfsd4: split lockstateid/openstateid release logic The flags here attempt to make the code more general, but I find it actually just adds confusion. I think it's clearer to separate the logic for the open and lock cases entirely. And eventually we may want to separate the stateowner and stateid types as well, as many of the fields aren't shared between the lock and open cases. Also move to eliminate forward references. Start with the stateid's. Signed-off-by: J. Bruce Fields Reviewed-by: Benny Halevy diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b6f60f4..e83b3c8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -119,7 +119,6 @@ opaque_hashval(const void *ptr, int nbytes) /* forward declarations */ static void release_stateowner(struct nfs4_stateowner *sop); -static void release_stateid(struct nfs4_stateid *stp, int flags); /* * Delegation state @@ -311,6 +310,34 @@ static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE]; static struct list_head client_lru; static struct list_head close_lru; +static void unhash_generic_stateid(struct nfs4_stateid *stp) +{ + list_del(&stp->st_hash); + list_del(&stp->st_perfile); + list_del(&stp->st_perstateowner); +} + +static void free_generic_stateid(struct nfs4_stateid *stp) +{ + put_nfs4_file(stp->st_file); + kmem_cache_free(stateid_slab, stp); +} + +static void release_lock_stateid(struct nfs4_stateid *stp) +{ + unhash_generic_stateid(stp); + locks_remove_posix(stp->st_vfs_file, (fl_owner_t)stp->st_stateowner); + free_generic_stateid(stp); +} + +static void release_open_stateid(struct nfs4_stateid *stp) +{ + unhash_generic_stateid(stp); + release_stateid_lockowners(stp); + nfsd_close(stp->st_vfs_file); + free_generic_stateid(stp); +} + static inline void renew_client(struct nfs4_client *clp) { @@ -1065,9 +1092,9 @@ unhash_stateowner(struct nfs4_stateowner *sop) stp = list_entry(sop->so_stateids.next, struct nfs4_stateid, st_perstateowner); if (sop->so_is_open_owner) - release_stateid(stp, OPEN_STATE); + release_open_stateid(stp); else - release_stateid(stp, LOCK_STATE); + release_lock_stateid(stp); } } @@ -1106,24 +1133,6 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open * } static void -release_stateid(struct nfs4_stateid *stp, int flags) -{ - struct file *filp = stp->st_vfs_file; - - list_del(&stp->st_hash); - list_del(&stp->st_perfile); - list_del(&stp->st_perstateowner); - if (flags & OPEN_STATE) { - release_stateid_lockowners(stp); - stp->st_vfs_file = NULL; - nfsd_close(filp); - } else if (flags & LOCK_STATE) - locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner); - put_nfs4_file(stp->st_file); - kmem_cache_free(stateid_slab, stp); -} - -static void move_to_close_lru(struct nfs4_stateowner *sop) { dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); @@ -1764,7 +1773,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf init_stateid(stp, fp, open); status = nfsd4_truncate(rqstp, current_fh, open); if (status) { - release_stateid(stp, OPEN_STATE); + release_open_stateid(stp); goto out; } } @@ -2373,7 +2382,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t)); /* release_stateid() calls nfsd_close() if needed */ - release_stateid(stp, OPEN_STATE); + release_open_stateid(stp); /* place unused nfs4_stateowners on so_close_lru list to be * released by the laundromat service after the lease period -- cgit v0.10.2 From f1d110caf7d759eae2c02c84343f63d83db9b9be Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 11 Jan 2009 14:37:31 -0500 Subject: nfsd4: remove a forward declaration Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e83b3c8..6ab3077 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -75,7 +75,6 @@ static stateid_t onestateid; /* bits all 1 */ /* forward declarations */ static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags); static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid); -static void release_stateid_lockowners(struct nfs4_stateid *open_stp); static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; static void nfs4_set_recdir(char *recdir); @@ -330,6 +329,20 @@ static void release_lock_stateid(struct nfs4_stateid *stp) free_generic_stateid(stp); } +static void +release_stateid_lockowners(struct nfs4_stateid *open_stp) +{ + struct nfs4_stateowner *lock_sop; + + while (!list_empty(&open_stp->st_lockowners)) { + lock_sop = list_entry(open_stp->st_lockowners.next, + struct nfs4_stateowner, so_perstateid); + /* list_del(&open_stp->st_lockowners); */ + BUG_ON(lock_sop->so_is_open_owner); + release_stateowner(lock_sop); + } +} + static void release_open_stateid(struct nfs4_stateid *stp) { unhash_generic_stateid(stp); @@ -338,6 +351,34 @@ static void release_open_stateid(struct nfs4_stateid *stp) free_generic_stateid(stp); } +static void +unhash_stateowner(struct nfs4_stateowner *sop) +{ + struct nfs4_stateid *stp; + + list_del(&sop->so_idhash); + list_del(&sop->so_strhash); + if (sop->so_is_open_owner) + list_del(&sop->so_perclient); + list_del(&sop->so_perstateid); + while (!list_empty(&sop->so_stateids)) { + stp = list_entry(sop->so_stateids.next, + struct nfs4_stateid, st_perstateowner); + if (sop->so_is_open_owner) + release_open_stateid(stp); + else + release_lock_stateid(stp); + } +} + +static void +release_stateowner(struct nfs4_stateowner *sop) +{ + unhash_stateowner(sop); + list_del(&sop->so_close_lru); + nfs4_put_stateowner(sop); +} + static inline void renew_client(struct nfs4_client *clp) { @@ -1064,48 +1105,6 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str return sop; } -static void -release_stateid_lockowners(struct nfs4_stateid *open_stp) -{ - struct nfs4_stateowner *lock_sop; - - while (!list_empty(&open_stp->st_lockowners)) { - lock_sop = list_entry(open_stp->st_lockowners.next, - struct nfs4_stateowner, so_perstateid); - /* list_del(&open_stp->st_lockowners); */ - BUG_ON(lock_sop->so_is_open_owner); - release_stateowner(lock_sop); - } -} - -static void -unhash_stateowner(struct nfs4_stateowner *sop) -{ - struct nfs4_stateid *stp; - - list_del(&sop->so_idhash); - list_del(&sop->so_strhash); - if (sop->so_is_open_owner) - list_del(&sop->so_perclient); - list_del(&sop->so_perstateid); - while (!list_empty(&sop->so_stateids)) { - stp = list_entry(sop->so_stateids.next, - struct nfs4_stateid, st_perstateowner); - if (sop->so_is_open_owner) - release_open_stateid(stp); - else - release_lock_stateid(stp); - } -} - -static void -release_stateowner(struct nfs4_stateowner *sop) -{ - unhash_stateowner(sop); - list_del(&sop->so_close_lru); - nfs4_put_stateowner(sop); -} - static inline void init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_stateowner *sop = open->op_stateowner; -- cgit v0.10.2 From f044ff830f1afe91e4388320f0c7b6e08d2e05f8 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 11 Jan 2009 15:24:04 -0500 Subject: nfsd4: split open/lockowner release code The caller always knows specifically whether it's releasing a lockowner or an openowner, and the code is simpler if we use separate functions (and the apparent recursion is gone). Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6ab3077..41a3590 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -116,9 +116,6 @@ opaque_hashval(const void *ptr, int nbytes) return x; } -/* forward declarations */ -static void release_stateowner(struct nfs4_stateowner *sop); - /* * Delegation state */ @@ -329,6 +326,26 @@ static void release_lock_stateid(struct nfs4_stateid *stp) free_generic_stateid(stp); } +static void unhash_lockowner(struct nfs4_stateowner *sop) +{ + struct nfs4_stateid *stp; + + list_del(&sop->so_idhash); + list_del(&sop->so_strhash); + list_del(&sop->so_perstateid); + while (!list_empty(&sop->so_stateids)) { + stp = list_first_entry(&sop->so_stateids, + struct nfs4_stateid, st_perstateowner); + release_lock_stateid(stp); + } +} + +static void release_lockowner(struct nfs4_stateowner *sop) +{ + unhash_lockowner(sop); + nfs4_put_stateowner(sop); +} + static void release_stateid_lockowners(struct nfs4_stateid *open_stp) { @@ -339,7 +356,7 @@ release_stateid_lockowners(struct nfs4_stateid *open_stp) struct nfs4_stateowner, so_perstateid); /* list_del(&open_stp->st_lockowners); */ BUG_ON(lock_sop->so_is_open_owner); - release_stateowner(lock_sop); + release_lockowner(lock_sop); } } @@ -351,30 +368,24 @@ static void release_open_stateid(struct nfs4_stateid *stp) free_generic_stateid(stp); } -static void -unhash_stateowner(struct nfs4_stateowner *sop) +static void unhash_openowner(struct nfs4_stateowner *sop) { struct nfs4_stateid *stp; list_del(&sop->so_idhash); list_del(&sop->so_strhash); - if (sop->so_is_open_owner) - list_del(&sop->so_perclient); - list_del(&sop->so_perstateid); + list_del(&sop->so_perclient); + list_del(&sop->so_perstateid); /* XXX: necessary? */ while (!list_empty(&sop->so_stateids)) { - stp = list_entry(sop->so_stateids.next, - struct nfs4_stateid, st_perstateowner); - if (sop->so_is_open_owner) - release_open_stateid(stp); - else - release_lock_stateid(stp); + stp = list_first_entry(&sop->so_stateids, + struct nfs4_stateid, st_perstateowner); + release_open_stateid(stp); } } -static void -release_stateowner(struct nfs4_stateowner *sop) +static void release_openowner(struct nfs4_stateowner *sop) { - unhash_stateowner(sop); + unhash_openowner(sop); list_del(&sop->so_close_lru); nfs4_put_stateowner(sop); } @@ -488,7 +499,7 @@ expire_client(struct nfs4_client *clp) list_del(&clp->cl_lru); while (!list_empty(&clp->cl_openowners)) { sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); - release_stateowner(sop); + release_openowner(sop); } put_nfs4_client(clp); } @@ -1443,7 +1454,7 @@ nfsd4_process_open1(struct nfsd4_open *open) if (!sop->so_confirmed) { /* Replace unconfirmed owners without checking for replay. */ clp = sop->so_client; - release_stateowner(sop); + release_openowner(sop); open->op_stateowner = NULL; goto renew; } @@ -1906,7 +1917,7 @@ nfs4_laundromat(void) } dprintk("NFSD: purging unused open stateowner (so_id %d)\n", sop->so_id); - release_stateowner(sop); + release_openowner(sop); } if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; @@ -2796,7 +2807,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } out: if (status && lock->lk_is_new && lock_sop) - release_stateowner(lock_sop); + release_lockowner(lock_sop); if (lock->lk_replay_owner) { nfs4_get_stateowner(lock->lk_replay_owner); cstate->replay_owner = lock->lk_replay_owner; @@ -3045,7 +3056,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, /* unhash_stateowner deletes so_perclient only * for openowners. */ list_del(&sop->so_perclient); - release_stateowner(sop); + release_lockowner(sop); } out: nfs4_unlock_state(); -- cgit v0.10.2 From 12214cb78147d42da28bc0d1b2917584e0a8efcd Mon Sep 17 00:00:00 2001 From: Qinghuang Feng Date: Mon, 12 Jan 2009 03:13:53 +0800 Subject: NFSD: cleanup for nfs3proc.c MSDOS_SUPER_MAGIC is defined in , so use MSDOS_SUPER_MAGIC directly. Signed-off-by: Qinghuang Feng Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 9dbd2eb..579ce8c 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -569,7 +570,7 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb; /* Note that we don't care for remote fs's here */ - if (sb->s_magic == 0x4d44 /* MSDOS_SUPER_MAGIC */) { + if (sb->s_magic == MSDOS_SUPER_MAGIC) { resp->f_properties = NFS3_FSF_BILLYBOY; } resp->f_maxfilesize = sb->s_maxbytes; @@ -610,7 +611,7 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, resp->p_link_max = EXT2_LINK_MAX; resp->p_name_max = EXT2_NAME_LEN; break; - case 0x4d44: /* MSDOS_SUPER_MAGIC */ + case MSDOS_SUPER_MAGIC: resp->p_case_insensitive = 1; resp->p_case_preserving = 0; break; -- cgit v0.10.2 From 686665619e4424b4f80c3a49e3e1229f27ea565c Mon Sep 17 00:00:00 2001 From: Manish Katiyar Date: Thu, 29 Jan 2009 20:53:57 +0530 Subject: nfsd : Define NFSD only when FILE_LOCKING is enabled Enable NFSD only when FILE_LOCKING is enabled, since we don't want to support NFSD without FILE_LOCKING. Signed-off-by: Manish Katiyar Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 44d7d04..503b9da 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -1,6 +1,7 @@ config NFSD tristate "NFS server support" depends on INET + depends on FILE_LOCKING select LOCKD select SUNRPC select EXPORTFS -- cgit v0.10.2 From 99f88726381f676bba6e7dcf74b7412857d7946a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 2 Feb 2009 15:12:27 -0500 Subject: nfsd: clarify exclusive create bitmask result. The use of |= is confusing--the bitmask is always initialized to zero in this case, so we're effectively just doing an assignment here. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 9fa60a3..85b9a08 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -103,11 +103,13 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o (u32 *)open->op_verf.data, &open->op_truncate, &created); - /* If we ever decide to use different attrs to store the - * verifier in nfsd_create_v3, then we'll need to change this + /* + * Following rfc 3530 14.2.16, use the returned bitmask + * to indicate which attributes we used to store the + * verifier: */ if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0) - open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS | + open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_MODIFY); } else { status = nfsd_lookup(rqstp, current_fh, -- cgit v0.10.2 From 13024b7b4097d33fe6bba34b1c83602e88753270 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 2 Feb 2009 17:04:03 -0500 Subject: nfsd4: fix misplaced comment Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 85b9a08..3265062 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -120,9 +120,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o goto out; set_change_info(&open->op_cinfo, current_fh); + fh_dup2(current_fh, &resfh); /* set reply cache */ - fh_dup2(current_fh, &resfh); open->op_stateowner->so_replay.rp_openfh_len = resfh.fh_handle.fh_size; memcpy(open->op_stateowner->so_replay.rp_openfh, &resfh.fh_handle.fh_base, resfh.fh_handle.fh_size); -- cgit v0.10.2 From a4773c08f2872626cb923433284488fbe8acb0ae Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 2 Feb 2009 17:23:10 -0500 Subject: nfsd4: use helper for copying filehandles for replay Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 3265062..af66073 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -123,10 +123,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o fh_dup2(current_fh, &resfh); /* set reply cache */ - open->op_stateowner->so_replay.rp_openfh_len = resfh.fh_handle.fh_size; - memcpy(open->op_stateowner->so_replay.rp_openfh, - &resfh.fh_handle.fh_base, resfh.fh_handle.fh_size); - + fh_copy_shallow(&open->op_stateowner->so_replay.rp_openfh, + &resfh.fh_handle); if (!created) status = do_open_permission(rqstp, current_fh, open, NFSD_MAY_NOP); @@ -152,10 +150,8 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info)); /* set replay cache */ - open->op_stateowner->so_replay.rp_openfh_len = current_fh->fh_handle.fh_size; - memcpy(open->op_stateowner->so_replay.rp_openfh, - ¤t_fh->fh_handle.fh_base, - current_fh->fh_handle.fh_size); + fh_copy_shallow(&open->op_stateowner->so_replay.rp_openfh, + ¤t_fh->fh_handle); open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && (open->op_iattr.ia_size == 0); @@ -187,9 +183,8 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status == nfserr_replay_me) { struct nfs4_replay *rp = &open->op_stateowner->so_replay; fh_put(&cstate->current_fh); - cstate->current_fh.fh_handle.fh_size = rp->rp_openfh_len; - memcpy(&cstate->current_fh.fh_handle.fh_base, rp->rp_openfh, - rp->rp_openfh_len); + fh_copy_shallow(&cstate->current_fh.fh_handle, + &rp->rp_openfh); status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); if (status) dprintk("nfsd4_open: replay failed" diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h index fa317f6..afa1901 100644 --- a/include/linux/nfsd/nfsfh.h +++ b/include/linux/nfsd/nfsfh.h @@ -269,6 +269,13 @@ fh_copy(struct svc_fh *dst, struct svc_fh *src) return dst; } +static inline void +fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src) +{ + dst->fh_size = src->fh_size; + memcpy(&dst->fh_base, &src->fh_base, src->fh_size); +} + static __inline__ struct svc_fh * fh_init(struct svc_fh *fhp, int maxsize) { diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 128298c..b65b2a6 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -168,8 +168,7 @@ struct nfs4_replay { unsigned int rp_buflen; char *rp_buf; unsigned intrp_allocated; - int rp_openfh_len; - char rp_openfh[NFS4_FHSIZE]; + struct knfsd_fh rp_openfh; char rp_ibuf[NFSD4_REPLAY_ISIZE]; }; -- cgit v0.10.2 From 6c02eaa1d1e53b9b2cc27d0c6fff3e57da4b611f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 2 Feb 2009 17:30:51 -0500 Subject: nfsd4: use helper for copying delegation filehandle Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index c464181..c6804db3 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -218,7 +218,7 @@ static int encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) { __be32 *p; - int len = cb_rec->cbr_fhlen; + int len = cb_rec->cbr_fh.fh_size; RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); WRITE32(OP_CB_RECALL); @@ -226,7 +226,7 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) WRITEMEM(&cb_rec->cbr_stateid.si_opaque, sizeof(stateid_opaque_t)); WRITE32(cb_rec->cbr_trunc); WRITE32(len); - WRITEMEM(cb_rec->cbr_fhval, len); + WRITEMEM(&cb_rec->cbr_fh.fh_base, len); return 0; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 41a3590..7f616e9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -215,9 +215,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f dp->dl_stateid.si_stateownerid = current_delegid++; dp->dl_stateid.si_fileid = 0; dp->dl_stateid.si_generation = 0; - dp->dl_fhlen = current_fh->fh_handle.fh_size; - memcpy(dp->dl_fhval, ¤t_fh->fh_handle.fh_base, - current_fh->fh_handle.fh_size); + fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); dp->dl_time = 0; atomic_set(&dp->dl_count, 1); list_add(&dp->dl_perfile, &fp->fi_delegations); diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index b65b2a6..1130d53 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -66,8 +66,7 @@ struct nfs4_cb_recall { u32 cbr_ident; int cbr_trunc; stateid_t cbr_stateid; - u32 cbr_fhlen; - char cbr_fhval[NFS4_FHSIZE]; + struct knfsd_fh cbr_fh; struct nfs4_delegation *cbr_dp; }; @@ -86,8 +85,7 @@ struct nfs4_delegation { }; #define dl_stateid dl_recall.cbr_stateid -#define dl_fhlen dl_recall.cbr_fhlen -#define dl_fhval dl_recall.cbr_fhval +#define dl_fh dl_recall.cbr_fh /* client delegation callback info */ struct nfs4_callback { -- cgit v0.10.2 From e37da04ed145d45c2a698d7cb373a7e1191fbe86 Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Thu, 18 Dec 2008 19:55:16 -0800 Subject: nfsd: lock state around put client and delegation in nfsd4_cb_recall not having the state locked before putting the client/delegation causes a bug. Also removed the comment from the function header about the state being already locked Signed-off-by: Alexandros Batsakis Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index c6804db3..3ddc9fb 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -451,7 +451,6 @@ nfsd4_probe_callback(struct nfs4_client *clp) /* * called with dp->dl_count inc'ed. - * nfs4_lock_state() may or may not have been called. */ void nfsd4_cb_recall(struct nfs4_delegation *dp) @@ -491,7 +490,9 @@ out_put_cred: * Success or failure, now we're either waiting for lease expiration * or deleg_return. */ + nfs4_lock_state(); put_nfs4_client(clp); nfs4_put_delegation(dp); + nfs4_unlock_state(); return; } -- cgit v0.10.2 From e33d1ea60c3a17b8b5c2910b1eef4c1faf0ac450 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 9 Feb 2009 12:30:43 -0500 Subject: lockd: clean up blocking lock cases of nlsmvc_lock() No change in behavior, just rearranging the switch so that we break out of the switch if and only if we're in the wait case. Signed-off-by: Miklos Szeredi Signed-off-by: J. Bruce Fields diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 763b78a..83ee342 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -426,8 +426,15 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, ret = nlm_granted; goto out; case -EAGAIN: + /* + * If this is a blocking request for an + * already pending lock request then we need + * to put it back on lockd's block list + */ + if (wait) + break; ret = nlm_lck_denied; - break; + goto out; case FILE_LOCK_DEFERRED: if (wait) break; @@ -443,10 +450,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; } - ret = nlm_lck_denied; - if (!wait) - goto out; - ret = nlm_lck_blocked; /* Append to list of blocked */ -- cgit v0.10.2 From 4ac35c2f794503d3acda20d98e89cf63f6e94332 Mon Sep 17 00:00:00 2001 From: wengang wang Date: Tue, 10 Feb 2009 11:27:51 +0800 Subject: nfsd(v2/v3): fix the failure of creation from HPUX client sometimes HPUX nfs client sends a create request to linux nfs server(v2/v3). the dump of the request is like: obj_attributes mode: value follows set_it: value follows (1) mode: 00 uid: no value set_it: no value (0) gid: value follows set_it: value follows (1) gid: 8030 size: value follows set_it: value follows (1) size: 0 atime: don't change set_it: don't change (0) mtime: don't change set_it: don't change (0) note that mode is 00(havs no rwx privilege even for the owner) and it requires to set size to 0. as current nfsd(v2/v3) implementation, the server does mainly 2 steps: 1) creates the file in mode specified by calling vfs_create(). 2) sets attributes for the file by calling nfsd_setattr(). at step 2), it finally calls file system specific setattr() function which may fail when checking permission because changing size needs WRITE privilege but it has none since mode is 000. for this case, a new file created, we may simply ignore the request of setting size to 0, so that WRITE privilege is not needed and the open succeeds. Signed-off-by: Wengang Wang -- vfs.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 6e50aaa..0c07629 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1176,6 +1176,21 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, return 0; } +/* HPUX client sometimes creates a file in mode 000, and sets size to 0. + * setting size to 0 may fail for some specific file systems by the permission + * checking which requires WRITE permission but the mode is 000. + * we ignore the resizing(to 0) on the just new created file, since the size is + * 0 after file created. + * + * call this only after vfs_create() is called. + * */ +static void +nfsd_check_ignore_resizing(struct iattr *iap) +{ + if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0)) + iap->ia_valid &= ~ATTR_SIZE; +} + /* * Create a file (regular, directory, device, fifo); UNIX sockets * not yet implemented. @@ -1271,6 +1286,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, switch (type) { case S_IFREG: host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); + if (!host_err) + nfsd_check_ignore_resizing(iap); break; case S_IFDIR: host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); @@ -1424,6 +1441,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, /* setattr will sync the child (or not) */ } + nfsd_check_ignore_resizing(iap); + if (createmode == NFS3_CREATE_EXCLUSIVE) { /* Cram the verifier into atime/mtime */ iap->ia_valid = ATTR_MTIME|ATTR_ATIME -- cgit v0.10.2 From 77f18f5e4ebdea35ec3d92343b0ed7546dc87637 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 11 Feb 2009 17:16:58 -0800 Subject: nfs: replace uses of __constant_{endian} The base versions handle constant folding now, none of these headers are exported to userspace, so the __ prefixed versions are not necessary. Signed-off-by: Harvey Harrison Reviewed-by: NeilBrown Signed-off-by: J. Bruce Fields diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 7dc5b6c..d39ed1c 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -25,13 +25,13 @@ struct svc_rqst; #define NLM_MAXCOOKIELEN 32 #define NLM_MAXSTRLEN 1024 -#define nlm_granted __constant_htonl(NLM_LCK_GRANTED) -#define nlm_lck_denied __constant_htonl(NLM_LCK_DENIED) -#define nlm_lck_denied_nolocks __constant_htonl(NLM_LCK_DENIED_NOLOCKS) -#define nlm_lck_blocked __constant_htonl(NLM_LCK_BLOCKED) -#define nlm_lck_denied_grace_period __constant_htonl(NLM_LCK_DENIED_GRACE_PERIOD) +#define nlm_granted cpu_to_be32(NLM_LCK_GRANTED) +#define nlm_lck_denied cpu_to_be32(NLM_LCK_DENIED) +#define nlm_lck_denied_nolocks cpu_to_be32(NLM_LCK_DENIED_NOLOCKS) +#define nlm_lck_blocked cpu_to_be32(NLM_LCK_BLOCKED) +#define nlm_lck_denied_grace_period cpu_to_be32(NLM_LCK_DENIED_GRACE_PERIOD) -#define nlm_drop_reply __constant_htonl(30000) +#define nlm_drop_reply cpu_to_be32(30000) /* Lock info passed via NLM */ struct nlm_lock { diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index 12bfe09..7353821 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -15,11 +15,11 @@ #include /* error codes new to NLMv4 */ -#define nlm4_deadlock __constant_htonl(NLM_DEADLCK) -#define nlm4_rofs __constant_htonl(NLM_ROFS) -#define nlm4_stale_fh __constant_htonl(NLM_STALE_FH) -#define nlm4_fbig __constant_htonl(NLM_FBIG) -#define nlm4_failed __constant_htonl(NLM_FAILED) +#define nlm4_deadlock cpu_to_be32(NLM_DEADLCK) +#define nlm4_rofs cpu_to_be32(NLM_ROFS) +#define nlm4_stale_fh cpu_to_be32(NLM_STALE_FH) +#define nlm4_fbig cpu_to_be32(NLM_FBIG) +#define nlm4_failed cpu_to_be32(NLM_FAILED) diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index e19f459..16f7b40 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -186,78 +186,78 @@ void nfsd_lockd_shutdown(void); /* * These macros provide pre-xdr'ed values for faster operation. */ -#define nfs_ok __constant_htonl(NFS_OK) -#define nfserr_perm __constant_htonl(NFSERR_PERM) -#define nfserr_noent __constant_htonl(NFSERR_NOENT) -#define nfserr_io __constant_htonl(NFSERR_IO) -#define nfserr_nxio __constant_htonl(NFSERR_NXIO) -#define nfserr_eagain __constant_htonl(NFSERR_EAGAIN) -#define nfserr_acces __constant_htonl(NFSERR_ACCES) -#define nfserr_exist __constant_htonl(NFSERR_EXIST) -#define nfserr_xdev __constant_htonl(NFSERR_XDEV) -#define nfserr_nodev __constant_htonl(NFSERR_NODEV) -#define nfserr_notdir __constant_htonl(NFSERR_NOTDIR) -#define nfserr_isdir __constant_htonl(NFSERR_ISDIR) -#define nfserr_inval __constant_htonl(NFSERR_INVAL) -#define nfserr_fbig __constant_htonl(NFSERR_FBIG) -#define nfserr_nospc __constant_htonl(NFSERR_NOSPC) -#define nfserr_rofs __constant_htonl(NFSERR_ROFS) -#define nfserr_mlink __constant_htonl(NFSERR_MLINK) -#define nfserr_opnotsupp __constant_htonl(NFSERR_OPNOTSUPP) -#define nfserr_nametoolong __constant_htonl(NFSERR_NAMETOOLONG) -#define nfserr_notempty __constant_htonl(NFSERR_NOTEMPTY) -#define nfserr_dquot __constant_htonl(NFSERR_DQUOT) -#define nfserr_stale __constant_htonl(NFSERR_STALE) -#define nfserr_remote __constant_htonl(NFSERR_REMOTE) -#define nfserr_wflush __constant_htonl(NFSERR_WFLUSH) -#define nfserr_badhandle __constant_htonl(NFSERR_BADHANDLE) -#define nfserr_notsync __constant_htonl(NFSERR_NOT_SYNC) -#define nfserr_badcookie __constant_htonl(NFSERR_BAD_COOKIE) -#define nfserr_notsupp __constant_htonl(NFSERR_NOTSUPP) -#define nfserr_toosmall __constant_htonl(NFSERR_TOOSMALL) -#define nfserr_serverfault __constant_htonl(NFSERR_SERVERFAULT) -#define nfserr_badtype __constant_htonl(NFSERR_BADTYPE) -#define nfserr_jukebox __constant_htonl(NFSERR_JUKEBOX) -#define nfserr_denied __constant_htonl(NFSERR_DENIED) -#define nfserr_deadlock __constant_htonl(NFSERR_DEADLOCK) -#define nfserr_expired __constant_htonl(NFSERR_EXPIRED) -#define nfserr_bad_cookie __constant_htonl(NFSERR_BAD_COOKIE) -#define nfserr_same __constant_htonl(NFSERR_SAME) -#define nfserr_clid_inuse __constant_htonl(NFSERR_CLID_INUSE) -#define nfserr_stale_clientid __constant_htonl(NFSERR_STALE_CLIENTID) -#define nfserr_resource __constant_htonl(NFSERR_RESOURCE) -#define nfserr_moved __constant_htonl(NFSERR_MOVED) -#define nfserr_nofilehandle __constant_htonl(NFSERR_NOFILEHANDLE) -#define nfserr_minor_vers_mismatch __constant_htonl(NFSERR_MINOR_VERS_MISMATCH) -#define nfserr_share_denied __constant_htonl(NFSERR_SHARE_DENIED) -#define nfserr_stale_stateid __constant_htonl(NFSERR_STALE_STATEID) -#define nfserr_old_stateid __constant_htonl(NFSERR_OLD_STATEID) -#define nfserr_bad_stateid __constant_htonl(NFSERR_BAD_STATEID) -#define nfserr_bad_seqid __constant_htonl(NFSERR_BAD_SEQID) -#define nfserr_symlink __constant_htonl(NFSERR_SYMLINK) -#define nfserr_not_same __constant_htonl(NFSERR_NOT_SAME) -#define nfserr_restorefh __constant_htonl(NFSERR_RESTOREFH) -#define nfserr_attrnotsupp __constant_htonl(NFSERR_ATTRNOTSUPP) -#define nfserr_bad_xdr __constant_htonl(NFSERR_BAD_XDR) -#define nfserr_openmode __constant_htonl(NFSERR_OPENMODE) -#define nfserr_locks_held __constant_htonl(NFSERR_LOCKS_HELD) -#define nfserr_op_illegal __constant_htonl(NFSERR_OP_ILLEGAL) -#define nfserr_grace __constant_htonl(NFSERR_GRACE) -#define nfserr_no_grace __constant_htonl(NFSERR_NO_GRACE) -#define nfserr_reclaim_bad __constant_htonl(NFSERR_RECLAIM_BAD) -#define nfserr_badname __constant_htonl(NFSERR_BADNAME) -#define nfserr_cb_path_down __constant_htonl(NFSERR_CB_PATH_DOWN) -#define nfserr_locked __constant_htonl(NFSERR_LOCKED) -#define nfserr_wrongsec __constant_htonl(NFSERR_WRONGSEC) -#define nfserr_replay_me __constant_htonl(NFSERR_REPLAY_ME) +#define nfs_ok cpu_to_be32(NFS_OK) +#define nfserr_perm cpu_to_be32(NFSERR_PERM) +#define nfserr_noent cpu_to_be32(NFSERR_NOENT) +#define nfserr_io cpu_to_be32(NFSERR_IO) +#define nfserr_nxio cpu_to_be32(NFSERR_NXIO) +#define nfserr_eagain cpu_to_be32(NFSERR_EAGAIN) +#define nfserr_acces cpu_to_be32(NFSERR_ACCES) +#define nfserr_exist cpu_to_be32(NFSERR_EXIST) +#define nfserr_xdev cpu_to_be32(NFSERR_XDEV) +#define nfserr_nodev cpu_to_be32(NFSERR_NODEV) +#define nfserr_notdir cpu_to_be32(NFSERR_NOTDIR) +#define nfserr_isdir cpu_to_be32(NFSERR_ISDIR) +#define nfserr_inval cpu_to_be32(NFSERR_INVAL) +#define nfserr_fbig cpu_to_be32(NFSERR_FBIG) +#define nfserr_nospc cpu_to_be32(NFSERR_NOSPC) +#define nfserr_rofs cpu_to_be32(NFSERR_ROFS) +#define nfserr_mlink cpu_to_be32(NFSERR_MLINK) +#define nfserr_opnotsupp cpu_to_be32(NFSERR_OPNOTSUPP) +#define nfserr_nametoolong cpu_to_be32(NFSERR_NAMETOOLONG) +#define nfserr_notempty cpu_to_be32(NFSERR_NOTEMPTY) +#define nfserr_dquot cpu_to_be32(NFSERR_DQUOT) +#define nfserr_stale cpu_to_be32(NFSERR_STALE) +#define nfserr_remote cpu_to_be32(NFSERR_REMOTE) +#define nfserr_wflush cpu_to_be32(NFSERR_WFLUSH) +#define nfserr_badhandle cpu_to_be32(NFSERR_BADHANDLE) +#define nfserr_notsync cpu_to_be32(NFSERR_NOT_SYNC) +#define nfserr_badcookie cpu_to_be32(NFSERR_BAD_COOKIE) +#define nfserr_notsupp cpu_to_be32(NFSERR_NOTSUPP) +#define nfserr_toosmall cpu_to_be32(NFSERR_TOOSMALL) +#define nfserr_serverfault cpu_to_be32(NFSERR_SERVERFAULT) +#define nfserr_badtype cpu_to_be32(NFSERR_BADTYPE) +#define nfserr_jukebox cpu_to_be32(NFSERR_JUKEBOX) +#define nfserr_denied cpu_to_be32(NFSERR_DENIED) +#define nfserr_deadlock cpu_to_be32(NFSERR_DEADLOCK) +#define nfserr_expired cpu_to_be32(NFSERR_EXPIRED) +#define nfserr_bad_cookie cpu_to_be32(NFSERR_BAD_COOKIE) +#define nfserr_same cpu_to_be32(NFSERR_SAME) +#define nfserr_clid_inuse cpu_to_be32(NFSERR_CLID_INUSE) +#define nfserr_stale_clientid cpu_to_be32(NFSERR_STALE_CLIENTID) +#define nfserr_resource cpu_to_be32(NFSERR_RESOURCE) +#define nfserr_moved cpu_to_be32(NFSERR_MOVED) +#define nfserr_nofilehandle cpu_to_be32(NFSERR_NOFILEHANDLE) +#define nfserr_minor_vers_mismatch cpu_to_be32(NFSERR_MINOR_VERS_MISMATCH) +#define nfserr_share_denied cpu_to_be32(NFSERR_SHARE_DENIED) +#define nfserr_stale_stateid cpu_to_be32(NFSERR_STALE_STATEID) +#define nfserr_old_stateid cpu_to_be32(NFSERR_OLD_STATEID) +#define nfserr_bad_stateid cpu_to_be32(NFSERR_BAD_STATEID) +#define nfserr_bad_seqid cpu_to_be32(NFSERR_BAD_SEQID) +#define nfserr_symlink cpu_to_be32(NFSERR_SYMLINK) +#define nfserr_not_same cpu_to_be32(NFSERR_NOT_SAME) +#define nfserr_restorefh cpu_to_be32(NFSERR_RESTOREFH) +#define nfserr_attrnotsupp cpu_to_be32(NFSERR_ATTRNOTSUPP) +#define nfserr_bad_xdr cpu_to_be32(NFSERR_BAD_XDR) +#define nfserr_openmode cpu_to_be32(NFSERR_OPENMODE) +#define nfserr_locks_held cpu_to_be32(NFSERR_LOCKS_HELD) +#define nfserr_op_illegal cpu_to_be32(NFSERR_OP_ILLEGAL) +#define nfserr_grace cpu_to_be32(NFSERR_GRACE) +#define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE) +#define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD) +#define nfserr_badname cpu_to_be32(NFSERR_BADNAME) +#define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN) +#define nfserr_locked cpu_to_be32(NFSERR_LOCKED) +#define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC) +#define nfserr_replay_me cpu_to_be32(NFSERR_REPLAY_ME) /* error codes for internal use */ /* if a request fails due to kmalloc failure, it gets dropped. * Client should resend eventually */ -#define nfserr_dropit __constant_htonl(30000) +#define nfserr_dropit cpu_to_be32(30000) /* end-of-file indicator in readdir */ -#define nfserr_eof __constant_htonl(30001) +#define nfserr_eof cpu_to_be32(30001) /* Check for dir entries '.' and '..' */ #define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.')) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 49e1eb4..d8910b6 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -69,27 +69,27 @@ struct xdr_buf { * pre-xdr'ed macros. */ -#define xdr_zero __constant_htonl(0) -#define xdr_one __constant_htonl(1) -#define xdr_two __constant_htonl(2) - -#define rpc_success __constant_htonl(RPC_SUCCESS) -#define rpc_prog_unavail __constant_htonl(RPC_PROG_UNAVAIL) -#define rpc_prog_mismatch __constant_htonl(RPC_PROG_MISMATCH) -#define rpc_proc_unavail __constant_htonl(RPC_PROC_UNAVAIL) -#define rpc_garbage_args __constant_htonl(RPC_GARBAGE_ARGS) -#define rpc_system_err __constant_htonl(RPC_SYSTEM_ERR) -#define rpc_drop_reply __constant_htonl(RPC_DROP_REPLY) - -#define rpc_auth_ok __constant_htonl(RPC_AUTH_OK) -#define rpc_autherr_badcred __constant_htonl(RPC_AUTH_BADCRED) -#define rpc_autherr_rejectedcred __constant_htonl(RPC_AUTH_REJECTEDCRED) -#define rpc_autherr_badverf __constant_htonl(RPC_AUTH_BADVERF) -#define rpc_autherr_rejectedverf __constant_htonl(RPC_AUTH_REJECTEDVERF) -#define rpc_autherr_tooweak __constant_htonl(RPC_AUTH_TOOWEAK) -#define rpcsec_gsserr_credproblem __constant_htonl(RPCSEC_GSS_CREDPROBLEM) -#define rpcsec_gsserr_ctxproblem __constant_htonl(RPCSEC_GSS_CTXPROBLEM) -#define rpc_autherr_oldseqnum __constant_htonl(101) +#define xdr_zero cpu_to_be32(0) +#define xdr_one cpu_to_be32(1) +#define xdr_two cpu_to_be32(2) + +#define rpc_success cpu_to_be32(RPC_SUCCESS) +#define rpc_prog_unavail cpu_to_be32(RPC_PROG_UNAVAIL) +#define rpc_prog_mismatch cpu_to_be32(RPC_PROG_MISMATCH) +#define rpc_proc_unavail cpu_to_be32(RPC_PROC_UNAVAIL) +#define rpc_garbage_args cpu_to_be32(RPC_GARBAGE_ARGS) +#define rpc_system_err cpu_to_be32(RPC_SYSTEM_ERR) +#define rpc_drop_reply cpu_to_be32(RPC_DROP_REPLY) + +#define rpc_auth_ok cpu_to_be32(RPC_AUTH_OK) +#define rpc_autherr_badcred cpu_to_be32(RPC_AUTH_BADCRED) +#define rpc_autherr_rejectedcred cpu_to_be32(RPC_AUTH_REJECTEDCRED) +#define rpc_autherr_badverf cpu_to_be32(RPC_AUTH_BADVERF) +#define rpc_autherr_rejectedverf cpu_to_be32(RPC_AUTH_REJECTEDVERF) +#define rpc_autherr_tooweak cpu_to_be32(RPC_AUTH_TOOWEAK) +#define rpcsec_gsserr_credproblem cpu_to_be32(RPCSEC_GSS_CREDPROBLEM) +#define rpcsec_gsserr_ctxproblem cpu_to_be32(RPCSEC_GSS_CTXPROBLEM) +#define rpc_autherr_oldseqnum cpu_to_be32(101) /* * Miscellaneous XDR helper functions -- cgit v0.10.2 From a4455be0850009f5da9a3b82523079922cd4b26e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 21 Feb 2009 10:40:22 -0800 Subject: nfsd4: trivial preprocess_stateid_op cleanup Remove a couple redundant comments, adjust style; no change in behavior. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7f616e9..b7e2f25 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2072,21 +2072,21 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return check_special_stateids(current_fh, stateid, flags); - /* STALE STATEID */ status = nfserr_stale_stateid; if (STALE_STATEID(stateid)) goto out; - /* BAD STATEID */ status = nfserr_bad_stateid; if (!stateid->si_fileid) { /* delegation stateid */ - if(!(dp = find_delegation_stateid(ino, stateid))) { + dp = find_delegation_stateid(ino, stateid); + if (!dp) { dprintk("NFSD: delegation stateid not found\n"); goto out; } stidp = &dp->dl_stateid; } else { /* open or lock stateid */ - if (!(stp = find_stateid(stateid, flags))) { + stp = find_stateid(stateid, flags); + if (!stp) { dprintk("NFSD: open or lock stateid not found\n"); goto out; } @@ -2100,13 +2100,15 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl if (status) goto out; if (stp) { - if ((status = nfs4_check_openmode(stp,flags))) + status = nfs4_check_openmode(stp, flags); + if (status) goto out; renew_client(stp->st_stateowner->so_client); if (filpp) *filpp = stp->st_vfs_file; } else { - if ((status = nfs4_check_delegmode(dp, flags))) + status = nfs4_check_delegmode(dp, flags); + if (status) goto out; renew_client(dp->dl_client); if (flags & DELEG_RET) -- cgit v0.10.2 From 0c2a498fa6d33d8ca9c8a0c29039c41e1734cb9e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 21 Feb 2009 11:11:50 -0800 Subject: nfsd4: move check_stateid_generation check No change in behavior. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b7e2f25..d6ca2be 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2084,6 +2084,9 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl goto out; } stidp = &dp->dl_stateid; + status = check_stateid_generation(stateid, stidp); + if (status) + goto out; } else { /* open or lock stateid */ stp = find_stateid(stateid, flags); if (!stp) { @@ -2095,10 +2098,10 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl if (!stp->st_stateowner->so_confirmed) goto out; stidp = &stp->st_stateid; + status = check_stateid_generation(stateid, stidp); + if (status) + goto out; } - status = check_stateid_generation(stateid, stidp); - if (status) - goto out; if (stp) { status = nfs4_check_openmode(stp, flags); if (status) -- cgit v0.10.2 From dc9bf700ed2fc3eab50f31000b13fda781e7c9f1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 21 Feb 2009 11:14:43 -0800 Subject: nfsd4: remove redundant "if" in nfs4_preprocess_stateid_op Note that we exit this first big "if" with stp == NULL if and only if we took the first branch; therefore, the second "if" is redundant, and we can just combine the two, simplifying the logic. Reviewed-by: Yang Hongyang Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d6ca2be..16fcb65 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2087,6 +2087,14 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl status = check_stateid_generation(stateid, stidp); if (status) goto out; + status = nfs4_check_delegmode(dp, flags); + if (status) + goto out; + renew_client(dp->dl_client); + if (flags & DELEG_RET) + unhash_delegation(dp); + if (filpp) + *filpp = dp->dl_vfs_file; } else { /* open or lock stateid */ stp = find_stateid(stateid, flags); if (!stp) { @@ -2101,23 +2109,12 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl status = check_stateid_generation(stateid, stidp); if (status) goto out; - } - if (stp) { status = nfs4_check_openmode(stp, flags); if (status) goto out; renew_client(stp->st_stateowner->so_client); if (filpp) *filpp = stp->st_vfs_file; - } else { - status = nfs4_check_delegmode(dp, flags); - if (status) - goto out; - renew_client(dp->dl_client); - if (flags & DELEG_RET) - unhash_delegation(dp); - if (filpp) - *filpp = dp->dl_vfs_file; } status = nfs_ok; out: -- cgit v0.10.2 From fd03b09906c32aea7b47f1275c9cd6034141159d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 21 Feb 2009 11:27:30 -0800 Subject: nfsd4: remove unneeded local variable We no longer need stidp. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 16fcb65..099938e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2056,7 +2056,6 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl { struct nfs4_stateid *stp = NULL; struct nfs4_delegation *dp = NULL; - stateid_t *stidp; struct inode *ino = current_fh->fh_dentry->d_inode; __be32 status; @@ -2083,8 +2082,7 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl dprintk("NFSD: delegation stateid not found\n"); goto out; } - stidp = &dp->dl_stateid; - status = check_stateid_generation(stateid, stidp); + status = check_stateid_generation(stateid, &dp->dl_stateid); if (status) goto out; status = nfs4_check_delegmode(dp, flags); @@ -2105,8 +2103,7 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl goto out; if (!stp->st_stateowner->so_confirmed) goto out; - stidp = &stp->st_stateid; - status = check_stateid_generation(stateid, stidp); + status = check_stateid_generation(stateid, &stp->st_stateid); if (status) goto out; status = nfs4_check_openmode(stp, flags); -- cgit v0.10.2 From 819a8f539acf7838d62fec20e88401ff53303cd1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 21 Feb 2009 12:13:24 -0800 Subject: nfsd4: remove some dprintk's I can't recall ever seeing these printk's used to debug a problem. I'll happily put them back if we see a case where they'd be useful. (Though if we do that the find_XXX() errors would probably be better reported in find_XXX() functions themselves.) Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 099938e..909a7a4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2059,9 +2059,6 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl struct inode *ino = current_fh->fh_dentry->d_inode; __be32 status; - dprintk("NFSD: preprocess_stateid_op: stateid = (%08x/%08x/%08x/%08x)\n", - stateid->si_boot, stateid->si_stateownerid, - stateid->si_fileid, stateid->si_generation); if (filpp) *filpp = NULL; @@ -2078,10 +2075,8 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl status = nfserr_bad_stateid; if (!stateid->si_fileid) { /* delegation stateid */ dp = find_delegation_stateid(ino, stateid); - if (!dp) { - dprintk("NFSD: delegation stateid not found\n"); + if (!dp) goto out; - } status = check_stateid_generation(stateid, &dp->dl_stateid); if (status) goto out; @@ -2095,10 +2090,8 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl *filpp = dp->dl_vfs_file; } else { /* open or lock stateid */ stp = find_stateid(stateid, flags); - if (!stp) { - dprintk("NFSD: open or lock stateid not found\n"); + if (!stp) goto out; - } if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) goto out; if (!stp->st_stateowner->so_confirmed) -- cgit v0.10.2 From 3e633079e377d2b527a8390f63ceb887b5cabfbf Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 21 Feb 2009 13:17:19 -0800 Subject: nfsd4: add a helper function to decide if stateid is delegation Make this check self-documenting. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 909a7a4..d555585 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2048,6 +2048,11 @@ static int check_stateid_generation(stateid_t *in, stateid_t *ref) return nfs_ok; } +static int is_delegation_stateid(stateid_t *stateid) +{ + return stateid->si_fileid == 0; +} + /* * Checks for stateid operations */ @@ -2073,7 +2078,7 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl goto out; status = nfserr_bad_stateid; - if (!stateid->si_fileid) { /* delegation stateid */ + if (is_delegation_stateid(stateid)) { dp = find_delegation_stateid(ino, stateid); if (!dp) goto out; -- cgit v0.10.2 From 203a8c8e66278a5936a230edaac29017e50c88fb Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 21 Feb 2009 13:29:14 -0800 Subject: nfsd4: separate delegreturn case from preprocess_stateid_op Delegreturn is enough a special case for preprocess_stateid_op to warrant just open-coding it in delegreturn. There should be no change in behavior here; we're just reshuffling code. Thanks to Yang Hongyang for catching a critical typo. Reviewed-by: Yang Hongyang Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d555585..3570a0d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2000,10 +2000,7 @@ out: static inline __be32 check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) { - /* Trying to call delegreturn with a special stateid? Yuch: */ - if (!(flags & (RD_STATE | WR_STATE))) - return nfserr_bad_stateid; - else if (ONE_STATEID(stateid) && (flags & RD_STATE)) + if (ONE_STATEID(stateid) && (flags & RD_STATE)) return nfs_ok; else if (locks_in_grace()) { /* Answer in remaining cases depends on existance of @@ -2024,8 +2021,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) static inline int io_during_grace_disallowed(struct inode *inode, int flags) { - return locks_in_grace() && (flags & (RD_STATE | WR_STATE)) - && mandatory_lock(inode); + return locks_in_grace() && mandatory_lock(inode); } static int check_stateid_generation(stateid_t *in, stateid_t *ref) @@ -2089,8 +2085,6 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl if (status) goto out; renew_client(dp->dl_client); - if (flags & DELEG_RET) - unhash_delegation(dp); if (filpp) *filpp = dp->dl_vfs_file; } else { /* open or lock stateid */ @@ -2408,16 +2402,38 @@ __be32 nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_delegreturn *dr) { + struct nfs4_delegation *dp; + stateid_t *stateid = &dr->dr_stateid; + struct inode *inode; __be32 status; if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) - goto out; + return status; + inode = cstate->current_fh.fh_dentry->d_inode; nfs4_lock_state(); - status = nfs4_preprocess_stateid_op(&cstate->current_fh, - &dr->dr_stateid, DELEG_RET, NULL); - nfs4_unlock_state(); + status = nfserr_bad_stateid; + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + goto out; + status = nfserr_stale_stateid; + if (STALE_STATEID(stateid)) + goto out; + status = nfs_ok; + if (!is_delegation_stateid(stateid)) + goto out; + status = nfserr_bad_stateid; + dp = find_delegation_stateid(inode, stateid); + if (!dp) + goto out; + status = check_stateid_generation(stateid, &dp->dl_stateid); + if (status) + goto out; + renew_client(dp->dl_client); + + unhash_delegation(dp); out: + nfs4_unlock_state(); + return status; } diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 1130d53..c9311a1 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -263,7 +263,6 @@ struct nfs4_stateid { #define RD_STATE 0x00000010 #define WR_STATE 0x00000020 #define CLOSE_STATE 0x00000040 -#define DELEG_RET 0x00000080 #define seqid_mutating_err(err) \ (((err) != nfserr_stale_clientid) && \ -- cgit v0.10.2 From 7e0f7cf582abd6c85232331dfe726a4e4b0fd98e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 21 Feb 2009 13:32:28 -0800 Subject: nfsd4: fail when delegreturn gets a non-delegation stateid Previous cleanup reveals an obvious (though harmless) bug: when delegreturn gets a stateid that isn't for a delegation, it should return an error rather than doing nothing. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3570a0d..6ae28e6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2418,10 +2418,9 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_stale_stateid; if (STALE_STATEID(stateid)) goto out; - status = nfs_ok; + status = nfserr_bad_stateid; if (!is_delegation_stateid(stateid)) goto out; - status = nfserr_bad_stateid; dp = find_delegation_stateid(inode, stateid); if (!dp) goto out; -- cgit v0.10.2 From 6150ef0dc7f734366d297e2eb5697ae458a1ea19 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 21 Feb 2009 13:36:16 -0800 Subject: nfsd4: remove unused CHECK_FH flag All users now pass this, so it's meaningless. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index af66073..77f584f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -519,7 +519,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* check stateid */ if ((status = nfs4_preprocess_stateid_op(&cstate->current_fh, &read->rd_stateid, - CHECK_FH | RD_STATE, &read->rd_filp))) { + RD_STATE, &read->rd_filp))) { dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); goto out; } @@ -651,7 +651,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { nfs4_lock_state(); status = nfs4_preprocess_stateid_op(&cstate->current_fh, - &setattr->sa_stateid, CHECK_FH | WR_STATE, NULL); + &setattr->sa_stateid, WR_STATE, NULL); nfs4_unlock_state(); if (status) { dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); @@ -690,7 +690,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); status = nfs4_preprocess_stateid_op(&cstate->current_fh, stateid, - CHECK_FH | WR_STATE, &filp); + WR_STATE, &filp); if (filp) get_file(filp); nfs4_unlock_state(); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6ae28e6..5957f77 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2091,7 +2091,7 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl stp = find_stateid(stateid, flags); if (!stp) goto out; - if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) + if (nfs4_check_fh(current_fh, stp)) goto out; if (!stp->st_stateowner->so_confirmed) goto out; diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index c9311a1..503b6bb 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -256,7 +256,6 @@ struct nfs4_stateid { }; /* flags for preprocess_seqid_op() */ -#define CHECK_FH 0x00000001 #define CONFIRM 0x00000002 #define OPEN_STATE 0x00000004 #define LOCK_STATE 0x00000008 -- cgit v0.10.2 From 18f82731b7784ba81ee9b1ed6a8179b577fa898b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 21 Feb 2009 15:23:01 -0800 Subject: nfsd4: rename io_during_grace_disallowed Use a slightly clearer, more concise name. Also removed unused argument. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5957f77..89e575e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2019,7 +2019,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) * that are not able to provide mandatory locking. */ static inline int -io_during_grace_disallowed(struct inode *inode, int flags) +grace_disallows_io(struct inode *inode) { return locks_in_grace() && mandatory_lock(inode); } @@ -2063,7 +2063,7 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl if (filpp) *filpp = NULL; - if (io_during_grace_disallowed(ino, flags)) + if (grace_disallows_io(ino)) return nfserr_grace; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) -- cgit v0.10.2 From d7fdcfe0aaaf6dffca6fa857bab374182fe7ca8b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 21 Feb 2009 15:39:54 -0800 Subject: nfsd4: put_nfs4_client does not require state lock Since free_client() is guaranteed to only be called once, and to only touch the client structure itself (not any common data structures), it has no need for the state lock. Signed-off-by: J. Bruce Fields Cc: Alexandros Batsakis diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 3ddc9fb..3fd7136 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -490,8 +490,8 @@ out_put_cred: * Success or failure, now we're either waiting for lease expiration * or deleg_return. */ - nfs4_lock_state(); put_nfs4_client(clp); + nfs4_lock_state(); nfs4_put_delegation(dp); nfs4_unlock_state(); return; -- cgit v0.10.2 From 8b671b80707e4fc76adfe4387df07b3be1007c1e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 22 Feb 2009 14:51:34 -0800 Subject: nfsd4: remove use of mutex for file_hashtable As part of reducing the scope of the client_mutex, and in order to remove the need for mutexes from the callback code (so that callbacks can be done as asynchronous rpc calls), move manipulations of the file_hashtable under the recall_lock. Update the relevant comments while we're here. Signed-off-by: J. Bruce Fields Cc: Alexandros Batsakis Reviewed-by: Benny Halevy diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 3fd7136..5dcd38e 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -491,8 +491,6 @@ out_put_cred: * or deleg_return. */ put_nfs4_client(clp); - nfs4_lock_state(); nfs4_put_delegation(dp); - nfs4_unlock_state(); return; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 89e575e..54651aa 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -78,14 +78,18 @@ static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, state static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; static void nfs4_set_recdir(char *recdir); -/* Locking: - * - * client_mutex: - * protects clientid_hashtbl[], clientstr_hashtbl[], - * unconfstr_hashtbl[], uncofid_hashtbl[]. - */ +/* Locking: */ + +/* Currently used for almost all code touching nfsv4 state: */ static DEFINE_MUTEX(client_mutex); +/* + * Currently used for the del_recall_lru and file hash table. In an + * effort to decrease the scope of the client_mutex, this spinlock may + * eventually cover more: + */ +static DEFINE_SPINLOCK(recall_lock); + static struct kmem_cache *stateowner_slab = NULL; static struct kmem_cache *file_slab = NULL; static struct kmem_cache *stateid_slab = NULL; @@ -116,33 +120,23 @@ opaque_hashval(const void *ptr, int nbytes) return x; } -/* - * Delegation state - */ - -/* recall_lock protects the del_recall_lru */ -static DEFINE_SPINLOCK(recall_lock); static struct list_head del_recall_lru; -static void -free_nfs4_file(struct kref *kref) -{ - struct nfs4_file *fp = container_of(kref, struct nfs4_file, fi_ref); - list_del(&fp->fi_hash); - iput(fp->fi_inode); - kmem_cache_free(file_slab, fp); -} - static inline void put_nfs4_file(struct nfs4_file *fi) { - kref_put(&fi->fi_ref, free_nfs4_file); + if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) { + list_del(&fi->fi_hash); + spin_unlock(&recall_lock); + iput(fi->fi_inode); + kmem_cache_free(file_slab, fi); + } } static inline void get_nfs4_file(struct nfs4_file *fi) { - kref_get(&fi->fi_ref); + atomic_inc(&fi->fi_ref); } static int num_delegations; @@ -1000,11 +994,13 @@ alloc_init_file(struct inode *ino) fp = kmem_cache_alloc(file_slab, GFP_KERNEL); if (fp) { - kref_init(&fp->fi_ref); + atomic_set(&fp->fi_ref, 1); INIT_LIST_HEAD(&fp->fi_hash); INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); + spin_lock(&recall_lock); list_add(&fp->fi_hash, &file_hashtbl[hashval]); + spin_unlock(&recall_lock); fp->fi_inode = igrab(ino); fp->fi_id = current_fileid++; fp->fi_had_conflict = false; @@ -1177,12 +1173,15 @@ find_file(struct inode *ino) unsigned int hashval = file_hashval(ino); struct nfs4_file *fp; + spin_lock(&recall_lock); list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { if (fp->fi_inode == ino) { get_nfs4_file(fp); + spin_unlock(&recall_lock); return fp; } } + spin_unlock(&recall_lock); return NULL; } diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 503b6bb..a6e4a00 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -214,7 +214,7 @@ struct nfs4_stateowner { * share_acces, share_deny on the file. */ struct nfs4_file { - struct kref fi_ref; + atomic_t fi_ref; struct list_head fi_hash; /* hash by "struct inode *" */ struct list_head fi_stateids; struct list_head fi_delegations; -- cgit v0.10.2 From 418cd20aa19b54554cab383e2fd0d1cb8c4732ee Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 22 Feb 2009 15:52:13 -0800 Subject: nfsd4: fix do_probe_callback errors The errors returned aren't used. Just return 0 and make them available to a dprintk(). Also, consistently use -ERRNO errors instead of nfs errors. Signed-off-by: J. Bruce Fields Reviewed-by: Benny Halevy diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 5dcd38e..8d55f50 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -389,12 +389,10 @@ static int do_probe_callback(void *data) .rpc_argp = clp, }; struct rpc_clnt *client; - int status; + int status = -EINVAL; - if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) { - status = nfserr_cb_path_down; + if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) goto out_err; - } /* Initialize address */ memset(&addr, 0, sizeof(addr)); @@ -405,8 +403,9 @@ static int do_probe_callback(void *data) /* Create RPC client */ client = rpc_create(&args); if (IS_ERR(client)) { - dprintk("NFSD: couldn't create callback client\n"); status = PTR_ERR(client); + dprintk("NFSD: couldn't create callback client: %d\n", + status); goto out_err; } @@ -422,10 +421,10 @@ static int do_probe_callback(void *data) out_release_client: rpc_shutdown_client(client); out_err: - dprintk("NFSD: warning: no callback path to client %.*s\n", - (int)clp->cl_name.len, clp->cl_name.data); + dprintk("NFSD: warning: no callback path to client %.*s: error %d\n", + (int)clp->cl_name.len, clp->cl_name.data, status); put_nfs4_client(clp); - return status; + return 0; } /* -- cgit v0.10.2 From a601caeda21c0e94c153dbd146ec0899cc5f324f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 22 Feb 2009 16:43:45 -0800 Subject: nfsd4: move rpc_client setup to a separate function Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 8d55f50..290289b 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -361,9 +361,8 @@ static struct rpc_program cb_program = { /* Reference counting, callback cleanup, etc., all look racy as heck. * And why is cb_set an atomic? */ -static int do_probe_callback(void *data) +static struct rpc_clnt *setup_callback_client(struct nfs4_client *clp) { - struct nfs4_client *clp = data; struct sockaddr_in addr; struct nfs4_callback *cb = &clp->cl_callback; struct rpc_timeout timeparms = { @@ -384,15 +383,10 @@ static int do_probe_callback(void *data) .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), .client_name = clp->cl_principal, }; - struct rpc_message msg = { - .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], - .rpc_argp = clp, - }; struct rpc_clnt *client; - int status = -EINVAL; if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) - goto out_err; + return ERR_PTR(-EINVAL); /* Initialize address */ memset(&addr, 0, sizeof(addr)); @@ -402,6 +396,25 @@ static int do_probe_callback(void *data) /* Create RPC client */ client = rpc_create(&args); + if (IS_ERR(client)) + dprintk("NFSD: couldn't create callback client: %ld\n", + PTR_ERR(client)); + return client; + +} + +static int do_probe_callback(void *data) +{ + struct nfs4_client *clp = data; + struct nfs4_callback *cb = &clp->cl_callback; + struct rpc_message msg = { + .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], + .rpc_argp = clp, + }; + struct rpc_clnt *client; + int status; + + client = setup_callback_client(clp); if (IS_ERR(client)) { status = PTR_ERR(client); dprintk("NFSD: couldn't create callback client: %d\n", -- cgit v0.10.2 From 1e685ec270cb97680be4eb8cf6b615f5f7f1403a Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 4 Mar 2009 23:06:06 +0200 Subject: NFSD: return nfsv4 error code nfserr_notsupp rather than nfsv[23]'s nfserr_opnotsupp Thanks for Bill Baker at sun.com for catching this at Connectathon 2009. This bug was introduced in 2.6.27 Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 9250067..aaf5f23 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1005,7 +1005,7 @@ nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) static __be32 nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p) { - return nfserr_opnotsupp; + return nfserr_notsupp; } typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *); -- cgit v0.10.2 From 31dec2538e45e9fff2007ea1f4c6bae9f78db724 Mon Sep 17 00:00:00 2001 From: David Shaw Date: Thu, 5 Mar 2009 20:16:14 -0500 Subject: Short write in nfsd becomes a full write to the client If a filesystem being written to via NFS returns a short write count (as opposed to an error) to nfsd, nfsd treats that as a success for the entire write, rather than the short count that actually succeeded. For example, given a 8192 byte write, if the underlying filesystem only writes 4096 bytes, nfsd will ack back to the nfs client that all 8192 bytes were written. The nfs client does have retry logic for short writes, but this is never called as the client is told the complete write succeeded. There are probably other ways it could happen, but in my case it happened with a fuse (filesystem in userspace) filesystem which can rather easily have a partial write. Here is a patch to properly return the short write count to the client. Signed-off-by: David Shaw Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 579ce8c..7c9fe83 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -203,6 +203,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp, struct nfsd3_writeres *resp) { __be32 nfserr; + unsigned long cnt = argp->len; dprintk("nfsd: WRITE(3) %s %d bytes at %ld%s\n", SVCFH_fmt(&argp->fh), @@ -215,9 +216,9 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp, nfserr = nfsd_write(rqstp, &resp->fh, NULL, argp->offset, rqstp->rq_vec, argp->vlen, - argp->len, + &cnt, &resp->committed); - resp->count = argp->count; + resp->count = cnt; RETURN_STATUS(nfserr); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 77f584f..283d77a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -682,6 +682,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct file *filp = NULL; u32 *p; __be32 status = nfs_ok; + unsigned long cnt; /* no need to check permission - this will be done in nfsd_write() */ @@ -700,7 +701,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status; } - write->wr_bytes_written = write->wr_buflen; + cnt = write->wr_buflen; write->wr_how_written = write->wr_stable_how; p = (u32 *)write->wr_verifier.data; *p++ = nfssvc_boot.tv_sec; @@ -708,10 +709,12 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfsd_write(rqstp, &cstate->current_fh, filp, write->wr_offset, rqstp->rq_vec, write->wr_vlen, - write->wr_buflen, &write->wr_how_written); + &cnt, &write->wr_how_written); if (filp) fput(filp); + write->wr_bytes_written = cnt; + if (status == nfserr_symlink) status = nfserr_inval; return status; diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 6f7f263..e298e26 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -180,6 +180,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp, { __be32 nfserr; int stable = 1; + unsigned long cnt = argp->len; dprintk("nfsd: WRITE %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), @@ -188,7 +189,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp, nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, argp->offset, rqstp->rq_vec, argp->vlen, - argp->len, + &cnt, &stable); return nfsd_return_attrs(nfserr, resp); } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 0c07629..54404d7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -960,7 +960,7 @@ static void kill_suid(struct dentry *dentry) static __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, struct kvec *vec, int vlen, - unsigned long cnt, int *stablep) + unsigned long *cnt, int *stablep) { struct svc_export *exp; struct dentry *dentry; @@ -974,7 +974,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, err = nfserr_perm; if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && - (!lock_may_write(file->f_path.dentry->d_inode, offset, cnt))) + (!lock_may_write(file->f_path.dentry->d_inode, offset, *cnt))) goto out; #endif @@ -1006,7 +1006,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); set_fs(oldfs); if (host_err >= 0) { - nfsdstats.io_write += cnt; + nfsdstats.io_write += host_err; fsnotify_modify(file->f_path.dentry); } @@ -1051,9 +1051,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, } dprintk("nfsd: write complete host_err=%d\n", host_err); - if (host_err >= 0) + if (host_err >= 0) { err = 0; - else + *cnt = host_err; + } else err = nfserrno(host_err); out: return err; @@ -1095,7 +1096,7 @@ out: */ __be32 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, - loff_t offset, struct kvec *vec, int vlen, unsigned long cnt, + loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt, int *stablep) { __be32 err = 0; diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 16f7b40..54beda1 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -105,7 +105,7 @@ void nfsd_close(struct file *); __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, struct file *, loff_t, struct kvec *, int, unsigned long *); __be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, - loff_t, struct kvec *,int, unsigned long, int *); + loff_t, struct kvec *,int, unsigned long *, int *); __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, char *, int *); __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, -- cgit v0.10.2 From a1c8c4d1ff54c6c86930ee3c4c73c69eeb9ede61 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 9 Mar 2009 12:17:29 -0400 Subject: nfsd4: support putpubfh operation Currently putpubfh returns NFSERR_OPNOTSUPP, which isn't actually allowed for v4. The right error is probably NFSERR_NOTSUPP. But let's just implement it; though rarely seen, it can be used by Solaris (with a special mount option), is mandated by the rfc, and is trivial for us to support. Thanks to Yang Hongyang for pointing out the original problem, and to Mike Eisler, Tom Talpey, Trond Myklebust, and Dave Noveck for further argument.... Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 283d77a..36d74cd 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1045,7 +1045,7 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = { .op_name = "OP_PUTFH", }, [OP_PUTPUBFH] = { - /* unsupported, just for future reference: */ + .op_func = (nfsd4op_func)nfsd4_putrootfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, .op_name = "OP_PUTPUBFH", }, diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index aaf5f23..76a0b2a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1031,7 +1031,7 @@ static nfsd4_dec nfsd4_dec_ops[] = { [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, - [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_noop, [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, [OP_READ] = (nfsd4_dec)nfsd4_decode_read, [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, -- cgit v0.10.2 From 05f4f678b0511a24795a017b5332455077be3b1c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 13 Mar 2009 16:02:59 -0400 Subject: nfsd4: don't do lookup within readdir in recovery code The main nfsd code was recently modified to no longer do lookups from withing the readdir callback, to avoid locking problems on certain filesystems. This (rather hacky, and overdue for replacement) NFSv4 recovery code has the same problem. Fix it to build up a list of names (instead of dentries) and do the lookups afterwards. Reported symptoms were a deadlock in the xfs code (called from nfsd4_recdir_load), with /var/lib/nfs on xfs. Signed-off-by: J. Bruce Fields Reported-by: David Warren diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 74f7b67..b11cf8d 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -182,36 +182,26 @@ out_unlock: typedef int (recdir_func)(struct dentry *, struct dentry *); -struct dentry_list { - struct dentry *dentry; +struct name_list { + char name[HEXDIR_LEN]; struct list_head list; }; -struct dentry_list_arg { - struct list_head dentries; - struct dentry *parent; -}; - static int -nfsd4_build_dentrylist(void *arg, const char *name, int namlen, +nfsd4_build_namelist(void *arg, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { - struct dentry_list_arg *dla = arg; - struct list_head *dentries = &dla->dentries; - struct dentry *parent = dla->parent; - struct dentry *dentry; - struct dentry_list *child; + struct list_head *names = arg; + struct name_list *entry; - if (name && isdotent(name, namlen)) + if (namlen != HEXDIR_LEN - 1) return 0; - dentry = lookup_one_len(name, parent, namlen); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - child = kmalloc(sizeof(*child), GFP_KERNEL); - if (child == NULL) + entry = kmalloc(sizeof(struct name_list), GFP_KERNEL); + if (entry == NULL) return -ENOMEM; - child->dentry = dentry; - list_add(&child->list, dentries); + memcpy(entry->name, name, HEXDIR_LEN - 1); + entry->name[HEXDIR_LEN - 1] = '\0'; + list_add(&entry->list, names); return 0; } @@ -220,11 +210,9 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) { const struct cred *original_cred; struct file *filp; - struct dentry_list_arg dla = { - .parent = dir, - }; - struct list_head *dentries = &dla.dentries; - struct dentry_list *child; + LIST_HEAD(names); + struct name_list *entry; + struct dentry *dentry; int status; if (!rec_dir_init) @@ -233,31 +221,34 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) status = nfs4_save_creds(&original_cred); if (status < 0) return status; - INIT_LIST_HEAD(dentries); filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY, current_cred()); status = PTR_ERR(filp); if (IS_ERR(filp)) goto out; - INIT_LIST_HEAD(dentries); - status = vfs_readdir(filp, nfsd4_build_dentrylist, &dla); + status = vfs_readdir(filp, nfsd4_build_namelist, &names); fput(filp); - while (!list_empty(dentries)) { - child = list_entry(dentries->next, struct dentry_list, list); - status = f(dir, child->dentry); + while (!list_empty(&names)) { + entry = list_entry(names.next, struct name_list, list); + + dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); + if (IS_ERR(dentry)) { + status = PTR_ERR(dentry); + goto out; + } + status = f(dir, dentry); + dput(dentry); if (status) goto out; - list_del(&child->list); - dput(child->dentry); - kfree(child); + list_del(&entry->list); + kfree(entry); } out: - while (!list_empty(dentries)) { - child = list_entry(dentries->next, struct dentry_list, list); - list_del(&child->list); - dput(child->dentry); - kfree(child); + while (!list_empty(&names)) { + entry = list_entry(names.next, struct name_list, list); + list_del(&entry->list); + kfree(entry); } nfs4_reset_creds(original_cred); return status; -- cgit v0.10.2 From 5cb031b0afddad73ea4191c9f0b76d20ca447dc0 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 14 Mar 2009 16:38:41 -0400 Subject: nfsd4: remove redundant check from nfsd4_open Note that we already checked for this invalid case at the top of this function. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 36d74cd..f156b85 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -206,10 +206,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_DELEGATE_CUR: - status = nfserr_inval; - if (open->op_create) - goto out; - /* fall through */ case NFS4_OPEN_CLAIM_NULL: /* * (1) set CURRENT_FH to the file being opened, -- cgit v0.10.2 From 8bbfa9f3889b643fc7de82c0c761ef17097f8faf Mon Sep 17 00:00:00 2001 From: Greg Banks Date: Tue, 13 Jan 2009 21:26:34 +1100 Subject: knfsd: remove the nfsd thread busy histogram Stop gathering the data that feeds the 'th' line in /proc/net/rpc/nfsd because the questionable data provided is not worth the scalability impact of calculating it. Instead, always report zeroes. The current approach suffers from three major issues: 1. update_thread_usage() increments buckets by call service time or call arrival time...in jiffies. On lightly loaded machines, call service times are usually < 1 jiffy; on heavily loaded machines call arrival times will be << 1 jiffy. So a large portion of the updates to the buckets are rounded down to zero, and the histogram is undercounting. 2. As seen previously on the nfs mailing list, the format in which the histogram is presented is cryptic, difficult to explain, and difficult to use. 3. Updating the histogram requires taking a global spinlock and dirtying the global variables nfsd_last_call, nfsd_busy, and nfsdstats *twice* on every RPC call, which is a significant scaling limitation. Testing on a 4 CPU 4 NIC Altix using 4 IRIX clients each doing 1K streaming reads at full line rate, shows the stats update code (inlined into nfsd()) takes about 1.7% of each CPU. This patch drops the contribution from nfsd() into the profile noise. This patch is a forward-ported version of knfsd-remove-nfsd-threadstats which has been shipping in the SGI "Enhanced NFS" product since 2006. In that time, exactly one customer has noticed that the threadstats were missing. It has been previously posted: http://article.gmane.org/gmane.linux.nfs/10376 and more recently requested to be posted again. Signed-off-by: Greg Banks Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 07e4f5d..c3eb075 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -40,9 +40,6 @@ extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); struct timeval nfssvc_boot; -static atomic_t nfsd_busy; -static unsigned long nfsd_last_call; -static DEFINE_SPINLOCK(nfsd_call_lock); /* * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members @@ -227,7 +224,6 @@ int nfsd_create_serv(void) nfsd_max_blksize /= 2; } - atomic_set(&nfsd_busy, 0); nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, AF_INET, nfsd_last_thread, nfsd, THIS_MODULE); @@ -376,26 +372,6 @@ nfsd_svc(unsigned short port, int nrservs) return error; } -static inline void -update_thread_usage(int busy_threads) -{ - unsigned long prev_call; - unsigned long diff; - int decile; - - spin_lock(&nfsd_call_lock); - prev_call = nfsd_last_call; - nfsd_last_call = jiffies; - decile = busy_threads*10/nfsdstats.th_cnt; - if (decile>0 && decile <= 10) { - diff = nfsd_last_call - prev_call; - if ( (nfsdstats.th_usage[decile-1] += diff) >= NFSD_USAGE_WRAP) - nfsdstats.th_usage[decile-1] -= NFSD_USAGE_WRAP; - if (decile == 10) - nfsdstats.th_fullcnt++; - } - spin_unlock(&nfsd_call_lock); -} /* * This is the NFS server kernel thread @@ -464,8 +440,6 @@ nfsd(void *vrqstp) continue; } - update_thread_usage(atomic_read(&nfsd_busy)); - atomic_inc(&nfsd_busy); /* Lock the export hash tables for reading. */ exp_readlock(); @@ -474,8 +448,6 @@ nfsd(void *vrqstp) /* Unlock export hash tables */ exp_readunlock(); - update_thread_usage(atomic_read(&nfsd_busy)); - atomic_dec(&nfsd_busy); } /* Clear signals before calling svc_exit_thread() */ -- cgit v0.10.2 From 59a252ff8c0f2fa32c896f69d56ae33e641ce7ad Mon Sep 17 00:00:00 2001 From: Greg Banks Date: Tue, 13 Jan 2009 21:26:35 +1100 Subject: knfsd: avoid overloading the CPU scheduler with enormous load averages Avoid overloading the CPU scheduler with enormous load averages when handling high call-rate NFS loads. When the knfsd bottom half is made aware of an incoming call by the socket layer, it tries to choose an nfsd thread and wake it up. As long as there are idle threads, one will be woken up. If there are lot of nfsd threads (a sensible configuration when the server is disk-bound or is running an HSM), there will be many more nfsd threads than CPUs to run them. Under a high call-rate low service-time workload, the result is that almost every nfsd is runnable, but only a handful are actually able to run. This situation causes two significant problems: 1. The CPU scheduler takes over 10% of each CPU, which is robbing the nfsd threads of valuable CPU time. 2. At a high enough load, the nfsd threads starve userspace threads of CPU time, to the point where daemons like portmap and rpc.mountd do not schedule for tens of seconds at a time. Clients attempting to mount an NFS filesystem timeout at the very first step (opening a TCP connection to portmap) because portmap cannot wake up from select() and call accept() in time. Disclaimer: these effects were observed on a SLES9 kernel, modern kernels' schedulers may behave more gracefully. The solution is simple: keep in each svc_pool a counter of the number of threads which have been woken but have not yet run, and do not wake any more if that count reaches an arbitrary small threshold. Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients, each with 16 synthetic client threads simulating an rsync (i.e. recursive directory listing) workload reading from an i386 RH9 install image (161480 regular files in 10841 directories) on the server. That tree is small enough to fill in the server's RAM so no disk traffic was involved. This setup gives a sustained call rate in excess of 60000 calls/sec before being CPU-bound on the server. The server was running 128 nfsds. Profiling showed schedule() taking 6.7% of every CPU, and __wake_up() taking 5.2%. This patch drops those contributions to 3.0% and 2.2%. Load average was over 120 before the patch, and 20.9 after. This patch is a forward-ported version of knfsd-avoid-nfsd-overload which has been shipping in the SGI "Enhanced NFS" product since 2006. It has been posted before: http://article.gmane.org/gmane.linux.nfs/10374 Signed-off-by: Greg Banks Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 3435d24..39ec186 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -41,6 +41,7 @@ struct svc_pool { struct list_head sp_sockets; /* pending sockets */ unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ + int sp_nwaking; /* number of threads woken but not yet active */ } ____cacheline_aligned_in_smp; /* @@ -264,6 +265,7 @@ struct svc_rqst { * cache pages */ wait_queue_head_t rq_wait; /* synchronization */ struct task_struct *rq_task; /* service thread */ + int rq_waking; /* 1 if thread is being woken */ }; /* diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index e588df5..0551b6b 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -14,6 +14,8 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT +#define SVC_MAX_WAKING 5 + static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); @@ -298,6 +300,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) struct svc_pool *pool; struct svc_rqst *rqstp; int cpu; + int thread_avail; if (!(xprt->xpt_flags & ((1<sp_lock); - if (!list_empty(&pool->sp_threads) && - !list_empty(&pool->sp_sockets)) - printk(KERN_ERR - "svc_xprt_enqueue: " - "threads and transports both waiting??\n"); - if (test_bit(XPT_DEAD, &xprt->xpt_flags)) { /* Don't enqueue dead transports */ dprintk("svc: transport %p is dead, not enqueued\n", xprt); @@ -353,7 +350,14 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) } process: - if (!list_empty(&pool->sp_threads)) { + /* Work out whether threads are available */ + thread_avail = !list_empty(&pool->sp_threads); /* threads are asleep */ + if (pool->sp_nwaking >= SVC_MAX_WAKING) { + /* too many threads are runnable and trying to wake up */ + thread_avail = 0; + } + + if (thread_avail) { rqstp = list_entry(pool->sp_threads.next, struct svc_rqst, rq_list); @@ -368,6 +372,8 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) svc_xprt_get(xprt); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); + rqstp->rq_waking = 1; + pool->sp_nwaking++; BUG_ON(xprt->xpt_pool != pool); wake_up(&rqstp->rq_wait); } else { @@ -633,6 +639,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) return -EINTR; spin_lock_bh(&pool->sp_lock); + if (rqstp->rq_waking) { + rqstp->rq_waking = 0; + pool->sp_nwaking--; + BUG_ON(pool->sp_nwaking < 0); + } xprt = svc_xprt_dequeue(pool); if (xprt) { rqstp->rq_xprt = xprt; -- cgit v0.10.2 From 03cf6c9f49a8fea953d38648d016e3f46e814991 Mon Sep 17 00:00:00 2001 From: Greg Banks Date: Tue, 13 Jan 2009 21:26:36 +1100 Subject: knfsd: add file to export stats about nfsd pools Add /proc/fs/nfsd/pool_stats to export to userspace various statistics about the operation of rpc server thread pools. This patch is based on a forward-ported version of knfsd-add-pool-thread-stats which has been shipping in the SGI "Enhanced NFS" product since 2006 and which was previously posted: http://article.gmane.org/gmane.linux.nfs/10375 It has also been updated thus: * moved EXPORT_SYMBOL() to near the function it exports * made the new struct struct seq_operations const * used SEQ_START_TOKEN instead of ((void *)1) * merged fix from SGI PV 990526 "sunrpc: use dprintk instead of printk in svc_pool_stats_*()" by Harshula Jayasuriya. * merged fix from SGI PV 964001 "Crash reading pool_stats before nfsds are started". Signed-off-by: Greg Banks Signed-off-by: Harshula Jayasuriya Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 3d93b20..4adebb6 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -60,6 +60,7 @@ enum { NFSD_FO_UnlockFS, NFSD_Threads, NFSD_Pool_Threads, + NFSD_Pool_Stats, NFSD_Versions, NFSD_Ports, NFSD_MaxBlkSize, @@ -172,6 +173,16 @@ static const struct file_operations exports_operations = { .owner = THIS_MODULE, }; +extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); + +static struct file_operations pool_stats_operations = { + .open = nfsd_pool_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .owner = THIS_MODULE, +}; + /*----------------------------------------------------------------------------*/ /* * payload - write methods @@ -1246,6 +1257,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO}, [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index c3eb075..ef0a368 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -546,3 +546,10 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1); return 1; } + +int nfsd_pool_stats_open(struct inode *inode, struct file *file) +{ + if (nfsd_serv == NULL) + return -ENODEV; + return svc_pool_stats_open(nfsd_serv, file); +} diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 39ec186..9f9f699 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -24,6 +24,15 @@ */ typedef int (*svc_thread_fn)(void *); +/* statistics for svc_pool structures */ +struct svc_pool_stats { + unsigned long packets; + unsigned long sockets_queued; + unsigned long threads_woken; + unsigned long overloads_avoided; + unsigned long threads_timedout; +}; + /* * * RPC service thread pool. @@ -42,6 +51,7 @@ struct svc_pool { unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ int sp_nwaking; /* number of threads woken but not yet active */ + struct svc_pool_stats sp_stats; /* statistics on pool operation */ } ____cacheline_aligned_in_smp; /* @@ -396,6 +406,7 @@ struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, sa_family_t, void (*shutdown)(struct svc_serv *), svc_thread_fn, struct module *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); +int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_destroy(struct svc_serv *); int svc_process(struct svc_rqst *); int svc_register(const struct svc_serv *, const unsigned short, diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 0551b6b..1e66f24 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -318,6 +318,8 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) goto out_unlock; } + pool->sp_stats.packets++; + /* Mark transport as busy. It will remain in this state until * the provider calls svc_xprt_received. We update XPT_BUSY * atomically because it also guards against trying to enqueue @@ -355,6 +357,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) if (pool->sp_nwaking >= SVC_MAX_WAKING) { /* too many threads are runnable and trying to wake up */ thread_avail = 0; + pool->sp_stats.overloads_avoided++; } if (thread_avail) { @@ -374,11 +377,13 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); rqstp->rq_waking = 1; pool->sp_nwaking++; + pool->sp_stats.threads_woken++; BUG_ON(xprt->xpt_pool != pool); wake_up(&rqstp->rq_wait); } else { dprintk("svc: transport %p put into queue\n", xprt); list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); + pool->sp_stats.sockets_queued++; BUG_ON(xprt->xpt_pool != pool); } @@ -591,6 +596,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) int pages; struct xdr_buf *arg; DECLARE_WAITQUEUE(wait, current); + long time_left; dprintk("svc: server %p waiting for data (to = %ld)\n", rqstp, timeout); @@ -676,12 +682,14 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) add_wait_queue(&rqstp->rq_wait, &wait); spin_unlock_bh(&pool->sp_lock); - schedule_timeout(timeout); + time_left = schedule_timeout(timeout); try_to_freeze(); spin_lock_bh(&pool->sp_lock); remove_wait_queue(&rqstp->rq_wait, &wait); + if (!time_left) + pool->sp_stats.threads_timedout++; xprt = rqstp->rq_xprt; if (!xprt) { @@ -1114,3 +1122,93 @@ int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen) return totlen; } EXPORT_SYMBOL_GPL(svc_xprt_names); + + +/*----------------------------------------------------------------------------*/ + +static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos) +{ + unsigned int pidx = (unsigned int)*pos; + struct svc_serv *serv = m->private; + + dprintk("svc_pool_stats_start, *pidx=%u\n", pidx); + + lock_kernel(); + /* bump up the pseudo refcount while traversing */ + svc_get(serv); + unlock_kernel(); + + if (!pidx) + return SEQ_START_TOKEN; + return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]); +} + +static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct svc_pool *pool = p; + struct svc_serv *serv = m->private; + + dprintk("svc_pool_stats_next, *pos=%llu\n", *pos); + + if (p == SEQ_START_TOKEN) { + pool = &serv->sv_pools[0]; + } else { + unsigned int pidx = (pool - &serv->sv_pools[0]); + if (pidx < serv->sv_nrpools-1) + pool = &serv->sv_pools[pidx+1]; + else + pool = NULL; + } + ++*pos; + return pool; +} + +static void svc_pool_stats_stop(struct seq_file *m, void *p) +{ + struct svc_serv *serv = m->private; + + lock_kernel(); + /* this function really, really should have been called svc_put() */ + svc_destroy(serv); + unlock_kernel(); +} + +static int svc_pool_stats_show(struct seq_file *m, void *p) +{ + struct svc_pool *pool = p; + + if (p == SEQ_START_TOKEN) { + seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken overloads-avoided threads-timedout\n"); + return 0; + } + + seq_printf(m, "%u %lu %lu %lu %lu %lu\n", + pool->sp_id, + pool->sp_stats.packets, + pool->sp_stats.sockets_queued, + pool->sp_stats.threads_woken, + pool->sp_stats.overloads_avoided, + pool->sp_stats.threads_timedout); + + return 0; +} + +static const struct seq_operations svc_pool_stats_seq_ops = { + .start = svc_pool_stats_start, + .next = svc_pool_stats_next, + .stop = svc_pool_stats_stop, + .show = svc_pool_stats_show, +}; + +int svc_pool_stats_open(struct svc_serv *serv, struct file *file) +{ + int err; + + err = seq_open(file, &svc_pool_stats_seq_ops); + if (!err) + ((struct seq_file *) file->private_data)->private = serv; + return err; +} +EXPORT_SYMBOL(svc_pool_stats_open); + +/*----------------------------------------------------------------------------*/ -- cgit v0.10.2 From 026722c25e6eb018eab8b9a3c198c258f5b7a2e7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 18 Mar 2009 15:06:26 -0400 Subject: nfsd4: don't check ip address in setclientid The spec allows clients to change ip address, so we shouldn't be requiring that setclientid always come from the same address. For example, a client could reboot and get a new dhcpd address, but still present the same clientid to the server. In that case the server should revoke the client's previous state and allow it to continue, instead of (as it currently does) returning a CLID_INUSE error. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 54651aa..070e9e5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -791,10 +791,9 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (conf) { /* RFC 3530 14.2.33 CASE 0: */ status = nfserr_clid_inuse; - if (!same_creds(&conf->cl_cred, &rqstp->rq_cred) - || conf->cl_addr != sin->sin_addr.s_addr) { - dprintk("NFSD: setclientid: string in use by clientat %pI4\n", - &conf->cl_addr); + if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { + dprintk("NFSD: setclientid: string in use by client" + " at %pI4\n", &conf->cl_addr); goto out; } } -- cgit v0.10.2 From 47a14ef1af48c696b214ac168f056ddc79793d0e Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 21 Oct 2008 14:13:47 -0400 Subject: svcrpc: take advantage of tcp autotuning Allow the NFSv4 server to make use of TCP autotuning behaviour, which was previously disabled by setting the sk_userlocks variable. Set the receive buffers to be big enough to receive the whole RPC request, and set this for the listening socket, not the accept socket. Remove the code that readjusts the receive/send buffer sizes for the accepted socket. Previously this code was used to influence the TCP window management behaviour, which is no longer needed when autotuning is enabled. This can improve IO bandwidth on networks with high bandwidth-delay products, where a large tcp window is required. It also simplifies performance tuning, since getting adequate tcp buffers previously required increasing the number of nfsd threads. Signed-off-by: Olga Kornievskaia Cc: Jim Rees Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5763e64..7a2a90f 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -345,7 +345,6 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, lock_sock(sock->sk); sock->sk->sk_sndbuf = snd * 2; sock->sk->sk_rcvbuf = rcv * 2; - sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; release_sock(sock->sk); #endif } @@ -797,23 +796,6 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); - if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) - /* sndbuf needs to have room for one request - * per thread, otherwise we can stall even when the - * network isn't a bottleneck. - * - * We count all threads rather than threads in a - * particular pool, which provides an upper bound - * on the number of threads which will access the socket. - * - * rcvbuf just needs to be able to hold a few requests. - * Normally they will be removed from the queue - * as soon a a complete request arrives. - */ - svc_sock_setbufsize(svsk->sk_sock, - (serv->sv_nrthreads+3) * serv->sv_max_mesg, - 3 * serv->sv_max_mesg); - clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* Receive data. If we haven't got the record length yet, get @@ -1061,15 +1043,6 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; - /* initialise setting must have enough space to - * receive and respond to one request. - * svc_tcp_recvfrom will re-adjust if necessary - */ - svc_sock_setbufsize(svsk->sk_sock, - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); - - set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (sk->sk_state != TCP_ESTABLISHED) set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); @@ -1140,8 +1113,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Initialize the socket */ if (sock->type == SOCK_DGRAM) svc_udp_init(svsk, serv); - else + else { + /* initialise setting must have enough space to + * receive and respond to one request. + */ + svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg, + 4 * serv->sv_max_mesg); svc_tcp_init(svsk, serv); + } /* * We start one listener per sv_serv. We want AF_INET -- cgit v0.10.2 From 0953e620de0538cbd081f1b45126f6098112a598 Mon Sep 17 00:00:00 2001 From: "Sachin S. Prabhu" Date: Mon, 23 Feb 2009 16:22:03 +0000 Subject: Inconsistent setattr behaviour There is an inconsistency seen in the behaviour of nfs compared to other local filesystems on linux when changing owner or group of a directory. If the directory has SUID/SGID flags set, on changing owner or group on the directory, the flags are stripped off on nfs. These flags are maintained on other filesystems such as ext3. To reproduce on a nfs share or local filesystem, run the following commands mkdir test; chmod +s+g test; chown user1 test; ls -ld test On the nfs share, the flags are stripped and the output seen is drwxr-xr-x 2 user1 root 4096 Feb 23 2009 test On other local filesystems(ex: ext3), the flags are not stripped and the output seen is drwsr-sr-x 2 user1 root 4096 Feb 23 13:57 test chown_common() called from sys_chown() will only strip the flags if the inode is not a directory. static int chown_common(struct dentry * dentry, uid_t user, gid_t group) { .. if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; .. } See: http://www.opengroup.org/onlinepubs/7990989775/xsh/chown.html "If the path argument refers to a regular file, the set-user-ID (S_ISUID) and set-group-ID (S_ISGID) bits of the file mode are cleared upon successful return from chown(), unless the call is made by a process with appropriate privileges, in which case it is implementation-dependent whether these bits are altered. If chown() is successfully invoked on a file that is not a regular file, these bits may be cleared. These bits are defined in ." The behaviour as it stands does not appear to violate POSIX. However the actions performed are inconsistent when comparing ext3 and nfs. Signed-off-by: Sachin Prabhu Acked-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 54404d7..8790571 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -366,8 +366,9 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, } /* Revoke setuid/setgid on chown */ - if (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) || - ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)) { + if (!S_ISDIR(inode->i_mode) && + (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) || + ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid))) { iap->ia_valid |= ATTR_KILL_PRIV; if (iap->ia_valid & ATTR_MODE) { /* we're setting mode too, just clear the s*id bits */ -- cgit v0.10.2 From 2795e53b4ed5d1f49d2283f416c922f55ec7d461 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 12 Mar 2009 12:07:14 -0400 Subject: SUNRPC: Clean up static inline functions in svc_xprt.h Clean up: Enable the use of const arguments in higher level svc_ APIs by adding const to the arguments of the helper functions in svc_xprt.h Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 0127dac..959b931 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -88,29 +88,32 @@ static inline void svc_xprt_get(struct svc_xprt *xprt) kref_get(&xprt->xpt_ref); } static inline void svc_xprt_set_local(struct svc_xprt *xprt, - struct sockaddr *sa, int salen) + const struct sockaddr *sa, + const size_t salen) { memcpy(&xprt->xpt_local, sa, salen); xprt->xpt_locallen = salen; } static inline void svc_xprt_set_remote(struct svc_xprt *xprt, - struct sockaddr *sa, int salen) + const struct sockaddr *sa, + const size_t salen) { memcpy(&xprt->xpt_remote, sa, salen); xprt->xpt_remotelen = salen; } -static inline unsigned short svc_addr_port(struct sockaddr *sa) +static inline unsigned short svc_addr_port(const struct sockaddr *sa) { - unsigned short ret = 0; + const struct sockaddr_in *sin = (const struct sockaddr_in *)sa; + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sa; + switch (sa->sa_family) { case AF_INET: - ret = ntohs(((struct sockaddr_in *)sa)->sin_port); - break; + return ntohs(sin->sin_port); case AF_INET6: - ret = ntohs(((struct sockaddr_in6 *)sa)->sin6_port); - break; + return ntohs(sin6->sin6_port); } - return ret; + + return 0; } static inline size_t svc_addr_len(struct sockaddr *sa) @@ -124,36 +127,39 @@ static inline size_t svc_addr_len(struct sockaddr *sa) return -EAFNOSUPPORT; } -static inline unsigned short svc_xprt_local_port(struct svc_xprt *xprt) +static inline unsigned short svc_xprt_local_port(const struct svc_xprt *xprt) { - return svc_addr_port((struct sockaddr *)&xprt->xpt_local); + return svc_addr_port((const struct sockaddr *)&xprt->xpt_local); } -static inline unsigned short svc_xprt_remote_port(struct svc_xprt *xprt) +static inline unsigned short svc_xprt_remote_port(const struct svc_xprt *xprt) { - return svc_addr_port((struct sockaddr *)&xprt->xpt_remote); + return svc_addr_port((const struct sockaddr *)&xprt->xpt_remote); } -static inline char *__svc_print_addr(struct sockaddr *addr, - char *buf, size_t len) +static inline char *__svc_print_addr(const struct sockaddr *addr, + char *buf, const size_t len) { + const struct sockaddr_in *sin = (const struct sockaddr_in *)addr; + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)addr; + switch (addr->sa_family) { case AF_INET: - snprintf(buf, len, "%pI4, port=%u", - &((struct sockaddr_in *)addr)->sin_addr, - ntohs(((struct sockaddr_in *) addr)->sin_port)); + snprintf(buf, len, "%pI4, port=%u", &sin->sin_addr, + ntohs(sin->sin_port)); break; case AF_INET6: snprintf(buf, len, "%pI6, port=%u", - &((struct sockaddr_in6 *)addr)->sin6_addr, - ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); + &sin6->sin6_addr, + ntohs(sin6->sin6_port)); break; default: snprintf(buf, len, "unknown address type: %d", addr->sa_family); break; } + return buf; } #endif /* SUNRPC_SVC_XPRT_H */ -- cgit v0.10.2 From abd91ee979f785b7377216532620d98ab4e3e5af Mon Sep 17 00:00:00 2001 From: ideawu Date: Thu, 26 Mar 2009 12:55:29 +0800 Subject: sunrpc/svc.c: Remove unused line 'rqstp->rq_server = serv;' in svc_process There is no need to set rqstp->rq_server to serv, while serv is initialized as rqstp->rq_server at previous line. And between these two lines, there is no change to rqstp->rq_server. Signed-off-by: ideawu Reviewed-by: Tom Tucker Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index c51fed4..fff09a2 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1093,7 +1093,6 @@ svc_process(struct svc_rqst *rqstp) procp = versp->vs_proc + proc; if (proc >= versp->vs_nproc || !procp->pc_func) goto err_bad_proc; - rqstp->rq_server = serv; rqstp->rq_procinfo = procp; /* Syntactic check complete */ -- cgit v0.10.2 From b5cbc369db39d9080f4932db8607aea1e1654d4d Mon Sep 17 00:00:00 2001 From: Greg Banks Date: Thu, 26 Mar 2009 17:45:27 +1100 Subject: Document /proc/fs/nfsd/pool_stats Document the format and semantics of the /proc/fs/nfsd/pool_stats file. Signed-off-by: Greg Banks Signed-off-by: J. Bruce Fields diff --git a/Documentation/filesystems/knfsd-stats.txt b/Documentation/filesystems/knfsd-stats.txt new file mode 100644 index 0000000..64ced51 --- /dev/null +++ b/Documentation/filesystems/knfsd-stats.txt @@ -0,0 +1,159 @@ + +Kernel NFS Server Statistics +============================ + +This document describes the format and semantics of the statistics +which the kernel NFS server makes available to userspace. These +statistics are available in several text form pseudo files, each of +which is described separately below. + +In most cases you don't need to know these formats, as the nfsstat(8) +program from the nfs-utils distribution provides a helpful command-line +interface for extracting and printing them. + +All the files described here are formatted as a sequence of text lines, +separated by newline '\n' characters. Lines beginning with a hash +'#' character are comments intended for humans and should be ignored +by parsing routines. All other lines contain a sequence of fields +separated by whitespace. + +/proc/fs/nfsd/pool_stats +------------------------ + +This file is available in kernels from 2.6.30 onwards, if the +/proc/fs/nfsd filesystem is mounted (it almost always should be). + +The first line is a comment which describes the fields present in +all the other lines. The other lines present the following data as +a sequence of unsigned decimal numeric fields. One line is shown +for each NFS thread pool. + +All counters are 64 bits wide and wrap naturally. There is no way +to zero these counters, instead applications should do their own +rate conversion. + +pool + The id number of the NFS thread pool to which this line applies. + This number does not change. + + Thread pool ids are a contiguous set of small integers starting + at zero. The maximum value depends on the thread pool mode, but + currently cannot be larger than the number of CPUs in the system. + Note that in the default case there will be a single thread pool + which contains all the nfsd threads and all the CPUs in the system, + and thus this file will have a single line with a pool id of "0". + +packets-arrived + Counts how many NFS packets have arrived. More precisely, this + is the number of times that the network stack has notified the + sunrpc server layer that new data may be available on a transport + (e.g. an NFS or UDP socket or an NFS/RDMA endpoint). + + Depending on the NFS workload patterns and various network stack + effects (such as Large Receive Offload) which can combine packets + on the wire, this may be either more or less than the number + of NFS calls received (which statistic is available elsewhere). + However this is a more accurate and less workload-dependent measure + of how much CPU load is being placed on the sunrpc server layer + due to NFS network traffic. + +sockets-enqueued + Counts how many times an NFS transport is enqueued to wait for + an nfsd thread to service it, i.e. no nfsd thread was considered + available. + + The circumstance this statistic tracks indicates that there was NFS + network-facing work to be done but it couldn't be done immediately, + thus introducing a small delay in servicing NFS calls. The ideal + rate of change for this counter is zero; significantly non-zero + values may indicate a performance limitation. + + This can happen either because there are too few nfsd threads in the + thread pool for the NFS workload (the workload is thread-limited), + or because the NFS workload needs more CPU time than is available in + the thread pool (the workload is CPU-limited). In the former case, + configuring more nfsd threads will probably improve the performance + of the NFS workload. In the latter case, the sunrpc server layer is + already choosing not to wake idle nfsd threads because there are too + many nfsd threads which want to run but cannot, so configuring more + nfsd threads will make no difference whatsoever. The overloads-avoided + statistic (see below) can be used to distinguish these cases. + +threads-woken + Counts how many times an idle nfsd thread is woken to try to + receive some data from an NFS transport. + + This statistic tracks the circumstance where incoming + network-facing NFS work is being handled quickly, which is a good + thing. The ideal rate of change for this counter will be close + to but less than the rate of change of the packets-arrived counter. + +overloads-avoided + Counts how many times the sunrpc server layer chose not to wake an + nfsd thread, despite the presence of idle nfsd threads, because + too many nfsd threads had been recently woken but could not get + enough CPU time to actually run. + + This statistic counts a circumstance where the sunrpc layer + heuristically avoids overloading the CPU scheduler with too many + runnable nfsd threads. The ideal rate of change for this counter + is zero. Significant non-zero values indicate that the workload + is CPU limited. Usually this is associated with heavy CPU usage + on all the CPUs in the nfsd thread pool. + + If a sustained large overloads-avoided rate is detected on a pool, + the top(1) utility should be used to check for the following + pattern of CPU usage on all the CPUs associated with the given + nfsd thread pool. + + - %us ~= 0 (as you're *NOT* running applications on your NFS server) + + - %wa ~= 0 + + - %id ~= 0 + + - %sy + %hi + %si ~= 100 + + If this pattern is seen, configuring more nfsd threads will *not* + improve the performance of the workload. If this patten is not + seen, then something more subtle is wrong. + +threads-timedout + Counts how many times an nfsd thread triggered an idle timeout, + i.e. was not woken to handle any incoming network packets for + some time. + + This statistic counts a circumstance where there are more nfsd + threads configured than can be used by the NFS workload. This is + a clue that the number of nfsd threads can be reduced without + affecting performance. Unfortunately, it's only a clue and not + a strong indication, for a couple of reasons: + + - Currently the rate at which the counter is incremented is quite + slow; the idle timeout is 60 minutes. Unless the NFS workload + remains constant for hours at a time, this counter is unlikely + to be providing information that is still useful. + + - It is usually a wise policy to provide some slack, + i.e. configure a few more nfsds than are currently needed, + to allow for future spikes in load. + + +Note that incoming packets on NFS transports will be dealt with in +one of three ways. An nfsd thread can be woken (threads-woken counts +this case), or the transport can be enqueued for later attention +(sockets-enqueued counts this case), or the packet can be temporarily +deferred because the transport is currently being used by an nfsd +thread. This last case is not very interesting and is not explicitly +counted, but can be inferred from the other counters thus: + +packets-deferred = packets-arrived - ( sockets-enqueued + threads-woken ) + + +More +---- +Descriptions of the other statistics file should go here. + + +Greg Banks +26 Mar 2009 -- cgit v0.10.2 From 42d671c78f6486c932b68a50f88768c7b4e57ebf Mon Sep 17 00:00:00 2001 From: Greg Banks Date: Fri, 27 Mar 2009 02:32:47 +1100 Subject: Fix a build warning about leaking CONFIG_NFSD to userspace. Fix a build warning about leaking CONFIG_NFSD to userspace. The nfsd_stats data structure does not need to be available to userspace; no kernel interface uses it. So move it inside #ifdef __KERNEL__ and the warning goes away. Signed-off-by: Greg Banks Signed-off-by: J. Bruce Fields diff --git a/include/linux/nfsd/stats.h b/include/linux/nfsd/stats.h index 7678cfb..2693ef6 100644 --- a/include/linux/nfsd/stats.h +++ b/include/linux/nfsd/stats.h @@ -11,6 +11,11 @@ #include +/* thread usage wraps very million seconds (approx one fortnight) */ +#define NFSD_USAGE_WRAP (HZ*1000000) + +#ifdef __KERNEL__ + struct nfsd_stats { unsigned int rchits; /* repcache hits */ unsigned int rcmisses; /* repcache hits */ @@ -35,10 +40,6 @@ struct nfsd_stats { }; -/* thread usage wraps very million seconds (approx one fortnight) */ -#define NFSD_USAGE_WRAP (HZ*1000000) - -#ifdef __KERNEL__ extern struct nfsd_stats nfsdstats; extern struct svc_stat nfsd_svcstats; -- cgit v0.10.2 From e354d571bb481f1d71f2c3004b9ff570b32e83bd Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Sat, 28 Mar 2009 11:30:52 +0300 Subject: nfsd: embed nfsd4_current_state in nfsd4_compoundres Remove the allocation of struct nfsd4_compound_state. Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f156b85..7bb8cb3 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -809,29 +809,6 @@ static inline void nfsd4_increment_op_stats(u32 opnum) nfsdstats.nfs4_opcount[opnum]++; } -static void cstate_free(struct nfsd4_compound_state *cstate) -{ - if (cstate == NULL) - return; - fh_put(&cstate->current_fh); - fh_put(&cstate->save_fh); - BUG_ON(cstate->replay_owner); - kfree(cstate); -} - -static struct nfsd4_compound_state *cstate_alloc(void) -{ - struct nfsd4_compound_state *cstate; - - cstate = kmalloc(sizeof(struct nfsd4_compound_state), GFP_KERNEL); - if (cstate == NULL) - return NULL; - fh_init(&cstate->current_fh, NFS4_FHSIZE); - fh_init(&cstate->save_fh, NFS4_FHSIZE); - cstate->replay_owner = NULL; - return cstate; -} - typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *, void *); @@ -859,12 +836,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, { struct nfsd4_op *op; struct nfsd4_operation *opdesc; - struct nfsd4_compound_state *cstate = NULL; + struct nfsd4_compound_state *cstate = &resp->cstate; int slack_bytes; __be32 status; resp->xbuf = &rqstp->rq_res; - resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; + resp->p = rqstp->rq_res.head[0].iov_base + + rqstp->rq_res.head[0].iov_len; resp->tagp = resp->p; /* reserve space for: taglen, tag, and opcnt */ resp->p += 2 + XDR_QUADLEN(args->taglen); @@ -873,6 +851,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, resp->tag = args->tag; resp->opcnt = 0; resp->rqstp = rqstp; + resp->cstate.replay_owner = NULL; + fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); + fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); /* * According to RFC3010, this takes precedence over all other errors. @@ -881,11 +862,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) goto out; - status = nfserr_resource; - cstate = cstate_alloc(); - if (cstate == NULL) - goto out; - status = nfs_ok; while (!status && resp->opcnt < args->opcnt) { op = &args->ops[resp->opcnt++]; @@ -958,7 +934,9 @@ encode_op: nfsd4_increment_op_stats(op->opnum); } - cstate_free(cstate); + fh_put(&resp->cstate.current_fh); + fh_put(&resp->cstate.save_fh); + BUG_ON(resp->cstate.replay_owner); out: nfsd4_release_compoundargs(args); dprintk("nfsv4 compound returned %d\n", ntohl(status)); diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 27bd3e3..fd15ddc 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -45,9 +45,9 @@ #define XDR_LEN(n) (((n) + 3) & ~3) struct nfsd4_compound_state { - struct svc_fh current_fh; - struct svc_fh save_fh; - struct nfs4_stateowner *replay_owner; + struct svc_fh current_fh; + struct svc_fh save_fh; + struct nfs4_stateowner *replay_owner; }; struct nfsd4_change_info { @@ -416,7 +416,8 @@ struct nfsd4_compoundres { u32 taglen; char * tag; u32 opcnt; - __be32 * tagp; /* where to encode tag and opcount */ + __be32 * tagp; /* tag, opcount encode location */ + struct nfsd4_compound_state cstate; }; #define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs) -- cgit v0.10.2 From 20766016329eb4985c2c8b2a1b2333e0f865fdf9 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sat, 28 Mar 2009 11:32:05 +0300 Subject: nfsd: remove nfsd4_ops array size There's no need for it. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 7bb8cb3..249dad9 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -943,7 +943,7 @@ out: return status; } -static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = { +static struct nfsd4_operation nfsd4_ops[] = { [OP_ACCESS] = { .op_func = (nfsd4op_func)nfsd4_access, .op_name = "OP_ACCESS", -- cgit v0.10.2 From 2f425878b6a71571341dcd3f9e9d1a6f6355da9c Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:27:32 +0300 Subject: nfsd: don't use the deferral service, return NFS4ERR_DELAY On an NFSv4.1 server cache miss that causes an upcall, NFS4ERR_DELAY will be returned. It is up to the NFSv4.1 client to resend only the operations that have not been processed. Initialize rq_usedeferral to 1 in svc_process(). It sill be turned off in nfsd4_proc_compound() only when NFSv4.1 Sessions are used. Note: this isn't an adequate solution on its own. It's acceptable as a way to get some minimal 4.1 up and working, but we're going to have to find a way to avoid returning DELAY in all common cases before 4.1 can really be considered ready. Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy [nfsd41: reverse rq_nodeferral negative logic] Signed-off-by: Benny Halevy [sunrpc: initialize rq_usedeferral] Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 249dad9..ded469f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -854,6 +854,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, resp->cstate.replay_owner = NULL; fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); + /* Use the deferral mechanism only for NFSv4.0 compounds */ + rqstp->rq_usedeferral = (args->minorversion == 0); /* * According to RFC3010, this takes precedence over all other errors. @@ -933,12 +935,18 @@ encode_op: nfsd4_increment_op_stats(op->opnum); } + if (!rqstp->rq_usedeferral && status == nfserr_dropit) { + dprintk("%s Dropit - send NFS4ERR_DELAY\n", __func__); + status = nfserr_jukebox; + } fh_put(&resp->cstate.current_fh); fh_put(&resp->cstate.save_fh); BUG_ON(resp->cstate.replay_owner); out: nfsd4_release_compoundargs(args); + /* Reset deferral mechanism for RPC deferrals */ + rqstp->rq_usedeferral = 1; dprintk("nfsv4 compound returned %d\n", ntohl(status)); return status; } diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 9f9f699..815dd58 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -230,6 +230,7 @@ struct svc_rqst { struct svc_cred rq_cred; /* auth info */ void * rq_xprt_ctxt; /* transport specific context ptr */ struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ + int rq_usedeferral; /* use deferral */ size_t rq_xprt_hlen; /* xprt header len */ struct xdr_buf rq_arg; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index fff09a2..45984cb 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1023,6 +1023,8 @@ svc_process(struct svc_rqst *rqstp) rqstp->rq_res.tail[0].iov_len = 0; /* Will be turned off only in gss privacy case: */ rqstp->rq_splice_ok = 1; + /* Will be turned off only when NFSv4 Sessions are used */ + rqstp->rq_usedeferral = 1; /* Setup reply header */ rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 1e66f24..600d091 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -974,7 +974,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); struct svc_deferred_req *dr; - if (rqstp->rq_arg.page_len) + if (rqstp->rq_arg.page_len || !rqstp->rq_usedeferral) return NULL; /* if more than a page, give up FIXME */ if (rqstp->rq_deferred) { dr = rqstp->rq_deferred; -- cgit v0.10.2 From 18df1884a872a2cc405a578cfd0d3adc8d227277 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 3 Apr 2009 08:27:36 +0300 Subject: nfs41: common protocol definitions Define all NFSv4.1 common operation and error code constants. Note that some of the definitions are used by both the nfs41 client and the server code. This patch is duplicated in the nfs41 and nfsd41 sessions patchset. Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy [nfs41: add exchange id flags] Signed-off-by: Mike Sager Signed-off-by: Benny Halevy [removed server-only hunk changing NFSERR_REPLAY_ME] Signed-off-by: Benny Halevy [nfs41: add SEQ4_XX to nfs41-common-protocol] Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy [nfs41: generic error code update] [nfs41: reverse EXCHGID4_INVAL_FLAG_MASK_{A,R}] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index b912311..29ed600 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -21,6 +21,7 @@ #define NFS4_FHSIZE 128 #define NFS4_MAXPATHLEN PATH_MAX #define NFS4_MAXNAMLEN NAME_MAX +#define NFS4_MAX_SESSIONID_LEN 16 #define NFS4_ACCESS_READ 0x0001 #define NFS4_ACCESS_LOOKUP 0x0002 @@ -38,6 +39,7 @@ #define NFS4_OPEN_RESULT_CONFIRM 0x0002 #define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004 +#define NFS4_SHARE_ACCESS_MASK 0x000F #define NFS4_SHARE_ACCESS_READ 0x0001 #define NFS4_SHARE_ACCESS_WRITE 0x0002 #define NFS4_SHARE_ACCESS_BOTH 0x0003 @@ -45,6 +47,19 @@ #define NFS4_SHARE_DENY_WRITE 0x0002 #define NFS4_SHARE_DENY_BOTH 0x0003 +/* nfs41 */ +#define NFS4_SHARE_WANT_MASK 0xFF00 +#define NFS4_SHARE_WANT_NO_PREFERENCE 0x0000 +#define NFS4_SHARE_WANT_READ_DELEG 0x0100 +#define NFS4_SHARE_WANT_WRITE_DELEG 0x0200 +#define NFS4_SHARE_WANT_ANY_DELEG 0x0300 +#define NFS4_SHARE_WANT_NO_DELEG 0x0400 +#define NFS4_SHARE_WANT_CANCEL 0x0500 + +#define NFS4_SHARE_WHEN_MASK 0xF0000 +#define NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL 0x10000 +#define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED 0x20000 + #define NFS4_SET_TO_SERVER_TIME 0 #define NFS4_SET_TO_CLIENT_TIME 1 @@ -88,6 +103,31 @@ #define NFS4_ACE_GENERIC_EXECUTE 0x001200A0 #define NFS4_ACE_MASK_ALL 0x001F01FF +#define EXCHGID4_FLAG_SUPP_MOVED_REFER 0x00000001 +#define EXCHGID4_FLAG_SUPP_MOVED_MIGR 0x00000002 +#define EXCHGID4_FLAG_USE_NON_PNFS 0x00010000 +#define EXCHGID4_FLAG_USE_PNFS_MDS 0x00020000 +#define EXCHGID4_FLAG_USE_PNFS_DS 0x00040000 +#define EXCHGID4_FLAG_UPD_CONFIRMED_REC_A 0x40000000 +#define EXCHGID4_FLAG_CONFIRMED_R 0x80000000 +/* + * Since the validity of these bits depends on whether + * they're set in the argument or response, have separate + * invalid flag masks for arg (_A) and resp (_R). + */ +#define EXCHGID4_FLAG_MASK_A 0x40070003 +#define EXCHGID4_FLAG_MASK_R 0x80070003 + +#define SEQ4_STATUS_CB_PATH_DOWN 0x00000001 +#define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING 0x00000002 +#define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED 0x00000004 +#define SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED 0x00000008 +#define SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED 0x00000010 +#define SEQ4_STATUS_ADMIN_STATE_REVOKED 0x00000020 +#define SEQ4_STATUS_RECALLABLE_STATE_REVOKED 0x00000040 +#define SEQ4_STATUS_LEASE_MOVED 0x00000080 +#define SEQ4_STATUS_RESTART_RECLAIM_NEEDED 0x00000100 + #define NFS4_MAX_UINT64 (~(u64)0) enum nfs4_acl_whotype { @@ -154,6 +194,28 @@ enum nfs_opnum4 { OP_VERIFY = 37, OP_WRITE = 38, OP_RELEASE_LOCKOWNER = 39, + + /* nfs41 */ + OP_BACKCHANNEL_CTL = 40, + OP_BIND_CONN_TO_SESSION = 41, + OP_EXCHANGE_ID = 42, + OP_CREATE_SESSION = 43, + OP_DESTROY_SESSION = 44, + OP_FREE_STATEID = 45, + OP_GET_DIR_DELEGATION = 46, + OP_GETDEVICEINFO = 47, + OP_GETDEVICELIST = 48, + OP_LAYOUTCOMMIT = 49, + OP_LAYOUTGET = 50, + OP_LAYOUTRETURN = 51, + OP_SECINFO_NO_NAME = 52, + OP_SEQUENCE = 53, + OP_SET_SSV = 54, + OP_TEST_STATEID = 55, + OP_WANT_DELEGATION = 56, + OP_DESTROY_CLIENTID = 57, + OP_RECLAIM_COMPLETE = 58, + OP_ILLEGAL = 10044, }; @@ -230,7 +292,48 @@ enum nfsstat4 { NFS4ERR_DEADLOCK = 10045, NFS4ERR_FILE_OPEN = 10046, NFS4ERR_ADMIN_REVOKED = 10047, - NFS4ERR_CB_PATH_DOWN = 10048 + NFS4ERR_CB_PATH_DOWN = 10048, + + /* nfs41 */ + NFS4ERR_BADIOMODE = 10049, + NFS4ERR_BADLAYOUT = 10050, + NFS4ERR_BAD_SESSION_DIGEST = 10051, + NFS4ERR_BADSESSION = 10052, + NFS4ERR_BADSLOT = 10053, + NFS4ERR_COMPLETE_ALREADY = 10054, + NFS4ERR_CONN_NOT_BOUND_TO_SESSION = 10055, + NFS4ERR_DELEG_ALREADY_WANTED = 10056, + NFS4ERR_BACK_CHAN_BUSY = 10057, /* backchan reqs outstanding */ + NFS4ERR_LAYOUTTRYLATER = 10058, + NFS4ERR_LAYOUTUNAVAILABLE = 10059, + NFS4ERR_NOMATCHING_LAYOUT = 10060, + NFS4ERR_RECALLCONFLICT = 10061, + NFS4ERR_UNKNOWN_LAYOUTTYPE = 10062, + NFS4ERR_SEQ_MISORDERED = 10063, /* unexpected seq.id in req */ + NFS4ERR_SEQUENCE_POS = 10064, /* [CB_]SEQ. op not 1st op */ + NFS4ERR_REQ_TOO_BIG = 10065, /* request too big */ + NFS4ERR_REP_TOO_BIG = 10066, /* reply too big */ + NFS4ERR_REP_TOO_BIG_TO_CACHE = 10067, /* rep. not all cached */ + NFS4ERR_RETRY_UNCACHED_REP = 10068, /* retry & rep. uncached */ + NFS4ERR_UNSAFE_COMPOUND = 10069, /* retry/recovery too hard */ + NFS4ERR_TOO_MANY_OPS = 10070, /* too many ops in [CB_]COMP */ + NFS4ERR_OP_NOT_IN_SESSION = 10071, /* op needs [CB_]SEQ. op */ + NFS4ERR_HASH_ALG_UNSUPP = 10072, /* hash alg. not supp. */ + /* Error 10073 is unused. */ + NFS4ERR_CLIENTID_BUSY = 10074, /* clientid has state */ + NFS4ERR_PNFS_IO_HOLE = 10075, /* IO to _SPARSE file hole */ + NFS4ERR_SEQ_FALSE_RETRY = 10076, /* retry not origional */ + NFS4ERR_BAD_HIGH_SLOT = 10077, /* sequence arg bad */ + NFS4ERR_DEADSESSION = 10078, /* persistent session dead */ + NFS4ERR_ENCR_ALG_UNSUPP = 10079, /* SSV alg mismatch */ + NFS4ERR_PNFS_NO_LAYOUT = 10080, /* direct I/O with no layout */ + NFS4ERR_NOT_ONLY_OP = 10081, /* bad compound */ + NFS4ERR_WRONG_CRED = 10082, /* permissions:state change */ + NFS4ERR_WRONG_TYPE = 10083, /* current operation mismatch */ + NFS4ERR_DIRDELEG_UNAVAIL = 10084, /* no directory delegation */ + NFS4ERR_REJECT_DELEG = 10085, /* on callback */ + NFS4ERR_RETURNCONFLICT = 10086, /* outstanding layoutreturn */ + NFS4ERR_DELEG_REVOKED = 10087, /* deleg./layout revoked */ }; /* @@ -391,6 +494,29 @@ enum { NFSPROC4_CLNT_GETACL, NFSPROC4_CLNT_SETACL, NFSPROC4_CLNT_FS_LOCATIONS, + + /* nfs41 */ + NFSPROC4_CLNT_EXCHANGE_ID, + NFSPROC4_CLNT_CREATE_SESSION, + NFSPROC4_CLNT_DESTROY_SESSION, + NFSPROC4_CLNT_SEQUENCE, + NFSPROC4_CLNT_GET_LEASE_TIME, +}; + +/* nfs41 types */ +struct nfs4_sessionid { + unsigned char data[NFS4_MAX_SESSIONID_LEN]; +}; + +/* Create Session Flags */ +#define SESSION4_PERSIST 0x001 +#define SESSION4_BACK_CHAN 0x002 +#define SESSION4_RDMA 0x004 + +enum state_protect_how4 { + SP4_NONE = 0, + SP4_MACH_CRED = 1, + SP4_SSV = 2 }; #endif -- cgit v0.10.2 From 10add806c38c022d18af48f3ec28c91b4eaf7bb3 Mon Sep 17 00:00:00 2001 From: Marc Eshel Date: Fri, 3 Apr 2009 08:27:40 +0300 Subject: nfsd41: define nfs41 error codes Define all error code present in http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29. Signed-off-by: Benny Halevy [nfsd41: clean up error code definitions] [nfsd41: change NFSERR_REPLAY_ME] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/include/linux/nfs.h b/include/linux/nfs.h index 54af92c..214d499 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -109,7 +109,6 @@ NFSERR_FILE_OPEN = 10046, /* v4 */ NFSERR_ADMIN_REVOKED = 10047, /* v4 */ NFSERR_CB_PATH_DOWN = 10048, /* v4 */ - NFSERR_REPLAY_ME = 10049 /* v4 */ }; /* NFSv2 file types - beware, these are not the same in NFSv3 */ diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 54beda1..ab9616d 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -249,7 +249,44 @@ void nfsd_lockd_shutdown(void); #define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN) #define nfserr_locked cpu_to_be32(NFSERR_LOCKED) #define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC) -#define nfserr_replay_me cpu_to_be32(NFSERR_REPLAY_ME) +#define nfserr_badiomode cpu_to_be32(NFS4ERR_BADIOMODE) +#define nfserr_badlayout cpu_to_be32(NFS4ERR_BADLAYOUT) +#define nfserr_bad_session_digest cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST) +#define nfserr_badsession cpu_to_be32(NFS4ERR_BADSESSION) +#define nfserr_badslot cpu_to_be32(NFS4ERR_BADSLOT) +#define nfserr_complete_already cpu_to_be32(NFS4ERR_COMPLETE_ALREADY) +#define nfserr_conn_not_bound_to_session cpu_to_be32(NFS4ERR_CONN_NOT_BOUND_TO_SESSION) +#define nfserr_deleg_already_wanted cpu_to_be32(NFS4ERR_DELEG_ALREADY_WANTED) +#define nfserr_back_chan_busy cpu_to_be32(NFS4ERR_BACK_CHAN_BUSY) +#define nfserr_layouttrylater cpu_to_be32(NFS4ERR_LAYOUTTRYLATER) +#define nfserr_layoutunavailable cpu_to_be32(NFS4ERR_LAYOUTUNAVAILABLE) +#define nfserr_nomatching_layout cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT) +#define nfserr_recallconflict cpu_to_be32(NFS4ERR_RECALLCONFLICT) +#define nfserr_unknown_layouttype cpu_to_be32(NFS4ERR_UNKNOWN_LAYOUTTYPE) +#define nfserr_seq_misordered cpu_to_be32(NFS4ERR_SEQ_MISORDERED) +#define nfserr_sequence_pos cpu_to_be32(NFS4ERR_SEQUENCE_POS) +#define nfserr_req_too_big cpu_to_be32(NFS4ERR_REQ_TOO_BIG) +#define nfserr_rep_too_big cpu_to_be32(NFS4ERR_REP_TOO_BIG) +#define nfserr_rep_too_big_to_cache cpu_to_be32(NFS4ERR_REP_TOO_BIG_TO_CACHE) +#define nfserr_retry_uncached_rep cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP) +#define nfserr_unsafe_compound cpu_to_be32(NFS4ERR_UNSAFE_COMPOUND) +#define nfserr_too_many_ops cpu_to_be32(NFS4ERR_TOO_MANY_OPS) +#define nfserr_op_not_in_session cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION) +#define nfserr_hash_alg_unsupp cpu_to_be32(NFS4ERR_HASH_ALG_UNSUPP) +#define nfserr_clientid_busy cpu_to_be32(NFS4ERR_CLIENTID_BUSY) +#define nfserr_pnfs_io_hole cpu_to_be32(NFS4ERR_PNFS_IO_HOLE) +#define nfserr_seq_false_retry cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY) +#define nfserr_bad_high_slot cpu_to_be32(NFS4ERR_BAD_HIGH_SLOT) +#define nfserr_deadsession cpu_to_be32(NFS4ERR_DEADSESSION) +#define nfserr_encr_alg_unsupp cpu_to_be32(NFS4ERR_ENCR_ALG_UNSUPP) +#define nfserr_pnfs_no_layout cpu_to_be32(NFS4ERR_PNFS_NO_LAYOUT) +#define nfserr_not_only_op cpu_to_be32(NFS4ERR_NOT_ONLY_OP) +#define nfserr_wrong_cred cpu_to_be32(NFS4ERR_WRONG_CRED) +#define nfserr_wrong_type cpu_to_be32(NFS4ERR_WRONG_TYPE) +#define nfserr_dirdeleg_unavail cpu_to_be32(NFS4ERR_DIRDELEG_UNAVAIL) +#define nfserr_reject_deleg cpu_to_be32(NFS4ERR_REJECT_DELEG) +#define nfserr_returnconflict cpu_to_be32(NFS4ERR_RETURNCONFLICT) +#define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED) /* error codes for internal use */ /* if a request fails due to kmalloc failure, it gets dropped. @@ -258,6 +295,10 @@ void nfsd_lockd_shutdown(void); #define nfserr_dropit cpu_to_be32(30000) /* end-of-file indicator in readdir */ #define nfserr_eof cpu_to_be32(30001) +/* replay detected */ +#define nfserr_replay_me cpu_to_be32(11001) +/* nfs41 replay detected */ +#define nfserr_replay_cache cpu_to_be32(11002) /* Check for dir entries '.' and '..' */ #define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.')) -- cgit v0.10.2 From 7116ed6b9973021ff43edeb10f4cb834db94000f Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:27:43 +0300 Subject: nfsd41: sessions basic data types This patch provides basic data structures representing the nfs41 sessions and slots, plus helpers for keeping a reference count on the session and freeing it. Note that our server only support a headerpadsz of 0 and it ignores backchannel attributes at the moment. Signed-off-by: Benny Halevy [nfsd41: remove headerpadsz from channel attributes] [nfsd41: embed nfsd4_channel in nfsd4_session] Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy [nfsd41: use bool inuse for slot state] Signed-off-by: Benny Halevy [nfsd41 remove sl_session from nfsd4_slot] Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 070e9e5..8c70f12 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -382,6 +382,24 @@ static void release_openowner(struct nfs4_stateowner *sop) nfs4_put_stateowner(sop); } +static void +release_session(struct nfsd4_session *ses) +{ + list_del(&ses->se_hash); + list_del(&ses->se_perclnt); + nfsd4_put_session(ses); +} + +void +free_session(struct kref *kref) +{ + struct nfsd4_session *ses; + + ses = container_of(kref, struct nfsd4_session, se_ref); + kfree(ses->se_slots); + kfree(ses); +} + static inline void renew_client(struct nfs4_client *clp) { diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index a6e4a00..baea7f1 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -99,6 +99,39 @@ struct nfs4_callback { struct rpc_clnt * cb_client; }; +struct nfsd4_slot { + bool sl_inuse; + u32 sl_seqid; +}; + +struct nfsd4_session { + struct kref se_ref; + struct list_head se_hash; /* hash by sessionid */ + struct list_head se_perclnt; + u32 se_flags; + struct nfs4_client *se_client; /* for expire_client */ + struct nfs4_sessionid se_sessionid; + u32 se_fmaxreq_sz; + u32 se_fmaxresp_sz; + u32 se_fmaxresp_cached; + u32 se_fmaxops; + u32 se_fnumslots; + struct nfsd4_slot *se_slots; /* forward channel slots */ +}; + +static inline void +nfsd4_put_session(struct nfsd4_session *ses) +{ + extern void free_session(struct kref *kref); + kref_put(&ses->se_ref, free_session); +} + +static inline void +nfsd4_get_session(struct nfsd4_session *ses) +{ + kref_get(&ses->se_ref); +} + #define HEXDIR_LEN 33 /* hex version of 16 byte md5 of cl_name plus '\0' */ /* -- cgit v0.10.2 From 9fb870702d02c05f9410423bfff3f63e46e26180 Mon Sep 17 00:00:00 2001 From: Marc Eshel Date: Fri, 3 Apr 2009 08:27:46 +0300 Subject: nfsd41: introduce nfs4_client cl_sessions list [get rid of CONFIG_NFSD_V4_1] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8c70f12..b261199 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -528,6 +528,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir) INIT_LIST_HEAD(&clp->cl_strhash); INIT_LIST_HEAD(&clp->cl_openowners); INIT_LIST_HEAD(&clp->cl_delegations); + INIT_LIST_HEAD(&clp->cl_sessions); INIT_LIST_HEAD(&clp->cl_lru); return clp; } diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index baea7f1..7faefc7 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -163,6 +163,9 @@ struct nfs4_client { struct nfs4_callback cl_callback; /* callback info */ atomic_t cl_count; /* ref count */ u32 cl_firststate; /* recovery dir creation */ + + /* for nfs41 */ + struct list_head cl_sessions; }; /* struct nfs4_client_reset -- cgit v0.10.2 From c4bf7868064ce8b9c75d8049d077e593c20602b3 Mon Sep 17 00:00:00 2001 From: Marc Eshel Date: Fri, 3 Apr 2009 08:27:49 +0300 Subject: nfsd41: release_session when client is expired Signed-off-by: Benny Halevy [add CONFIG_NFSD_V4_1 to fix v4.0 regression bug] Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b261199..f23385b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -511,6 +511,12 @@ expire_client(struct nfs4_client *clp) sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); release_openowner(sop); } + while (!list_empty(&clp->cl_sessions)) { + struct nfsd4_session *ses; + ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, + se_perclnt); + release_session(ses); + } put_nfs4_client(clp); } -- cgit v0.10.2 From 5282fd724b667b7d65f2e41e405a825e58a78813 Mon Sep 17 00:00:00 2001 From: Marc Eshel Date: Fri, 3 Apr 2009 08:27:52 +0300 Subject: nfsd41: sessionid hashing Simple sessionid hashing using its monotonically increasing sequence number. Locking considerations: sessionid_hashtbl access is controlled by the sessionid_lock spin lock. It must be taken for insert, delete, and lookup. nfsd4_sequence looks up the session id and if the session is found, it calls nfsd4_get_session (still under the sessionid_lock). nfsd4_destroy_session calls nfsd4_put_session after unhashing it, so when the session's kref reaches zero it's going to get freed. Signed-off-by: Benny Halevy [we don't use a prime for sessionid hash table size] [use sessionid_lock spin lock] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f23385b..fc20e1f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -382,11 +382,62 @@ static void release_openowner(struct nfs4_stateowner *sop) nfs4_put_stateowner(sop); } +static DEFINE_SPINLOCK(sessionid_lock); +#define SESSION_HASH_SIZE 512 +static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE]; + +static inline int +hash_sessionid(struct nfs4_sessionid *sessionid) +{ + struct nfsd4_sessionid *sid = (struct nfsd4_sessionid *)sessionid; + + return sid->sequence % SESSION_HASH_SIZE; +} + +static inline void +dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) +{ + u32 *ptr = (u32 *)(&sessionid->data[0]); + dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]); +} + +/* caller must hold sessionid_lock */ +static struct nfsd4_session * +find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid) +{ + struct nfsd4_session *elem; + int idx; + + dump_sessionid(__func__, sessionid); + idx = hash_sessionid(sessionid); + dprintk("%s: idx is %d\n", __func__, idx); + /* Search in the appropriate list */ + list_for_each_entry(elem, &sessionid_hashtbl[idx], se_hash) { + dump_sessionid("list traversal", &elem->se_sessionid); + if (!memcmp(elem->se_sessionid.data, sessionid->data, + NFS4_MAX_SESSIONID_LEN)) { + return elem; + } + } + + dprintk("%s: session not found\n", __func__); + return NULL; +} + +/* caller must hold sessionid_lock */ static void -release_session(struct nfsd4_session *ses) +unhash_session(struct nfsd4_session *ses) { list_del(&ses->se_hash); list_del(&ses->se_perclnt); +} + +static void +release_session(struct nfsd4_session *ses) +{ + spin_lock(&sessionid_lock); + unhash_session(ses); + spin_unlock(&sessionid_lock); nfsd4_put_session(ses); } @@ -3205,6 +3256,8 @@ nfs4_state_init(void) INIT_LIST_HEAD(&unconf_str_hashtbl[i]); INIT_LIST_HEAD(&unconf_id_hashtbl[i]); } + for (i = 0; i < SESSION_HASH_SIZE; i++) + INIT_LIST_HEAD(&sessionid_hashtbl[i]); for (i = 0; i < FILE_HASH_SIZE; i++) { INIT_LIST_HEAD(&file_hashtbl[i]); } diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 7faefc7..5b3a666 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -132,6 +132,13 @@ nfsd4_get_session(struct nfsd4_session *ses) kref_get(&ses->se_ref); } +/* formatted contents of nfs4_sessionid */ +struct nfsd4_sessionid { + clientid_t clientid; + u32 sequence; + u32 reserved; +}; + #define HEXDIR_LEN 33 /* hex version of 16 byte md5 of cl_name plus '\0' */ /* -- cgit v0.10.2 From 2db134eb3b39faefc7fbfb200156d175edba2f68 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:27:55 +0300 Subject: nfsd41: xdr infrastructure Define nfsd41_dec_ops vector and add it to nfsd4_minorversion for minorversion 1. Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1 since we don't need to filter out obsolete ops as this is done in the decoding phase. exchange_id, create_session, destroy_session, and sequence ops are implemented as stubs returning nfserr_opnotsupp at this stage. [was nfsd41: xdr stubs] [get rid of CONFIG_NFSD_V4_1] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 76a0b2a..6973d61 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -997,6 +997,34 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel } static __be32 +nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, + struct nfsd4_exchange_id *clid) +{ + return nfserr_opnotsupp; /* stub */ +} + +static __be32 +nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, + struct nfsd4_create_session *sess) +{ + return nfserr_opnotsupp; /* stub */ +} + +static __be32 +nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp, + struct nfsd4_destroy_session *destroy_session) +{ + return nfserr_opnotsupp; /* stub */ +} + +static __be32 +nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, + struct nfsd4_sequence *seq) +{ + return nfserr_opnotsupp; /* stub */ +} + +static __be32 nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) { return nfs_ok; @@ -1050,6 +1078,67 @@ static nfsd4_dec nfsd4_dec_ops[] = { [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, }; +static nfsd4_dec nfsd41_dec_ops[] = { + [OP_ACCESS] (nfsd4_dec)nfsd4_decode_access, + [OP_CLOSE] (nfsd4_dec)nfsd4_decode_close, + [OP_COMMIT] (nfsd4_dec)nfsd4_decode_commit, + [OP_CREATE] (nfsd4_dec)nfsd4_decode_create, + [OP_DELEGPURGE] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_DELEGRETURN] (nfsd4_dec)nfsd4_decode_delegreturn, + [OP_GETATTR] (nfsd4_dec)nfsd4_decode_getattr, + [OP_GETFH] (nfsd4_dec)nfsd4_decode_noop, + [OP_LINK] (nfsd4_dec)nfsd4_decode_link, + [OP_LOCK] (nfsd4_dec)nfsd4_decode_lock, + [OP_LOCKT] (nfsd4_dec)nfsd4_decode_lockt, + [OP_LOCKU] (nfsd4_dec)nfsd4_decode_locku, + [OP_LOOKUP] (nfsd4_dec)nfsd4_decode_lookup, + [OP_LOOKUPP] (nfsd4_dec)nfsd4_decode_noop, + [OP_NVERIFY] (nfsd4_dec)nfsd4_decode_verify, + [OP_OPEN] (nfsd4_dec)nfsd4_decode_open, + [OP_OPENATTR] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_OPEN_CONFIRM] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_OPEN_DOWNGRADE] (nfsd4_dec)nfsd4_decode_open_downgrade, + [OP_PUTFH] (nfsd4_dec)nfsd4_decode_putfh, + [OP_PUTPUBFH] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_PUTROOTFH] (nfsd4_dec)nfsd4_decode_noop, + [OP_READ] (nfsd4_dec)nfsd4_decode_read, + [OP_READDIR] (nfsd4_dec)nfsd4_decode_readdir, + [OP_READLINK] (nfsd4_dec)nfsd4_decode_noop, + [OP_REMOVE] (nfsd4_dec)nfsd4_decode_remove, + [OP_RENAME] (nfsd4_dec)nfsd4_decode_rename, + [OP_RENEW] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_RESTOREFH] (nfsd4_dec)nfsd4_decode_noop, + [OP_SAVEFH] (nfsd4_dec)nfsd4_decode_noop, + [OP_SECINFO] (nfsd4_dec)nfsd4_decode_secinfo, + [OP_SETATTR] (nfsd4_dec)nfsd4_decode_setattr, + [OP_SETCLIENTID] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_SETCLIENTID_CONFIRM](nfsd4_dec)nfsd4_decode_notsupp, + [OP_VERIFY] (nfsd4_dec)nfsd4_decode_verify, + [OP_WRITE] (nfsd4_dec)nfsd4_decode_write, + [OP_RELEASE_LOCKOWNER] (nfsd4_dec)nfsd4_decode_notsupp, + + /* new operations for NFSv4.1 */ + [OP_BACKCHANNEL_CTL] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_BIND_CONN_TO_SESSION](nfsd4_dec)nfsd4_decode_notsupp, + [OP_EXCHANGE_ID] (nfsd4_dec)nfsd4_decode_exchange_id, + [OP_CREATE_SESSION] (nfsd4_dec)nfsd4_decode_create_session, + [OP_DESTROY_SESSION] (nfsd4_dec)nfsd4_decode_destroy_session, + [OP_FREE_STATEID] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_GET_DIR_DELEGATION] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_GETDEVICEINFO] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_GETDEVICELIST] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTCOMMIT] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTGET] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTRETURN] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_SECINFO_NO_NAME] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_SEQUENCE] (nfsd4_dec)nfsd4_decode_sequence, + [OP_SET_SSV] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_TEST_STATEID] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_WANT_DELEGATION] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_DESTROY_CLIENTID] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_RECLAIM_COMPLETE] (nfsd4_dec)nfsd4_decode_notsupp, +}; + struct nfsd4_minorversion_ops { nfsd4_dec *decoders; int nops; @@ -1057,6 +1146,7 @@ struct nfsd4_minorversion_ops { static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, + [1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, }; static __be32 @@ -2572,6 +2662,38 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w } static __be32 +nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, int nfserr, + struct nfsd4_exchange_id *exid) +{ + /* stub */ + return nfserr; +} + +static __be32 +nfsd4_encode_create_session(struct nfsd4_compoundres *resp, int nfserr, + struct nfsd4_create_session *sess) +{ + /* stub */ + return nfserr; +} + +static __be32 +nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, int nfserr, + struct nfsd4_destroy_session *destroy_session) +{ + /* stub */ + return nfserr; +} + +static __be32 +nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, + struct nfsd4_sequence *seq) +{ + /* stub */ + return nfserr; +} + +static __be32 nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) { return nfserr; @@ -2579,6 +2701,11 @@ nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); +/* + * Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1 + * since we don't need to filter out obsolete ops as this is + * done in the decoding phase. + */ static nfsd4_enc nfsd4_enc_ops[] = { [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access, [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close, @@ -2617,6 +2744,27 @@ static nfsd4_enc nfsd4_enc_ops[] = { [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop, [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write, [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop, + + /* NFSv4.1 operations */ + [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop, + [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_noop, + [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, + [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, + [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_destroy_session, + [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, + [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, + [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, + [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, + [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, + [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_noop, + [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, + [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, + [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, }; void diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index fd15ddc..28af925 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -344,6 +344,22 @@ struct nfsd4_write { nfs4_verifier wr_verifier; /* response */ }; +struct nfsd4_exchange_id { + int foo; /* stub */ +}; + +struct nfsd4_create_session { + int foo; /* stub */ +}; + +struct nfsd4_sequence { + int foo; /* stub */ +}; + +struct nfsd4_destroy_session { + int foo; /* stub */ +}; + struct nfsd4_op { int opnum; __be32 status; @@ -378,6 +394,12 @@ struct nfsd4_op { struct nfsd4_verify verify; struct nfsd4_write write; struct nfsd4_release_lockowner release_lockowner; + + /* NFSv4.1 */ + struct nfsd4_exchange_id exchange_id; + struct nfsd4_create_session create_session; + struct nfsd4_destroy_session destroy_session; + struct nfsd4_sequence sequence; } u; struct nfs4_replay * replay; }; -- cgit v0.10.2 From 069b6ad4bb20abf175ea7875e82e8002154773af Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:27:58 +0300 Subject: nfsd41: proc stubs Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ded469f..f7be7fa 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1101,6 +1101,28 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, .op_name = "OP_RELEASE_LOCKOWNER", }, + + /* NFSv4.1 operations */ + [OP_EXCHANGE_ID] = { + .op_func = (nfsd4op_func)nfsd4_exchange_id, + .op_flags = ALLOWED_WITHOUT_FH, + .op_name = "OP_EXCHANGE_ID", + }, + [OP_CREATE_SESSION] = { + .op_func = (nfsd4op_func)nfsd4_create_session, + .op_flags = ALLOWED_WITHOUT_FH, + .op_name = "OP_CREATE_SESSION", + }, + [OP_DESTROY_SESSION] = { + .op_func = (nfsd4op_func)nfsd4_destroy_session, + .op_flags = ALLOWED_WITHOUT_FH, + .op_name = "OP_DESTROY_SESSION", + }, + [OP_SEQUENCE] = { + .op_func = (nfsd4op_func)nfsd4_sequence, + .op_flags = ALLOWED_WITHOUT_FH, + .op_name = "OP_SEQUENCE", + }, }; static const char *nfsd4_op_name(unsigned opnum) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fc20e1f..cfc01f4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -833,6 +833,38 @@ out_err: } __be32 +nfsd4_exchange_id(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_exchange_id *exid) +{ + return -1; /* stub */ +} + +__be32 +nfsd4_create_session(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_create_session *cr_ses) +{ + return -1; /* stub */ +} + +__be32 +nfsd4_destroy_session(struct svc_rqst *r, + struct nfsd4_compound_state *cstate, + struct nfsd4_destroy_session *sessionid) +{ + return -1; /* stub */ +} + +__be32 +nfsd4_sequence(struct svc_rqst *r, + struct nfsd4_compound_state *cstate, + struct nfsd4_sequence *seq) +{ + return -1; /* stub */ +} + +__be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setclientid *setclid) { diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 28af925..294940c 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -471,6 +471,18 @@ extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_setclientid_confirm *setclientid_confirm); +extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, + struct nfsd4_compound_state *, +struct nfsd4_exchange_id *); + extern __be32 nfsd4_create_session(struct svc_rqst *, + struct nfsd4_compound_state *, + struct nfsd4_create_session *); +extern __be32 nfsd4_sequence(struct svc_rqst *, + struct nfsd4_compound_state *, + struct nfsd4_sequence *); +extern __be32 nfsd4_destroy_session(struct svc_rqst *, + struct nfsd4_compound_state *, + struct nfsd4_destroy_session *); extern __be32 nfsd4_process_open1(struct nfsd4_open *open); extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open); -- cgit v0.10.2 From 0733d21338747483985a5964e852af160d88e429 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:01 +0300 Subject: nfsd41: exchange_id operation Implement the exchange_id operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-28 Based on the client provided name, hash a client id. If a confirmed one is found, compare the op's creds and verifier. If the creds match and the verifier is different then expire the old client (client re-incarnated), otherwise, if both match, assume it's a replay and ignore it. If an unconfirmed client is found, then copy the new creds and verifer if need update, otherwise assume replay. The client is moved to a confirmed state on create_session. In the nfs41 branch set the exchange_id flags to EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_SUPP_MOVED_REFER (pNFS is not supported, Referrals are supported, Migration is not.). Address various scenarios from section 18.35 of the spec: 1. Check for EXCHGID4_FLAG_UPD_CONFIRMED_REC_A and set EXCHGID4_FLAG_CONFIRMED_R as appropriate. 2. Return error codes per 18.35.4 scenarios. 3. Update client records or generate new client ids depending on scenario. Note: 18.35.4 case 3 probably still needs revisiting. The handling seems not quite right. Signed-off-by: Benny Halevy Signed-off-by: Andy Adamosn Signed-off-by: Benny Halevy [nfsd41: use utsname for major_id (and copy to server_scope)] [nfsd41: fix handling of various exchange id scenarios] Signed-off-by: Mike Sager Signed-off-by: Benny Halevy [nfsd41: reverse use of EXCHGID4_INVAL_FLAG_MASK_A] [simplify nfsd4_encode_exchange_id error handling] [nfsd41: embed an xdr_netobj in nfsd4_exchange_id] [nfsd41: return nfserr_serverfault for spa_how == SP4_MACH_CRED] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cfc01f4..4f963e9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -832,12 +832,152 @@ out_err: return; } +/* + * Set the exchange_id flags returned by the server. + */ +static void +nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) +{ + /* pNFS is not supported */ + new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS; + + /* Referrals are supported, Migration is not. */ + new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER; + + /* set the wire flags to return to client. */ + clid->flags = new->cl_exchange_flags; +} + __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_exchange_id *exid) { - return -1; /* stub */ + struct nfs4_client *unconf, *conf, *new; + int status; + unsigned int strhashval; + char dname[HEXDIR_LEN]; + nfs4_verifier verf = exid->verifier; + u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; + + dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " + " ip_addr=%u flags %x, spa_how %d\n", + __func__, rqstp, exid, exid->clname.len, exid->clname.data, + ip_addr, exid->flags, exid->spa_how); + + if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A)) + return nfserr_inval; + + /* Currently only support SP4_NONE */ + switch (exid->spa_how) { + case SP4_NONE: + break; + case SP4_SSV: + return nfserr_encr_alg_unsupp; + default: + BUG(); /* checked by xdr code */ + case SP4_MACH_CRED: + return nfserr_serverfault; /* no excuse :-/ */ + } + + status = nfs4_make_rec_clidname(dname, &exid->clname); + + if (status) + goto error; + + strhashval = clientstr_hashval(dname); + + nfs4_lock_state(); + status = nfs_ok; + + conf = find_confirmed_client_by_str(dname, strhashval); + if (conf) { + if (!same_verf(&verf, &conf->cl_verifier)) { + /* 18.35.4 case 8 */ + if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { + status = nfserr_not_same; + goto out; + } + /* Client reboot: destroy old state */ + expire_client(conf); + goto out_new; + } + if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { + /* 18.35.4 case 9 */ + if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { + status = nfserr_perm; + goto out; + } + expire_client(conf); + goto out_new; + } + if (ip_addr != conf->cl_addr && + !(exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A)) { + /* Client collision. 18.35.4 case 3 */ + status = nfserr_clid_inuse; + goto out; + } + /* + * Set bit when the owner id and verifier map to an already + * confirmed client id (18.35.3). + */ + exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; + + /* + * Falling into 18.35.4 case 2, possible router replay. + * Leave confirmed record intact and return same result. + */ + copy_verf(conf, &verf); + new = conf; + goto out_copy; + } else { + /* 18.35.4 case 7 */ + if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { + status = nfserr_noent; + goto out; + } + } + + unconf = find_unconfirmed_client_by_str(dname, strhashval); + if (unconf) { + /* + * Possible retry or client restart. Per 18.35.4 case 4, + * a new unconfirmed record should be generated regardless + * of whether any properties have changed. + */ + expire_client(unconf); + } + +out_new: + /* Normal case */ + new = create_client(exid->clname, dname); + if (new == NULL) { + status = nfserr_resource; + goto out; + } + + copy_verf(new, &verf); + copy_cred(&new->cl_cred, &rqstp->rq_cred); + new->cl_addr = ip_addr; + gen_clid(new); + gen_confirm(new); + add_to_unconfirmed(new, strhashval); +out_copy: + exid->clientid.cl_boot = new->cl_clientid.cl_boot; + exid->clientid.cl_id = new->cl_clientid.cl_id; + + new->cl_seqid = exid->seqid = 1; + nfsd4_set_ex_flags(new, exid); + + dprintk("nfsd4_exchange_id seqid %d flags %x\n", + new->cl_seqid, new->cl_exchange_flags); + status = nfs_ok; + +out: + nfs4_unlock_state(); +error: + dprintk("nfsd4_exchange_id returns %d\n", ntohl(status)); + return status; } __be32 diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 6973d61..bebf6d2 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -998,9 +999,100 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel static __be32 nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, - struct nfsd4_exchange_id *clid) + struct nfsd4_exchange_id *exid) { - return nfserr_opnotsupp; /* stub */ + int dummy; + DECODE_HEAD; + + READ_BUF(NFS4_VERIFIER_SIZE); + COPYMEM(exid->verifier.data, NFS4_VERIFIER_SIZE); + + READ_BUF(4); + READ32(exid->clname.len); + + READ_BUF(exid->clname.len); + SAVEMEM(exid->clname.data, exid->clname.len); + + READ_BUF(4); + READ32(exid->flags); + + /* Ignore state_protect4_a */ + READ_BUF(4); + READ32(exid->spa_how); + switch (exid->spa_how) { + case SP4_NONE: + break; + case SP4_MACH_CRED: + /* spo_must_enforce */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy * 4); + p += dummy; + + /* spo_must_allow */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy * 4); + p += dummy; + break; + case SP4_SSV: + /* ssp_ops */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy * 4); + p += dummy; + + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy * 4); + p += dummy; + + /* ssp_hash_algs<> */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + + /* ssp_encr_algs<> */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + + /* ssp_window and ssp_num_gss_handles */ + READ_BUF(8); + READ32(dummy); + READ32(dummy); + break; + default: + goto xdr_error; + } + + /* Ignore Implementation ID */ + READ_BUF(4); /* nfs_impl_id4 array length */ + READ32(dummy); + + if (dummy > 1) + goto xdr_error; + + if (dummy == 1) { + /* nii_domain */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + + /* nii_name */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + + /* nii_date */ + READ_BUF(12); + p += 3; + } + DECODE_TAIL; } static __be32 @@ -2665,8 +2757,55 @@ static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_exchange_id *exid) { - /* stub */ - return nfserr; + ENCODE_HEAD; + char *major_id; + char *server_scope; + int major_id_sz; + int server_scope_sz; + uint64_t minor_id = 0; + + if (nfserr) + return nfserr; + + major_id = utsname()->nodename; + major_id_sz = strlen(major_id); + server_scope = utsname()->nodename; + server_scope_sz = strlen(server_scope); + + RESERVE_SPACE( + 8 /* eir_clientid */ + + 4 /* eir_sequenceid */ + + 4 /* eir_flags */ + + 4 /* spr_how (SP4_NONE) */ + + 8 /* so_minor_id */ + + 4 /* so_major_id.len */ + + (XDR_QUADLEN(major_id_sz) * 4) + + 4 /* eir_server_scope.len */ + + (XDR_QUADLEN(server_scope_sz) * 4) + + 4 /* eir_server_impl_id.count (0) */); + + WRITEMEM(&exid->clientid, 8); + WRITE32(exid->seqid); + WRITE32(exid->flags); + + /* state_protect4_r. Currently only support SP4_NONE */ + BUG_ON(exid->spa_how != SP4_NONE); + WRITE32(exid->spa_how); + + /* The server_owner struct */ + WRITE64(minor_id); /* Minor id */ + /* major id */ + WRITE32(major_id_sz); + WRITEMEM(major_id, major_id_sz); + + /* Server scope */ + WRITE32(server_scope_sz); + WRITEMEM(server_scope, server_scope_sz); + + /* Implementation id */ + WRITE32(0); /* zero length nfs_impl_id4 array */ + ADJUST_ARGS(); + return 0; } static __be32 diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 5b3a666..8d0b101 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -173,6 +173,8 @@ struct nfs4_client { /* for nfs41 */ struct list_head cl_sessions; + u32 cl_seqid; /* seqid for create_session */ + u32 cl_exchange_flags; }; /* struct nfs4_client_reset diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 294940c..33ee71e 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -345,7 +345,12 @@ struct nfsd4_write { }; struct nfsd4_exchange_id { - int foo; /* stub */ + nfs4_verifier verifier; + struct xdr_netobj clname; + u32 flags; + clientid_t clientid; + u32 seqid; + int spa_how; }; struct nfsd4_create_session { -- cgit v0.10.2 From a1bcecd29cdf1670df6908a620add4211c0abb7a Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:05 +0300 Subject: nfsd41: match clientid establishment method We need to distinguish between client names provided by NFSv4.0 clients SETCLIENTID and those provided by NFSv4.1 via EXCHANGE_ID when looking up the clientid by string. Signed-off-by: Benny Halevy Signed-off-by: Andy Adamson [nfsd41: use boolean values for use_exchange_id argument] Signed-off-by: Benny Halevy [nfsd41: simplify match_clientid_establishment logic] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index b11cf8d..3444c00 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -344,7 +344,8 @@ purge_old(struct dentry *parent, struct dentry *child) { int status; - if (nfs4_has_reclaimed_state(child->d_name.name)) + /* note: we currently use this path only for minorversion 0 */ + if (nfs4_has_reclaimed_state(child->d_name.name, false)) return 0; status = nfsd4_clear_clid_dir(parent, child); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4f963e9..dfc6d94 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -715,25 +715,45 @@ find_unconfirmed_client(clientid_t *clid) return NULL; } +/* + * Return 1 iff clp's clientid establishment method matches the use_exchange_id + * parameter. Matching is based on the fact the at least one of the + * EXCHGID4_FLAG_USE_{NON_PNFS,PNFS_MDS,PNFS_DS} flags must be set for v4.1 + * + * FIXME: we need to unify the clientid namespaces for nfsv4.x + * and correctly deal with client upgrade/downgrade in EXCHANGE_ID + * and SET_CLIENTID{,_CONFIRM} + */ +static inline int +match_clientid_establishment(struct nfs4_client *clp, bool use_exchange_id) +{ + bool has_exchange_flags = (clp->cl_exchange_flags != 0); + return use_exchange_id == has_exchange_flags; +} + static struct nfs4_client * -find_confirmed_client_by_str(const char *dname, unsigned int hashval) +find_confirmed_client_by_str(const char *dname, unsigned int hashval, + bool use_exchange_id) { struct nfs4_client *clp; list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) { - if (same_name(clp->cl_recdir, dname)) + if (same_name(clp->cl_recdir, dname) && + match_clientid_establishment(clp, use_exchange_id)) return clp; } return NULL; } static struct nfs4_client * -find_unconfirmed_client_by_str(const char *dname, unsigned int hashval) +find_unconfirmed_client_by_str(const char *dname, unsigned int hashval, + bool use_exchange_id) { struct nfs4_client *clp; list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) { - if (same_name(clp->cl_recdir, dname)) + if (same_name(clp->cl_recdir, dname) && + match_clientid_establishment(clp, use_exchange_id)) return clp; } return NULL; @@ -890,7 +910,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, nfs4_lock_state(); status = nfs_ok; - conf = find_confirmed_client_by_str(dname, strhashval); + conf = find_confirmed_client_by_str(dname, strhashval, true); if (conf) { if (!same_verf(&verf, &conf->cl_verifier)) { /* 18.35.4 case 8 */ @@ -938,7 +958,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, } } - unconf = find_unconfirmed_client_by_str(dname, strhashval); + unconf = find_unconfirmed_client_by_str(dname, strhashval, true); if (unconf) { /* * Possible retry or client restart. Per 18.35.4 case 4, @@ -1035,7 +1055,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, strhashval = clientstr_hashval(dname); nfs4_lock_state(); - conf = find_confirmed_client_by_str(dname, strhashval); + conf = find_confirmed_client_by_str(dname, strhashval, false); if (conf) { /* RFC 3530 14.2.33 CASE 0: */ status = nfserr_clid_inuse; @@ -1050,7 +1070,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * has a description of SETCLIENTID request processing consisting * of 5 bullet points, labeled as CASE0 - CASE4 below. */ - unconf = find_unconfirmed_client_by_str(dname, strhashval); + unconf = find_unconfirmed_client_by_str(dname, strhashval, false); status = nfserr_resource; if (!conf) { /* @@ -1205,7 +1225,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, unsigned int hash = clientstr_hashval(unconf->cl_recdir); conf = find_confirmed_client_by_str(unconf->cl_recdir, - hash); + hash, false); if (conf) { nfsd4_remove_clid_dir(conf); expire_client(conf); @@ -3326,12 +3346,12 @@ alloc_reclaim(void) } int -nfs4_has_reclaimed_state(const char *name) +nfs4_has_reclaimed_state(const char *name, bool use_exchange_id) { unsigned int strhashval = clientstr_hashval(name); struct nfs4_client *clp; - clp = find_confirmed_client_by_str(name, strhashval); + clp = find_confirmed_client_by_str(name, strhashval, use_exchange_id); return clp ? 1 : 0; } diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 8d0b101..90829db 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -330,7 +330,7 @@ extern void nfsd4_init_recdir(char *recdir_name); extern int nfsd4_recdir_load(void); extern void nfsd4_shutdown_recdir(void); extern int nfs4_client_to_reclaim(const char *name); -extern int nfs4_has_reclaimed_state(const char *name); +extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id); extern void nfsd4_recdir_purge_old(void); extern int nfsd4_create_clid_dir(struct nfs4_client *clp); extern void nfsd4_remove_clid_dir(struct nfs4_client *clp); -- cgit v0.10.2 From b85d4c01b76f6969a085d07a767fa45225cb14be Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 3 Apr 2009 08:28:08 +0300 Subject: nfsd41: sequence operation Implement the sequence operation conforming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-26 Check for stale clientid (as derived from the sessionid). Enforce slotid range and exactly-once semantics using the slotid and seqid. If everything went well renew the client lease and mark the slot INPROGRESS. Add a struct nfsd4_slot pointer to struct nfsd4_compound_state. To be used for sessions DRC replay. [nfsd41: rename sequence catchthis to cachethis] Signed-off-by: Andy Adamson [pulled some code to set cstate->slot from "nfsd DRC logic"] [use sessionid_lock spin lock] [nfsd41: use bool inuse for slot state] Signed-off-by: Benny Halevy [nfsd: add a struct nfsd4_slot pointer to struct nfsd4_compound_state] Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy [nfsd41: add nfsd4_session pointer to nfsd4_compound_state] [nfsd41: set cstate session] [nfsd41: use cstate session in nfsd4_sequence] Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy [simplify nfsd4_encode_sequence error handling] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index dfc6d94..24b6e05 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1000,6 +1000,32 @@ error: return status; } +static int +check_slot_seqid(u32 seqid, struct nfsd4_slot *slot) +{ + dprintk("%s enter. seqid %d slot->sl_seqid %d\n", __func__, seqid, + slot->sl_seqid); + + /* The slot is in use, and no response has been sent. */ + if (slot->sl_inuse) { + if (seqid == slot->sl_seqid) + return nfserr_jukebox; + else + return nfserr_seq_misordered; + } + /* Normal */ + if (likely(seqid == slot->sl_seqid + 1)) + return nfs_ok; + /* Replay */ + if (seqid == slot->sl_seqid) + return nfserr_replay_cache; + /* Wraparound */ + if (seqid == 1 && (slot->sl_seqid + 1) == 0) + return nfs_ok; + /* Misordered replay or misordered new request */ + return nfserr_seq_misordered; +} + __be32 nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -1017,11 +1043,54 @@ nfsd4_destroy_session(struct svc_rqst *r, } __be32 -nfsd4_sequence(struct svc_rqst *r, +nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_sequence *seq) { - return -1; /* stub */ + struct nfsd4_session *session; + struct nfsd4_slot *slot; + int status; + + spin_lock(&sessionid_lock); + status = nfserr_badsession; + session = find_in_sessionid_hashtbl(&seq->sessionid); + if (!session) + goto out; + + status = nfserr_badslot; + if (seq->slotid >= session->se_fnumslots) + goto out; + + slot = &session->se_slots[seq->slotid]; + dprintk("%s: slotid %d\n", __func__, seq->slotid); + + status = check_slot_seqid(seq->seqid, slot); + if (status == nfserr_replay_cache) { + cstate->slot = slot; + cstate->session = session; + goto replay_cache; + } + if (status) + goto out; + + /* Success! bump slot seqid */ + slot->sl_inuse = true; + slot->sl_seqid = seq->seqid; + + cstate->slot = slot; + cstate->session = session; + +replay_cache: + /* Renew the clientid on success and on replay. + * Hold a session reference until done processing the compound: + * nfsd4_put_session called only if the cstate slot is set. + */ + renew_client(session->se_client); + nfsd4_get_session(session); +out: + spin_unlock(&sessionid_lock); + dprintk("%s: return %d\n", __func__, ntohl(status)); + return status; } __be32 diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index bebf6d2..8556575 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1113,7 +1113,16 @@ static __be32 nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, struct nfsd4_sequence *seq) { - return nfserr_opnotsupp; /* stub */ + DECODE_HEAD; + + READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); + COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); + READ32(seq->seqid); + READ32(seq->slotid); + READ32(seq->maxslots); + READ32(seq->cachethis); + + DECODE_TAIL; } static __be32 @@ -2828,8 +2837,26 @@ static __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_sequence *seq) { - /* stub */ - return nfserr; + ENCODE_HEAD; + + if (nfserr) + return nfserr; + + RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 20); + WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); + WRITE32(seq->seqid); + WRITE32(seq->slotid); + WRITE32(seq->maxslots); + /* + * FIXME: for now: + * target_maxslots = maxslots + * status_flags = 0 + */ + WRITE32(seq->maxslots); + WRITE32(0); + + ADJUST_ARGS(); + return 0; } static __be32 diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 33ee71e..6e28a04 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -48,6 +48,9 @@ struct nfsd4_compound_state { struct svc_fh current_fh; struct svc_fh save_fh; struct nfs4_stateowner *replay_owner; + /* For sessions DRC */ + struct nfsd4_session *session; + struct nfsd4_slot *slot; }; struct nfsd4_change_info { @@ -358,7 +361,15 @@ struct nfsd4_create_session { }; struct nfsd4_sequence { - int foo; /* stub */ + struct nfs4_sessionid sessionid; /* request/response */ + u32 seqid; /* request/response */ + u32 slotid; /* request/response */ + u32 maxslots; /* request/response */ + u32 cachethis; /* request */ +#if 0 + u32 target_maxslots; /* response */ + u32 status_flags; /* response */ +#endif /* not yet */ }; struct nfsd4_destroy_session { -- cgit v0.10.2 From f9bb94c4c60f6e1d1717077bfddb614f03a607d1 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:12 +0300 Subject: nfsd41: enforce NFS4ERR_SEQUENCE_POS operation order rules for minorversion != 0 only. Signed-off-by: Andy Adamson [nfsd41: do not verify nfserr_sequence_pos for minorversion 0] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f7be7fa..ec51936 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -811,14 +811,15 @@ static inline void nfsd4_increment_op_stats(u32 opnum) typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *, void *); +enum nfsd4_op_flags { + ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ + ALLOWED_ON_ABSENT_FS = 2 << 0, /* ops processed on absent fs */ + ALLOWED_AS_FIRST_OP = 3 << 0, /* ops reqired first in compound */ +}; struct nfsd4_operation { nfsd4op_func op_func; u32 op_flags; -/* Most ops require a valid current filehandle; a few don't: */ -#define ALLOWED_WITHOUT_FH 1 -/* GETATTR and ops not listed as returning NFS4ERR_MOVED: */ -#define ALLOWED_ON_ABSENT_FS 2 char *op_name; }; @@ -827,6 +828,23 @@ static struct nfsd4_operation nfsd4_ops[]; static const char *nfsd4_op_name(unsigned opnum); /* + * Enforce NFSv4.1 COMPOUND ordering rules. + * + * TODO: + * - enforce NFS4ERR_NOT_ONLY_OP, + * - DESTROY_SESSION MUST be the final operation in the COMPOUND request. + */ +static bool nfs41_op_ordering_ok(struct nfsd4_compoundargs *args) +{ + if (args->minorversion && args->opcnt > 0) { + struct nfsd4_op *op = &args->ops[0]; + return (op->status == nfserr_op_illegal) || + (nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP); + } + return true; +} + +/* * COMPOUND call. */ static __be32 @@ -864,6 +882,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) goto out; + if (!nfs41_op_ordering_ok(args)) { + op = &args->ops[0]; + op->status = nfserr_sequence_pos; + goto encode_op; + } + status = nfs_ok; while (!status && resp->opcnt < args->opcnt) { op = &args->ops[resp->opcnt++]; @@ -1105,22 +1129,22 @@ static struct nfsd4_operation nfsd4_ops[] = { /* NFSv4.1 operations */ [OP_EXCHANGE_ID] = { .op_func = (nfsd4op_func)nfsd4_exchange_id, - .op_flags = ALLOWED_WITHOUT_FH, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, .op_name = "OP_EXCHANGE_ID", }, [OP_CREATE_SESSION] = { .op_func = (nfsd4op_func)nfsd4_create_session, - .op_flags = ALLOWED_WITHOUT_FH, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, .op_name = "OP_CREATE_SESSION", }, [OP_DESTROY_SESSION] = { .op_func = (nfsd4op_func)nfsd4_destroy_session, - .op_flags = ALLOWED_WITHOUT_FH, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, .op_name = "OP_DESTROY_SESSION", }, [OP_SEQUENCE] = { .op_func = (nfsd4op_func)nfsd4_sequence, - .op_flags = ALLOWED_WITHOUT_FH, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, .op_name = "OP_SEQUENCE", }, }; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 24b6e05..9243dca 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1047,10 +1047,14 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_sequence *seq) { + struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfsd4_session *session; struct nfsd4_slot *slot; int status; + if (resp->opcnt != 1) + return nfserr_sequence_pos; + spin_lock(&sessionid_lock); status = nfserr_badsession; session = find_in_sessionid_hashtbl(&seq->sessionid); -- cgit v0.10.2 From 074fe897536f095309c5aaffcf912952882ab2cb Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:15 +0300 Subject: nfsd41: DRC save, restore, and clear functions Cache all the result pages, including the rpc header in rq_respages[0], for a request in the slot table cache entry. Cache the statp pointer from nfsd_dispatch which points into rq_respages[0] just past the rpc header. When setting a cache entry, calculate and save the length of the nfs data minus the rpc header for rq_respages[0]. When replaying a cache entry, replace the cached rpc header with the replayed request rpc result header, unless there is not enough room in the cached results first page. In that case, use the cached rpc header. The sessions fore channel maxresponse size cached is set to NFSD_PAGES_PER_SLOT * PAGE_SIZE. For compounds we are cacheing with operations such as READDIR that use the xdr_buf->pages to hold data, we choose to cache the extra page of data rather than copying data from xdr_buf->pages into the xdr_buf->head page. [nfsd41: limit cache to maxresponsesize_cached] [nfsd41: mv nfsd4_set_statp under CONFIG_NFSD_V4_1] [nfsd41: rename nfsd4_move_pages] [nfsd41: rename page_no variable] [nfsd41: rename nfsd4_set_cache_entry] [nfsd41: fix nfsd41_copy_replay_data comment] [nfsd41: add to nfsd4_set_cache_entry] Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9243dca..a37b91d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -852,6 +852,148 @@ out_err: return; } +void +nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp) +{ + struct nfsd4_compoundres *resp = rqstp->rq_resp; + + resp->cstate.statp = statp; +} + +/* + * Dereference the result pages. + */ +static void +nfsd4_release_respages(struct page **respages, short resused) +{ + int i; + + dprintk("--> %s\n", __func__); + for (i = 0; i < resused; i++) { + if (!respages[i]) + continue; + put_page(respages[i]); + respages[i] = NULL; + } +} + +static void +nfsd4_copy_pages(struct page **topages, struct page **frompages, short count) +{ + int i; + + for (i = 0; i < count; i++) { + topages[i] = frompages[i]; + if (!topages[i]) + continue; + get_page(topages[i]); + } +} + +/* + * Cache the reply pages up to NFSD_PAGES_PER_SLOT + 1, clearing the previous + * pages. We add a page to NFSD_PAGES_PER_SLOT for the case where the total + * length of the XDR response is less than se_fmaxresp_cached + * (NFSD_PAGES_PER_SLOT * PAGE_SIZE) but the xdr_buf pages is used for a + * of the reply (e.g. readdir). + * + * Store the base and length of the rq_req.head[0] page + * of the NFSv4.1 data, just past the rpc header. + */ +void +nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) +{ + struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; + struct svc_rqst *rqstp = resp->rqstp; + struct nfsd4_compoundargs *args = rqstp->rq_argp; + struct nfsd4_op *op = &args->ops[resp->opcnt]; + struct kvec *resv = &rqstp->rq_res.head[0]; + + dprintk("--> %s entry %p\n", __func__, entry); + + /* Don't cache a failed OP_SEQUENCE. */ + if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status) + return; + nfsd4_release_respages(entry->ce_respages, entry->ce_resused); + entry->ce_resused = rqstp->rq_resused; + if (entry->ce_resused > NFSD_PAGES_PER_SLOT + 1) + entry->ce_resused = NFSD_PAGES_PER_SLOT + 1; + nfsd4_copy_pages(entry->ce_respages, rqstp->rq_respages, + entry->ce_resused); + entry->ce_status = resp->cstate.status; + entry->ce_datav.iov_base = resp->cstate.statp; + entry->ce_datav.iov_len = resv->iov_len - ((char *)resp->cstate.statp - + (char *)page_address(rqstp->rq_respages[0])); + entry->ce_opcnt = resp->opcnt; + /* Current request rpc header length*/ + entry->ce_rpchdrlen = (char *)resp->cstate.statp - + (char *)page_address(rqstp->rq_respages[0]); +} + +/* + * We keep the rpc header, but take the nfs reply from the replycache. + */ +static int +nfsd41_copy_replay_data(struct nfsd4_compoundres *resp, + struct nfsd4_cache_entry *entry) +{ + struct svc_rqst *rqstp = resp->rqstp; + struct kvec *resv = &resp->rqstp->rq_res.head[0]; + int len; + + /* Current request rpc header length*/ + len = (char *)resp->cstate.statp - + (char *)page_address(rqstp->rq_respages[0]); + if (entry->ce_datav.iov_len + len > PAGE_SIZE) { + dprintk("%s v41 cached reply too large (%Zd).\n", __func__, + entry->ce_datav.iov_len); + return 0; + } + /* copy the cached reply nfsd data past the current rpc header */ + memcpy((char *)resv->iov_base + len, entry->ce_datav.iov_base, + entry->ce_datav.iov_len); + resv->iov_len = len + entry->ce_datav.iov_len; + return 1; +} + +/* + * Keep the first page of the replay. Copy the NFSv4.1 data from the first + * cached page. Replace any futher replay pages from the cache. + */ +__be32 +nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp) +{ + struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; + __be32 status; + + dprintk("--> %s entry %p\n", __func__, entry); + + + if (!nfsd41_copy_replay_data(resp, entry)) { + /* + * Not enough room to use the replay rpc header, send the + * cached header. Release all the allocated result pages. + */ + svc_free_res_pages(resp->rqstp); + nfsd4_copy_pages(resp->rqstp->rq_respages, entry->ce_respages, + entry->ce_resused); + } else { + /* Release all but the first allocated result page */ + + resp->rqstp->rq_resused--; + svc_free_res_pages(resp->rqstp); + + nfsd4_copy_pages(&resp->rqstp->rq_respages[1], + &entry->ce_respages[1], + entry->ce_resused - 1); + } + + resp->rqstp->rq_resused = entry->ce_resused; + status = entry->ce_status; + + return status; +} + /* * Set the exchange_id flags returned by the server. */ diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index ef0a368..b5168d1 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -515,6 +515,10 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) + rqstp->rq_res.head[0].iov_len; rqstp->rq_res.head[0].iov_len += sizeof(__be32); + /* NFSv4.1 DRC requires statp */ + if (rqstp->rq_vers == 4) + nfsd4_set_statp(rqstp, statp); + /* Now call the procedure handler, and encode NFS status. */ nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); nfserr = map_new_errors(rqstp->rq_vers, nfserr); diff --git a/include/linux/nfsd/cache.h b/include/linux/nfsd/cache.h index 04b355c..a59a2df 100644 --- a/include/linux/nfsd/cache.h +++ b/include/linux/nfsd/cache.h @@ -75,5 +75,6 @@ int nfsd_reply_cache_init(void); void nfsd_reply_cache_shutdown(void); int nfsd_cache_lookup(struct svc_rqst *, int); void nfsd_cache_update(struct svc_rqst *, int, __be32 *); +void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp); #endif /* NFSCACHE_H */ diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 90829db..f1edb1d 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -99,9 +99,22 @@ struct nfs4_callback { struct rpc_clnt * cb_client; }; +/* Maximum number of pages per slot cache entry */ +#define NFSD_PAGES_PER_SLOT 1 + +struct nfsd4_cache_entry { + __be32 ce_status; + struct kvec ce_datav; /* encoded NFSv4.1 data in rq_res.head[0] */ + struct page *ce_respages[NFSD_PAGES_PER_SLOT + 1]; + short ce_resused; + int ce_opcnt; + int ce_rpchdrlen; +}; + struct nfsd4_slot { bool sl_inuse; u32 sl_seqid; + struct nfsd4_cache_entry sl_cache_entry; }; struct nfsd4_session { diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 6e28a04..d091684 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -51,6 +51,8 @@ struct nfsd4_compound_state { /* For sessions DRC */ struct nfsd4_session *session; struct nfsd4_slot *slot; + __be32 *statp; + u32 status; }; struct nfsd4_change_info { @@ -487,6 +489,8 @@ extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_setclientid_confirm *setclientid_confirm); +extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp); +extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp); extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_exchange_id *); -- cgit v0.10.2 From c3d06f9ce8544fecfe13e377d1e2c2e47fe18dbc Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:18 +0300 Subject: nfsd41: hard page limit for DRC Use no more than 1/128th of the number of free pages at nfsd startup for the v4.1 DRC. This is an arbitrary default which should probably end up under the control of an administrator. Signed-off-by: Andy Adamson [moved added fields in struct svc_serv under CONFIG_NFSD_V4_1] Signed-off-by: Benny Halevy [fix set_max_drc calculation of sv_drc_max_pages] [moved NFSD_DRC_SIZE_SHIFT's declaration up in header file] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index b5168d1..b53a098 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -197,6 +198,26 @@ void nfsd_reset_versions(void) } } +/* + * Each session guarantees a negotiated per slot memory cache for replies + * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated + * NFSv4.1 server might want to use more memory for a DRC than a machine + * with mutiple services. + * + * Impose a hard limit on the number of pages for the DRC which varies + * according to the machines free pages. This is of course only a default. + * + * For now this is a #defined shift which could be under admin control + * in the future. + */ +static void set_max_drc(void) +{ + nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages() + >> NFSD_DRC_SIZE_SHIFT; + nfsd_serv->sv_drc_pages_used = 0; + dprintk("%s svc_drc_max_pages %u\n", __func__, + nfsd_serv->sv_drc_max_pages); +} int nfsd_create_serv(void) { @@ -229,6 +250,8 @@ int nfsd_create_serv(void) nfsd_last_thread, nfsd, THIS_MODULE); if (nfsd_serv == NULL) err = -ENOMEM; + else + set_max_drc(); do_gettimeofday(&nfssvc_boot); /* record boot time */ return err; diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index ab9616d..1f063d4 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -331,6 +331,9 @@ extern struct timeval nfssvc_boot; #define NFSD_LEASE_TIME (nfs4_lease_time()) #define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */ +/* The percent of nr_free_buffer_pages used by the V4.1 server DRC */ +#define NFSD_DRC_SIZE_SHIFT 7 + /* * The following attributes are currently not supported by the NFSv4 server: * ARCHIVE (deprecated anyway) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 815dd58..d209c63 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -95,6 +95,8 @@ struct svc_serv { struct module * sv_module; /* optional module to count when * adding threads */ svc_thread_fn sv_function; /* main function for threads */ + unsigned int sv_drc_max_pages; /* Total pages for DRC */ + unsigned int sv_drc_pages_used;/* DRC pages used */ }; /* -- cgit v0.10.2 From da3846a2866ddf239311766ff434a82e7b4ac701 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:22 +0300 Subject: nfsd41: nfsd DRC logic Replay a request in nfsd4_sequence. Add a minorversion to struct nfsd4_compound_state. Pass the current slot to nfs4svc_encode_compound res via struct nfsd4_compoundres to set an NFSv4.1 DRC entry. Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy [nfsd41: use bool inuse for slot state] Signed-off-by: Benny Halevy [nfsd41: use cstate session in nfs4svc_encode_compoundres] [nfsd41 replace nfsd4_set_cache_entry] Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ec51936..9e2ee75 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -936,6 +936,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, BUG_ON(op->status == nfs_ok); encode_op: + /* Only from SEQUENCE or CREATE_SESSION */ + if (resp->cstate.status == nfserr_replay_cache) { + dprintk("%s NFS4.1 replay from cache\n", __func__); + status = op->status; + goto out; + } if (op->status == nfserr_replay_me) { op->replay = &cstate->replay_owner->so_replay; nfsd4_encode_replay(resp, op); @@ -964,6 +970,7 @@ encode_op: status = nfserr_jukebox; } + resp->cstate.status = status; fh_put(&resp->cstate.current_fh); fh_put(&resp->cstate.save_fh); BUG_ON(resp->cstate.replay_owner); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a37b91d..e20d4345 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -989,6 +989,8 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp) } resp->rqstp->rq_resused = entry->ce_resused; + resp->opcnt = entry->ce_opcnt; + resp->cstate.iovlen = entry->ce_datav.iov_len + entry->ce_rpchdrlen; status = entry->ce_status; return status; @@ -1214,6 +1216,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (status == nfserr_replay_cache) { cstate->slot = slot; cstate->session = session; + /* Return the cached reply status and set cstate->status + * for nfsd4_svc_encode_compoundres processing*/ + status = nfsd4_replay_cache_entry(resp); + cstate->status = nfserr_replay_cache; goto replay_cache; } if (status) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 8556575..09415bc 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3049,6 +3049,17 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo iov = &rqstp->rq_res.head[0]; iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; BUG_ON(iov->iov_len > PAGE_SIZE); + if (resp->cstate.slot != NULL) { + if (resp->cstate.status == nfserr_replay_cache) { + iov->iov_len = resp->cstate.iovlen; + } else { + nfsd4_store_cache_entry(resp); + dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); + resp->cstate.slot->sl_inuse = 0; + } + if (resp->cstate.session) + nfsd4_put_session(resp->cstate.session); + } return 1; } diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index d091684..69cb467 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -52,6 +52,7 @@ struct nfsd4_compound_state { struct nfsd4_session *session; struct nfsd4_slot *slot; __be32 *statp; + size_t iovlen; u32 status; }; -- cgit v0.10.2 From 14778a133e3be332be77d981552a79260a61ee17 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:25 +0300 Subject: nfsd41: clear DRC cache on free_session Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e20d4345..f25a7d2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -441,12 +441,19 @@ release_session(struct nfsd4_session *ses) nfsd4_put_session(ses); } +static void nfsd4_release_respages(struct page **respages, short resused); + void free_session(struct kref *kref) { struct nfsd4_session *ses; + int i; ses = container_of(kref, struct nfsd4_session, se_ref); + for (i = 0; i < ses->se_fnumslots; i++) { + struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry; + nfsd4_release_respages(e->ce_respages, e->ce_resused); + } kfree(ses->se_slots); kfree(ses); } -- cgit v0.10.2 From ec6b5d7b5064fde27aee798b81107ea3a830de85 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:28 +0300 Subject: nfsd41: create_session operation Implement the create_session operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-26 Look up the client id (generated by the server on exchange_id, given by the client on create_session). If neither a confirmed or unconfirmed client is found then the client id is stale If a confirmed cilent is found (i.e. we already received create_session for it) then compare the sequence id to determine if it's a replay or possibly a mis-ordered rpc. If the seqid is in order, update the confirmed client seqid and procedd with updating the session parameters. If an unconfirmed client_id is found then verify the creds and seqid. If both match move the client id to confirmed state and proceed with processing the create_session. Currently, we do not support persistent sessions, and RDMA. alloc_init_session generates a new sessionid and creates a session structure. NFSD_PAGES_PER_SLOT is used for the max response cached calculation, and for the counting of DRC pages using the hard limits set in struct srv_serv. A note on NFSD_PAGES_PER_SLOT: Other patches in this series allow for NFSD_PAGES_PER_SLOT + 1 pages to be cached in a DRC slot when the response size is less than NFSD_PAGES_PER_SLOT * PAGE_SIZE but xdr_buf pages are used. e.g. a READDIR operation will encode a small amount of data in the xdr_buf head, and then the READDIR in the xdr_buf pages. So, the hard limit calculation use of pages by a session is underestimated by the number of cached operations using the xdr_buf pages. Yet another patch caches no pages for the solo sequence operation, or any compound where cache_this is False. So the hard limit calculation use of pages by a session is overestimated by the number of these operations in the cache. TODO: improve resource pre-allocation and negotiate session parameters accordingly. Respect and possibly adjust backchannel attributes. Signed-off-by: Marc Eshel Signed-off-by: Dean Hildebrand [nfsd41: remove headerpadsz from channel attributes] Our client and server only support a headerpadsz of 0. [nfsd41: use DRC limits in fore channel init] [nfsd41: do not change CREATE_SESSION back channel attrs] Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy [use sessionid_lock spin lock] [nfsd41: use bool inuse for slot state] Signed-off-by: Benny Halevy [nfsd41 remove sl_session from alloc_init_session] Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy [simplify nfsd4_encode_create_session error handling] [nfsd41: fix comment style in init_forechannel_attrs] [nfsd41: allocate struct nfsd4_session and slot table in one piece] [nfsd41: no need to INIT_LIST_HEAD in alloc_init_session just prior to list_add] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f25a7d2..463ae39 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -68,6 +68,7 @@ static u32 current_delegid = 1; static u32 nfs4_init; static stateid_t zerostateid; /* bits all 0 */ static stateid_t onestateid; /* bits all 1 */ +static u64 current_sessionid = 1; #define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t))) #define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t))) @@ -401,6 +402,131 @@ dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]); } +static void +gen_sessionid(struct nfsd4_session *ses) +{ + struct nfs4_client *clp = ses->se_client; + struct nfsd4_sessionid *sid; + + sid = (struct nfsd4_sessionid *)ses->se_sessionid.data; + sid->clientid = clp->cl_clientid; + sid->sequence = current_sessionid++; + sid->reserved = 0; +} + +/* + * Give the client the number of slots it requests bound by + * NFSD_MAX_SLOTS_PER_SESSION and by sv_drc_max_pages. + * + * If we run out of pages (sv_drc_pages_used == sv_drc_max_pages) we + * should (up to a point) re-negotiate active sessions and reduce their + * slot usage to make rooom for new connections. For now we just fail the + * create session. + */ +static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) +{ + int status = 0, np = fchan->maxreqs * NFSD_PAGES_PER_SLOT; + + spin_lock(&nfsd_serv->sv_lock); + if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages) + np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used; + nfsd_serv->sv_drc_pages_used += np; + spin_unlock(&nfsd_serv->sv_lock); + + if (np <= 0) { + status = nfserr_resource; + fchan->maxreqs = 0; + } else + fchan->maxreqs = np / NFSD_PAGES_PER_SLOT; + + return status; +} + +/* + * fchan holds the client values on input, and the server values on output + */ +static int init_forechannel_attrs(struct svc_rqst *rqstp, + struct nfsd4_session *session, + struct nfsd4_channel_attrs *fchan) +{ + int status = 0; + __u32 maxcount = svc_max_payload(rqstp); + + /* headerpadsz set to zero in encode routine */ + + /* Use the client's max request and max response size if possible */ + if (fchan->maxreq_sz > maxcount) + fchan->maxreq_sz = maxcount; + session->se_fmaxreq_sz = fchan->maxreq_sz; + + if (fchan->maxresp_sz > maxcount) + fchan->maxresp_sz = maxcount; + session->se_fmaxresp_sz = fchan->maxresp_sz; + + /* Set the max response cached size our default which is + * a multiple of PAGE_SIZE and small */ + session->se_fmaxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE; + fchan->maxresp_cached = session->se_fmaxresp_cached; + + /* Use the client's maxops if possible */ + if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) + fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; + session->se_fmaxops = fchan->maxops; + + /* try to use the client requested number of slots */ + if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) + fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; + + /* FIXME: Error means no more DRC pages so the server should + * recover pages from existing sessions. For now fail session + * creation. + */ + status = set_forechannel_maxreqs(fchan); + + session->se_fnumslots = fchan->maxreqs; + return status; +} + +static int +alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, + struct nfsd4_create_session *cses) +{ + struct nfsd4_session *new, tmp; + int idx, status = nfserr_resource, slotsize; + + memset(&tmp, 0, sizeof(tmp)); + + /* FIXME: For now, we just accept the client back channel attributes. */ + status = init_forechannel_attrs(rqstp, &tmp, &cses->fore_channel); + if (status) + goto out; + + /* allocate struct nfsd4_session and slot table in one piece */ + slotsize = tmp.se_fnumslots * sizeof(struct nfsd4_slot); + new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); + if (!new) + goto out; + + memcpy(new, &tmp, sizeof(*new)); + + new->se_client = clp; + gen_sessionid(new); + idx = hash_sessionid(&new->se_sessionid); + memcpy(clp->cl_sessionid.data, new->se_sessionid.data, + NFS4_MAX_SESSIONID_LEN); + + new->se_flags = cses->flags; + kref_init(&new->se_ref); + spin_lock(&sessionid_lock); + list_add(&new->se_hash, &sessionid_hashtbl[idx]); + list_add(&new->se_perclnt, &clp->cl_sessions); + spin_unlock(&sessionid_lock); + + status = nfs_ok; +out: + return status; +} + /* caller must hold sessionid_lock */ static struct nfsd4_session * find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid) @@ -1182,7 +1308,67 @@ nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_create_session *cr_ses) { - return -1; /* stub */ + u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; + struct nfs4_client *conf, *unconf; + int status = 0; + + nfs4_lock_state(); + unconf = find_unconfirmed_client(&cr_ses->clientid); + conf = find_confirmed_client(&cr_ses->clientid); + + if (conf) { + status = nfs_ok; + if (conf->cl_seqid == cr_ses->seqid) { + dprintk("Got a create_session replay! seqid= %d\n", + conf->cl_seqid); + goto out_replay; + } else if (cr_ses->seqid != conf->cl_seqid + 1) { + status = nfserr_seq_misordered; + dprintk("Sequence misordered!\n"); + dprintk("Expected seqid= %d but got seqid= %d\n", + conf->cl_seqid, cr_ses->seqid); + goto out; + } + conf->cl_seqid++; + } else if (unconf) { + if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || + (ip_addr != unconf->cl_addr)) { + status = nfserr_clid_inuse; + goto out; + } + + if (unconf->cl_seqid != cr_ses->seqid) { + status = nfserr_seq_misordered; + goto out; + } + + move_to_confirmed(unconf); + + /* + * We do not support RDMA or persistent sessions + */ + cr_ses->flags &= ~SESSION4_PERSIST; + cr_ses->flags &= ~SESSION4_RDMA; + + conf = unconf; + } else { + status = nfserr_stale_clientid; + goto out; + } + + status = alloc_init_session(rqstp, conf, cr_ses); + if (status) + goto out; + +out_replay: + memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, + NFS4_MAX_SESSIONID_LEN); + cr_ses->seqid = conf->cl_seqid; + +out: + nfs4_unlock_state(); + dprintk("%s returns %d\n", __func__, ntohl(status)); + return status; } __be32 diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 09415bc..671f9b9 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1099,7 +1099,108 @@ static __be32 nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, struct nfsd4_create_session *sess) { - return nfserr_opnotsupp; /* stub */ + DECODE_HEAD; + + u32 dummy; + char *machine_name; + int i; + int nr_secflavs; + + READ_BUF(16); + COPYMEM(&sess->clientid, 8); + READ32(sess->seqid); + READ32(sess->flags); + + /* Fore channel attrs */ + READ_BUF(28); + READ32(dummy); /* headerpadsz is always 0 */ + READ32(sess->fore_channel.maxreq_sz); + READ32(sess->fore_channel.maxresp_sz); + READ32(sess->fore_channel.maxresp_cached); + READ32(sess->fore_channel.maxops); + READ32(sess->fore_channel.maxreqs); + READ32(sess->fore_channel.nr_rdma_attrs); + if (sess->fore_channel.nr_rdma_attrs == 1) { + READ_BUF(4); + READ32(sess->fore_channel.rdma_attrs); + } else if (sess->fore_channel.nr_rdma_attrs > 1) { + dprintk("Too many fore channel attr bitmaps!\n"); + goto xdr_error; + } + + /* Back channel attrs */ + READ_BUF(28); + READ32(dummy); /* headerpadsz is always 0 */ + READ32(sess->back_channel.maxreq_sz); + READ32(sess->back_channel.maxresp_sz); + READ32(sess->back_channel.maxresp_cached); + READ32(sess->back_channel.maxops); + READ32(sess->back_channel.maxreqs); + READ32(sess->back_channel.nr_rdma_attrs); + if (sess->back_channel.nr_rdma_attrs == 1) { + READ_BUF(4); + READ32(sess->back_channel.rdma_attrs); + } else if (sess->back_channel.nr_rdma_attrs > 1) { + dprintk("Too many back channel attr bitmaps!\n"); + goto xdr_error; + } + + READ_BUF(8); + READ32(sess->callback_prog); + + /* callback_sec_params4 */ + READ32(nr_secflavs); + for (i = 0; i < nr_secflavs; ++i) { + READ_BUF(4); + READ32(dummy); + switch (dummy) { + case RPC_AUTH_NULL: + /* Nothing to read */ + break; + case RPC_AUTH_UNIX: + READ_BUF(8); + /* stamp */ + READ32(dummy); + + /* machine name */ + READ32(dummy); + READ_BUF(dummy); + SAVEMEM(machine_name, dummy); + + /* uid, gid */ + READ_BUF(8); + READ32(sess->uid); + READ32(sess->gid); + + /* more gids */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy * 4); + for (i = 0; i < dummy; ++i) + READ32(dummy); + break; + case RPC_AUTH_GSS: + dprintk("RPC_AUTH_GSS callback secflavor " + "not supported!\n"); + READ_BUF(8); + /* gcbp_service */ + READ32(dummy); + /* gcbp_handle_from_server */ + READ32(dummy); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + /* gcbp_handle_from_client */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + break; + default: + dprintk("Illegal callback secflavor\n"); + return nfserr_inval; + } + } + DECODE_TAIL; } static __be32 @@ -2821,8 +2922,49 @@ static __be32 nfsd4_encode_create_session(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_create_session *sess) { - /* stub */ - return nfserr; + ENCODE_HEAD; + + if (nfserr) + return nfserr; + + RESERVE_SPACE(24); + WRITEMEM(sess->sessionid.data, NFS4_MAX_SESSIONID_LEN); + WRITE32(sess->seqid); + WRITE32(sess->flags); + ADJUST_ARGS(); + + RESERVE_SPACE(28); + WRITE32(0); /* headerpadsz */ + WRITE32(sess->fore_channel.maxreq_sz); + WRITE32(sess->fore_channel.maxresp_sz); + WRITE32(sess->fore_channel.maxresp_cached); + WRITE32(sess->fore_channel.maxops); + WRITE32(sess->fore_channel.maxreqs); + WRITE32(sess->fore_channel.nr_rdma_attrs); + ADJUST_ARGS(); + + if (sess->fore_channel.nr_rdma_attrs) { + RESERVE_SPACE(4); + WRITE32(sess->fore_channel.rdma_attrs); + ADJUST_ARGS(); + } + + RESERVE_SPACE(28); + WRITE32(0); /* headerpadsz */ + WRITE32(sess->back_channel.maxreq_sz); + WRITE32(sess->back_channel.maxresp_sz); + WRITE32(sess->back_channel.maxresp_cached); + WRITE32(sess->back_channel.maxops); + WRITE32(sess->back_channel.maxreqs); + WRITE32(sess->back_channel.nr_rdma_attrs); + ADJUST_ARGS(); + + if (sess->back_channel.nr_rdma_attrs) { + RESERVE_SPACE(4); + WRITE32(sess->back_channel.rdma_attrs); + ADJUST_ARGS(); + } + return 0; } static __be32 diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index f1edb1d..692edf4 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -99,8 +99,12 @@ struct nfs4_callback { struct rpc_clnt * cb_client; }; +/* Maximum number of slots per session. 128 is useful for long haul TCP */ +#define NFSD_MAX_SLOTS_PER_SESSION 128 /* Maximum number of pages per slot cache entry */ #define NFSD_PAGES_PER_SLOT 1 +/* Maximum number of operations per session compound */ +#define NFSD_MAX_OPS_PER_COMPOUND 16 struct nfsd4_cache_entry { __be32 ce_status; @@ -129,7 +133,7 @@ struct nfsd4_session { u32 se_fmaxresp_cached; u32 se_fmaxops; u32 se_fnumslots; - struct nfsd4_slot *se_slots; /* forward channel slots */ + struct nfsd4_slot se_slots[]; /* forward channel slots */ }; static inline void @@ -188,6 +192,7 @@ struct nfs4_client { struct list_head cl_sessions; u32 cl_seqid; /* seqid for create_session */ u32 cl_exchange_flags; + struct nfs4_sessionid cl_sessionid; }; /* struct nfs4_client_reset diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 69cb467..9468829 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -359,8 +359,27 @@ struct nfsd4_exchange_id { int spa_how; }; +struct nfsd4_channel_attrs { + u32 headerpadsz; + u32 maxreq_sz; + u32 maxresp_sz; + u32 maxresp_cached; + u32 maxops; + u32 maxreqs; + u32 nr_rdma_attrs; + u32 rdma_attrs; +}; + struct nfsd4_create_session { - int foo; /* stub */ + clientid_t clientid; + struct nfs4_sessionid sessionid; + u32 seqid; + u32 flags; + struct nfsd4_channel_attrs fore_channel; + struct nfsd4_channel_attrs back_channel; + u32 callback_prog; + u32 uid; + u32 gid; }; struct nfsd4_sequence { -- cgit v0.10.2 From 38eb76a54d803e6792816623651b1a9cb85f8d01 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:32 +0300 Subject: nfsd41: Add a create session replay cache Replace the nfs4_client cl_seqid field with a single struct nfs41_slot used for the create session replay cache. The CREATE_SESSION slot sets the sl_session pointer to NULL. Otherwise, the slot and it's replay cache are used just like the session slots. Fix unconfirmed create_session replay response by initializing the create_session slot sequence id to 0. A future patch will set the CREATE_SESSION cache when a SEQUENCE operation preceeds the CREATE_SESSION operation. This compound is currently only cached in the session slot table. Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy [nfsd41: use bool inuse for slot state] Signed-off-by: Benny Halevy [nfsd41: revert portion of nfsd4_set_cache_entry] Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 463ae39..58f9797 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -649,6 +649,8 @@ static inline void free_client(struct nfs4_client *clp) { shutdown_callback_client(clp); + nfsd4_release_respages(clp->cl_slot.sl_cache_entry.ce_respages, + clp->cl_slot.sl_cache_entry.ce_resused); if (clp->cl_cred.cr_group_info) put_group_info(clp->cl_cred.cr_group_info); kfree(clp->cl_principal); @@ -1263,11 +1265,12 @@ out_copy: exid->clientid.cl_boot = new->cl_clientid.cl_boot; exid->clientid.cl_id = new->cl_clientid.cl_id; - new->cl_seqid = exid->seqid = 1; + new->cl_slot.sl_seqid = 0; + exid->seqid = 1; nfsd4_set_ex_flags(new, exid); dprintk("nfsd4_exchange_id seqid %d flags %x\n", - new->cl_seqid, new->cl_exchange_flags); + new->cl_slot.sl_seqid, new->cl_exchange_flags); status = nfs_ok; out: @@ -1309,7 +1312,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_create_session *cr_ses) { u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; + struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfs4_client *conf, *unconf; + struct nfsd4_slot *slot = NULL; int status = 0; nfs4_lock_state(); @@ -1317,19 +1322,24 @@ nfsd4_create_session(struct svc_rqst *rqstp, conf = find_confirmed_client(&cr_ses->clientid); if (conf) { - status = nfs_ok; - if (conf->cl_seqid == cr_ses->seqid) { + slot = &conf->cl_slot; + status = check_slot_seqid(cr_ses->seqid, slot); + if (status == nfserr_replay_cache) { dprintk("Got a create_session replay! seqid= %d\n", - conf->cl_seqid); - goto out_replay; - } else if (cr_ses->seqid != conf->cl_seqid + 1) { + slot->sl_seqid); + cstate->slot = slot; + cstate->status = status; + /* Return the cached reply status */ + status = nfsd4_replay_cache_entry(resp); + goto out; + } else if (cr_ses->seqid != conf->cl_slot.sl_seqid + 1) { status = nfserr_seq_misordered; dprintk("Sequence misordered!\n"); dprintk("Expected seqid= %d but got seqid= %d\n", - conf->cl_seqid, cr_ses->seqid); + slot->sl_seqid, cr_ses->seqid); goto out; } - conf->cl_seqid++; + conf->cl_slot.sl_seqid++; } else if (unconf) { if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || (ip_addr != unconf->cl_addr)) { @@ -1337,11 +1347,15 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out; } - if (unconf->cl_seqid != cr_ses->seqid) { + slot = &unconf->cl_slot; + status = check_slot_seqid(cr_ses->seqid, slot); + if (status) { + /* an unconfirmed replay returns misordered */ status = nfserr_seq_misordered; goto out; } + slot->sl_seqid++; /* from 0 to 1 */ move_to_confirmed(unconf); /* @@ -1360,11 +1374,12 @@ nfsd4_create_session(struct svc_rqst *rqstp, if (status) goto out; -out_replay: memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN); - cr_ses->seqid = conf->cl_seqid; + cr_ses->seqid = slot->sl_seqid; + slot->sl_inuse = true; + cstate->slot = slot; out: nfs4_unlock_state(); dprintk("%s returns %d\n", __func__, ntohl(status)); diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 692edf4..f063df7 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -190,7 +190,7 @@ struct nfs4_client { /* for nfs41 */ struct list_head cl_sessions; - u32 cl_seqid; /* seqid for create_session */ + struct nfsd4_slot cl_slot; /* create_session slot */ u32 cl_exchange_flags; struct nfs4_sessionid cl_sessionid; }; -- cgit v0.10.2 From bf864a31d50e3e94d6e76537b97d664913906ff8 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:35 +0300 Subject: nfsd41: non-page DRC for solo sequence responses A session inactivity time compound (lease renewal) or a compound where the sequence operation has sa_cachethis set to FALSE do not require any pages to be held in the v4.1 DRC. This is because struct nfsd4_slot is already caching the session information. Add logic to the nfs41 server to not cache response pages for solo sequence responses. Return nfserr_replay_uncached_rep on the operation following the sequence operation when sa_cachethis is FALSE. Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy [nfsd41: use cstate session in nfsd4_replay_cache_entry] [nfsd41: rename nfsd4_no_page_in_cache] [nfsd41 rename nfsd4_enc_no_page_replay] [nfsd41 nfsd4_is_solo_sequence] [nfsd41 change nfsd4_not_cached return] Signed-off-by: Andy Adamson [changed return type to bool] Signed-off-by: Benny Halevy [nfsd41 drop parens in nfsd4_is_solo_sequence call] Signed-off-by: Andy Adamson [changed "== 0" to "!"] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 9e2ee75..ae21a4e 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -828,6 +828,34 @@ static struct nfsd4_operation nfsd4_ops[]; static const char *nfsd4_op_name(unsigned opnum); /* + * This is a replay of a compound for which no cache entry pages + * were used. Encode the sequence operation, and if cachethis is FALSE + * encode the uncache rep error on the next operation. + */ +static __be32 +nfsd4_enc_uncached_replay(struct nfsd4_compoundargs *args, + struct nfsd4_compoundres *resp) +{ + struct nfsd4_op *op; + + dprintk("--> %s resp->opcnt %d ce_cachethis %u \n", __func__, + resp->opcnt, resp->cstate.slot->sl_cache_entry.ce_cachethis); + + /* Encode the replayed sequence operation */ + BUG_ON(resp->opcnt != 1); + op = &args->ops[resp->opcnt - 1]; + nfsd4_encode_operation(resp, op); + + /*return nfserr_retry_uncached_rep in next operation. */ + if (resp->cstate.slot->sl_cache_entry.ce_cachethis == 0) { + op = &args->ops[resp->opcnt++]; + op->status = nfserr_retry_uncached_rep; + nfsd4_encode_operation(resp, op); + } + return op->status; +} + +/* * Enforce NFSv4.1 COMPOUND ordering rules. * * TODO: @@ -895,7 +923,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, dprintk("nfsv4 compound op #%d/%d: %d (%s)\n", resp->opcnt, args->opcnt, op->opnum, nfsd4_op_name(op->opnum)); - /* * The XDR decode routines may have pre-set op->status; * for example, if there is a miscellaneous XDR error @@ -939,7 +966,10 @@ encode_op: /* Only from SEQUENCE or CREATE_SESSION */ if (resp->cstate.status == nfserr_replay_cache) { dprintk("%s NFS4.1 replay from cache\n", __func__); - status = op->status; + if (nfsd4_not_cached(resp)) + status = nfsd4_enc_uncached_replay(args, resp); + else + status = op->status; goto out; } if (op->status == nfserr_replay_me) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 58f9797..04a395f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1049,17 +1049,31 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) /* Don't cache a failed OP_SEQUENCE. */ if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status) return; + nfsd4_release_respages(entry->ce_respages, entry->ce_resused); + entry->ce_opcnt = resp->opcnt; + entry->ce_status = resp->cstate.status; + + /* + * Don't need a page to cache just the sequence operation - the slot + * does this for us! + */ + + if (nfsd4_not_cached(resp)) { + entry->ce_resused = 0; + entry->ce_rpchdrlen = 0; + dprintk("%s Just cache SEQUENCE. ce_cachethis %d\n", __func__, + resp->cstate.slot->sl_cache_entry.ce_cachethis); + return; + } entry->ce_resused = rqstp->rq_resused; if (entry->ce_resused > NFSD_PAGES_PER_SLOT + 1) entry->ce_resused = NFSD_PAGES_PER_SLOT + 1; nfsd4_copy_pages(entry->ce_respages, rqstp->rq_respages, entry->ce_resused); - entry->ce_status = resp->cstate.status; entry->ce_datav.iov_base = resp->cstate.statp; entry->ce_datav.iov_len = resv->iov_len - ((char *)resp->cstate.statp - (char *)page_address(rqstp->rq_respages[0])); - entry->ce_opcnt = resp->opcnt; /* Current request rpc header length*/ entry->ce_rpchdrlen = (char *)resp->cstate.statp - (char *)page_address(rqstp->rq_respages[0]); @@ -1096,13 +1110,28 @@ nfsd41_copy_replay_data(struct nfsd4_compoundres *resp, * cached page. Replace any futher replay pages from the cache. */ __be32 -nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp) +nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, + struct nfsd4_sequence *seq) { struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; __be32 status; dprintk("--> %s entry %p\n", __func__, entry); + /* + * If this is just the sequence operation, we did not keep + * a page in the cache entry because we can just use the + * slot info stored in struct nfsd4_sequence that was checked + * against the slot in nfsd4_sequence(). + * + * This occurs when seq->cachethis is FALSE, or when the client + * session inactivity timer fires and a solo sequence operation + * is sent (lease renewal). + */ + if (seq && nfsd4_not_cached(resp)) { + seq->maxslots = resp->cstate.session->se_fnumslots; + return nfs_ok; + } if (!nfsd41_copy_replay_data(resp, entry)) { /* @@ -1330,7 +1359,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, cstate->slot = slot; cstate->status = status; /* Return the cached reply status */ - status = nfsd4_replay_cache_entry(resp); + status = nfsd4_replay_cache_entry(resp, NULL); goto out; } else if (cr_ses->seqid != conf->cl_slot.sl_seqid + 1) { status = nfserr_seq_misordered; @@ -1380,6 +1409,8 @@ nfsd4_create_session(struct svc_rqst *rqstp, slot->sl_inuse = true; cstate->slot = slot; + /* Ensure a page is used for the cache */ + slot->sl_cache_entry.ce_cachethis = 1; out: nfs4_unlock_state(); dprintk("%s returns %d\n", __func__, ntohl(status)); @@ -1425,8 +1456,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, cstate->slot = slot; cstate->session = session; /* Return the cached reply status and set cstate->status - * for nfsd4_svc_encode_compoundres processing*/ - status = nfsd4_replay_cache_entry(resp); + * for nfsd4_svc_encode_compoundres processing */ + status = nfsd4_replay_cache_entry(resp, seq); cstate->status = nfserr_replay_cache; goto replay_cache; } @@ -1436,6 +1467,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, /* Success! bump slot seqid */ slot->sl_inuse = true; slot->sl_seqid = seq->seqid; + slot->sl_cache_entry.ce_cachethis = seq->cachethis; + /* Always set the cache entry cachethis for solo sequence */ + if (nfsd4_is_solo_sequence(resp)) + slot->sl_cache_entry.ce_cachethis = 1; cstate->slot = slot; cstate->session = session; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 671f9b9..64bc215 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2975,7 +2975,7 @@ nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, int nfserr, return nfserr; } -static __be32 +__be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_sequence *seq) { @@ -3192,7 +3192,8 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; BUG_ON(iov->iov_len > PAGE_SIZE); if (resp->cstate.slot != NULL) { - if (resp->cstate.status == nfserr_replay_cache) { + if (resp->cstate.status == nfserr_replay_cache && + !nfsd4_not_cached(resp)) { iov->iov_len = resp->cstate.iovlen; } else { nfsd4_store_cache_entry(resp); diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index f063df7..18dcffa 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -110,6 +110,7 @@ struct nfsd4_cache_entry { __be32 ce_status; struct kvec ce_datav; /* encoded NFSv4.1 data in rq_res.head[0] */ struct page *ce_respages[NFSD_PAGES_PER_SLOT + 1]; + int ce_cachethis; short ce_resused; int ce_opcnt; int ce_rpchdrlen; diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 9468829..4861888 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -480,6 +480,18 @@ struct nfsd4_compoundres { struct nfsd4_compound_state cstate; }; +static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp) +{ + struct nfsd4_compoundargs *args = resp->rqstp->rq_argp; + return args->opcnt == 1; +} + +static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) +{ + return !resp->cstate.slot->sl_cache_entry.ce_cachethis || + nfsd4_is_solo_sequence(resp); +} + #define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs) static inline void @@ -510,7 +522,8 @@ extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_setclientid_confirm *setclientid_confirm); extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp); -extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp); +extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, + struct nfsd4_sequence *seq); extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_exchange_id *); -- cgit v0.10.2 From e10e0cfc2f27364c73b28adbd3c8688d97049e73 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 3 Apr 2009 08:28:38 +0300 Subject: nfsd41: destroy_session operation Implement the destory_session operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-26 [use sessionid_lock spin lock] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 04a395f..9192e5b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1422,7 +1422,34 @@ nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_session *sessionid) { - return -1; /* stub */ + struct nfsd4_session *ses; + u32 status = nfserr_badsession; + + /* Notes: + * - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid + * - Should we return nfserr_back_chan_busy if waiting for + * callbacks on to-be-destroyed session? + * - Do we need to clear any callback info from previous session? + */ + + dump_sessionid(__func__, &sessionid->sessionid); + spin_lock(&sessionid_lock); + ses = find_in_sessionid_hashtbl(&sessionid->sessionid); + if (!ses) { + spin_unlock(&sessionid_lock); + goto out; + } + + unhash_session(ses); + spin_unlock(&sessionid_lock); + + /* wait for callbacks */ + shutdown_callback_client(ses->se_client); + nfsd4_put_session(ses); + status = nfs_ok; +out: + dprintk("%s returns %d\n", __func__, ntohl(status)); + return status; } __be32 diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 64bc215..c6a726d 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1207,7 +1207,11 @@ static __be32 nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp, struct nfsd4_destroy_session *destroy_session) { - return nfserr_opnotsupp; /* stub */ + DECODE_HEAD; + READ_BUF(NFS4_MAX_SESSIONID_LEN); + COPYMEM(destroy_session->sessionid.data, NFS4_MAX_SESSIONID_LEN); + + DECODE_TAIL; } static __be32 @@ -2971,7 +2975,6 @@ static __be32 nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_destroy_session *destroy_session) { - /* stub */ return nfserr; } diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 4861888..a0a2e83 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -395,7 +395,7 @@ struct nfsd4_sequence { }; struct nfsd4_destroy_session { - int foo; /* stub */ + struct nfs4_sessionid sessionid; }; struct nfsd4_op { -- cgit v0.10.2 From dd453dfd70538cadc02cb47ff8d8cfd0cb8cf435 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 3 Apr 2009 08:28:41 +0300 Subject: nfsd: pass nfsd4_compound_state* to nfs4_preprocess_{state,seq}id_op Currently we only use cstate->current_fh, will also be used by nfsd41 code. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ae21a4e..e6a0f31 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -513,9 +513,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); /* check stateid */ - if ((status = nfs4_preprocess_stateid_op(&cstate->current_fh, - &read->rd_stateid, - RD_STATE, &read->rd_filp))) { + if ((status = nfs4_preprocess_stateid_op(cstate, &read->rd_stateid, + RD_STATE, &read->rd_filp))) { dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); goto out; } @@ -646,7 +645,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { nfs4_lock_state(); - status = nfs4_preprocess_stateid_op(&cstate->current_fh, + status = nfs4_preprocess_stateid_op(cstate, &setattr->sa_stateid, WR_STATE, NULL); nfs4_unlock_state(); if (status) { @@ -686,8 +685,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_inval; nfs4_lock_state(); - status = nfs4_preprocess_stateid_op(&cstate->current_fh, stateid, - WR_STATE, &filp); + status = nfs4_preprocess_stateid_op(cstate, stateid, WR_STATE, &filp); if (filp) get_file(filp); nfs4_unlock_state(); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9192e5b..f49f305 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2810,10 +2810,12 @@ static int is_delegation_stateid(stateid_t *stateid) * Checks for stateid operations */ __be32 -nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags, struct file **filpp) +nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, + stateid_t *stateid, int flags, struct file **filpp) { struct nfs4_stateid *stp = NULL; struct nfs4_delegation *dp = NULL; + struct svc_fh *current_fh = &cstate->current_fh; struct inode *ino = current_fh->fh_dentry->d_inode; __be32 status; @@ -2878,10 +2880,14 @@ setlkflg (int type) * Checks for sequence id mutating operations. */ static __be32 -nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, struct nfsd4_lock *lock) +nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, + stateid_t *stateid, int flags, + struct nfs4_stateowner **sopp, + struct nfs4_stateid **stpp, struct nfsd4_lock *lock) { struct nfs4_stateid *stp; struct nfs4_stateowner *sop; + struct svc_fh *current_fh = &cstate->current_fh; __be32 status; dprintk("NFSD: preprocess_seqid_op: seqid=%d " @@ -3004,7 +3010,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); - if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, + if ((status = nfs4_preprocess_seqid_op(cstate, oc->oc_seqid, &oc->oc_req_stateid, CONFIRM | OPEN_STATE, &oc->oc_stateowner, &stp, NULL))) @@ -3074,7 +3080,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, return nfserr_inval; nfs4_lock_state(); - if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, + if ((status = nfs4_preprocess_seqid_op(cstate, od->od_seqid, &od->od_stateid, OPEN_STATE, @@ -3127,7 +3133,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); /* check close_lru for replay */ - if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, + if ((status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, &close->cl_stateid, OPEN_STATE | CLOSE_STATE, @@ -3474,7 +3480,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; /* validate and update open stateid and open seqid */ - status = nfs4_preprocess_seqid_op(&cstate->current_fh, + status = nfs4_preprocess_seqid_op(cstate, lock->lk_new_open_seqid, &lock->lk_new_open_stateid, OPEN_STATE, @@ -3501,7 +3507,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } else { /* lock (lock owner + lock stateid) already exists */ - status = nfs4_preprocess_seqid_op(&cstate->current_fh, + status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, &lock->lk_old_lock_stateid, LOCK_STATE, @@ -3697,7 +3703,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); - if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, + if ((status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid, &locku->lu_stateid, LOCK_STATE, diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 18dcffa..836c0d6b 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -333,7 +333,9 @@ struct nfs4_stateid { ((err) != nfserr_stale_stateid) && \ ((err) != nfserr_bad_stateid)) -extern __be32 nfs4_preprocess_stateid_op(struct svc_fh *current_fh, +struct nfsd4_compound_state; + +extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, stateid_t *stateid, int flags, struct file **filp); extern void nfs4_lock_state(void); extern void nfs4_unlock_state(void); -- cgit v0.10.2 From 6668958fac1d05f55420de702f3678d46c1e93a5 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:45 +0300 Subject: nfsd41: stateid handling When sessions are used, stateful operation sequenceid and stateid handling are not used. When sessions are used, on the first open set the seqid to 1, mark state confirmed and skip seqid processing. When sessionas are used the stateid generation number is ignored when it is zero whereas without sessions bad_stateid or stale stateid is returned. Add flags to propagate session use to all stateful ops and down to check_stateid_generation. Signed-off-by: Benny Halevy Signed-off-by: Andy Adamson [nfsd4_has_session should return a boolean, not u32] Signed-off-by: Benny Halevy [nfsd41: pass nfsd4_compoundres * to nfsd4_process_open1] [nfsd41: calculate HAS_SESSION in nfs4_preprocess_stateid_op] [nfsd41: calculate HAS_SESSION in nfs4_preprocess_seqid_op] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e6a0f31..aeb4888 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -168,6 +168,8 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { __be32 status; + struct nfsd4_compoundres *resp; + dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n", (int)open->op_fname.len, open->op_fname.data, open->op_stateowner); @@ -179,7 +181,8 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); /* check seqid for replay. set nfs4_owner */ - status = nfsd4_process_open1(open); + resp = rqstp->rq_resp; + status = nfsd4_process_open1(&resp->cstate, open); if (status == nfserr_replay_me) { struct nfs4_replay *rp = &open->op_stateowner->so_replay; fh_put(&cstate->current_fh); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f49f305..bbaf3c9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2183,7 +2183,8 @@ static struct lock_manager_operations nfsd_lease_mng_ops = { __be32 -nfsd4_process_open1(struct nfsd4_open *open) +nfsd4_process_open1(struct nfsd4_compound_state *cstate, + struct nfsd4_open *open) { clientid_t *clientid = &open->op_clientid; struct nfs4_client *clp = NULL; @@ -2206,6 +2207,9 @@ nfsd4_process_open1(struct nfsd4_open *open) return nfserr_expired; goto renew; } + /* When sessions are used, skip open sequenceid processing */ + if (nfsd4_has_session(cstate)) + goto renew; if (!sop->so_confirmed) { /* Replace unconfirmed owners without checking for replay. */ clp = sop->so_client; @@ -2483,6 +2487,7 @@ out: __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { + struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfs4_file *fp = NULL; struct inode *ino = current_fh->fh_dentry->d_inode; struct nfs4_stateid *stp = NULL; @@ -2541,9 +2546,14 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf release_open_stateid(stp); goto out; } + if (nfsd4_has_session(&resp->cstate)) + update_stateid(&stp->st_stateid); } memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t)); + if (nfsd4_has_session(&resp->cstate)) + open->op_stateowner->so_confirmed = 1; + /* * Attempt to hand out a delegation. No error return, because the * OPEN succeeds even if we fail. @@ -2564,7 +2574,8 @@ out: * To finish the open response, we just need to set the rflags. */ open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; - if (!open->op_stateowner->so_confirmed) + if (!open->op_stateowner->so_confirmed && + !nfsd4_has_session(&resp->cstate)) open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; return status; @@ -2781,8 +2792,15 @@ grace_disallows_io(struct inode *inode) return locks_in_grace() && mandatory_lock(inode); } -static int check_stateid_generation(stateid_t *in, stateid_t *ref) +static int check_stateid_generation(stateid_t *in, stateid_t *ref, int flags) { + /* + * When sessions are used the stateid generation number is ignored + * when it is zero. + */ + if ((flags & HAS_SESSION) && in->si_generation == 0) + goto out; + /* If the client sends us a stateid from the future, it's buggy: */ if (in->si_generation > ref->si_generation) return nfserr_bad_stateid; @@ -2798,6 +2816,7 @@ static int check_stateid_generation(stateid_t *in, stateid_t *ref) */ if (in->si_generation < ref->si_generation) return nfserr_old_stateid; +out: return nfs_ok; } @@ -2825,6 +2844,9 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, if (grace_disallows_io(ino)) return nfserr_grace; + if (nfsd4_has_session(cstate)) + flags |= HAS_SESSION; + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return check_special_stateids(current_fh, stateid, flags); @@ -2837,7 +2859,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, dp = find_delegation_stateid(ino, stateid); if (!dp) goto out; - status = check_stateid_generation(stateid, &dp->dl_stateid); + status = check_stateid_generation(stateid, &dp->dl_stateid, + flags); if (status) goto out; status = nfs4_check_delegmode(dp, flags); @@ -2854,7 +2877,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, goto out; if (!stp->st_stateowner->so_confirmed) goto out; - status = check_stateid_generation(stateid, &stp->st_stateid); + status = check_stateid_generation(stateid, &stp->st_stateid, + flags); if (status) goto out; status = nfs4_check_openmode(stp, flags); @@ -2905,6 +2929,10 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, if (STALE_STATEID(stateid)) return nfserr_stale_stateid; + + if (nfsd4_has_session(cstate)) + flags |= HAS_SESSION; + /* * We return BAD_STATEID if filehandle doesn't match stateid, * the confirmed flag is incorrecly set, or the generation @@ -2961,7 +2989,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, * For the moment, we ignore the possibility of * generation number wraparound. */ - if (seqid != sop->so_seqid) + if (!(flags & HAS_SESSION) && seqid != sop->so_seqid) goto check_replay; if (sop->so_confirmed && flags & CONFIRM) { @@ -2974,7 +3002,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, " confirmed yet!\n"); return nfserr_bad_stateid; } - status = check_stateid_generation(stateid, &stp->st_stateid); + status = check_stateid_generation(stateid, &stp->st_stateid, flags); if (status) return status; renew_client(sop->so_client); @@ -3169,11 +3197,14 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stateid_t *stateid = &dr->dr_stateid; struct inode *inode; __be32 status; + int flags = 0; if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) return status; inode = cstate->current_fh.fh_dentry->d_inode; + if (nfsd4_has_session(cstate)) + flags |= HAS_SESSION; nfs4_lock_state(); status = nfserr_bad_stateid; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) @@ -3187,7 +3218,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dp = find_delegation_stateid(inode, stateid); if (!dp) goto out; - status = check_stateid_generation(stateid, &dp->dl_stateid); + status = check_stateid_generation(stateid, &dp->dl_stateid, flags); if (status) goto out; renew_client(dp->dl_client); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c6a726d..dd81ac1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3194,7 +3194,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo iov = &rqstp->rq_res.head[0]; iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; BUG_ON(iov->iov_len > PAGE_SIZE); - if (resp->cstate.slot != NULL) { + if (nfsd4_has_session(&resp->cstate)) { if (resp->cstate.status == nfserr_replay_cache && !nfsd4_not_cached(resp)) { iov->iov_len = resp->cstate.iovlen; diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 836c0d6b..4d61c87 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -320,6 +320,7 @@ struct nfs4_stateid { }; /* flags for preprocess_seqid_op() */ +#define HAS_SESSION 0x00000001 #define CONFIRM 0x00000002 #define OPEN_STATE 0x00000004 #define LOCK_STATE 0x00000008 diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index a0a2e83..b8b3dcb 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -56,6 +56,11 @@ struct nfsd4_compound_state { u32 status; }; +static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs) +{ + return cs->slot != NULL; +} + struct nfsd4_change_info { u32 atomic; u32 before_ctime_sec; @@ -536,7 +541,8 @@ extern __be32 nfsd4_sequence(struct svc_rqst *, extern __be32 nfsd4_destroy_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_destroy_session *); -extern __be32 nfsd4_process_open1(struct nfsd4_open *open); +extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *, + struct nfsd4_open *open); extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open); extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp, -- cgit v0.10.2 From 496c262cf01106a546ffb7df6fea84b8b881ee19 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:48 +0300 Subject: nfsd41: check encode size for sessions maxresponse cached Calculate the space the compound response has taken after encoding the current operation. pad: add on 8 bytes for the next operation's op_code and status so that there is room to cache a failure on the next operation. Compare this length to the session se_fmaxresp_cached and return nfserr_rep_too_big_to_cache if the length is too large. Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so will be at least a page and will therefore hold the xdr_buf head. Signed-off-by: Andy Adamson [nfsd41: non-page DRC for solo sequence responses] [fixed nfsd4_check_drc_limit cosmetics] Signed-off-by: Benny Halevy [nfsd41: use cstate session in nfsd4_check_drc_limit] Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index dd81ac1..7fdee82 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3078,6 +3078,54 @@ static nfsd4_enc nfsd4_enc_ops[] = { [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, }; +/* + * Calculate the total amount of memory that the compound response has taken + * after encoding the current operation. + * + * pad: add on 8 bytes for the next operation's op_code and status so that + * there is room to cache a failure on the next operation. + * + * Compare this length to the session se_fmaxresp_cached. + * + * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so + * will be at least a page and will therefore hold the xdr_buf head. + */ +static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp) +{ + int status = 0; + struct xdr_buf *xb = &resp->rqstp->rq_res; + struct nfsd4_compoundargs *args = resp->rqstp->rq_argp; + struct nfsd4_session *session = NULL; + struct nfsd4_slot *slot = resp->cstate.slot; + u32 length, tlen = 0, pad = 8; + + if (!nfsd4_has_session(&resp->cstate)) + return status; + + session = resp->cstate.session; + if (session == NULL || slot->sl_cache_entry.ce_cachethis == 0) + return status; + + if (resp->opcnt >= args->opcnt) + pad = 0; /* this is the last operation */ + + if (xb->page_len == 0) { + length = (char *)resp->p - (char *)xb->head[0].iov_base + pad; + } else { + if (xb->tail[0].iov_base && xb->tail[0].iov_len > 0) + tlen = (char *)resp->p - (char *)xb->tail[0].iov_base; + + length = xb->head[0].iov_len + xb->page_len + tlen + pad; + } + dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__, + length, xb->page_len, tlen, pad); + + if (length <= session->se_fmaxresp_cached) + return status; + else + return nfserr_rep_too_big_to_cache; +} + void nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { @@ -3094,6 +3142,9 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || !nfsd4_enc_ops[op->opnum]); op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u); + /* nfsd4_check_drc_limit guarantees enough room for error status */ + if (!op->status && nfsd4_check_drc_limit(resp)) + op->status = nfserr_rep_too_big_to_cache; status: /* * Note: We write the status directly, instead of using WRITE32(), -- cgit v0.10.2 From 60adfc50de3855628dea8f8896a65f471f51301c Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:50 +0300 Subject: nfsd41: clientid handling Extract the clientid from sessionid to set the op_clientid on open. Verify that the clid for other stateful ops is zero for minorversion != 0 Do all other checks for stateful ops without sessions. Signed-off-by: Benny Halevy Signed-off-by: Andy Adamson [fixed whitespace indent] Signed-off-by: Benny Halevy [nfsd41 remove sl_session from nfsd4_open] Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index aeb4888..3fdcca5 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -162,6 +162,15 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ return status; } +static void +copy_clientid(clientid_t *clid, struct nfsd4_session *session) +{ + struct nfsd4_sessionid *sid = + (struct nfsd4_sessionid *)session->se_sessionid.data; + + clid->cl_boot = sid->clientid.cl_boot; + clid->cl_id = sid->clientid.cl_id; +} static __be32 nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -178,6 +187,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) return nfserr_inval; + if (nfsd4_has_session(cstate)) + copy_clientid(&open->op_clientid, cstate->session); + nfs4_lock_state(); /* check seqid for replay. set nfs4_owner */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index bbaf3c9..d227f85 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -603,8 +603,8 @@ STALE_CLIENTID(clientid_t *clid) { if (clid->cl_boot == boot_time) return 0; - dprintk("NFSD stale clientid (%08x/%08x)\n", - clid->cl_boot, clid->cl_id); + dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n", + clid->cl_boot, clid->cl_id, boot_time); return 1; } @@ -2965,8 +2965,9 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, if (lock->lk_is_new) { if (!sop->so_is_open_owner) return nfserr_bad_stateid; - if (!same_clid(&clp->cl_clientid, lockclid)) - return nfserr_bad_stateid; + if (!(flags & HAS_SESSION) && + !same_clid(&clp->cl_clientid, lockclid)) + return nfserr_bad_stateid; /* stp is the open stateid */ status = nfs4_check_openmode(stp, lkflg); if (status) @@ -3507,7 +3508,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfs4_file *fp; status = nfserr_stale_clientid; - if (STALE_CLIENTID(&lock->lk_new_clientid)) + if (!nfsd4_has_session(cstate) && + STALE_CLIENTID(&lock->lk_new_clientid)) goto out; /* validate and update open stateid and open seqid */ @@ -3661,7 +3663,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); status = nfserr_stale_clientid; - if (STALE_CLIENTID(&lockt->lt_clientid)) + if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid)) goto out; if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 7fdee82..059d4aa 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -189,6 +189,11 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) return p; } +static int zero_clientid(clientid_t *clid) +{ + return (clid->cl_boot == 0) && (clid->cl_id == 0); +} + static int defer_free(struct nfsd4_compoundargs *argp, void (*release)(const void *), void *p) @@ -584,6 +589,8 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) READ_BUF(lockt->lt_owner.len); READMEM(lockt->lt_owner.data, lockt->lt_owner.len); + if (argp->minorversion && !zero_clientid(&lockt->lt_clientid)) + return nfserr_inval; DECODE_TAIL; } @@ -994,6 +1001,8 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel READ_BUF(rlockowner->rl_owner.len); READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len); + if (argp->minorversion && !zero_clientid(&rlockowner->rl_clientid)) + return nfserr_inval; DECODE_TAIL; } -- cgit v0.10.2 From d87a8ade95288f28c729e076cd74929f3f199b6c Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:53 +0300 Subject: nfsd41: access_valid For nfs41, the open share flags are used also for delegation "wants" and "signals". Check that they are valid. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 3fdcca5..db208dd8 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -910,6 +910,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, resp->tag = args->tag; resp->opcnt = 0; resp->rqstp = rqstp; + resp->cstate.minorversion = args->minorversion; resp->cstate.replay_owner = NULL; fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d227f85..374aaf3 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1943,11 +1943,21 @@ find_file(struct inode *ino) return NULL; } -static inline int access_valid(u32 x) +static inline int access_valid(u32 x, u32 minorversion) { - if (x < NFS4_SHARE_ACCESS_READ) + if ((x & NFS4_SHARE_ACCESS_MASK) < NFS4_SHARE_ACCESS_READ) return 0; - if (x > NFS4_SHARE_ACCESS_BOTH) + if ((x & NFS4_SHARE_ACCESS_MASK) > NFS4_SHARE_ACCESS_BOTH) + return 0; + x &= ~NFS4_SHARE_ACCESS_MASK; + if (minorversion && x) { + if ((x & NFS4_SHARE_WANT_MASK) > NFS4_SHARE_WANT_CANCEL) + return 0; + if ((x & NFS4_SHARE_WHEN_MASK) > NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED) + return 0; + x &= ~(NFS4_SHARE_WANT_MASK | NFS4_SHARE_WHEN_MASK); + } + if (x) return 0; return 1; } @@ -2495,7 +2505,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf __be32 status; status = nfserr_inval; - if (!access_valid(open->op_share_access) + if (!access_valid(open->op_share_access, resp->cstate.minorversion) || !deny_valid(open->op_share_deny)) goto out; /* @@ -3104,7 +3114,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, (int)cstate->current_fh.fh_dentry->d_name.len, cstate->current_fh.fh_dentry->d_name.name); - if (!access_valid(od->od_share_access) + if (!access_valid(od->od_share_access, cstate->minorversion) || !deny_valid(od->od_share_deny)) return nfserr_inval; diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index b8b3dcb..5e16935 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -53,6 +53,7 @@ struct nfsd4_compound_state { struct nfsd4_slot *slot; __be32 *statp; size_t iovlen; + u32 minorversion; u32 status; }; -- cgit v0.10.2 From 84459a1162801fb84734e5f9e6dc5194f791d69b Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:56 +0300 Subject: nfsd41: add OPEN4_SHARE_ACCESS_WANT nfs4_stateid bmap Separate the access bits from the want bits and enable __set_bit to work correctly with st_access_bmap. Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 374aaf3..c65a27b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1889,7 +1889,8 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open * stp->st_stateid.si_generation = 0; stp->st_access_bmap = 0; stp->st_deny_bmap = 0; - __set_bit(open->op_share_access, &stp->st_access_bmap); + __set_bit(open->op_share_access & ~NFS4_SHARE_WANT_MASK, + &stp->st_access_bmap); __set_bit(open->op_share_deny, &stp->st_deny_bmap); stp->st_openstp = NULL; } -- cgit v0.10.2 From 8daf220a6a83c47b9648c28bb819c14c60bad7f9 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 3 Apr 2009 08:28:59 +0300 Subject: nfsd41: control nfsv4.1 svc via /proc/fs/nfsd/versions Support enabling and disabling nfsv4.1 via /proc/fs/nfsd/versions by writing the strings "+4.1" or "-4.1" correspondingly. Use user mode nfs-utils (rpc.nfsd option) to enable. This will allow us to get rid of CONFIG_NFSD_V4_1 [nfsd41: disable support for minorversion by default] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index db208dd8..52bb14d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -921,7 +921,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, * According to RFC3010, this takes precedence over all other errors. */ status = nfserr_minor_vers_mismatch; - if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) + if (args->minorversion > nfsd_supported_minorversion) goto out; if (!nfs41_op_ordering_ok(args)) { diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 4adebb6..a9b8c75 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -792,8 +792,9 @@ out_free: static ssize_t __write_versions(struct file *file, char *buf, size_t size) { char *mesg = buf; - char *vers, sign; + char *vers, *minorp, sign; int len, num; + unsigned minor; ssize_t tlen = 0; char *sep; @@ -814,9 +815,20 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) do { sign = *vers; if (sign == '+' || sign == '-') - num = simple_strtol((vers+1), NULL, 0); + num = simple_strtol((vers+1), &minorp, 0); else - num = simple_strtol(vers, NULL, 0); + num = simple_strtol(vers, &minorp, 0); + if (*minorp == '.') { + if (num < 4) + return -EINVAL; + minor = simple_strtoul(minorp+1, NULL, 0); + if (minor == 0) + return -EINVAL; + if (nfsd_minorversion(minor, sign == '-' ? + NFSD_CLEAR : NFSD_SET) < 0) + return -EINVAL; + goto next; + } switch(num) { case 2: case 3: @@ -826,6 +838,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) default: return -EINVAL; } + next: vers += len + 1; tlen += len; } while ((len = qword_get(&mesg, vers, size)) > 0); @@ -844,6 +857,13 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) num); sep = " "; } + if (nfsd_vers(4, NFSD_AVAIL)) + for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION; minor++) + len += sprintf(buf+len, " %c4.%u", + (nfsd_vers(4, NFSD_TEST) && + nfsd_minorversion(minor, NFSD_TEST)) ? + '+' : '-', + minor); len += sprintf(buf+len, "\n"); return len; } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index b53a098..e9d5773 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -121,6 +121,8 @@ struct svc_program nfsd_program = { }; +u32 nfsd_supported_minorversion; + int nfsd_vers(int vers, enum vers_op change) { if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS) @@ -147,6 +149,28 @@ int nfsd_vers(int vers, enum vers_op change) } return 0; } + +int nfsd_minorversion(u32 minorversion, enum vers_op change) +{ + if (minorversion > NFSD_SUPPORTED_MINOR_VERSION) + return -1; + switch(change) { + case NFSD_SET: + nfsd_supported_minorversion = minorversion; + break; + case NFSD_CLEAR: + if (minorversion == 0) + return -1; + nfsd_supported_minorversion = minorversion - 1; + break; + case NFSD_TEST: + return minorversion <= nfsd_supported_minorversion; + case NFSD_AVAIL: + return minorversion <= NFSD_SUPPORTED_MINOR_VERSION; + } + return 0; +} + /* * Maximum number of nfsd processes */ diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 1f063d4..70339b7 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -53,6 +53,7 @@ typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int); extern struct svc_program nfsd_program; extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; +extern u32 nfsd_supported_minorversion; extern struct mutex nfsd_mutex; extern struct svc_serv *nfsd_serv; @@ -149,6 +150,7 @@ int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *); enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL }; int nfsd_vers(int vers, enum vers_op change); +int nfsd_minorversion(u32 minorversion, enum vers_op change); void nfsd_reset_versions(void); int nfsd_create_serv(void); -- cgit v0.10.2 From f3ec22b5b0b4b220ee600bf1a8b6dc045b4e72bd Mon Sep 17 00:00:00 2001 From: Marc Eshel Date: Fri, 3 Apr 2009 08:29:02 +0300 Subject: nfsd41: provide support for minor version 1 at rpc level Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 70339b7..5bed509 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -23,7 +23,7 @@ /* * nfsd version */ -#define NFSD_SUPPORTED_MINOR_VERSION 0 +#define NFSD_SUPPORTED_MINOR_VERSION 1 /* * Flags for nfsd_permission -- cgit v0.10.2 From c0d6fc8a2d55a8235c301aeb6d5254d5992895d1 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 3 Apr 2009 08:29:05 +0300 Subject: nfsd41: pass writable attrs mask to nfsd4_decode_fattr In preparation for EXCLUSIVE4_1 Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 059d4aa..42f9fb6 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -251,9 +251,14 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) DECODE_TAIL; } +static u32 nfsd_attrmask[] = { + NFSD_WRITEABLE_ATTRS_WORD0, + NFSD_WRITEABLE_ATTRS_WORD1 +}; + static __be32 -nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr, - struct nfs4_acl **acl) +nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 *writable, + struct iattr *iattr, struct nfs4_acl **acl) { int expected_len, len = 0; u32 dummy32; @@ -271,7 +276,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia */ if ((bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) || (bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) return nfserr_attrnotsupp; - if ((bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0) || (bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1)) + if ((bmval[0] & ~writable[0]) || (bmval[1] & ~writable[1])) return nfserr_inval; READ_BUF(4); @@ -499,7 +504,9 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval))) return status; - if ((status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, &create->cr_acl))) + status = nfsd4_decode_fattr(argp, create->cr_bmval, nfsd_attrmask, + &create->cr_iattr, &create->cr_acl); + if (status) goto out; DECODE_TAIL; @@ -660,7 +667,9 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) switch (open->op_createmode) { case NFS4_CREATE_UNCHECKED: case NFS4_CREATE_GUARDED: - if ((status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr, &open->op_acl))) + status = nfsd4_decode_fattr(argp, open->op_bmval, + nfsd_attrmask, &open->op_iattr, &open->op_acl); + if (status) goto out; break; case NFS4_CREATE_EXCLUSIVE: @@ -859,7 +868,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta status = nfsd4_decode_stateid(argp, &setattr->sa_stateid); if (status) return status; - return nfsd4_decode_fattr(argp, setattr->sa_bmval, + return nfsd4_decode_fattr(argp, setattr->sa_bmval, nfsd_attrmask, &setattr->sa_iattr, &setattr->sa_acl); } -- cgit v0.10.2 From 95ec28cda323104bbff64fc7ec8ee4c9042e51fa Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 3 Apr 2009 08:29:08 +0300 Subject: nfsd: dynamically skip encoded fattr bitmap in _nfsd4_verify _nfsd4_verify currently skips 3 words from the encoded buffer begining. With support for 3-word attr bitmaps in nfsd41, nfsd4_encode_fattr may encode 1, 2, or 3 words, and not always 2 as it used to be, hence we need to find out where to skip using the encoded bitmap length. Note: This patch may be applied over pre-nfsd41 nfsd. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 52bb14d..937853f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -775,7 +775,8 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out_kfree; - p = buf + 3; + /* skip bitmap */ + p = buf + 1 + ntohl(buf[0]); status = nfserr_not_same; if (ntohl(*p++) != verify->ve_attrlen) goto out_kfree; -- cgit v0.10.2 From 7e70570647827345352cf6c17461c9fa166f570a Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:29:11 +0300 Subject: nfsd41: support for 3-word long attribute bitmask Also, use client minorversion to generate supported attrs Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 937853f..e206053 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -463,8 +463,9 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (getattr->ga_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) return nfserr_inval; - getattr->ga_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0; - getattr->ga_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1; + getattr->ga_bmval[0] &= nfsd_suppattrs0(cstate->minorversion); + getattr->ga_bmval[1] &= nfsd_suppattrs1(cstate->minorversion); + getattr->ga_bmval[2] &= nfsd_suppattrs2(cstate->minorversion); getattr->ga_fhp = &cstate->current_fh; return nfs_ok; @@ -555,8 +556,9 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) return nfserr_inval; - readdir->rd_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0; - readdir->rd_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1; + readdir->rd_bmval[0] &= nfsd_suppattrs0(cstate->minorversion); + readdir->rd_bmval[1] &= nfsd_suppattrs1(cstate->minorversion); + readdir->rd_bmval[2] &= nfsd_suppattrs2(cstate->minorversion); if ((cookie > ~(u32)0) || (cookie == 1) || (cookie == 2) || (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE))) @@ -746,8 +748,9 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) return status; - if ((verify->ve_bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) - || (verify->ve_bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) + if ((verify->ve_bmval[0] & ~nfsd_suppattrs0(cstate->minorversion)) + || (verify->ve_bmval[1] & ~nfsd_suppattrs1(cstate->minorversion)) + || (verify->ve_bmval[2] & ~nfsd_suppattrs2(cstate->minorversion))) return nfserr_attrnotsupp; if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR) || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 42f9fb6..70296fe 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -236,6 +236,7 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) bmval[0] = 0; bmval[1] = 0; + bmval[2] = 0; READ_BUF(4); READ32(bmlen); @@ -247,13 +248,16 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) READ32(bmval[0]); if (bmlen > 1) READ32(bmval[1]); + if (bmlen > 2) + READ32(bmval[2]); DECODE_TAIL; } static u32 nfsd_attrmask[] = { NFSD_WRITEABLE_ATTRS_WORD0, - NFSD_WRITEABLE_ATTRS_WORD1 + NFSD_WRITEABLE_ATTRS_WORD1, + NFSD_WRITEABLE_ATTRS_WORD2 }; static __be32 @@ -274,9 +278,12 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 *writable, * According to spec, unsupported attributes return ERR_ATTRNOTSUPP; * read-only attributes return ERR_INVAL. */ - if ((bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) || (bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) + if ((bmval[0] & ~nfsd_suppattrs0(argp->minorversion)) || + (bmval[1] & ~nfsd_suppattrs1(argp->minorversion)) || + (bmval[2] & ~nfsd_suppattrs2(argp->minorversion))) return nfserr_attrnotsupp; - if ((bmval[0] & ~writable[0]) || (bmval[1] & ~writable[1])) + if ((bmval[0] & ~writable[0]) || (bmval[1] & ~writable[1]) || + (bmval[2] & ~writable[2])) return nfserr_inval; READ_BUF(4); @@ -411,6 +418,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 *writable, goto xdr_error; } } + BUG_ON(bmval[2]); /* no such writeable attr supported yet */ if (len != expected_len) goto xdr_error; @@ -1726,6 +1734,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, { u32 bmval0 = bmval[0]; u32 bmval1 = bmval[1]; + u32 bmval2 = bmval[2]; struct kstat stat; struct svc_fh tempfh; struct kstatfs statfs; @@ -1739,12 +1748,16 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, int err; int aclsupport = 0; struct nfs4_acl *acl = NULL; + struct nfsd4_compoundres *resp = rqstp->rq_resp; + u32 minorversion = resp->cstate.minorversion; BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); - BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0); - BUG_ON(bmval1 & ~NFSD_SUPPORTED_ATTRS_WORD1); + BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion)); + BUG_ON(bmval1 & ~nfsd_suppattrs1(minorversion)); + BUG_ON(bmval2 & ~nfsd_suppattrs2(minorversion)); if (exp->ex_fslocs.migrated) { + BUG_ON(bmval[2]); status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err); if (status) goto out; @@ -1790,22 +1803,42 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, if ((buflen -= 16) < 0) goto out_resource; - WRITE32(2); - WRITE32(bmval0); - WRITE32(bmval1); + if (unlikely(bmval2)) { + WRITE32(3); + WRITE32(bmval0); + WRITE32(bmval1); + WRITE32(bmval2); + } else if (likely(bmval1)) { + WRITE32(2); + WRITE32(bmval0); + WRITE32(bmval1); + } else { + WRITE32(1); + WRITE32(bmval0); + } attrlenp = p++; /* to be backfilled later */ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { - u32 word0 = NFSD_SUPPORTED_ATTRS_WORD0; + u32 word0 = nfsd_suppattrs0(minorversion); + u32 word1 = nfsd_suppattrs1(minorversion); + u32 word2 = nfsd_suppattrs2(minorversion); + if ((buflen -= 12) < 0) goto out_resource; if (!aclsupport) word0 &= ~FATTR4_WORD0_ACL; if (!exp->ex_fslocs.locations) word0 &= ~FATTR4_WORD0_FS_LOCATIONS; - WRITE32(2); - WRITE32(word0); - WRITE32(NFSD_SUPPORTED_ATTRS_WORD1); + if (!word2) { + WRITE32(2); + WRITE32(word0); + WRITE32(word1); + } else { + WRITE32(3); + WRITE32(word0); + WRITE32(word1); + WRITE32(word2); + } } if (bmval0 & FATTR4_WORD0_TYPE) { if ((buflen -= 4) < 0) @@ -2115,6 +2148,8 @@ out_acl: } WRITE64(stat.ino); } + BUG_ON(bmval2); /* FIXME: not implemented yet */ + *attrlenp = htonl((char *)p - (char *)attrlenp - 4); *countp = p - buffer; status = nfs_ok; diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 5bed509..69ca788 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -346,7 +346,7 @@ extern struct timeval nfssvc_boot; * TIME_BACKUP (unlikely to be supported any time soon) * TIME_CREATE (unlikely to be supported any time soon) */ -#define NFSD_SUPPORTED_ATTRS_WORD0 \ +#define NFSD4_SUPPORTED_ATTRS_WORD0 \ (FATTR4_WORD0_SUPPORTED_ATTRS | FATTR4_WORD0_TYPE | FATTR4_WORD0_FH_EXPIRE_TYPE \ | FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE | FATTR4_WORD0_LINK_SUPPORT \ | FATTR4_WORD0_SYMLINK_SUPPORT | FATTR4_WORD0_NAMED_ATTR | FATTR4_WORD0_FSID \ @@ -358,7 +358,7 @@ extern struct timeval nfssvc_boot; | FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME \ | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_ACL) -#define NFSD_SUPPORTED_ATTRS_WORD1 \ +#define NFSD4_SUPPORTED_ATTRS_WORD1 \ (FATTR4_WORD1_MODE | FATTR4_WORD1_NO_TRUNC | FATTR4_WORD1_NUMLINKS \ | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV \ | FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL \ @@ -366,6 +366,35 @@ extern struct timeval nfssvc_boot; | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA \ | FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID) +#define NFSD4_SUPPORTED_ATTRS_WORD2 0 + +#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \ + NFSD4_SUPPORTED_ATTRS_WORD0 + +#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \ + NFSD4_SUPPORTED_ATTRS_WORD1 + +#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ + NFSD4_SUPPORTED_ATTRS_WORD2 + +static inline u32 nfsd_suppattrs0(u32 minorversion) +{ + return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 + : NFSD4_SUPPORTED_ATTRS_WORD0; +} + +static inline u32 nfsd_suppattrs1(u32 minorversion) +{ + return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD1 + : NFSD4_SUPPORTED_ATTRS_WORD1; +} + +static inline u32 nfsd_suppattrs2(u32 minorversion) +{ + return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2 + : NFSD4_SUPPORTED_ATTRS_WORD2; +} + /* These will return ERR_INVAL if specified in GETATTR or READDIR. */ #define NFSD_WRITEONLY_ATTRS_WORD1 \ (FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) @@ -376,6 +405,7 @@ extern struct timeval nfssvc_boot; #define NFSD_WRITEABLE_ATTRS_WORD1 \ (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) +#define NFSD_WRITEABLE_ATTRS_WORD2 0 #endif /* CONFIG_NFSD_V4 */ diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 5e16935..c07d8fe 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -102,7 +102,7 @@ struct nfsd4_create { u32 specdata2; } dev; /* NF4BLK, NF4CHR */ } u; - u32 cr_bmval[2]; /* request */ + u32 cr_bmval[3]; /* request */ struct iattr cr_iattr; /* request */ struct nfsd4_change_info cr_cinfo; /* response */ struct nfs4_acl *cr_acl; @@ -117,7 +117,7 @@ struct nfsd4_delegreturn { }; struct nfsd4_getattr { - u32 ga_bmval[2]; /* request */ + u32 ga_bmval[3]; /* request */ struct svc_fh *ga_fhp; /* response */ }; @@ -218,7 +218,7 @@ struct nfsd4_open { stateid_t op_delegate_stateid; /* request - response */ u32 op_create; /* request */ u32 op_createmode; /* request */ - u32 op_bmval[2]; /* request */ + u32 op_bmval[3]; /* request */ union { /* request */ struct iattr iattr; /* UNCHECKED4,GUARDED4 */ nfs4_verifier verf; /* EXCLUSIVE4 */ @@ -271,7 +271,7 @@ struct nfsd4_readdir { nfs4_verifier rd_verf; /* request */ u32 rd_dircount; /* request */ u32 rd_maxcount; /* request */ - u32 rd_bmval[2]; /* request */ + u32 rd_bmval[3]; /* request */ struct svc_rqst *rd_rqstp; /* response */ struct svc_fh * rd_fhp; /* response */ @@ -313,7 +313,7 @@ struct nfsd4_secinfo { struct nfsd4_setattr { stateid_t sa_stateid; /* request */ - u32 sa_bmval[2]; /* request */ + u32 sa_bmval[3]; /* request */ struct iattr sa_iattr; /* request */ struct nfs4_acl *sa_acl; }; @@ -339,7 +339,7 @@ struct nfsd4_setclientid_confirm { /* also used for NVERIFY */ struct nfsd4_verify { - u32 ve_bmval[2]; /* request */ + u32 ve_bmval[3]; /* request */ u32 ve_attrlen; /* request */ char * ve_attrval; /* request */ }; -- cgit v0.10.2 From 8c18f2052e756e7d5dea712fc6e7ed70c00e8a39 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 3 Apr 2009 08:29:14 +0300 Subject: nfsd41: SUPPATTR_EXCLCREAT attribute Return bitmask for supported EXCLUSIVE4_1 create attributes. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 70296fe..533d14f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2148,7 +2148,12 @@ out_acl: } WRITE64(stat.ino); } - BUG_ON(bmval2); /* FIXME: not implemented yet */ + if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { + WRITE32(3); + WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); + WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1); + WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD2); + } *attrlenp = htonl((char *)p - (char *)attrlenp - 4); *countp = p - buffer; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 29ed600..ec3cd49 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -404,6 +404,8 @@ enum lock_type4 { #define FATTR4_WORD0_UNIQUE_HANDLES (1UL << 9) #define FATTR4_WORD0_LEASE_TIME (1UL << 10) #define FATTR4_WORD0_RDATTR_ERROR (1UL << 11) +/* Mandatory in NFSv4.1 */ +#define FATTR4_WORD2_SUPPATTR_EXCLCREAT (1UL << 11) /* Recommended Attributes */ #define FATTR4_WORD0_ACL (1UL << 12) diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 69ca788..0ec4d14 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -375,7 +375,7 @@ extern struct timeval nfssvc_boot; NFSD4_SUPPORTED_ATTRS_WORD1 #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ - NFSD4_SUPPORTED_ATTRS_WORD2 + (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) static inline u32 nfsd_suppattrs0(u32 minorversion) { @@ -407,6 +407,18 @@ static inline u32 nfsd_suppattrs2(u32 minorversion) | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) #define NFSD_WRITEABLE_ATTRS_WORD2 0 +#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ + NFSD_WRITEABLE_ATTRS_WORD0 +/* + * we currently store the exclusive create verifier in the v_{a,m}time + * attributes so the client can't set these at create time using EXCLUSIVE4_1 + */ +#define NFSD_SUPPATTR_EXCLCREAT_WORD1 \ + (NFSD_WRITEABLE_ATTRS_WORD1 & \ + ~(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)) +#define NFSD_SUPPATTR_EXCLCREAT_WORD2 \ + NFSD_WRITEABLE_ATTRS_WORD2 + #endif /* CONFIG_NFSD_V4 */ #endif /* LINUX_NFSD_NFSD_H */ -- cgit v0.10.2 From 79fb54abd285b442e1f30f851902f3ddf58e7704 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 3 Apr 2009 08:29:17 +0300 Subject: nfsd41: CREATE_EXCLUSIVE4_1 Implement the CREATE_EXCLUSIVE4_1 open mode conforming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-26 This mode allows the client to atomically create a file if it doesn't exist while setting some of its attributes. It must be implemented if the server supports persistent reply cache and/or pnfs. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e206053..b2883e9 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -93,6 +93,21 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o open->op_truncate = 0; if (open->op_create) { + /* FIXME: check session persistence and pnfs flags. + * The nfsv4.1 spec requires the following semantics: + * + * Persistent | pNFS | Server REQUIRED | Client Allowed + * Reply Cache | server | | + * -------------+--------+-----------------+-------------------- + * no | no | EXCLUSIVE4_1 | EXCLUSIVE4_1 + * | | | (SHOULD) + * | | and EXCLUSIVE4 | or EXCLUSIVE4 + * | | | (SHOULD NOT) + * no | yes | EXCLUSIVE4_1 | EXCLUSIVE4_1 + * yes | no | GUARDED4 | GUARDED4 + * yes | yes | GUARDED4 | GUARDED4 + */ + /* * Note: create modes (UNCHECKED,GUARDED...) are the same * in NFSv4 as in v3. diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 533d14f..b820c31 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -260,6 +260,12 @@ static u32 nfsd_attrmask[] = { NFSD_WRITEABLE_ATTRS_WORD2 }; +static u32 nfsd41_ex_attrmask[] = { + NFSD_SUPPATTR_EXCLCREAT_WORD0, + NFSD_SUPPATTR_EXCLCREAT_WORD1, + NFSD_SUPPATTR_EXCLCREAT_WORD2 +}; + static __be32 nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 *writable, struct iattr *iattr, struct nfs4_acl **acl) @@ -684,6 +690,17 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) READ_BUF(8); COPYMEM(open->op_verf.data, 8); break; + case NFS4_CREATE_EXCLUSIVE4_1: + if (argp->minorversion < 1) + goto xdr_error; + READ_BUF(8); + COPYMEM(open->op_verf.data, 8); + status = nfsd4_decode_fattr(argp, open->op_bmval, + nfsd41_ex_attrmask, &open->op_iattr, + &open->op_acl); + if (status) + goto out; + break; default: goto xdr_error; } diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index ec3cd49..e3f0cbc 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -368,7 +368,13 @@ enum opentype4 { enum createmode4 { NFS4_CREATE_UNCHECKED = 0, NFS4_CREATE_GUARDED = 1, - NFS4_CREATE_EXCLUSIVE = 2 + NFS4_CREATE_EXCLUSIVE = 2, + /* + * New to NFSv4.1. If session is persistent, + * GUARDED4 MUST be used. Otherwise, use + * EXCLUSIVE4_1 instead of EXCLUSIVE4. + */ + NFS4_CREATE_EXCLUSIVE4_1 = 3 }; enum limit_by4 { diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index c07d8fe..f80d601 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -219,10 +219,8 @@ struct nfsd4_open { u32 op_create; /* request */ u32 op_createmode; /* request */ u32 op_bmval[3]; /* request */ - union { /* request */ - struct iattr iattr; /* UNCHECKED4,GUARDED4 */ - nfs4_verifier verf; /* EXCLUSIVE4 */ - } u; + struct iattr iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */ + nfs4_verifier verf; /* EXCLUSIVE4 */ clientid_t op_clientid; /* request */ struct xdr_netobj op_owner; /* request */ u32 op_seqid; /* request */ @@ -236,8 +234,8 @@ struct nfsd4_open { struct nfs4_stateowner *op_stateowner; /* used during processing */ struct nfs4_acl *op_acl; }; -#define op_iattr u.iattr -#define op_verf u.verf +#define op_iattr iattr +#define op_verf verf struct nfsd4_open_confirm { stateid_t oc_req_stateid /* request */; -- cgit v0.10.2 From 3ef1728898f0285c13aa6901f412b52835e23578 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 3 Apr 2009 08:29:20 +0300 Subject: nfsd41: Documentation/filesystems/nfs41-server.txt Initial nfs41 server write up describing the status of the linux server implementation. [nfsd41: document unenforced nfs41 compound ordering rules.] [get rid of CONFIG_NFSD_V4_1] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/Documentation/filesystems/nfs41-server.txt b/Documentation/filesystems/nfs41-server.txt new file mode 100644 index 0000000..05d81cb --- /dev/null +++ b/Documentation/filesystems/nfs41-server.txt @@ -0,0 +1,161 @@ +NFSv4.1 Server Implementation + +Server support for minorversion 1 can be controlled using the +/proc/fs/nfsd/versions control file. The string output returned +by reading this file will contain either "+4.1" or "-4.1" +correspondingly. + +Currently, server support for minorversion 1 is disabled by default. +It can be enabled at run time by writing the string "+4.1" to +the /proc/fs/nfsd/versions control file. Note that to write this +control file, the nfsd service must be taken down. Use your user-mode +nfs-utils to set this up; see rpc.nfsd(8) + +The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based +on the latest NFSv4.1 Internet Draft: +http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29 + +From the many new features in NFSv4.1 the current implementation +focuses on the mandatory-to-implement NFSv4.1 Sessions, providing +"exactly once" semantics and better control and throttling of the +resources allocated for each client. + +Other NFSv4.1 features, Parallel NFS operations in particular, +are still under development out of tree. +See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design +for more information. + +The table below, taken from the NFSv4.1 document, lists +the operations that are mandatory to implement (REQ), optional +(OPT), and NFSv4.0 operations that are required not to implement (MNI) +in minor version 1. The first column indicates the operations that +are not supported yet by the linux server implementation. + +The OPTIONAL features identified and their abbreviations are as follows: + pNFS Parallel NFS + FDELG File Delegations + DDELG Directory Delegations + +The following abbreviations indicate the linux server implementation status. + I Implemented NFSv4.1 operations. + NS Not Supported. + NS* unimplemented optional feature. + P pNFS features implemented out of tree. + PNS pNFS features that are not supported yet (out of tree). + +Operations + + +----------------------+------------+--------------+----------------+ + | Operation | REQ, REC, | Feature | Definition | + | | OPT, or | (REQ, REC, | | + | | MNI | or OPT) | | + +----------------------+------------+--------------+----------------+ + | ACCESS | REQ | | Section 18.1 | +NS | BACKCHANNEL_CTL | REQ | | Section 18.33 | +NS | BIND_CONN_TO_SESSION | REQ | | Section 18.34 | + | CLOSE | REQ | | Section 18.2 | + | COMMIT | REQ | | Section 18.3 | + | CREATE | REQ | | Section 18.4 | +I | CREATE_SESSION | REQ | | Section 18.36 | +NS*| DELEGPURGE | OPT | FDELG (REQ) | Section 18.5 | + | DELEGRETURN | OPT | FDELG, | Section 18.6 | + | | | DDELG, pNFS | | + | | | (REQ) | | +NS | DESTROY_CLIENTID | REQ | | Section 18.50 | +I | DESTROY_SESSION | REQ | | Section 18.37 | +I | EXCHANGE_ID | REQ | | Section 18.35 | +NS | FREE_STATEID | REQ | | Section 18.38 | + | GETATTR | REQ | | Section 18.7 | +P | GETDEVICEINFO | OPT | pNFS (REQ) | Section 18.40 | +P | GETDEVICELIST | OPT | pNFS (OPT) | Section 18.41 | + | GETFH | REQ | | Section 18.8 | +NS*| GET_DIR_DELEGATION | OPT | DDELG (REQ) | Section 18.39 | +P | LAYOUTCOMMIT | OPT | pNFS (REQ) | Section 18.42 | +P | LAYOUTGET | OPT | pNFS (REQ) | Section 18.43 | +P | LAYOUTRETURN | OPT | pNFS (REQ) | Section 18.44 | + | LINK | OPT | | Section 18.9 | + | LOCK | REQ | | Section 18.10 | + | LOCKT | REQ | | Section 18.11 | + | LOCKU | REQ | | Section 18.12 | + | LOOKUP | REQ | | Section 18.13 | + | LOOKUPP | REQ | | Section 18.14 | + | NVERIFY | REQ | | Section 18.15 | + | OPEN | REQ | | Section 18.16 | +NS*| OPENATTR | OPT | | Section 18.17 | + | OPEN_CONFIRM | MNI | | N/A | + | OPEN_DOWNGRADE | REQ | | Section 18.18 | + | PUTFH | REQ | | Section 18.19 | + | PUTPUBFH | REQ | | Section 18.20 | + | PUTROOTFH | REQ | | Section 18.21 | + | READ | REQ | | Section 18.22 | + | READDIR | REQ | | Section 18.23 | + | READLINK | OPT | | Section 18.24 | +NS | RECLAIM_COMPLETE | REQ | | Section 18.51 | + | RELEASE_LOCKOWNER | MNI | | N/A | + | REMOVE | REQ | | Section 18.25 | + | RENAME | REQ | | Section 18.26 | + | RENEW | MNI | | N/A | + | RESTOREFH | REQ | | Section 18.27 | + | SAVEFH | REQ | | Section 18.28 | + | SECINFO | REQ | | Section 18.29 | +NS | SECINFO_NO_NAME | REC | pNFS files | Section 18.45, | + | | | layout (REQ) | Section 13.12 | +I | SEQUENCE | REQ | | Section 18.46 | + | SETATTR | REQ | | Section 18.30 | + | SETCLIENTID | MNI | | N/A | + | SETCLIENTID_CONFIRM | MNI | | N/A | +NS | SET_SSV | REQ | | Section 18.47 | +NS | TEST_STATEID | REQ | | Section 18.48 | + | VERIFY | REQ | | Section 18.31 | +NS*| WANT_DELEGATION | OPT | FDELG (OPT) | Section 18.49 | + | WRITE | REQ | | Section 18.32 | + +Callback Operations + + +-------------------------+-----------+-------------+---------------+ + | Operation | REQ, REC, | Feature | Definition | + | | OPT, or | (REQ, REC, | | + | | MNI | or OPT) | | + +-------------------------+-----------+-------------+---------------+ + | CB_GETATTR | OPT | FDELG (REQ) | Section 20.1 | +P | CB_LAYOUTRECALL | OPT | pNFS (REQ) | Section 20.3 | +NS*| CB_NOTIFY | OPT | DDELG (REQ) | Section 20.4 | +P | CB_NOTIFY_DEVICEID | OPT | pNFS (OPT) | Section 20.12 | +NS*| CB_NOTIFY_LOCK | OPT | | Section 20.11 | +NS*| CB_PUSH_DELEG | OPT | FDELG (OPT) | Section 20.5 | + | CB_RECALL | OPT | FDELG, | Section 20.2 | + | | | DDELG, pNFS | | + | | | (REQ) | | +NS*| CB_RECALL_ANY | OPT | FDELG, | Section 20.6 | + | | | DDELG, pNFS | | + | | | (REQ) | | +NS | CB_RECALL_SLOT | REQ | | Section 20.8 | +NS*| CB_RECALLABLE_OBJ_AVAIL | OPT | DDELG, pNFS | Section 20.7 | + | | | (REQ) | | +I | CB_SEQUENCE | OPT | FDELG, | Section 20.9 | + | | | DDELG, pNFS | | + | | | (REQ) | | +NS*| CB_WANTS_CANCELLED | OPT | FDELG, | Section 20.10 | + | | | DDELG, pNFS | | + | | | (REQ) | | + +-------------------------+-----------+-------------+---------------+ + +Implementation notes: + +EXCHANGE_ID: +* only SP4_NONE state protection supported +* implementation ids are ignored + +CREATE_SESSION: +* backchannel attributes are ignored +* backchannel security parameters are ignored + +SEQUENCE: +* no support for dynamic slot table renegotiation (optional) + +nfsv4.1 COMPOUND rules: +The following cases aren't supported yet: +* Enforcing of NFS4ERR_NOT_ONLY_OP for: BIND_CONN_TO_SESSION, CREATE_SESSION, + DESTROY_CLIENTID, DESTROY_SESSION, EXCHANGE_ID. +* DESTROY_SESSION MUST be the final operation in the COMPOUND request. + -- cgit v0.10.2 From f0ad670d7061efad138df19aefe569263c4ea37b Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Mon, 6 Apr 2009 12:00:36 +0300 Subject: nfsd41: define NFSD_DRC_SIZE_SHIFT in set_max_drc Fixes the following compiler error: fs/nfsd/nfssvc.c: In function 'set_max_drc': fs/nfsd/nfssvc.c:240: error: 'NFSD_DRC_SIZE_SHIFT' undeclared CONFIG_NFSD_V4 is not set Reported-by: Alexander Beregalov Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index e9d5773..469c931 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -236,6 +236,8 @@ void nfsd_reset_versions(void) */ static void set_max_drc(void) { + /* The percent of nr_free_buffer_pages used by the V4.1 server DRC */ + #define NFSD_DRC_SIZE_SHIFT 7 nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages() >> NFSD_DRC_SIZE_SHIFT; nfsd_serv->sv_drc_pages_used = 0; diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 0ec4d14..2b49d67 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -333,9 +333,6 @@ extern struct timeval nfssvc_boot; #define NFSD_LEASE_TIME (nfs4_lease_time()) #define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */ -/* The percent of nr_free_buffer_pages used by the V4.1 server DRC */ -#define NFSD_DRC_SIZE_SHIFT 7 - /* * The following attributes are currently not supported by the NFSv4 server: * ARCHIVE (deprecated anyway) -- cgit v0.10.2 From 04826f43d4f0a4d56423eb8abb9f2ec9987df5b5 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Mon, 6 Apr 2009 12:00:49 +0300 Subject: nfsd41: define nfsd4_set_statp as noop for !CONFIG_NFSD_V4 Fixes following modpost error: ERROR: "nfsd4_set_statp" [fs/nfsd/nfsd.ko] undefined! Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/include/linux/nfsd/cache.h b/include/linux/nfsd/cache.h index a59a2df..5bccaab 100644 --- a/include/linux/nfsd/cache.h +++ b/include/linux/nfsd/cache.h @@ -75,6 +75,13 @@ int nfsd_reply_cache_init(void); void nfsd_reply_cache_shutdown(void); int nfsd_cache_lookup(struct svc_rqst *, int); void nfsd_cache_update(struct svc_rqst *, int, __be32 *); + +#ifdef CONFIG_NFSD_V4 void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp); +#else /* CONFIG_NFSD_V4 */ +static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp) +{ +} +#endif /* CONFIG_NFSD_V4 */ #endif /* NFSCACHE_H */ -- cgit v0.10.2