From f8e6defe7f4456d8700e5a3796a1e9fb54a88543 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 28 Oct 2011 08:40:00 +1100 Subject: Update NFSD MAINTAINER Neil hasn't really been at all active as a maintainer for some years now. So move his maintainership to CREDITS Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields diff --git a/CREDITS b/CREDITS index 07e32a87..557da47 100644 --- a/CREDITS +++ b/CREDITS @@ -514,6 +514,11 @@ S: Bessemerstraat 21 S: Amsterdam S: The Netherlands +N: NeilBrown +E: neil@brown.name +P: 4096R/566281B9 1BC6 29EB D390 D870 7B5F 497A 39EC 9EDD 5662 81B9 +D: NFSD Maintainer 2000-2007 + N: Zach Brown E: zab@zabbo.net D: maestro pci sound diff --git a/MAINTAINERS b/MAINTAINERS index 4808256..6ab923d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3770,7 +3770,6 @@ S: Odd Fixes KERNEL NFSD, SUNRPC, AND LOCKD SERVERS M: "J. Bruce Fields" -M: Neil Brown L: linux-nfs@vger.kernel.org W: http://nfs.sourceforge.net/ S: Supported -- cgit v0.10.2 From b93d87c19821ba7d3ee11557403d782e541071ad Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 7 Nov 2011 16:37:57 -0500 Subject: nfsd4: fix lockowner matching Lockowners are looked up by file as well as by owner, but we were forgetting to do a comparison on the file. This could cause an incorrect result from lockt. (Note looking up the inode from the lockowner is pretty awkward here. The data structures need fixing.) Cc: stable@kernel.org Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 47e94e3..5abced7 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3809,16 +3809,29 @@ nevermind: deny->ld_type = NFS4_WRITE_LT; } +static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) +{ + struct nfs4_ol_stateid *lst; + + if (!same_owner_str(&lo->lo_owner, owner, clid)) + return false; + lst = list_first_entry(&lo->lo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + return lst->st_file->fi_inode == inode; +} + static struct nfs4_lockowner * find_lockowner_str(struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) { unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner); + struct nfs4_lockowner *lo; struct nfs4_stateowner *op; list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) { - if (same_owner_str(op, owner, clid)) - return lockowner(op); + lo = lockowner(op); + if (same_lockowner_ino(lo, inode, clid, owner)) + return lo; } return NULL; } -- cgit v0.10.2 From 684e563858018d27acb8f00e30c026215bbd0ffb Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 4 Nov 2011 17:08:10 -0400 Subject: nfsd4: cleanup lock clientid handling in sessions case I'd rather the "ignore clientid in sessions case" rule be enforced in just one place. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5abced7..9354dde 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3946,10 +3946,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * lock stateid. */ struct nfs4_ol_stateid *open_stp = NULL; - + + if (nfsd4_has_session(cstate)) + /* See rfc 5661 18.10.3: given clientid is ignored: */ + memcpy(&lock->v.new.clientid, + &cstate->session->se_client->cl_clientid, + sizeof(clientid_t)); + status = nfserr_stale_clientid; - if (!nfsd4_has_session(cstate) && - STALE_CLIENTID(&lock->lk_new_clientid)) + if (STALE_CLIENTID(&lock->lk_new_clientid)) goto out; /* validate and update open stateid and open seqid */ @@ -3961,8 +3966,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; open_sop = openowner(open_stp->st_stateowner); status = nfserr_bad_stateid; - if (!nfsd4_has_session(cstate) && - !same_clid(&open_sop->oo_owner.so_client->cl_clientid, + if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, &lock->v.new.clientid)) goto out; /* create lockowner and lock stateid */ -- cgit v0.10.2 From 64a284d07c7d84299a90826950079a8ef11e8204 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 20 Oct 2011 06:57:46 -0400 Subject: nfsd4: maintain one seqid stream per (lockowner, file) Instead of creating a new lockowner and stateid for every open_to_lockowner call, reuse the existing lockowner if it exists. Reported-by: Trond Myklebust Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9354dde..d09d524 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3905,6 +3905,37 @@ static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) __set_bit(access, &lock_stp->st_access_bmap); } +__be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) +{ + struct nfs4_file *fi = ost->st_file; + struct nfs4_openowner *oo = openowner(ost->st_stateowner); + struct nfs4_client *cl = oo->oo_owner.so_client; + struct nfs4_lockowner *lo; + unsigned int strhashval; + + lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, &lock->v.new.owner); + if (lo) { + if (!cstate->minorversion) + return nfserr_bad_seqid; + /* XXX: a lockowner always has exactly one stateid: */ + *lst = list_first_entry(&lo->lo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + return nfs_ok; + } + strhashval = lock_ownerstr_hashval(fi->fi_inode, cl->cl_clientid.cl_id, + &lock->v.new.owner); + lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); + if (lo == NULL) + return nfserr_jukebox; + *lst = alloc_init_lock_stateid(lo, fi, ost); + if (*lst == NULL) { + release_lockowner(lo); + return nfserr_jukebox; + } + *new = true; + return nfs_ok; +} + /* * LOCK operation */ @@ -3920,7 +3951,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct file_lock file_lock; struct file_lock conflock; __be32 status = 0; - unsigned int strhashval; + bool new_state = false; int lkflg; int err; @@ -3969,21 +4000,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, &lock->v.new.clientid)) goto out; - /* create lockowner and lock stateid */ - fp = open_stp->st_file; - strhashval = lock_ownerstr_hashval(fp->fi_inode, - open_sop->oo_owner.so_client->cl_clientid.cl_id, - &lock->v.new.owner); - /* XXX: Do we need to check for duplicate stateowners on - * the same file, or should they just be allowed (and - * create new stateids)? */ - status = nfserr_jukebox; - lock_sop = alloc_init_lock_stateowner(strhashval, - open_sop->oo_owner.so_client, open_stp, lock); - if (lock_sop == NULL) - goto out; - lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp); - if (lock_stp == NULL) + status = lookup_or_create_lock_state(cstate, open_stp, lock, + &lock_stp, &new_state); + if (status) goto out; } else { /* lock (lock owner + lock stateid) already exists */ @@ -3993,10 +4012,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, NFS4_LOCK_STID, &lock_stp); if (status) goto out; - lock_sop = lockowner(lock_stp->st_stateowner); - fp = lock_stp->st_file; } - /* lock_sop and lock_stp have been created or found */ + lock_sop = lockowner(lock_stp->st_stateowner); + fp = lock_stp->st_file; lkflg = setlkflg(lock->lk_type); status = nfs4_check_openmode(lock_stp, lkflg); @@ -4071,7 +4089,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; } out: - if (status && lock->lk_is_new && lock_sop) + if (status && new_state) release_lockowner(lock_sop); if (!cstate->replay_owner) nfs4_unlock_state(); -- cgit v0.10.2 From 65178db42a02c7984f711614546e97e9952d8e01 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 1 Nov 2011 13:35:21 -0400 Subject: NFSD: Added fault injection Fault injection on the NFS server makes it easier to test the client's state manager and recovery threads. Simulating errors on the server is easier than finding the right conditions that cause them naturally. This patch uses debugfs to add a simple framework for fault injection to the server. This framework is a config option, and can be enabled through CONFIG_NFSD_FAULT_INJECTION. Assuming you have debugfs mounted to /sys/debug, a set of files will be created in /sys/debug/nfsd/. Writing to any of these files will cause the corresponding action and write a log entry to dmesg. Signed-off-by: Bryan Schumaker Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 10e6366..8df1ea4 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -80,3 +80,13 @@ config NFSD_V4 available from http://linux-nfs.org/. If unsure, say N. + +config NFSD_FAULT_INJECTION + bool "NFS server manual fault injection" + depends on NFSD_V4 && DEBUG_KERNEL + help + This option enables support for manually injecting faults + into the NFS server. This is intended to be used for + testing error recovery on the NFS client. + + If unsure, say N. diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 9b118ee..af32ef0 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_NFSD) += nfsd.o nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o +nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c new file mode 100644 index 0000000..ce7f075 --- /dev/null +++ b/fs/nfsd/fault_inject.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2011 Bryan Schumaker + * + * Uses debugfs to create fault injection points for client testing + */ + +#include +#include +#include +#include + +#include "state.h" +#include "fault_inject.h" + +struct nfsd_fault_inject_op { + char *file; + void (*func)(u64); +}; + +static struct nfsd_fault_inject_op inject_ops[] = { + { + .file = "forget_clients", + .func = nfsd_forget_clients, + }, + { + .file = "forget_locks", + .func = nfsd_forget_locks, + }, + { + .file = "forget_openowners", + .func = nfsd_forget_openowners, + }, + { + .file = "forget_delegations", + .func = nfsd_forget_delegations, + }, + { + .file = "recall_delegations", + .func = nfsd_recall_delegations, + }, +}; + +static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op); +static struct dentry *debug_dir; + +static int nfsd_inject_set(void *op_ptr, u64 val) +{ + struct nfsd_fault_inject_op *op = op_ptr; + + if (val == 0) + printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file); + else + printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val); + + op->func(val); + return 0; +} + +static int nfsd_inject_get(void *data, u64 *val) +{ + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_nfsd, nfsd_inject_get, nfsd_inject_set, "%llu\n"); + +void nfsd_fault_inject_cleanup(void) +{ + debugfs_remove_recursive(debug_dir); +} + +int nfsd_fault_inject_init(void) +{ + unsigned int i; + struct nfsd_fault_inject_op *op; + mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; + + debug_dir = debugfs_create_dir("nfsd", NULL); + if (!debug_dir) + goto fail; + + for (i = 0; i < NUM_INJECT_OPS; i++) { + op = &inject_ops[i]; + if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd)) + goto fail; + } + return 0; + +fail: + nfsd_fault_inject_cleanup(); + return -ENOMEM; +} diff --git a/fs/nfsd/fault_inject.h b/fs/nfsd/fault_inject.h new file mode 100644 index 0000000..90bd057 --- /dev/null +++ b/fs/nfsd/fault_inject.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2011 Bryan Schumaker + * + * Function definitions for fault injection + */ + +#ifndef LINUX_NFSD_FAULT_INJECT_H +#define LINUX_NFSD_FAULT_INJECT_H + +#ifdef CONFIG_NFSD_FAULT_INJECTION +int nfsd_fault_inject_init(void); +void nfsd_fault_inject_cleanup(void); +void nfsd_forget_clients(u64); +void nfsd_forget_locks(u64); +void nfsd_forget_openowners(u64); +void nfsd_forget_delegations(u64); +void nfsd_recall_delegations(u64); +#else /* CONFIG_NFSD_FAULT_INJECTION */ +static inline int nfsd_fault_inject_init(void) { return 0; } +static inline void nfsd_fault_inject_cleanup(void) {} +static inline void nfsd_forget_clients(u64 num) {} +static inline void nfsd_forget_locks(u64 num) {} +static inline void nfsd_forget_openowners(u64 num) {} +static inline void nfsd_forget_delegations(u64 num) {} +static inline void nfsd_recall_delegations(u64 num) {} +#endif /* CONFIG_NFSD_FAULT_INJECTION */ + +#endif /* LINUX_NFSD_FAULT_INJECT_H */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d09d524..a511eff 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4429,6 +4429,121 @@ nfs4_check_open_reclaim(clientid_t *clid) return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad; } +#ifdef CONFIG_NFSD_FAULT_INJECTION + +void nfsd_forget_clients(u64 num) +{ + struct nfs4_client *clp, *next; + int count = 0; + + nfs4_lock_state(); + list_for_each_entry_safe(clp, next, &client_lru, cl_lru) { + nfsd4_remove_clid_dir(clp); + expire_client(clp); + if (++count == num) + break; + } + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Forgot %d clients", count); +} + +static void release_lockowner_sop(struct nfs4_stateowner *sop) +{ + release_lockowner(lockowner(sop)); +} + +static void release_openowner_sop(struct nfs4_stateowner *sop) +{ + release_openowner(openowner(sop)); +} + +static int nfsd_release_n_owners(u64 num, + struct list_head hashtbl[], + unsigned int hashtbl_size, + void (*release_sop)(struct nfs4_stateowner *)) +{ + int i, count = 0; + struct nfs4_stateowner *sop, *next; + + for (i = 0; i < hashtbl_size; i++) { + list_for_each_entry_safe(sop, next, &hashtbl[i], so_strhash) { + release_sop(sop); + if (++count == num) + return count; + } + } + return count; +} + +void nfsd_forget_locks(u64 num) +{ + int count; + + nfs4_lock_state(); + count = nfsd_release_n_owners(num, lock_ownerstr_hashtbl, + LOCK_HASH_SIZE, release_lockowner_sop); + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Forgot %d locks", count); +} + +void nfsd_forget_openowners(u64 num) +{ + int count; + + nfs4_lock_state(); + count = nfsd_release_n_owners(num, open_ownerstr_hashtbl, + OPEN_OWNER_HASH_SIZE, release_openowner_sop); + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Forgot %d open owners", count); +} + +int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegation *)) +{ + int i, count = 0; + struct nfs4_file *fp; + struct nfs4_delegation *dp, *next; + + for (i = 0; i < FILE_HASH_SIZE; i++) { + list_for_each_entry(fp, &file_hashtbl[i], fi_hash) { + list_for_each_entry_safe(dp, next, &fp->fi_delegations, dl_perfile) { + deleg_func(dp); + if (++count == num) + return count; + } + } + } + return count; +} + +void nfsd_forget_delegations(u64 num) +{ + unsigned int count; + + nfs4_lock_state(); + count = nfsd_process_n_delegations(num, unhash_delegation); + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Forgot %d delegations", count); +} + +void nfsd_recall_delegations(u64 num) +{ + unsigned int count; + + nfs4_lock_state(); + spin_lock(&recall_lock); + count = nfsd_process_n_delegations(num, nfsd_break_one_deleg); + spin_unlock(&recall_lock); + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Recalled %d delegations", count); +} + +#endif /* CONFIG_NFSD_FAULT_INJECTION */ + /* initialization to perform at module load time: */ int diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index c45a2ea..b2e8093 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -18,6 +18,7 @@ #include "idmap.h" #include "nfsd.h" #include "cache.h" +#include "fault_inject.h" /* * We have a single directory with several nodes in it. @@ -1131,6 +1132,9 @@ static int __init init_nfsd(void) retval = nfs4_state_init(); /* nfs4 locking state */ if (retval) return retval; + retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ + if (retval) + goto out_free_slabs; nfsd_stat_init(); /* Statistics */ retval = nfsd_reply_cache_init(); if (retval) @@ -1161,6 +1165,8 @@ out_free_cache: nfsd_reply_cache_shutdown(); out_free_stat: nfsd_stat_shutdown(); + nfsd_fault_inject_cleanup(); +out_free_slabs: nfsd4_free_slabs(); return retval; } @@ -1175,6 +1181,7 @@ static void __exit exit_nfsd(void) nfsd_lockd_shutdown(); nfsd_idmap_shutdown(); nfsd4_free_slabs(); + nfsd_fault_inject_cleanup(); unregister_filesystem(&nfsd_fs_type); } -- cgit v0.10.2 From 800b927b38c7cdfbd7df39d7a8a4b065637ab7b6 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 1 Nov 2011 13:35:22 -0400 Subject: NFSD: Added fault injection script This script provides a convenient way to use the NFSD fault injection framework. Fault injection writes to dmesg using the KERN_INFO flag, so this script will compare the before and after output of `dmesg` to show the user what happened Signed-off-by: Bryan Schumaker Signed-off-by: J. Bruce Fields diff --git a/tools/nfsd/inject_fault.sh b/tools/nfsd/inject_fault.sh new file mode 100755 index 0000000..06a399a --- /dev/null +++ b/tools/nfsd/inject_fault.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# +# Copyright (c) 2011 Bryan Schumaker +# +# Script for easier NFSD fault injection + +# Check that debugfs has been mounted +DEBUGFS=`cat /proc/mounts | grep debugfs` +if [ "$DEBUGFS" == "" ]; then + echo "debugfs does not appear to be mounted!" + echo "Please mount debugfs and try again" + exit 1 +fi + +# Check that the fault injection directory exists +DEBUGDIR=`echo $DEBUGFS | awk '{print $2}'`/nfsd +if [ ! -d "$DEBUGDIR" ]; then + echo "$DEBUGDIR does not exist" + echo "Check that your .config selects CONFIG_NFSD_FAULT_INJECTION" + exit 1 +fi + +function help() +{ + echo "Usage $0 injection_type [count]" + echo "" + echo "Injection types are:" + ls $DEBUGDIR + exit 1 +} + +if [ $# == 0 ]; then + help +elif [ ! -f $DEBUGDIR/$1 ]; then + help +elif [ $# != 2 ]; then + COUNT=0 +else + COUNT=$2 +fi + +BEFORE=`mktemp` +AFTER=`mktemp` +dmesg > $BEFORE +echo $COUNT > $DEBUGDIR/$1 +dmesg > $AFTER +# Capture lines that only exist in the $AFTER file +diff $BEFORE $AFTER | grep ">" +rm -f $BEFORE $AFTER -- cgit v0.10.2 From 114a0a08d46cfb0eefb1a882f7866b7f57bfc5ba Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 1 Nov 2011 13:35:23 -0400 Subject: NFSD: Added fault injection documentation Signed-off-by: Bryan Schumaker Signed-off-by: J. Bruce Fields diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX index a57e124..1716874a 100644 --- a/Documentation/filesystems/nfs/00-INDEX +++ b/Documentation/filesystems/nfs/00-INDEX @@ -2,6 +2,8 @@ - this file (nfs-related documentation). Exporting - explanation of how to make filesystems exportable. +fault_injection.txt + - information for using fault injection on the server knfsd-stats.txt - statistics which the NFS server makes available to user space. nfs.txt diff --git a/Documentation/filesystems/nfs/fault_injection.txt b/Documentation/filesystems/nfs/fault_injection.txt new file mode 100644 index 0000000..426d166 --- /dev/null +++ b/Documentation/filesystems/nfs/fault_injection.txt @@ -0,0 +1,69 @@ + +Fault Injection +=============== +Fault injection is a method for forcing errors that may not normally occur, or +may be difficult to reproduce. Forcing these errors in a controlled environment +can help the developer find and fix bugs before their code is shipped in a +production system. Injecting an error on the Linux NFS server will allow us to +observe how the client reacts and if it manages to recover its state correctly. + +NFSD_FAULT_INJECTION must be selected when configuring the kernel to use this +feature. + + +Using Fault Injection +===================== +On the client, mount the fault injection server through NFS v4.0+ and do some +work over NFS (open files, take locks, ...). + +On the server, mount the debugfs filesystem to and ls +/nfsd. This will show a list of files that will be used for +injecting faults on the NFS server. As root, write a number n to the file +corresponding to the action you want the server to take. The server will then +process the first n items it finds. So if you want to forget 5 locks, echo '5' +to /nfsd/forget_locks. A value of 0 will tell the server to forget +all corresponding items. A log message will be created containing the number +of items forgotten (check dmesg). + +Go back to work on the client and check if the client recovered from the error +correctly. + + +Available Faults +================ +forget_clients: + The NFS server keeps a list of clients that have placed a mount call. If + this list is cleared, the server will have no knowledge of who the client + is, forcing the client to reauthenticate with the server. + +forget_openowners: + The NFS server keeps a list of what files are currently opened and who + they were opened by. Clearing this list will force the client to reopen + its files. + +forget_locks: + The NFS server keeps a list of what files are currently locked in the VFS. + Clearing this list will force the client to reclaim its locks (files are + unlocked through the VFS as they are cleared from this list). + +forget_delegations: + A delegation is used to assure the client that a file, or part of a file, + has not changed since the delegation was awarded. Clearing this list will + force the client to reaquire its delegation before accessing the file + again. + +recall_delegations: + Delegations can be recalled by the server when another client attempts to + access a file. This test will notify the client that its delegation has + been revoked, forcing the client to reaquire the delegation before using + the file again. + + +tools/nfs/inject_faults.sh script +================================= +This script has been created to ease the fault injection process. This script +will detect the mounted debugfs directory and write to the files located there +based on the arguments passed by the user. For example, running +`inject_faults.sh forget_locks 1` as root will instruct the server to forget +one lock. Running `inject_faults forget_locks` will instruct the server to +forgetall locks. -- cgit v0.10.2 From 72083396074035ffa5cf81b6bb3e55f1d615badf Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 1 Nov 2011 15:24:59 -0400 Subject: NFSD: Call nfsd4_init_slabs() from init_nfsd() init_nfsd() was calling free_slabs() during cleanup code, but the call to init_slabs() was hidden in nfsd4_state_init(). This could be confusing to people unfamiliar with the code. Signed-off-by: Bryan Schumaker Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a511eff..1d6812d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2301,7 +2301,7 @@ nfsd4_free_slabs(void) nfsd4_free_slab(&deleg_slab); } -static int +int nfsd4_init_slabs(void) { openowner_slab = kmem_cache_create("nfsd4_openowners", @@ -4546,14 +4546,11 @@ void nfsd_recall_delegations(u64 num) /* initialization to perform at module load time: */ -int +void nfs4_state_init(void) { - int i, status; + int i; - status = nfsd4_init_slabs(); - if (status) - return status; for (i = 0; i < CLIENT_HASH_SIZE; i++) { INIT_LIST_HEAD(&conf_id_hashtbl[i]); INIT_LIST_HEAD(&conf_str_hashtbl[i]); @@ -4577,7 +4574,6 @@ nfs4_state_init(void) INIT_LIST_HEAD(&client_lru); INIT_LIST_HEAD(&del_recall_lru); reclaim_str_hashtbl_size = 0; - return 0; } static void diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index b2e8093..8daa935 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1129,9 +1129,10 @@ static int __init init_nfsd(void) int retval; printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); - retval = nfs4_state_init(); /* nfs4 locking state */ + retval = nfsd4_init_slabs(); if (retval) return retval; + nfs4_state_init(); retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ if (retval) goto out_free_slabs; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 58134a2..4e2ee29 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -104,14 +104,16 @@ static inline int nfsd_v4client(struct svc_rqst *rq) */ #ifdef CONFIG_NFSD_V4 extern unsigned int max_delegations; -int nfs4_state_init(void); +void nfs4_state_init(void); +int nfsd4_init_slabs(void); void nfsd4_free_slabs(void); int nfs4_state_start(void); void nfs4_state_shutdown(void); void nfs4_reset_lease(time_t leasetime); int nfs4_reset_recoverydir(char *recdir); #else -static inline int nfs4_state_init(void) { return 0; } +static inline void nfs4_state_init(void) { } +static inline int nfsd4_init_slabs(void) { return 0; } static inline void nfsd4_free_slabs(void) { } static inline int nfs4_state_start(void) { return 0; } static inline void nfs4_state_shutdown(void) { } -- cgit v0.10.2 From c7e8472cf8ae877b232eb506284a5b15cf7efb2c Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 1 Nov 2011 14:18:28 -0400 Subject: NFSD: Remove unnecessary whitespace The close parenthesis was hard to find with it spaced so far over. Signed-off-by: Bryan Schumaker [bfields@redhat.com: get all these lines under 80 chars while we're here] Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 4e2ee29..1d1e858 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -340,15 +340,15 @@ static inline u32 nfsd_suppattrs2(u32 minorversion) } /* These will return ERR_INVAL if specified in GETATTR or READDIR. */ -#define NFSD_WRITEONLY_ATTRS_WORD1 \ -(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) +#define NFSD_WRITEONLY_ATTRS_WORD1 \ + (FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) /* These are the only attrs allowed in CREATE/OPEN/SETATTR. */ -#define NFSD_WRITEABLE_ATTRS_WORD0 \ -(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL ) -#define NFSD_WRITEABLE_ATTRS_WORD1 \ -(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ - | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) +#define NFSD_WRITEABLE_ATTRS_WORD0 \ + (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL) +#define NFSD_WRITEABLE_ATTRS_WORD1 \ + (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ + | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) #define NFSD_WRITEABLE_ATTRS_WORD2 0 #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ -- cgit v0.10.2 From 06f1f864d4ae5804e83785308d41f14a08e4b980 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 7 Nov 2011 16:58:18 -0500 Subject: nfsd4: hash lockowners to simplify RELEASE_LOCKOWNER Hash lockowners on just the owner string rather than on (owner, inode). This makes the owner-string lookup needed for RELEASE_LOCKOWNER simpler (currently it's doing at a linear search through the entire hash table!). That may come at the expense of making (owner, inode) lookups more expensive if a client reuses the same lockowner across multiple files. We might add a separate lookup for that. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1d6812d..eec9900 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3746,15 +3746,6 @@ last_byte_offset(u64 start, u64 len) return end > start ? end - 1: NFS4_MAX_UINT64; } -static inline unsigned int -lock_ownerstr_hashval(struct inode *inode, u32 cl_id, - struct xdr_netobj *ownername) -{ - return (file_hashval(inode) + cl_id - + opaque_hashval(ownername->data, ownername->len)) - & LOCK_HASH_MASK; -} - static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE]; /* @@ -3824,7 +3815,7 @@ static struct nfs4_lockowner * find_lockowner_str(struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) { - unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner); + unsigned int hashval = open_ownerstr_hashval(clid->cl_id, owner); struct nfs4_lockowner *lo; struct nfs4_stateowner *op; @@ -3847,7 +3838,7 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has * occurred. * - * strhashval = lock_ownerstr_hashval + * strhashval = open_ownerstr_hashval */ static struct nfs4_lockowner * @@ -3922,7 +3913,7 @@ __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct n struct nfs4_ol_stateid, st_perstateowner); return nfs_ok; } - strhashval = lock_ownerstr_hashval(fi->fi_inode, cl->cl_clientid.cl_id, + strhashval = open_ownerstr_hashval(cl->cl_clientid.cl_id, &lock->v.new.owner); lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); if (lo == NULL) @@ -4286,7 +4277,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfs4_ol_stateid *stp; struct xdr_netobj *owner = &rlockowner->rl_owner; struct list_head matches; - int i; + unsigned int hashval = open_ownerstr_hashval(clid->cl_id, owner); __be32 status; dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", @@ -4301,22 +4292,17 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, nfs4_lock_state(); status = nfserr_locks_held; - /* XXX: we're doing a linear search through all the lockowners. - * Yipes! For now we'll just hope clients aren't really using - * release_lockowner much, but eventually we have to fix these - * data structures. */ INIT_LIST_HEAD(&matches); - for (i = 0; i < LOCK_HASH_SIZE; i++) { - list_for_each_entry(sop, &lock_ownerstr_hashtbl[i], so_strhash) { - if (!same_owner_str(sop, owner, clid)) - continue; - list_for_each_entry(stp, &sop->so_stateids, - st_perstateowner) { - lo = lockowner(sop); - if (check_for_locks(stp->st_file, lo)) - goto out; - list_add(&lo->lo_list, &matches); - } + + list_for_each_entry(sop, &lock_ownerstr_hashtbl[hashval], so_strhash) { + if (!same_owner_str(sop, owner, clid)) + continue; + list_for_each_entry(stp, &sop->so_stateids, + st_perstateowner) { + lo = lockowner(sop); + if (check_for_locks(stp->st_file, lo)) + goto out; + list_add(&lo->lo_list, &matches); } } /* Clients probably won't expect us to return with some (but not all) -- cgit v0.10.2 From 16bfdaafa2c66d8cc81422a97a105a849ca65b3e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 7 Nov 2011 17:23:30 -0500 Subject: nfsd4: share open and lock owner hash tables Now that they're used in the same way, it's a little simpler to put open and lock owners in the same hash table, and I can't see a reason not to. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index eec9900..dcbe7f3 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -133,21 +133,21 @@ unsigned int max_delegations; * Open owner state (share locks) */ -/* hash tables for open owners */ -#define OPEN_OWNER_HASH_BITS 8 -#define OPEN_OWNER_HASH_SIZE (1 << OPEN_OWNER_HASH_BITS) -#define OPEN_OWNER_HASH_MASK (OPEN_OWNER_HASH_SIZE - 1) +/* hash tables for lock and open owners */ +#define OWNER_HASH_BITS 8 +#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS) +#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1) -static unsigned int open_ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) +static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) { unsigned int ret; ret = opaque_hashval(ownername->data, ownername->len); ret += clientid; - return ret & OPEN_OWNER_HASH_MASK; + return ret & OWNER_HASH_MASK; } -static struct list_head open_ownerstr_hashtbl[OPEN_OWNER_HASH_SIZE]; +static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE]; /* hash table for nfs4_file */ #define FILE_HASH_BITS 8 @@ -2373,7 +2373,7 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) { - list_add(&oo->oo_owner.so_strhash, &open_ownerstr_hashtbl[strhashval]); + list_add(&oo->oo_owner.so_strhash, &ownerstr_hashtbl[strhashval]); list_add(&oo->oo_perclient, &clp->cl_openowners); } @@ -2436,7 +2436,9 @@ find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open) struct nfs4_stateowner *so; struct nfs4_openowner *oo; - list_for_each_entry(so, &open_ownerstr_hashtbl[hashval], so_strhash) { + list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) { + if (!so->so_is_open_owner) + continue; if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { oo = openowner(so); renew_client(oo->oo_owner.so_client); @@ -2580,7 +2582,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, if (open->op_file == NULL) return nfserr_jukebox; - strhashval = open_ownerstr_hashval(clientid->cl_id, &open->op_owner); + strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner); oo = find_openstateowner_str(strhashval, open); open->op_openowner = oo; if (!oo) { @@ -3718,13 +3720,7 @@ out: } -/* - * Lock owner state (byte-range locks) - */ #define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) -#define LOCK_HASH_BITS 8 -#define LOCK_HASH_SIZE (1 << LOCK_HASH_BITS) -#define LOCK_HASH_MASK (LOCK_HASH_SIZE - 1) static inline u64 end_offset(u64 start, u64 len) @@ -3746,8 +3742,6 @@ last_byte_offset(u64 start, u64 len) return end > start ? end - 1: NFS4_MAX_UINT64; } -static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE]; - /* * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that * we can't properly handle lock requests that go beyond the (2^63 - 1)-th @@ -3815,11 +3809,13 @@ static struct nfs4_lockowner * find_lockowner_str(struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) { - unsigned int hashval = open_ownerstr_hashval(clid->cl_id, owner); + unsigned int hashval = ownerstr_hashval(clid->cl_id, owner); struct nfs4_lockowner *lo; struct nfs4_stateowner *op; - list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) { + list_for_each_entry(op, &ownerstr_hashtbl[hashval], so_strhash) { + if (op->so_is_open_owner) + continue; lo = lockowner(op); if (same_lockowner_ino(lo, inode, clid, owner)) return lo; @@ -3829,7 +3825,7 @@ find_lockowner_str(struct inode *inode, clientid_t *clid, static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp) { - list_add(&lo->lo_owner.so_strhash, &lock_ownerstr_hashtbl[strhashval]); + list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]); list_add(&lo->lo_perstateid, &open_stp->st_lockowners); } @@ -3838,7 +3834,7 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has * occurred. * - * strhashval = open_ownerstr_hashval + * strhashval = ownerstr_hashval */ static struct nfs4_lockowner * @@ -3913,7 +3909,7 @@ __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct n struct nfs4_ol_stateid, st_perstateowner); return nfs_ok; } - strhashval = open_ownerstr_hashval(cl->cl_clientid.cl_id, + strhashval = ownerstr_hashval(cl->cl_clientid.cl_id, &lock->v.new.owner); lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); if (lo == NULL) @@ -4277,7 +4273,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfs4_ol_stateid *stp; struct xdr_netobj *owner = &rlockowner->rl_owner; struct list_head matches; - unsigned int hashval = open_ownerstr_hashval(clid->cl_id, owner); + unsigned int hashval = ownerstr_hashval(clid->cl_id, owner); __be32 status; dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", @@ -4294,7 +4290,9 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, status = nfserr_locks_held; INIT_LIST_HEAD(&matches); - list_for_each_entry(sop, &lock_ownerstr_hashtbl[hashval], so_strhash) { + list_for_each_entry(sop, &ownerstr_hashtbl[hashval], so_strhash) { + if (sop->so_is_open_owner) + continue; if (!same_owner_str(sop, owner, clid)) continue; list_for_each_entry(stp, &sop->so_stateids, @@ -4444,16 +4442,16 @@ static void release_openowner_sop(struct nfs4_stateowner *sop) release_openowner(openowner(sop)); } -static int nfsd_release_n_owners(u64 num, - struct list_head hashtbl[], - unsigned int hashtbl_size, +static int nfsd_release_n_owners(u64 num, bool is_open_owner void (*release_sop)(struct nfs4_stateowner *)) { int i, count = 0; struct nfs4_stateowner *sop, *next; - for (i = 0; i < hashtbl_size; i++) { - list_for_each_entry_safe(sop, next, &hashtbl[i], so_strhash) { + for (i = 0; i < OWNER_HASH_SIZE; i++) { + list_for_each_entry_safe(sop, next, &ownerstr_hashtbl[i], so_strhash) { + if (sop->so_is_open_owner != is_open_owner) + continue; release_sop(sop); if (++count == num) return count; @@ -4467,8 +4465,7 @@ void nfsd_forget_locks(u64 num) int count; nfs4_lock_state(); - count = nfsd_release_n_owners(num, lock_ownerstr_hashtbl, - LOCK_HASH_SIZE, release_lockowner_sop); + count = nfsd_release_n_owners(num, false, release_lockowner_sop); nfs4_unlock_state(); printk(KERN_INFO "NFSD: Forgot %d locks", count); @@ -4479,8 +4476,7 @@ void nfsd_forget_openowners(u64 num) int count; nfs4_lock_state(); - count = nfsd_release_n_owners(num, open_ownerstr_hashtbl, - OPEN_OWNER_HASH_SIZE, release_openowner_sop); + count = nfsd_release_n_owners(num, true, release_openowner_sop); nfs4_unlock_state(); printk(KERN_INFO "NFSD: Forgot %d open owners", count); @@ -4549,11 +4545,8 @@ nfs4_state_init(void) for (i = 0; i < FILE_HASH_SIZE; i++) { INIT_LIST_HEAD(&file_hashtbl[i]); } - for (i = 0; i < OPEN_OWNER_HASH_SIZE; i++) { - INIT_LIST_HEAD(&open_ownerstr_hashtbl[i]); - } - for (i = 0; i < LOCK_HASH_SIZE; i++) { - INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]); + for (i = 0; i < OWNER_HASH_SIZE; i++) { + INIT_LIST_HEAD(&ownerstr_hashtbl[i]); } memset(&onestateid, ~0, sizeof(stateid_t)); INIT_LIST_HEAD(&close_lru); -- cgit v0.10.2 From 353de31b86850309b205a3d4cc1294052471b14d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 15 Nov 2011 19:25:03 -0500 Subject: nfsd4: fix CONFIG_NFSD_FAULT_INJECTION compile error Reported-by: Stephen Rothwell Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index dcbe7f3..7e4edbd 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4442,7 +4442,7 @@ static void release_openowner_sop(struct nfs4_stateowner *sop) release_openowner(openowner(sop)); } -static int nfsd_release_n_owners(u64 num, bool is_open_owner +static int nfsd_release_n_owners(u64 num, bool is_open_owner, void (*release_sop)(struct nfs4_stateowner *)) { int i, count = 0; -- cgit v0.10.2 From 009673b439cf74d70a486fca0177e274febd81a7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 7 Nov 2011 17:40:10 -0500 Subject: nfsd4: add a separate (lockowner, inode) lookup Address the possible performance regression mentioned in "nfsd4: hash lockowners to simplify RELEASE_LOCKOWNER" by providing a separate (lockowner, inode) hash. Really, I doubt this matters much, but I think it's likely we'll change these data structures here and I'd rather that the need for (owner, inode) lookups be well-documented. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7e4edbd..a84978c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -514,6 +514,7 @@ static void unhash_lockowner(struct nfs4_lockowner *lo) list_del(&lo->lo_owner.so_strhash); list_del(&lo->lo_perstateid); + list_del(&lo->lo_owner_ino_hash); while (!list_empty(&lo->lo_owner.so_stateids)) { stp = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); @@ -3722,6 +3723,10 @@ out: #define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) +#define LOCKOWNER_INO_HASH_BITS 8 +#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS) +#define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1) + static inline u64 end_offset(u64 start, u64 len) { @@ -3742,6 +3747,15 @@ last_byte_offset(u64 start, u64 len) return end > start ? end - 1: NFS4_MAX_UINT64; } +static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername) +{ + return (file_hashval(inode) + cl_id + + opaque_hashval(ownername->data, ownername->len)) + & LOCKOWNER_INO_HASH_MASK; +} + +static struct list_head lockowner_ino_hashtbl[LOCKOWNER_INO_HASH_SIZE]; + /* * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that * we can't properly handle lock requests that go beyond the (2^63 - 1)-th @@ -3809,14 +3823,10 @@ static struct nfs4_lockowner * find_lockowner_str(struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) { - unsigned int hashval = ownerstr_hashval(clid->cl_id, owner); + unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner); struct nfs4_lockowner *lo; - struct nfs4_stateowner *op; - list_for_each_entry(op, &ownerstr_hashtbl[hashval], so_strhash) { - if (op->so_is_open_owner) - continue; - lo = lockowner(op); + list_for_each_entry(lo, &lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) { if (same_lockowner_ino(lo, inode, clid, owner)) return lo; } @@ -3825,7 +3835,12 @@ find_lockowner_str(struct inode *inode, clientid_t *clid, static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp) { + struct inode *inode = open_stp->st_file->fi_inode; + unsigned int inohash = lockowner_ino_hashval(inode, + clp->cl_clientid.cl_id, &lo->lo_owner.so_owner); + list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]); + list_add(&lo->lo_owner_ino_hash, &lockowner_ino_hashtbl[inohash]); list_add(&lo->lo_perstateid, &open_stp->st_lockowners); } @@ -4548,6 +4563,8 @@ nfs4_state_init(void) for (i = 0; i < OWNER_HASH_SIZE; i++) { INIT_LIST_HEAD(&ownerstr_hashtbl[i]); } + for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++) + INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]); memset(&onestateid, ~0, sizeof(stateid_t)); INIT_LIST_HEAD(&close_lru); INIT_LIST_HEAD(&client_lru); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index a3cf384..89c2cd8 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -366,6 +366,7 @@ struct nfs4_openowner { struct nfs4_lockowner { struct nfs4_stateowner lo_owner; /* must be first element */ + struct list_head lo_owner_ino_hash; /* hash by owner,file */ struct list_head lo_perstateid; /* for lockowners only */ struct list_head lo_list; /* for temporary uses */ }; -- cgit v0.10.2 From 67114fe6103183190fb0a24f58e5695a80beb2ec Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Thu, 17 Nov 2011 23:43:40 +0100 Subject: nfsd4: Use kmemdup rather than duplicating its implementation The semantic patch that makes this change is available in scripts/coccinelle/api/memdup.cocci. Signed-off-by: Thomas Meyer Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a84978c..6ab6779 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -986,12 +986,11 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL); if (clp == NULL) return NULL; - clp->cl_name.data = kmalloc(name.len, GFP_KERNEL); + clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL); if (clp->cl_name.data == NULL) { kfree(clp); return NULL; } - memcpy(clp->cl_name.data, name.data, name.len); clp->cl_name.len = name.len; return clp; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b6fa792..0ec5a1b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -215,10 +215,9 @@ defer_free(struct nfsd4_compoundargs *argp, static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) { if (p == argp->tmp) { - p = kmalloc(nbytes, GFP_KERNEL); + p = kmemdup(argp->tmp, nbytes, GFP_KERNEL); if (!p) return NULL; - memcpy(p, argp->tmp, nbytes); } else { BUG_ON(p != argp->tmpp); argp->tmpp = NULL; -- cgit v0.10.2 From b2ea70afade7080360ac55c4e64ff7a5fafdb67b Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Fri, 18 Nov 2011 12:14:49 +0200 Subject: nfsd: Fix oops when parsing a 0 length export expkey_parse() oopses when handling a 0 length export. This is easily triggerable from usermode by writing 0 bytes into '/proc/[proc id]/net/rpc/nfsd.fh/channel'. Below is the log: [ 1402.286893] BUG: unable to handle kernel paging request at ffff880077c49fff [ 1402.287632] IP: [] expkey_parse+0x28/0x2e1 [ 1402.287632] PGD 2206063 PUD 1fdfd067 PMD 1ffbc067 PTE 8000000077c49160 [ 1402.287632] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [ 1402.287632] CPU 1 [ 1402.287632] Pid: 20198, comm: trinity Not tainted 3.2.0-rc2-sasha-00058-gc65cd37 #6 [ 1402.287632] RIP: 0010:[] [] expkey_parse+0x28/0x2e1 [ 1402.287632] RSP: 0018:ffff880077f0fd68 EFLAGS: 00010292 [ 1402.287632] RAX: ffff880077c49fff RBX: 00000000ffffffea RCX: 0000000001043400 [ 1402.287632] RDX: 0000000000000000 RSI: ffff880077c4a000 RDI: ffffffff82283de0 [ 1402.287632] RBP: ffff880077f0fe18 R08: 0000000000000001 R09: ffff880000000000 [ 1402.287632] R10: 0000000000000000 R11: 0000000000000001 R12: ffff880077c4a000 [ 1402.287632] R13: ffffffff82283de0 R14: 0000000001043400 R15: ffffffff82283de0 [ 1402.287632] FS: 00007f25fec3f700(0000) GS:ffff88007d400000(0000) knlGS:0000000000000000 [ 1402.287632] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 1402.287632] CR2: ffff880077c49fff CR3: 0000000077e1d000 CR4: 00000000000406e0 [ 1402.287632] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1402.287632] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 1402.287632] Process trinity (pid: 20198, threadinfo ffff880077f0e000, task ffff880077db17b0) [ 1402.287632] Stack: [ 1402.287632] ffff880077db17b0 ffff880077c4a000 ffff880077f0fdb8 ffffffff810b411e [ 1402.287632] ffff880000000000 ffff880077db17b0 ffff880077c4a000 ffffffff82283de0 [ 1402.287632] 0000000001043400 ffffffff82283de0 ffff880077f0fde8 ffffffff81111f63 [ 1402.287632] Call Trace: [ 1402.287632] [] ? lock_release+0x1af/0x1bc [ 1402.287632] [] ? might_fault+0x97/0x9e [ 1402.287632] [] ? might_fault+0x4e/0x9e [ 1402.287632] [] cache_do_downcall+0x3e/0x4f [ 1402.287632] [] cache_write.clone.16+0xbb/0x130 [ 1402.287632] [] ? cache_write_pipefs+0x1a/0x1a [ 1402.287632] [] cache_write_procfs+0x19/0x1b [ 1402.287632] [] proc_reg_write+0x8e/0xad [ 1402.287632] [] vfs_write+0xaa/0xfd [ 1402.287632] [] ? fget_light+0x35/0x9e [ 1402.287632] [] sys_write+0x48/0x6f [ 1402.287632] [] system_call_fastpath+0x16/0x1b [ 1402.287632] Code: c0 c9 c3 55 48 63 d2 48 89 e5 48 8d 44 32 ff 41 57 41 56 41 55 41 54 53 bb ea ff ff ff 48 81 ec 88 00 00 00 48 89 b5 58 ff ff ff [ 1402.287632] 38 0a 0f 85 89 02 00 00 c6 00 00 48 8b 3d 44 4a e5 01 48 85 [ 1402.287632] RIP [] expkey_parse+0x28/0x2e1 [ 1402.287632] RSP [ 1402.287632] CR2: ffff880077c49fff [ 1402.287632] ---[ end trace 368ef53ff773a5e3 ]--- Cc: "J. Bruce Fields" Cc: Neil Brown Cc: linux-nfs@vger.kernel.org Cc: stable@kernel.org Signed-off-by: Sasha Levin Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 62f3b90..5f312ab 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -87,7 +87,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) struct svc_expkey key; struct svc_expkey *ek = NULL; - if (mesg[mlen-1] != '\n') + if (mlen < 1 || mesg[mlen-1] != '\n') return -EINVAL; mesg[mlen-1] = 0; -- cgit v0.10.2 From 7710ec36b6f516e026f9e29e50e67d2547c2a79b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 25 Nov 2011 18:44:05 -0500 Subject: svcrpc: make svc_delete_xprt static Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 8620f79..5488e5931 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -118,7 +118,6 @@ void svc_xprt_received(struct svc_xprt *); void svc_xprt_put(struct svc_xprt *xprt); void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); void svc_close_xprt(struct svc_xprt *xprt); -void svc_delete_xprt(struct svc_xprt *xprt); int svc_port_is_privileged(struct sockaddr *sin); int svc_print_xprts(char *buf, int maxlen); struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 447cd0e..8046c6d 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -22,6 +22,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); static void svc_age_temp_xprts(unsigned long closure); +static void svc_delete_xprt(struct svc_xprt *xprt); /* apparently the "standard" is that clients close * idle connections after 5 minutes, servers after @@ -878,7 +879,7 @@ static void call_xpt_users(struct svc_xprt *xprt) /* * Remove a dead transport */ -void svc_delete_xprt(struct svc_xprt *xprt) +static void svc_delete_xprt(struct svc_xprt *xprt) { struct svc_serv *serv = xprt->xpt_server; struct svc_deferred_req *dr; -- cgit v0.10.2 From 2fefb8a09e7ed251ae8996e0c69066e74c5aa560 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 29 Nov 2011 11:35:35 -0500 Subject: svcrpc: destroy server sockets all at once There's no reason I can see that we need to call sv_shutdown between closing the two lists of sockets. Cc: stable@kernel.org Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 85c50b4..c84e974 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -34,7 +34,7 @@ struct svc_sock { /* * Function prototypes. */ -void svc_close_all(struct list_head *); +void svc_close_all(struct svc_serv *); int svc_recv(struct svc_rqst *, long); int svc_send(struct svc_rqst *); void svc_drop(struct svc_rqst *); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 6e03888..60babf0 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -528,16 +528,11 @@ svc_destroy(struct svc_serv *serv) del_timer_sync(&serv->sv_temptimer); - svc_close_all(&serv->sv_tempsocks); + svc_close_all(serv); if (serv->sv_shutdown) serv->sv_shutdown(serv); - svc_close_all(&serv->sv_permsocks); - - BUG_ON(!list_empty(&serv->sv_permsocks)); - BUG_ON(!list_empty(&serv->sv_tempsocks)); - cache_clean_deferred(serv); if (svc_serv_is_pooled(serv)) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 8046c6d..099ddf9 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -929,7 +929,7 @@ void svc_close_xprt(struct svc_xprt *xprt) } EXPORT_SYMBOL_GPL(svc_close_xprt); -void svc_close_all(struct list_head *xprt_list) +static void svc_close_list(struct list_head *xprt_list) { struct svc_xprt *xprt; struct svc_xprt *tmp; @@ -947,6 +947,15 @@ void svc_close_all(struct list_head *xprt_list) } } +void svc_close_all(struct svc_serv *serv) +{ + svc_close_list(&serv->sv_tempsocks); + svc_close_list(&serv->sv_permsocks); + BUG_ON(!list_empty(&serv->sv_permsocks)); + BUG_ON(!list_empty(&serv->sv_tempsocks)); + +} + /* * Handle defer and revisit of requests */ -- cgit v0.10.2 From b4f36f88b3ee7cf26bf0be84e6c7fc15f84dcb71 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 29 Nov 2011 17:00:26 -0500 Subject: svcrpc: avoid memory-corruption on pool shutdown Socket callbacks use svc_xprt_enqueue() to add an xprt to a pool->sp_sockets list. In normal operation a server thread will later come along and take the xprt off that list. On shutdown, after all the threads have exited, we instead manually walk the sv_tempsocks and sv_permsocks lists to find all the xprt's and delete them. So the sp_sockets lists don't really matter any more. As a result, we've mostly just ignored them and hoped they would go away. Which has gotten us into trouble; witness for example ebc63e531cc6 "svcrpc: fix list-corrupting race on nfsd shutdown", the result of Ben Greear noticing that a still-running svc_xprt_enqueue() could re-add an xprt to an sp_sockets list just before it was deleted. The fix was to remove it from the list at the end of svc_delete_xprt(). But that only made corruption less likely--I can see nothing that prevents a svc_xprt_enqueue() from adding another xprt to the list at the same moment that we're removing this xprt from the list. In fact, despite the earlier xpo_detach(), I don't even see what guarantees that svc_xprt_enqueue() couldn't still be running on this xprt. So, instead, note that svc_xprt_enqueue() essentially does: lock sp_lock if XPT_BUSY unset add to sp_sockets unlock sp_lock So, if we do: set XPT_BUSY on every xprt. Empty every sp_sockets list, under the sp_socks locks. Then we're left knowing that the sp_sockets lists are all empty and will stay that way, since any svc_xprt_enqueue() will check XPT_BUSY under the sp_lock and see it set. And *then* we can continue deleting the xprt's. (Thanks to Jeff Layton for being correctly suspicious of this code....) Cc: Ben Greear Cc: Jeff Layton Cc: stable@kernel.org Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 60babf0..1a6c16e 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -527,7 +527,15 @@ svc_destroy(struct svc_serv *serv) printk("svc_destroy: no threads for serv=%p!\n", serv); del_timer_sync(&serv->sv_temptimer); - + /* + * The set of xprts (contained in the sv_tempsocks and + * sv_permsocks lists) is now constant, since it is modified + * only by accepting new sockets (done by service threads in + * svc_recv) or aging old ones (done by sv_temptimer), or + * configuration changes (excluded by whatever locking the + * caller is using--nfsd_mutex in the case of nfsd). So it's + * safe to traverse those lists and shut everything down: + */ svc_close_all(serv); if (serv->sv_shutdown) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 099ddf9..0d80c06 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -894,14 +894,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt) spin_lock_bh(&serv->sv_lock); if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags)) list_del_init(&xprt->xpt_list); - /* - * The only time we're called while xpt_ready is still on a list - * is while the list itself is about to be destroyed (in - * svc_destroy). BUT svc_xprt_enqueue could still be attempting - * to add new entries to the sp_sockets list, so we can't leave - * a freed xprt on it. - */ - list_del_init(&xprt->xpt_ready); + BUG_ON(!list_empty(&xprt->xpt_ready)); if (test_bit(XPT_TEMP, &xprt->xpt_flags)) serv->sv_tmpcnt--; spin_unlock_bh(&serv->sv_lock); @@ -932,28 +925,45 @@ EXPORT_SYMBOL_GPL(svc_close_xprt); static void svc_close_list(struct list_head *xprt_list) { struct svc_xprt *xprt; - struct svc_xprt *tmp; - /* - * The server is shutting down, and no more threads are running. - * svc_xprt_enqueue() might still be running, but at worst it - * will re-add the xprt to sp_sockets, which will soon get - * freed. So we don't bother with any more locking, and don't - * leave the close to the (nonexistent) server threads: - */ - list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { + list_for_each_entry(xprt, xprt_list, xpt_list) { set_bit(XPT_CLOSE, &xprt->xpt_flags); - svc_delete_xprt(xprt); + set_bit(XPT_BUSY, &xprt->xpt_flags); } } void svc_close_all(struct svc_serv *serv) { + struct svc_pool *pool; + struct svc_xprt *xprt; + struct svc_xprt *tmp; + int i; + svc_close_list(&serv->sv_tempsocks); svc_close_list(&serv->sv_permsocks); + + for (i = 0; i < serv->sv_nrpools; i++) { + pool = &serv->sv_pools[i]; + + spin_lock_bh(&pool->sp_lock); + while (!list_empty(&pool->sp_sockets)) { + xprt = list_first_entry(&pool->sp_sockets, struct svc_xprt, xpt_ready); + list_del_init(&xprt->xpt_ready); + } + spin_unlock_bh(&pool->sp_lock); + } + /* + * At this point the sp_sockets lists will stay empty, since + * svc_enqueue will not add new entries without taking the + * sp_lock and checking XPT_BUSY. + */ + list_for_each_entry_safe(xprt, tmp, &serv->sv_tempsocks, xpt_list) + svc_delete_xprt(xprt); + list_for_each_entry_safe(xprt, tmp, &serv->sv_permsocks, xpt_list) + svc_delete_xprt(xprt); + BUG_ON(!list_empty(&serv->sv_permsocks)); BUG_ON(!list_empty(&serv->sv_tempsocks)); - } /* -- cgit v0.10.2 From 0cf99b91c669510b785b459c211772091a94efd5 Mon Sep 17 00:00:00 2001 From: Mi Jinlong Date: Wed, 23 Nov 2011 10:48:40 +0800 Subject: nfsd41: allow non-reclaim open-by-fh's in 4.1 With NFSv4.0 it was safe to assume that open-by-filehandles were always reclaims. With NFSv4.1 there are non-reclaim open-by-filehandle operations, so we should ensure we're only insisting on reclaims in the OPEN_CLAIM_PREVIOUS case. Signed-off-by: Mi Jinlong Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index fa38336..9415bc4 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -266,10 +266,6 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ { __be32 status; - /* Only reclaims from previously confirmed clients are valid */ - if ((status = nfs4_check_open_reclaim(&open->op_clientid))) - return status; - /* We don't know the target directory, and therefore can not * set the change info */ @@ -373,6 +369,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; case NFS4_OPEN_CLAIM_PREVIOUS: open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; + status = nfs4_check_open_reclaim(&open->op_clientid); + if (status) + goto out; case NFS4_OPEN_CLAIM_FH: case NFS4_OPEN_CLAIM_DELEG_CUR_FH: status = do_open_fhandle(rqstp, &cstate->current_fh, -- cgit v0.10.2 From 94cf3179ccfc69d727dd884fd0831d82ada6bb06 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 1 Dec 2011 17:51:21 -0500 Subject: svcrpc: update outdated BKL comment Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 1a6c16e..e9632bb 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -686,8 +686,8 @@ found_pool: * Create or destroy enough new threads to make the number * of threads the given number. If `pool' is non-NULL, applies * only to threads in that pool, otherwise round-robins between - * all pools. Must be called with a svc_get() reference and - * the BKL or another lock to protect access to svc_serv fields. + * all pools. Caller must ensure that mutual exclusion between this and + * server startup or shutdown. * * Destroying threads relies on the service threads filling in * rqstp->rq_task, which only the nfs ones do. Assumes the serv -- cgit v0.10.2 From bd4620ddf6d6eb3d9e7d073ad601fa4299d46ba9 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 6 Dec 2011 14:19:10 +0300 Subject: SUNRPC: create svc_xprt in proper network namespace This patch makes svc_xprt inherit network namespace link from its socket. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 5488e5931..dfa90094 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -109,7 +109,7 @@ static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); -void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *, +void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, struct svc_serv *); int svc_create_xprt(struct svc_serv *, const char *, struct net *, const int, const unsigned short, int); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 0d80c06..0633c7e 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -148,8 +148,8 @@ EXPORT_SYMBOL_GPL(svc_xprt_put); * Called by transport drivers to initialize the transport independent * portion of the transport instance. */ -void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, - struct svc_serv *serv) +void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl, + struct svc_xprt *xprt, struct svc_serv *serv) { memset(xprt, 0, sizeof(*xprt)); xprt->xpt_class = xcl; @@ -164,7 +164,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, spin_lock_init(&xprt->xpt_lock); set_bit(XPT_BUSY, &xprt->xpt_flags); rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending"); - xprt->xpt_net = get_net(&init_net); + xprt->xpt_net = get_net(net); } EXPORT_SYMBOL_GPL(svc_xprt_init); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 71bed1c..277909e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -739,7 +739,8 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) { int err, level, optname, one = 1; - svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv); + svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class, + &svsk->sk_xprt, serv); clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); svsk->sk_sk->sk_data_ready = svc_udp_data_ready; svsk->sk_sk->sk_write_space = svc_write_space; @@ -1343,7 +1344,8 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) { struct sock *sk = svsk->sk_sk; - svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv); + svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class, + &svsk->sk_xprt, serv); set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); if (sk->sk_state == TCP_LISTEN) { dprintk("setting up TCP socket for listening\n"); @@ -1659,7 +1661,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv, return ERR_PTR(-ENOMEM); xprt = &svsk->sk_xprt; - svc_xprt_init(&svc_tcp_bc_class, xprt, serv); + svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv); serv->sv_bc_xprt = xprt; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index ba1296d..894cb42 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -453,7 +453,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, if (!cma_xprt) return NULL; - svc_xprt_init(&svc_rdma_class, &cma_xprt->sc_xprt, serv); + svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv); INIT_LIST_HEAD(&cma_xprt->sc_accept_q); INIT_LIST_HEAD(&cma_xprt->sc_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); -- cgit v0.10.2 From f5c8593b94190aabdcf207a544f082c7816c4fe6 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 7 Dec 2011 12:57:56 +0300 Subject: NFSd: use network-namespace-aware cache registering routines v2: cache_register_net() and cache_unregister_net() GPL exports added This is a cleanup patch. Hope, some day generic cache_register() and cache_unregister() will be removed. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 5f312ab..cf8a6bd 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1226,12 +1226,12 @@ nfsd_export_init(void) int rv; dprintk("nfsd: initializing export module.\n"); - rv = cache_register(&svc_export_cache); + rv = cache_register_net(&svc_export_cache, &init_net); if (rv) return rv; - rv = cache_register(&svc_expkey_cache); + rv = cache_register_net(&svc_expkey_cache, &init_net); if (rv) - cache_unregister(&svc_export_cache); + cache_unregister_net(&svc_export_cache, &init_net); return rv; } @@ -1255,8 +1255,8 @@ nfsd_export_shutdown(void) dprintk("nfsd: shutting down export module.\n"); - cache_unregister(&svc_expkey_cache); - cache_unregister(&svc_export_cache); + cache_unregister_net(&svc_expkey_cache, &init_net); + cache_unregister_net(&svc_export_cache, &init_net); svcauth_unix_purge(); dprintk("nfsd: export shutdown complete.\n"); diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 55780a2..9409627 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "idmap.h" #include "nfsd.h" @@ -466,20 +467,20 @@ nfsd_idmap_init(void) { int rv; - rv = cache_register(&idtoname_cache); + rv = cache_register_net(&idtoname_cache, &init_net); if (rv) return rv; - rv = cache_register(&nametoid_cache); + rv = cache_register_net(&nametoid_cache, &init_net); if (rv) - cache_unregister(&idtoname_cache); + cache_unregister_net(&idtoname_cache, &init_net); return rv; } void nfsd_idmap_shutdown(void) { - cache_unregister(&idtoname_cache); - cache_unregister(&nametoid_cache); + cache_unregister_net(&idtoname_cache, &init_net); + cache_unregister_net(&nametoid_cache, &init_net); } static int diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 72ad836..b8daa57 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1641,6 +1641,7 @@ int cache_register_net(struct cache_detail *cd, struct net *net) sunrpc_destroy_cache_detail(cd); return ret; } +EXPORT_SYMBOL_GPL(cache_register_net); int cache_register(struct cache_detail *cd) { @@ -1653,6 +1654,7 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net) remove_cache_proc_entries(cd, net); sunrpc_destroy_cache_detail(cd); } +EXPORT_SYMBOL_GPL(cache_unregister_net); void cache_unregister(struct cache_detail *cd) { -- cgit v0.10.2 From f32f3c2d3f09a586349ca9180885dc8741290fd9 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 12 Dec 2011 15:00:35 -0500 Subject: nfsd4: initialize special stateid's at compile time Stateid's with "other" ("opaque") field all zeros or all ones are reserved. We define all_ones separately on the off chance there will be more such some day, though currently all the other special stateid's have zero other field. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6ab6779..213da7b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -49,12 +49,20 @@ time_t nfsd4_lease = 90; /* default lease time */ time_t nfsd4_grace = 90; static time_t boot_time; -static stateid_t zerostateid; /* bits all 0 */ -static stateid_t onestateid; /* bits all 1 */ + +#define all_ones {{~0,~0},~0} +static const stateid_t one_stateid = { + .si_generation = ~0, + .si_opaque = all_ones, +}; +static const stateid_t zero_stateid = { + /* all fields zero */ +}; + static u64 current_sessionid = 1; -#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t))) -#define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t))) +#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t))) +#define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t))) /* forward declarations */ static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner); @@ -4564,7 +4572,6 @@ nfs4_state_init(void) } for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++) INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]); - memset(&onestateid, ~0, sizeof(stateid_t)); INIT_LIST_HEAD(&close_lru); INIT_LIST_HEAD(&client_lru); INIT_LIST_HEAD(&del_recall_lru); -- cgit v0.10.2 From 39c4cc0fcc38cff29d5b9884372b17c894c9c080 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 13 Dec 2011 16:35:58 -0500 Subject: NFSD: Only reinitilize the recall_lru list under the recall lock unhash_delegation() will grab the recall lock before calling list_del_init() in each of these places. This patch removes the redundant calls. Signed-off-by: Bryan Schumaker Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 213da7b..19ca9b5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1066,7 +1066,6 @@ expire_client(struct nfs4_client *clp) spin_unlock(&recall_lock); while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); - list_del_init(&dp->dl_recall_lru); unhash_delegation(dp); } while (!list_empty(&clp->cl_openowners)) { @@ -3133,7 +3132,6 @@ nfs4_laundromat(void) spin_unlock(&recall_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - list_del_init(&dp->dl_recall_lru); unhash_delegation(dp); } test_val = nfsd4_lease; @@ -4674,7 +4672,6 @@ __nfs4_state_shutdown(void) spin_unlock(&recall_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - list_del_init(&dp->dl_recall_lru); unhash_delegation(dp); } -- cgit v0.10.2 From 2d3475c0ad625f7a43844a6cd0130d136416e604 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 14 Dec 2011 14:39:56 -0500 Subject: NFSD: forget_delegations should use list_for_each_entry_safe Otherwise the for loop could try to use a file recently removed from the file_hashtbl list and oops. Signed-off-by: Bryan Schumaker Tested-by: Casey Bodley Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 19ca9b5..7355fe4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4505,18 +4505,19 @@ void nfsd_forget_openowners(u64 num) int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegation *)) { int i, count = 0; - struct nfs4_file *fp; - struct nfs4_delegation *dp, *next; + struct nfs4_file *fp, *fnext; + struct nfs4_delegation *dp, *dnext; for (i = 0; i < FILE_HASH_SIZE; i++) { - list_for_each_entry(fp, &file_hashtbl[i], fi_hash) { - list_for_each_entry_safe(dp, next, &fp->fi_delegations, dl_perfile) { + list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) { + list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) { deleg_func(dp); if (++count == num) return count; } } } + return count; } -- cgit v0.10.2 From aec39680b02ed55fcbb2bc87ad96eba16a651546 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 2 Jan 2012 17:30:05 -0500 Subject: nfsd4: fix spurious 4.1 post-reboot failures In the NFSv4.1 case, this could cause a spurious "NFSD: failed to write recovery record (err -17); please check that /var/lib/nfs/v4recovery exists and is writable. Signed-off-by: J. Bruce Fields Reported-by: Steve Dickson diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index ed083b9..28712e2 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -144,8 +144,15 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) status = PTR_ERR(dentry); goto out_unlock; } - status = -EEXIST; if (dentry->d_inode) + /* + * In the 4.1 case, where we're called from + * reclaim_complete(), records from the previous reboot + * may still be left, so this is OK. + * + * In the 4.0 case, we should never get here; but we may + * as well be forgiving and just succeed silently. + */ goto out_put; status = mnt_want_write(rec_file->f_path.mnt); if (status) -- cgit v0.10.2 From b8548894bde94ccee836e210274ff401225e9733 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 2 Jan 2012 17:49:12 -0500 Subject: nfsd4: be forgiving in the absence of the recovery directory If the recovery directory doesn't exist, then behavior after a reboot will be suboptimal. But it's unnecessarily harsh to then prevent the nfsv4 server from working at all. Instead just print a warning (already done in nfsd4_init_recdir()) and soldier on. Tested-by: Lior Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 28712e2..eb01fff 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -127,10 +127,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); - if (!rec_file || clp->cl_firststate) + if (clp->cl_firststate) return 0; - clp->cl_firststate = 1; + if (!rec_file) + return -ENOENT; status = nfs4_save_creds(&original_cred); if (status < 0) return status; -- cgit v0.10.2 From 61c8504c428edcebf23b97775a129c5b393a302b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 22 Dec 2011 18:22:49 -0700 Subject: svcrpc: fix double-free on shutdown of nfsd after changing pool mode The pool_to and to_pool fields of the global svc_pool_map are freed on shutdown, but are initialized in nfsd startup only in the SVC_POOL_PERCPU and SVC_POOL_PERNODE cases. They *are* initialized to zero on kernel startup. So as long as you use only SVC_POOL_GLOBAL (the default), this will never be a problem. You're also OK if you only ever use SVC_POOL_PERCPU or SVC_POOL_PERNODE. However, the following sequence events leads to a double-free: 1. set SVC_POOL_PERCPU or SVC_POOL_PERNODE 2. start nfsd: both fields are initialized. 3. shutdown nfsd: both fields are freed. 4. set SVC_POOL_GLOBAL 5. start nfsd: the fields are left untouched. 6. shutdown nfsd: now we try to free them again. Step 4 is actually unnecessary, since (for some bizarre reason), nfsd automatically resets the pool mode to SVC_POOL_GLOBAL on shutdown. Cc: stable@kernel.org Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e9632bb..1dd5fd0 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -167,6 +167,7 @@ svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools) fail_free: kfree(m->to_pool); + m->to_pool = NULL; fail: return -ENOMEM; } @@ -287,7 +288,9 @@ svc_pool_map_put(void) if (!--m->count) { m->mode = SVC_POOL_DEFAULT; kfree(m->to_pool); + m->to_pool = NULL; kfree(m->pool_to); + m->pool_to = NULL; m->npools = 0; } -- cgit v0.10.2 From 9689dcce0b456793c46bdeea7a79adfab1bc9c5d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 23 Dec 2011 13:52:19 -0700 Subject: svcrpc: don't revert to SVC_POOL_DEFAULT on nfsd shutdown This was unexpected behavior (at least for me)--why would you want configuration settings automatically lost on nfsd restart? In practice this won't affect distributions, which likely set everything on every startup. But I'd expect the behavior to be less confusing to someone manually restarting nfsd for testing. Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 1dd5fd0..9701798 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -286,7 +286,6 @@ svc_pool_map_put(void) mutex_lock(&svc_pool_map_mutex); if (!--m->count) { - m->mode = SVC_POOL_DEFAULT; kfree(m->to_pool); m->to_pool = NULL; kfree(m->pool_to); -- cgit v0.10.2 From 9b4146e85536ebd6b989f43906986e34486efdba Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Jan 2012 16:26:43 -0500 Subject: NFSD: Change name of extended attribute containing junction As of fedfs-utils-0.8.0, user space stores all NFS junction information in a single extended attribute: "trusted.junction.nfs". Both FedFS and NFS basic junctions are stored in this one attribute, and the intention is that all future forms of NFS junction metadata will be stored in this attribute. Other protocols may use a different extended attribute. Thus NFSD needs to look only for that one extended attribute. The "trusted.junction.type" xattr is deprecated. fedfs-utils-0.8.0 will continue to attach a "trusted.junction.type" xattr to junctions, but future fedfs-utils releases may no longer do that. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 7a2e442..8968913 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -594,8 +594,19 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac return error; } -#define NFSD_XATTR_JUNCTION_PREFIX XATTR_TRUSTED_PREFIX "junction." -#define NFSD_XATTR_JUNCTION_TYPE NFSD_XATTR_JUNCTION_PREFIX "type" +/* + * NFS junction information is stored in an extended attribute. + */ +#define NFSD_JUNCTION_XATTR_NAME XATTR_TRUSTED_PREFIX "junction.nfs" + +/** + * nfsd4_is_junction - Test if an object could be an NFS junction + * + * @dentry: object to test + * + * Returns 1 if "dentry" appears to contain NFS junction information. + * Otherwise 0 is returned. + */ int nfsd4_is_junction(struct dentry *dentry) { struct inode *inode = dentry->d_inode; @@ -606,7 +617,7 @@ int nfsd4_is_junction(struct dentry *dentry) return 0; if (!(inode->i_mode & S_ISVTX)) return 0; - if (vfs_getxattr(dentry, NFSD_XATTR_JUNCTION_TYPE, NULL, 0) <= 0) + if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0) return 0; return 1; } -- cgit v0.10.2 From 7a6ef8c72314f254c107c6a9ed7cb201961ee05a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 5 Jan 2012 15:38:41 -0500 Subject: nfsd4: nfsd4_create_clid_dir return value is unused Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index eb01fff..a52f267 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -117,8 +117,7 @@ out_no_tfm: return status; } -int -nfsd4_create_clid_dir(struct nfs4_client *clp) +void nfsd4_create_clid_dir(struct nfs4_client *clp) { const struct cred *original_cred; char *dname = clp->cl_recdir; @@ -128,13 +127,13 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); if (clp->cl_firststate) - return 0; + return; clp->cl_firststate = 1; if (!rec_file) - return -ENOENT; + return; status = nfs4_save_creds(&original_cred); if (status < 0) - return status; + return; dir = rec_file->f_path.dentry; /* lock the parent */ @@ -172,7 +171,6 @@ out_unlock: " and is writeable", status, user_recovery_dirname); nfs4_reset_creds(original_cred); - return status; } typedef int (recdir_func)(struct dentry *, struct dentry *); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 89c2cd8..ffb5df1 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -483,7 +483,7 @@ extern void nfsd4_shutdown_recdir(void); extern int nfs4_client_to_reclaim(const char *name); extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id); extern void nfsd4_recdir_purge_old(void); -extern int nfsd4_create_clid_dir(struct nfs4_client *clp); +extern void nfsd4_create_clid_dir(struct nfs4_client *clp); extern void nfsd4_remove_clid_dir(struct nfs4_client *clp); extern void release_session_client(struct nfsd4_session *); extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *); -- cgit v0.10.2