From 669502ff31d7dba1849aec7ee2450a3c61f57d39 Mon Sep 17 00:00:00 2001 From: Andy Chittenden Date: Tue, 10 Aug 2010 10:19:53 -0400 Subject: SUNRPC: fix NFS client over TCP hangs due to packet loss (Bug 16494) When reusing a TCP connection, ensure that it's aborted if a previous shutdown attempt has been made on that connection so that the RPC over TCP recovery mechanism succeeds. Signed-off-by: Andy Chittenden Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7ca65c7..66d1369 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1305,10 +1305,11 @@ static void xs_tcp_state_change(struct sock *sk) if (!(xprt = xprt_from_sock(sk))) goto out; dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); - dprintk("RPC: state %x conn %d dead %d zapped %d\n", + dprintk("RPC: state %x conn %d dead %d zapped %d sk_shutdown %d\n", sk->sk_state, xprt_connected(xprt), sock_flag(sk, SOCK_DEAD), - sock_flag(sk, SOCK_ZAPPED)); + sock_flag(sk, SOCK_ZAPPED), + sk->sk_shutdown); switch (sk->sk_state) { case TCP_ESTABLISHED: @@ -1779,10 +1780,25 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *tra { unsigned int state = transport->inet->sk_state; - if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) - return; - if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) - return; + if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) { + /* we don't need to abort the connection if the socket + * hasn't undergone a shutdown + */ + if (transport->inet->sk_shutdown == 0) + return; + dprintk("RPC: %s: TCP_CLOSEd and sk_shutdown set to %d\n", + __func__, transport->inet->sk_shutdown); + } + if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) { + /* we don't need to abort the connection if the socket + * hasn't undergone a shutdown + */ + if (transport->inet->sk_shutdown == 0) + return; + dprintk("RPC: %s: ESTABLISHED/SYN_SENT " + "sk_shutdown set to %d\n", + __func__, transport->inet->sk_shutdown); + } xs_abort_connection(xprt, transport); } -- cgit v0.10.2 From f5a73672d1811f2fb1dcb62ca90ceb12b2050ae7 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Tue, 10 Aug 2010 10:20:05 -0400 Subject: NFS: allow close-to-open cache semantics to apply to root of NFS filesystem To obey NFS cache semantics, the client must verify the cached attributes when a file is opened. In most cases this is done by a call to d_validate as one of the last steps in path_walk. However for the root of a filesystem, d_validate is only ever called on the mounted-on filesystem (except when the path ends '.' or '..'). So NFS has no chance to validate the attributes. So, in nfs_opendir, we revalidate the attributes if the opened directory is the mountpoint. This may cause double-validation for "." and ".." lookups, but that is better than missing regular /path/name lookups completely. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 29539ce..bd91b27 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -140,6 +140,13 @@ nfs_opendir(struct inode *inode, struct file *filp) /* Call generic open code in order to cache credentials */ res = nfs_open(inode, filp); + if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) { + /* This is a mountpoint, so d_revalidate will never + * have been called, so we need to refresh the + * inode (for close-open consistency) ourselves. + */ + __nfs_revalidate_inode(NFS_SERVER(inode), inode); + } return res; } -- cgit v0.10.2 From 9b00c64318cc337846a7a08a5678f5f19aeff188 Mon Sep 17 00:00:00 2001 From: "Patrick J. LoPresti" Date: Tue, 10 Aug 2010 17:28:01 -0400 Subject: nfs: Add "lookupcache" to displayed mount options Running "cat /proc/mounts" fails to display the "lookupcache" option. This oversight cost me a bunch of wasted time recently. The following simple patch fixes it. CC: stable Signed-off-by: Patrick LoPresti Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f1ae39f..3d0d63c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -655,6 +655,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, if (nfss->options & NFS_OPTION_FSCACHE) seq_printf(m, ",fsc"); + + if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) { + if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) + seq_printf(m, ",lookupcache=none"); + else + seq_printf(m, ",lookupcache=pos"); + } } /* -- cgit v0.10.2 From 5d7ca35a182a626f8ed5596023ad42eb219a332e Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 11 Aug 2010 12:42:15 -0400 Subject: nfs: Remove redundant NULL check upon kfree() Signed-off-by: Davidlohr Bueso Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7ffbb98..6b44bbf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2273,8 +2273,7 @@ static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct out: if (page) __free_page(page); - if (locations) - kfree(locations); + kfree(locations); return status; } -- cgit v0.10.2 From 7a8b80eb38b248cfdf84048dad12073d5cfba3e6 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 11 Aug 2010 12:47:08 -0400 Subject: xprtrdma: Do not truncate iova_start values in frmr registrations. A bad cast causes the iova_start, which in this case is a 64b DMA bus address, to be truncated on 32b systems. This breaks frmrs on 32b systems. No cast is needed. Signed-off-by: Steve Wise Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 27015c6..3bdbd9f 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1490,7 +1490,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, memset(&frmr_wr, 0, sizeof frmr_wr); frmr_wr.opcode = IB_WR_FAST_REG_MR; frmr_wr.send_flags = 0; /* unsignaled */ - frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma; + frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma; frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; frmr_wr.wr.fast_reg.page_list_len = i; frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; -- cgit v0.10.2 From 15cdc644b268a9a9ce73ce0b153129222c254b7b Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Wed, 11 Aug 2010 12:47:24 -0400 Subject: rpcrdma: Fix SQ size calculation when memreg is FRMR This patch updates the computation to include the worst case situation where three FRMR are required to map a single RPC REQ. Signed-off-by: Tom Tucker Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index e5e28d1..2ac3f6e 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -249,6 +249,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, req->rl_nchunks = nchunks; BUG_ON(nchunks == 0); + BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) + && (nchunks > 3)); /* * finish off header. If write, marshal discrim and nchunks. diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3bdbd9f..5f4c7b3 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -650,10 +650,22 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.cap.max_send_wr = cdata->max_requests; switch (ia->ri_memreg_strategy) { case RPCRDMA_FRMR: - /* Add room for frmr register and invalidate WRs */ - ep->rep_attr.cap.max_send_wr *= 3; - if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) - return -EINVAL; + /* Add room for frmr register and invalidate WRs. + * 1. FRMR reg WR for head + * 2. FRMR invalidate WR for head + * 3. FRMR reg WR for pagelist + * 4. FRMR invalidate WR for pagelist + * 5. FRMR reg WR for tail + * 6. FRMR invalidate WR for tail + * 7. The RDMA_SEND WR + */ + ep->rep_attr.cap.max_send_wr *= 7; + if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) { + cdata->max_requests = devattr.max_qp_wr / 7; + if (!cdata->max_requests) + return -EINVAL; + ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7; + } break; case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: -- cgit v0.10.2 From 0702099bd86c33c2dcdbd3963433a61f3f503901 Mon Sep 17 00:00:00 2001 From: "J. R. Okajima" Date: Wed, 11 Aug 2010 13:10:16 -0400 Subject: NFS: fix the return value of nfs_file_fsync() By the commit af7fa16 2010-08-03 NFS: Fix up the fsync code close(2) became returning the non-zero value even if it went well. nfs_file_fsync() should return 0 when "status" is positive. Signed-off-by: J. R. Okajima Signed-off-by: Trond Myklebust diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 2d141a7..eb51bd6 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -323,7 +323,7 @@ nfs_file_fsync(struct file *file, int datasync) have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); if (have_error) ret = xchg(&ctx->error, 0); - if (!ret) + if (!ret && status < 0) ret = status; return ret; } -- cgit v0.10.2 From df486a25900f4dba9cdc3886c4ac871951c6aef3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 17 Aug 2010 17:42:45 -0400 Subject: NFS: Fix the selection of security flavours in Kconfig Randy Dunlap reports: ERROR: "svc_gss_principal" [fs/nfs/nfs.ko] undefined! because in fs/nfs/Kconfig, NFS_V4 selects RPCSEC_GSS_KRB5 and/or in fs/nfsd/Kconfig, NFSD_V4 selects RPCSEC_GSS_KRB5. RPCSEC_GSS_KRB5 does 5 selects, but none of these is enforced/followed by the fs/nfs[d]/Kconfig configs: select SUNRPC_GSS select CRYPTO select CRYPTO_MD5 select CRYPTO_DES select CRYPTO_CBC Reported-by: Randy Dunlap Cc: J. Bruce Fields Acked-by: Randy Dunlap Signed-off-by: Trond Myklebust diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index cc1bb33..2ddc384 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -63,7 +63,6 @@ config NFS_V3_ACL config NFS_V4 bool "NFS client support for NFS version 4" depends on NFS_FS - select RPCSEC_GSS_KRB5 help This option enables support for version 4 of the NFS protocol (RFC 3530) in the kernel's NFS client. diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 503b9da..95932f5 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -69,7 +69,6 @@ config NFSD_V4 depends on NFSD && PROC_FS && EXPERIMENTAL select NFSD_V3 select FS_POSIX_ACL - select RPCSEC_GSS_KRB5 help This option enables support in your system's NFS server for version 4 of the NFS protocol (RFC 3530). diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 443c161..3376d76 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -18,10 +18,11 @@ config SUNRPC_XPRT_RDMA If unsure, say N. config RPCSEC_GSS_KRB5 - tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" - depends on SUNRPC && EXPERIMENTAL + tristate + depends on SUNRPC && CRYPTO + prompt "Secure RPC: Kerberos V mechanism" if !(NFS_V4 || NFSD_V4) + default y select SUNRPC_GSS - select CRYPTO select CRYPTO_MD5 select CRYPTO_DES select CRYPTO_CBC @@ -34,7 +35,7 @@ config RPCSEC_GSS_KRB5 available from http://linux-nfs.org/. In addition, user-space Kerberos support should be installed. - If unsure, say N. + If unsure, say Y. config RPCSEC_GSS_SPKM3 tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)" -- cgit v0.10.2 From 0a377cff9428af2da2b293d11e07bc4dbf064ee5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 18 Aug 2010 09:25:42 -0400 Subject: NFS: Fix an Oops in the NFSv4 atomic open code Adam Lackorzynski reports: with 2.6.35.2 I'm getting this reproducible Oops: [ 110.825396] BUG: unable to handle kernel NULL pointer dereference at (null) [ 110.828638] IP: [] encode_attrs+0x1a/0x2a4 [ 110.828638] PGD be89f067 PUD bf18f067 PMD 0 [ 110.828638] Oops: 0000 [#1] SMP [ 110.828638] last sysfs file: /sys/class/net/lo/operstate [ 110.828638] CPU 2 [ 110.828638] Modules linked in: rtc_cmos rtc_core rtc_lib amd64_edac_mod i2c_amd756 edac_core i2c_core dm_mirror dm_region_hash dm_log dm_snapshot sg sr_mod usb_storage ohci_hcd mptspi tg3 mptscsih mptbase usbcore nls_base [last unloaded: scsi_wait_scan] [ 110.828638] [ 110.828638] Pid: 11264, comm: setchecksum Not tainted 2.6.35.2 #1 [ 110.828638] RIP: 0010:[] [] encode_attrs+0x1a/0x2a4 [ 110.828638] RSP: 0000:ffff88003bf5b878 EFLAGS: 00010296 [ 110.828638] RAX: ffff8800bddb48a8 RBX: ffff88003bf5bb18 RCX: 0000000000000000 [ 110.828638] RDX: ffff8800be258800 RSI: 0000000000000000 RDI: ffff88003bf5b9f8 [ 110.828638] RBP: 0000000000000000 R08: ffff8800bddb48a8 R09: 0000000000000004 [ 110.828638] R10: 0000000000000003 R11: ffff8800be779000 R12: ffff8800be258800 [ 110.828638] R13: ffff88003bf5b9f8 R14: ffff88003bf5bb20 R15: ffff8800be258800 [ 110.828638] FS: 0000000000000000(0000) GS:ffff880041e00000(0063) knlGS:00000000556bd6b0 [ 110.828638] CS: 0010 DS: 002b ES: 002b CR0: 000000008005003b [ 110.828638] CR2: 0000000000000000 CR3: 00000000be8ef000 CR4: 00000000000006e0 [ 110.828638] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 110.828638] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 110.828638] Process setchecksum (pid: 11264, threadinfo ffff88003bf5a000, task ffff88003f232210) [ 110.828638] Stack: [ 110.828638] 0000000000000000 ffff8800bfbcf920 0000000000000000 0000000000000ffe [ 110.828638] <0> 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 110.828638] <0> 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 110.828638] Call Trace: [ 110.828638] [] ? nfs4_xdr_enc_setattr+0x90/0xb4 [ 110.828638] [] ? call_transmit+0x1c3/0x24a [ 110.828638] [] ? __rpc_execute+0x78/0x22a [ 110.828638] [] ? rpc_run_task+0x21/0x2b [ 110.828638] [] ? rpc_call_sync+0x3d/0x5d [ 110.828638] [] ? _nfs4_do_setattr+0x11b/0x147 [ 110.828638] [] ? nfs_init_locked+0x0/0x32 [ 110.828638] [] ? ifind+0x4e/0x90 [ 110.828638] [] ? nfs4_do_setattr+0x4b/0x6e [ 110.828638] [] ? nfs4_do_open+0x291/0x3a6 [ 110.828638] [] ? nfs4_open_revalidate+0x63/0x14a [ 110.828638] [] ? nfs_open_revalidate+0xd7/0x161 [ 110.828638] [] ? do_lookup+0x1a4/0x201 [ 110.828638] [] ? link_path_walk+0x6a/0x9d5 [ 110.828638] [] ? do_last+0x17b/0x58e [ 110.828638] [] ? do_filp_open+0x1bd/0x56e [ 110.828638] [] ? _atomic_dec_and_lock+0x30/0x48 [ 110.828638] [] ? dput+0x37/0x152 [ 110.828638] [] ? alloc_fd+0x69/0x10a [ 110.828638] [] ? do_sys_open+0x56/0x100 [ 110.828638] [] ? ia32_sysret+0x0/0x5 [ 110.828638] Code: 83 f1 01 e8 f5 ca ff ff 48 83 c4 50 5b 5d 41 5c c3 41 57 41 56 41 55 49 89 fd 41 54 49 89 d4 55 48 89 f5 53 48 81 ec 18 01 00 00 <8b> 06 89 c2 83 e2 08 83 fa 01 19 db 83 e3 f8 83 c3 18 a8 01 8d [ 110.828638] RIP [] encode_attrs+0x1a/0x2a4 [ 110.828638] RSP [ 110.828638] CR2: 0000000000000000 [ 112.840396] ---[ end trace 95282e83fd77358f ]--- We need to ensure that the O_EXCL flag is turned off if the user doesn't set O_CREAT. Cc: stable@kernel.org Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index bd91b27..e257172 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1110,7 +1110,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) goto no_open_dput; /* We can't create new files, or truncate existing ones here */ - openflags &= ~(O_CREAT|O_TRUNC); + openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); /* * Note: we're not holding inode->i_mutex and so may be racing with diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6b44bbf..089da5b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2036,7 +2036,8 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) struct rpc_cred *cred; struct nfs4_state *state; struct dentry *res; - fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); + int open_flags = nd->intent.open.flags; + fmode_t fmode = open_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); if (nd->flags & LOOKUP_CREATE) { attr.ia_mode = nd->intent.open.create_mode; @@ -2044,8 +2045,9 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) if (!IS_POSIXACL(dir)) attr.ia_mode &= ~current_umask(); } else { + open_flags &= ~O_EXCL; attr.ia_valid = 0; - BUG_ON(nd->intent.open.flags & O_CREAT); + BUG_ON(open_flags & O_CREAT); } cred = rpc_lookup_cred(); @@ -2054,7 +2056,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) parent = dentry->d_parent; /* Protect against concurrent sillydeletes */ nfs_block_sillyrename(parent); - state = nfs4_do_open(dir, &path, fmode, nd->intent.open.flags, &attr, cred); + state = nfs4_do_open(dir, &path, fmode, open_flags, &attr, cred); put_rpccred(cred); if (IS_ERR(state)) { if (PTR_ERR(state) == -ENOENT) { -- cgit v0.10.2