From 59b7c05fffba030e5d9e72324691e2f99aa69b79 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Oct 2011 18:22:55 -0700 Subject: Revert "NFS: Ensure that writeback_single_inode() calls write_inode() when syncing" This reverts commit b80c3cb628f0ebc241b02e38dd028969fb8026a2. The reverted commit was rendered obsolete by a VFS fix: commit 5547e8aac6f71505d621a612de2fca0dd988b439 (writeback: Update dirty flags in two steps). We now no longer need to worry about writeback_single_inode() missing our marking the inode for COMMIT in 'do_writepages()' call. Reverting this patch, fixes a performance regression in which the inode would continuously get queued to the dirty list, causing the writeback code to unnecessarily try to send a COMMIT. Signed-off-by: Trond Myklebust Tested-by: Simon Kirby Cc: stable@kernel.org [2.6.35+] diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c9bd2a6..71296cc 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -428,7 +428,6 @@ static void nfs_mark_request_dirty(struct nfs_page *req) { __set_page_dirty_nobuffers(req->wb_page); - __mark_inode_dirty(req->wb_page->mapping->host, I_DIRTY_DATASYNC); } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) @@ -762,6 +761,8 @@ int nfs_updatepage(struct file *file, struct page *page, status = nfs_writepage_setup(ctx, page, offset, count); if (status < 0) nfs_set_pageerror(page); + else + __set_page_dirty_nobuffers(page); dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", status, (long long)i_size_read(inode)); -- cgit v0.10.2 From 3236c3e1adc0c7ec83eaff1de2d06746b7c5bb28 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Oct 2011 09:49:21 -0400 Subject: nfs: don't redirty inode when ncommit == 0 in nfs_commit_unstable_pages commit 420e3646 allowed the kernel to reduce the number of unnecessary commit calls by skipping the commit when there are a large number of outstanding pages. However, the current test in nfs_commit_unstable_pages does not handle the edge condition properly. When ncommit == 0, then that means that the kernel doesn't need to do anything more for the inode. The current test though in the WB_SYNC_NONE case will return true, and the inode will end up being marked dirty. Once that happens the inode will never be clean until there's a WB_SYNC_ALL flush. Fix this by immediately returning from nfs_commit_unstable_pages when ncommit == 0. Mike noticed this problem initially in RHEL5 (2.6.18-based kernel) which has a backported version of 420e3646. The inode cache there was growing very large. The inode cache was unable to be shrunk since the inodes were all marked dirty. Calling sync() would essentially "fix" the problem -- the WB_SYNC_ALL flush would result in the inodes all being marked clean. What I'm not clear on is how big a problem this is in mainline kernels as the writeback code there is very different. Either way, it seems incorrect to re-mark the inode dirty in this case. Reported-by: Mike McLean Signed-off-by: Jeff Layton Cc: stable@kernel.org [2.6.34+] Signed-off-by: Trond Myklebust diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 71296cc..46aa438 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1554,6 +1554,10 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr int flags = FLUSH_SYNC; int ret = 0; + /* no commits means nothing needs to be done */ + if (!nfsi->ncommit) + return ret; + if (wbc->sync_mode == WB_SYNC_NONE) { /* Don't commit yet if this is a non-blocking flush and there * are a lot of outstanding writes for this mapping. -- cgit v0.10.2 From b9dd3abbbc708da5e3c53424a5b2c66ab580f97e Mon Sep 17 00:00:00 2001 From: Mi Jinlong Date: Wed, 12 Oct 2011 15:09:34 +0800 Subject: nfs: fix bug about IPv6 address scope checking The result from ipv6_addr_scope() always not be a single SCOPE, so we can't use equal to compare the result with IPV6_ADDR_SCOPE_LINKLOCAL at nfs_sockaddr_match_ipaddr6. This patch fixs the problem, and lets checking address before scope_id. Signed-off-by: Mi Jinlong Signed-off-by: Trond Myklebust diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 5833fbb..b4e41dd 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -336,11 +336,12 @@ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1, const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1; const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2; - if (ipv6_addr_scope(&sin1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL && - sin1->sin6_scope_id != sin2->sin6_scope_id) + if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr)) return 0; + else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL) + return sin1->sin6_scope_id == sin2->sin6_scope_id; - return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr); + return 1; } #else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1, -- cgit v0.10.2 From 2da956523526e440ef4f4dd174e26f5ac06fe011 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 12 Oct 2011 10:57:42 -0400 Subject: nfs: don't try to migrate pages with active requests nfs_find_and_lock_request will take a reference to the nfs_page and will then put it if the req is already locked. It's possible though that the reference will be the last one. That put then can kick off a whole series of reference puts: nfs_page nfs_open_context dentry inode If the inode ends up being deleted, then the VFS will call truncate_inode_pages. That function will try to take the page lock, but it was already locked when migrate_page was called. The code deadlocks. Fix this by simply refusing the migration request if PagePrivate is already set, indicating that the page is already associated with an active read or write request. We've had a customer test a backported version of this patch and the preliminary results seem good. Cc: stable@kernel.org Cc: Andrea Arcangeli Reported-by: Harshula Jayasuriya Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 46aa438..72813ed 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1691,34 +1691,20 @@ out_error: int nfs_migrate_page(struct address_space *mapping, struct page *newpage, struct page *page) { - struct nfs_page *req; - int ret; + /* + * If PagePrivate is set, then the page is currently associated with + * an in-progress read or write request. Don't try to migrate it. + * + * FIXME: we could do this in principle, but we'll need a way to ensure + * that we can safely release the inode reference while holding + * the page lock. + */ + if (PagePrivate(page)) + return -EBUSY; nfs_fscache_release_page(page, GFP_KERNEL); - req = nfs_find_and_lock_request(page, false); - ret = PTR_ERR(req); - if (IS_ERR(req)) - goto out; - - ret = migrate_page(mapping, newpage, page); - if (!req) - goto out; - if (ret) - goto out_unlock; - page_cache_get(newpage); - spin_lock(&mapping->host->i_lock); - req->wb_page = newpage; - SetPagePrivate(newpage); - set_page_private(newpage, (unsigned long)req); - ClearPagePrivate(page); - set_page_private(page, 0); - spin_unlock(&mapping->host->i_lock); - page_cache_release(page); -out_unlock: - nfs_clear_page_tag_locked(req); -out: - return ret; + return migrate_page(mapping, newpage, page); } #endif -- cgit v0.10.2 From 516f2e24faa7548a61d9ba790958528469c2e284 Mon Sep 17 00:00:00 2001 From: Jim Rees Date: Thu, 22 Sep 2011 21:50:08 -0400 Subject: pnfsblock: fix return code confusion Always return PTR_ERR, not NULL, from nfs4_blk_get_deviceinfo and nfs4_blk_decode_device. Check for IS_ERR, not NULL, in bl_set_layoutdriver when calling nfs4_blk_get_deviceinfo. Signed-off-by: Jim Rees Signed-off-by: Benny Halevy Cc: stable@kernel.org [3.0] Signed-off-by: Trond Myklebust diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 9561c8f..d2432f0 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -805,7 +805,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, struct nfs4_deviceid *d_id) { struct pnfs_device *dev; - struct pnfs_block_dev *rv = NULL; + struct pnfs_block_dev *rv; u32 max_resp_sz; int max_pages; struct page **pages = NULL; @@ -823,18 +823,20 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, dev = kmalloc(sizeof(*dev), GFP_NOFS); if (!dev) { dprintk("%s kmalloc failed\n", __func__); - return NULL; + return ERR_PTR(-ENOMEM); } pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS); if (pages == NULL) { kfree(dev); - return NULL; + return ERR_PTR(-ENOMEM); } for (i = 0; i < max_pages; i++) { pages[i] = alloc_page(GFP_NOFS); - if (!pages[i]) + if (!pages[i]) { + rv = ERR_PTR(-ENOMEM); goto out_free; + } } memcpy(&dev->dev_id, d_id, sizeof(*d_id)); @@ -847,8 +849,10 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data); rc = nfs4_proc_getdeviceinfo(server, dev); dprintk("%s getdevice info returns %d\n", __func__, rc); - if (rc) + if (rc) { + rv = ERR_PTR(rc); goto out_free; + } rv = nfs4_blk_decode_device(server, dev); out_free: @@ -866,7 +870,7 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh) struct pnfs_devicelist *dlist = NULL; struct pnfs_block_dev *bdev; LIST_HEAD(block_disklist); - int status = 0, i; + int status, i; dprintk("%s enter\n", __func__); @@ -898,8 +902,8 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh) for (i = 0; i < dlist->num_devs; i++) { bdev = nfs4_blk_get_deviceinfo(server, fh, &dlist->dev_id[i]); - if (!bdev) { - status = -ENODEV; + if (IS_ERR(bdev)) { + status = PTR_ERR(bdev); goto out_error; } spin_lock(&b_mt_id->bm_lock); diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index a83b393..0b1fb0e 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c @@ -131,7 +131,7 @@ struct pnfs_block_dev * nfs4_blk_decode_device(struct nfs_server *server, struct pnfs_device *dev) { - struct pnfs_block_dev *rv = NULL; + struct pnfs_block_dev *rv; struct block_device *bd = NULL; struct rpc_pipe_msg msg; struct bl_msg_hdr bl_msg = { @@ -141,7 +141,7 @@ nfs4_blk_decode_device(struct nfs_server *server, uint8_t *dataptr; DECLARE_WAITQUEUE(wq, current); struct bl_dev_msg *reply = &bl_mount_reply; - int offset, len, i; + int offset, len, i, rc; dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, @@ -168,8 +168,10 @@ nfs4_blk_decode_device(struct nfs_server *server, dprintk("%s CALLING USERSPACE DAEMON\n", __func__); add_wait_queue(&bl_wq, &wq); - if (rpc_queue_upcall(bl_device_pipe->d_inode, &msg) < 0) { + rc = rpc_queue_upcall(bl_device_pipe->d_inode, &msg); + if (rc < 0) { remove_wait_queue(&bl_wq, &wq); + rv = ERR_PTR(rc); goto out; } @@ -187,8 +189,9 @@ nfs4_blk_decode_device(struct nfs_server *server, bd = nfs4_blkdev_get(MKDEV(reply->major, reply->minor)); if (IS_ERR(bd)) { - dprintk("%s failed to open device : %ld\n", - __func__, PTR_ERR(bd)); + rc = PTR_ERR(bd); + dprintk("%s failed to open device : %d\n", __func__, rc); + rv = ERR_PTR(rc); goto out; } -- cgit v0.10.2 From fdc17abbc4b6094b34ee8ff5d91eaba8637594a2 Mon Sep 17 00:00:00 2001 From: Jim Rees Date: Thu, 22 Sep 2011 21:50:09 -0400 Subject: pnfsblock: fix size of upcall message Make the status field explicitly 32 bits. "...it's unlikely that the kernel and userspace would differ on the size of an int here, but it might be a good idea to go ahead and make that explicitly 32 bits in case we end up dealing with more exotic arches at some point in the future." Suggested-by: Jeff Layton Signed-off-by: Jim Rees Signed-off-by: Benny Halevy Cc: stable@kernel.org [3.0] Signed-off-by: Trond Myklebust diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index f27d827..58dc256 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -150,7 +150,7 @@ BLK_LSEG2EXT(struct pnfs_layout_segment *lseg) } struct bl_dev_msg { - int status; + int32_t status; uint32_t major, minor; }; -- cgit v0.10.2 From c1225158a8dad9e9d5eee8a17dbbd9c7cda05ab9 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 22 Sep 2011 21:50:10 -0400 Subject: SUNRPC/NFS: make rpc pipe upcall generic The same function is used by idmap, gss and blocklayout code. Make it generic. Signed-off-by: Peng Tao Signed-off-by: Jim Rees Cc: stable@kernel.org [3.0] Signed-off-by: Trond Myklebust diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index d2432f0..dc23833 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -964,7 +964,7 @@ static struct pnfs_layoutdriver_type blocklayout_type = { }; static const struct rpc_pipe_ops bl_upcall_ops = { - .upcall = bl_pipe_upcall, + .upcall = rpc_pipe_generic_upcall, .downcall = bl_pipe_downcall, .destroy_msg = bl_pipe_destroy_msg, }; diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 58dc256..42acf7e 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -169,8 +169,6 @@ extern wait_queue_head_t bl_wq; #define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ /* blocklayoutdev.c */ -ssize_t bl_pipe_upcall(struct file *, struct rpc_pipe_msg *, - char __user *, size_t); ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t); void bl_pipe_destroy_msg(struct rpc_pipe_msg *); struct block_device *nfs4_blkdev_get(dev_t dev); diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index 0b1fb0e..d08ba91 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c @@ -79,28 +79,6 @@ int nfs4_blkdev_put(struct block_device *bdev) return blkdev_put(bdev, FMODE_READ); } -/* - * Shouldn't there be a rpc_generic_upcall() to do this for us? - */ -ssize_t bl_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, - char __user *dst, size_t buflen) -{ - char *data = (char *)msg->data + msg->copied; - size_t mlen = min(msg->len - msg->copied, buflen); - unsigned long left; - - left = copy_to_user(dst, data, mlen); - if (left == mlen) { - msg->errno = -EFAULT; - return -EFAULT; - } - - mlen -= left; - msg->copied += mlen; - msg->errno = 0; - return mlen; -} - static struct bl_dev_msg bl_mount_reply; ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index f20801a..47d1c6f 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -336,8 +336,6 @@ struct idmap { struct idmap_hashtable idmap_group_hash; }; -static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *, - char __user *, size_t); static ssize_t idmap_pipe_downcall(struct file *, const char __user *, size_t); static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); @@ -345,7 +343,7 @@ static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); static unsigned int fnvhash32(const void *, size_t); static const struct rpc_pipe_ops idmap_upcall_ops = { - .upcall = idmap_pipe_upcall, + .upcall = rpc_pipe_generic_upcall, .downcall = idmap_pipe_downcall, .destroy_msg = idmap_pipe_destroy_msg, }; @@ -595,27 +593,6 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, return ret; } -/* RPC pipefs upcall/downcall routines */ -static ssize_t -idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, - char __user *dst, size_t buflen) -{ - char *data = (char *)msg->data + msg->copied; - size_t mlen = min(msg->len, buflen); - unsigned long left; - - left = copy_to_user(dst, data, mlen); - if (left == mlen) { - msg->errno = -EFAULT; - return -EFAULT; - } - - mlen -= left; - msg->copied += mlen; - msg->errno = 0; - return mlen; -} - static ssize_t idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index cf14db9..e4ea430 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -44,6 +44,8 @@ RPC_I(struct inode *inode) return container_of(inode, struct rpc_inode, vfs_inode); } +extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *, + char __user *, size_t); extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); struct rpc_clnt; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 364eb45..e9b7693 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -603,26 +603,6 @@ out: return err; } -static ssize_t -gss_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, - char __user *dst, size_t buflen) -{ - char *data = (char *)msg->data + msg->copied; - size_t mlen = min(msg->len, buflen); - unsigned long left; - - left = copy_to_user(dst, data, mlen); - if (left == mlen) { - msg->errno = -EFAULT; - return -EFAULT; - } - - mlen -= left; - msg->copied += mlen; - msg->errno = 0; - return mlen; -} - #define MSG_BUF_MAXSIZE 1024 static ssize_t @@ -1590,7 +1570,7 @@ static const struct rpc_credops gss_nullops = { }; static const struct rpc_pipe_ops gss_upcall_ops_v0 = { - .upcall = gss_pipe_upcall, + .upcall = rpc_pipe_generic_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, .open_pipe = gss_pipe_open_v0, @@ -1598,7 +1578,7 @@ static const struct rpc_pipe_ops gss_upcall_ops_v0 = { }; static const struct rpc_pipe_ops gss_upcall_ops_v1 = { - .upcall = gss_pipe_upcall, + .upcall = rpc_pipe_generic_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, .open_pipe = gss_pipe_open_v1, diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index b181e34..67dbc18 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -77,6 +77,26 @@ rpc_timeout_upcall_queue(struct work_struct *work) rpc_purge_list(rpci, &free_list, destroy_msg, -ETIMEDOUT); } +ssize_t rpc_pipe_generic_upcall(struct file *filp, struct rpc_pipe_msg *msg, + char __user *dst, size_t buflen) +{ + char *data = (char *)msg->data + msg->copied; + size_t mlen = min(msg->len - msg->copied, buflen); + unsigned long left; + + left = copy_to_user(dst, data, mlen); + if (left == mlen) { + msg->errno = -EFAULT; + return -EFAULT; + } + + mlen -= left; + msg->copied += mlen; + msg->errno = 0; + return mlen; +} +EXPORT_SYMBOL_GPL(rpc_pipe_generic_upcall); + /** * rpc_queue_upcall - queue an upcall message to userspace * @inode: inode of upcall pipe on which to queue given message -- cgit v0.10.2 From 760383f1ee4d14b0e0bdf0cddee648d9b8633429 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 22 Sep 2011 21:50:11 -0400 Subject: pnfsblock: add missing rpc_put_mount and path_put Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Jim Rees Cc: stable@kernel.org [3.0] Signed-off-by: Trond Myklebust diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index dc23833..dee6cae 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -993,17 +993,20 @@ static int __init nfs4blocklayout_init(void) mnt, NFS_PIPE_DIRNAME, 0, &path); if (ret) - goto out_remove; + goto out_putrpc; bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL, &bl_upcall_ops, 0); + path_put(&path); if (IS_ERR(bl_device_pipe)) { ret = PTR_ERR(bl_device_pipe); - goto out_remove; + goto out_putrpc; } out: return ret; +out_putrpc: + rpc_put_mount(); out_remove: pnfs_unregister_layoutdriver(&blocklayout_type); return ret; @@ -1016,6 +1019,7 @@ static void __exit nfs4blocklayout_exit(void) pnfs_unregister_layoutdriver(&blocklayout_type); rpc_unlink(bl_device_pipe); + rpc_put_mount(); } MODULE_ALIAS("nfs-layouttype4-3"); -- cgit v0.10.2 From 1b0ae068779874f54b55aac3a2a992bcf3f2c3c4 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 22 Sep 2011 21:50:12 -0400 Subject: pnfs: make _set_lo_fail generic file layout and block layout both use it to set mark layout io failure bit. So make it generic. Signed-off-by: Peng Tao Signed-off-by: Jim Rees Cc: stable@kernel.org [3.0] Signed-off-by: Trond Myklebust diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index dee6cae..2167ba2a 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -176,17 +176,6 @@ retry: return bio; } -static void bl_set_lo_fail(struct pnfs_layout_segment *lseg) -{ - if (lseg->pls_range.iomode == IOMODE_RW) { - dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); - set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); - } else { - dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); - set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); - } -} - /* This is basically copied from mpage_end_io_read */ static void bl_end_io_read(struct bio *bio, int err) { @@ -206,7 +195,7 @@ static void bl_end_io_read(struct bio *bio, int err) if (!uptodate) { if (!rdata->pnfs_error) rdata->pnfs_error = -EIO; - bl_set_lo_fail(rdata->lseg); + pnfs_set_lo_fail(rdata->lseg); } bio_put(bio); put_parallel(par); @@ -370,7 +359,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err) if (!uptodate) { if (!wdata->pnfs_error) wdata->pnfs_error = -EIO; - bl_set_lo_fail(wdata->lseg); + pnfs_set_lo_fail(wdata->lseg); } bio_put(bio); put_parallel(par); @@ -386,7 +375,7 @@ static void bl_end_io_write(struct bio *bio, int err) if (!uptodate) { if (!wdata->pnfs_error) wdata->pnfs_error = -EIO; - bl_set_lo_fail(wdata->lseg); + pnfs_set_lo_fail(wdata->lseg); } bio_put(bio); put_parallel(par); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index e8915d4..4c78c62 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -77,19 +77,6 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) BUG(); } -/* For data server errors we don't recover from */ -static void -filelayout_set_lo_fail(struct pnfs_layout_segment *lseg) -{ - if (lseg->pls_range.iomode == IOMODE_RW) { - dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); - set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); - } else { - dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); - set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); - } -} - static int filelayout_async_handle_error(struct rpc_task *task, struct nfs4_state *state, struct nfs_client *clp, @@ -145,7 +132,7 @@ static int filelayout_read_done_cb(struct rpc_task *task, dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", __func__, data->ds_clp, data->ds_clp->cl_session); if (reset) { - filelayout_set_lo_fail(data->lseg); + pnfs_set_lo_fail(data->lseg); nfs4_reset_read(task, data); clp = NFS_SERVER(data->inode)->nfs_client; } @@ -221,7 +208,7 @@ static int filelayout_write_done_cb(struct rpc_task *task, dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", __func__, data->ds_clp, data->ds_clp->cl_session); if (reset) { - filelayout_set_lo_fail(data->lseg); + pnfs_set_lo_fail(data->lseg); nfs4_reset_write(task, data); clp = NFS_SERVER(data->inode)->nfs_client; } else @@ -256,7 +243,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task, __func__, data->ds_clp, data->ds_clp->cl_session); if (reset) { prepare_to_resend_writes(data); - filelayout_set_lo_fail(data->lseg); + pnfs_set_lo_fail(data->lseg); } else nfs_restart_rpc(task, data->ds_clp); return -EAGAIN; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index e550e88..6b19fff 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1381,6 +1381,18 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) } } +void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) +{ + if (lseg->pls_range.iomode == IOMODE_RW) { + dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); + set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); + } else { + dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); + set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); + } +} +EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); + void pnfs_set_layoutcommit(struct nfs_write_data *wdata) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 01cbfd5..94e760e 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -178,6 +178,7 @@ int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); +void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); int pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); -- cgit v0.10.2 From 8ce160c5ef06cc89c2b6b26bfa5ef7a5ce2c93e0 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 22 Sep 2011 21:50:14 -0400 Subject: pnfs: recoalesce when ld write pagelist fails For pnfs pagelist write failure, we need to pg_recoalesce and resend IO to mds. Signed-off-by: Peng Tao Signed-off-by: Jim Rees Cc: stable@kernel.org [3.0] Signed-off-by: Trond Myklebust diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6b19fff..a205c8e 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1168,23 +1168,17 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); /* * Called by non rpc-based layout drivers */ -int -pnfs_ld_write_done(struct nfs_write_data *data) +void pnfs_ld_write_done(struct nfs_write_data *data) { - int status; - - if (!data->pnfs_error) { + if (likely(!data->pnfs_error)) { pnfs_set_layoutcommit(data); data->mds_ops->rpc_call_done(&data->task, data); - data->mds_ops->rpc_release(data); - return 0; + } else { + put_lseg(data->lseg); + data->lseg = NULL; + dprintk("pnfs write error = %d\n", data->pnfs_error); } - - dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, - data->pnfs_error); - status = nfs_initiate_write(data, NFS_CLIENT(data->inode), - data->mds_ops, NFS_FILE_SYNC); - return status ? : -EAGAIN; + data->mds_ops->rpc_release(data); } EXPORT_SYMBOL_GPL(pnfs_ld_write_done); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 94e760e..71c23d4 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -201,7 +201,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); -int pnfs_ld_write_done(struct nfs_write_data *); +void pnfs_ld_write_done(struct nfs_write_data *); int pnfs_ld_read_done(struct nfs_read_data *); struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 72813ed..106fd06 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1166,7 +1166,13 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) static void nfs_writeback_release_full(void *calldata) { struct nfs_write_data *data = calldata; - int status = data->task.tk_status; + int ret, status = data->task.tk_status; + struct nfs_pageio_descriptor pgio; + + if (data->pnfs_error) { + nfs_pageio_init_write_mds(&pgio, data->inode, FLUSH_STABLE); + pgio.pg_recoalesce = 1; + } /* Update attributes as result of writeback. */ while (!list_empty(&data->pages)) { @@ -1182,6 +1188,11 @@ static void nfs_writeback_release_full(void *calldata) req->wb_bytes, (long long)req_offset(req)); + if (data->pnfs_error) { + dprintk(", pnfs error = %d\n", data->pnfs_error); + goto next; + } + if (status < 0) { nfs_set_pageerror(page); nfs_context_set_write_error(req->wb_context, status); @@ -1201,7 +1212,19 @@ remove_request: next: nfs_clear_page_tag_locked(req); nfs_end_page_writeback(page); + if (data->pnfs_error) { + lock_page(page); + nfs_pageio_cond_complete(&pgio, page->index); + ret = nfs_page_async_flush(&pgio, page, 0); + if (ret) { + nfs_set_pageerror(page); + dprintk("rewrite to MDS error = %d\n", ret); + } + unlock_page(page); + } } + if (data->pnfs_error) + nfs_pageio_complete(&pgio); nfs_writedata_release(calldata); } -- cgit v0.10.2 From 9b7eecdcfeb943f130d86bbc249fde4994b6fe30 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 22 Sep 2011 21:50:15 -0400 Subject: pnfs: recoalesce when ld read pagelist fails For pnfs pagelist read failure, we need to pg_recoalesce and resend IO to mds. Signed-off-by: Peng Tao Signed-off-by: Jim Rees Cc: stable@kernel.org [3.0] Signed-off-by: Trond Myklebust diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a205c8e..ee73d9a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1262,23 +1262,17 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); /* * Called by non rpc-based layout drivers */ -int -pnfs_ld_read_done(struct nfs_read_data *data) +void pnfs_ld_read_done(struct nfs_read_data *data) { - int status; - - if (!data->pnfs_error) { + if (likely(!data->pnfs_error)) { __nfs4_read_done_cb(data); data->mds_ops->rpc_call_done(&data->task, data); - data->mds_ops->rpc_release(data); - return 0; + } else { + put_lseg(data->lseg); + data->lseg = NULL; + dprintk("pnfs write error = %d\n", data->pnfs_error); } - - dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, - data->pnfs_error); - status = nfs_initiate_read(data, NFS_CLIENT(data->inode), - data->mds_ops); - return status ? : -EAGAIN; + data->mds_ops->rpc_release(data); } EXPORT_SYMBOL_GPL(pnfs_ld_read_done); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 71c23d4..1509530 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -202,7 +202,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); void pnfs_ld_write_done(struct nfs_write_data *); -int pnfs_ld_read_done(struct nfs_read_data *); +void pnfs_ld_read_done(struct nfs_read_data *); struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, loff_t pos, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 2171c04..bfc20b1 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -541,13 +541,23 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) static void nfs_readpage_release_full(void *calldata) { struct nfs_read_data *data = calldata; + struct nfs_pageio_descriptor pgio; + if (data->pnfs_error) { + nfs_pageio_init_read_mds(&pgio, data->inode); + pgio.pg_recoalesce = 1; + } while (!list_empty(&data->pages)) { struct nfs_page *req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); - nfs_readpage_release(req); + if (!data->pnfs_error) + nfs_readpage_release(req); + else + nfs_pageio_add_request(&pgio, req); } + if (data->pnfs_error) + nfs_pageio_complete(&pgio); nfs_readdata_release(calldata); } -- cgit v0.10.2 From e6d05a757c314ad88d0649d3835a8a1daa964236 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 22 Sep 2011 21:50:16 -0400 Subject: pnfsblock: fix NULL pointer dereference bl_add_page_to_bio returns error pointer. bio should be reset to NULL in failure cases as the out path always calls bl_submit_bio. Signed-off-by: Peng Tao Signed-off-by: Jim Rees Cc: stable@kernel.org [3.0] Signed-off-by: Trond Myklebust diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 2167ba2a..4ddbfbf 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -292,6 +292,7 @@ bl_read_pagelist(struct nfs_read_data *rdata) bl_end_io_read, par); if (IS_ERR(bio)) { rdata->pnfs_error = PTR_ERR(bio); + bio = NULL; goto out; } } @@ -581,6 +582,7 @@ fill_invalid_ext: bl_end_io_write_zero, par); if (IS_ERR(bio)) { wdata->pnfs_error = PTR_ERR(bio); + bio = NULL; goto out; } /* FIXME: This should be done in bi_end_io */ @@ -629,6 +631,7 @@ next_page: bl_end_io_write, par); if (IS_ERR(bio)) { wdata->pnfs_error = PTR_ERR(bio); + bio = NULL; goto out; } isect += PAGE_CACHE_SECTORS; -- cgit v0.10.2 From 7542274519b3ba87555410c66e8356ac1e3bc9b3 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 22 Sep 2011 21:50:17 -0400 Subject: pnfsblock: fix writeback deadlock We should check if the sector is already initialized before trying to grab the page from page cache. Otherwise when two pages of the same block are written back by two threads each calling from writepage_locked, it can cause deadlock like bellow. [ 1080.972099] INFO: task kswapd0:25 blocked for more than 120 seconds. [ 1080.972377] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 1080.972812] kswapd0 D ffff88000c4926c0 0 25 2 0x00000000 [ 1080.972816] ffff88000df276b0 0000000000000046 ffff88000df27640 ffffffff81013ba7 [ 1080.972821] ffff88000c492310 ffff88000df27fd8 ffff88000df27fd8 00000000001d3440 [ 1080.972824] ffff88000c378000 ffff88000c492310 ffff8800175d3d40 ffff880017fc75a8 [ 1080.972828] Call Trace: [ 1080.972860] [] ? read_tsc+0x9/0x19 [ 1080.972877] [] ? lock_page+0x2b/0x2b [ 1080.972899] [] io_schedule+0x63/0x7e [ 1080.972902] [] sleep_on_page+0xe/0x12 [ 1080.972905] [] __wait_on_bit_lock+0x46/0x8f [ 1080.972916] [] ? lock_release_holdtime.part.7+0x6b/0x72 [ 1080.972919] [] __lock_page+0x66/0x68 [ 1080.972928] [] ? autoremove_wake_function+0x3d/0x3d [ 1080.972932] [] lock_page+0x27/0x2b [ 1080.972934] [] find_lock_page+0x34/0x57 [ 1080.972937] [] find_or_create_page+0x34/0x8a [ 1080.972947] [] bl_write_pagelist+0x205/0x6da [blocklayoutdriver] [ 1080.972951] [] ? bl_free_lseg+0x38/0x38 [blocklayoutdriver] [ 1080.972995] [] ? nfs_write_rpcsetup+0x118/0x123 [nfs] [ 1080.973033] [] pnfs_generic_pg_writepages+0x10b/0x1f4 [nfs] [ 1080.973089] [] nfs_pageio_doio+0x1a/0x43 [nfs] [ 1080.973098] [] nfs_pageio_complete+0x16/0x2d [nfs] [ 1080.973108] [] nfs_writepage_locked+0xa0/0xbf [nfs] [ 1080.973119] [] nfs_writepage+0x16/0x2b [nfs] [ 1080.973122] [] ? clear_page_dirty_for_io+0x87/0x9a [ 1080.973133] [] shrink_page_list+0x39b/0x6c8 [ 1080.973139] [] shrink_inactive_list+0x22c/0x39e [ 1080.973144] [] ? lock_release_holdtime.part.7+0x6b/0x72 [ 1080.973148] [] shrink_zone+0x445/0x588 [ 1080.973152] [] balance_pgdat+0x2c2/0x56b [ 1080.973170] [] ? __bitmap_weight+0x34/0x80 [ 1080.973175] [] kswapd+0x2be/0x2fa [ 1080.973179] [] ? __init_waitqueue_head+0x4b/0x4b [ 1080.973183] [] ? balance_pgdat+0x56b/0x56b [ 1080.973187] [] kthread+0xa8/0xb0 [ 1080.973200] [] kernel_thread_helper+0x4/0x10 [ 1080.973205] [] ? __init_kthread_worker+0x5a/0x5a [ 1080.973210] [] ? gs_change+0x13/0x13 [ 1080.973213] no locks held by kswapd0/25. Signed-off-by: Peng Tao Signed-off-by: Jim Rees Cc: stable@kernel.org [3.0] Signed-off-by: Trond Myklebust diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 4ddbfbf..281ae95 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -533,6 +533,11 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) fill_invalid_ext: dprintk("%s need to zero %d pages\n", __func__, npg_zero); for (;npg_zero > 0; npg_zero--) { + if (bl_is_sector_init(be->be_inval, isect)) { + dprintk("isect %llu already init\n", + (unsigned long long)isect); + goto next_page; + } /* page ref released in bl_end_io_write_zero */ index = isect >> PAGE_CACHE_SECTOR_SHIFT; dprintk("%s zero %dth page: index %lu isect %llu\n", @@ -552,8 +557,7 @@ fill_invalid_ext: * PageUptodate: It was read before * sector_initialized: already written out */ - if (PageDirty(page) || PageWriteback(page) || - bl_is_sector_init(be->be_inval, isect)) { + if (PageDirty(page) || PageWriteback(page)) { print_page(page); unlock_page(page); page_cache_release(page); -- cgit v0.10.2 From 45402c38eec740f52422aafc92937c6a4a8c8c0e Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Fri, 2 Sep 2011 14:39:12 -0700 Subject: nfs/super.c: local functions should be static commit ae50c0b5 "pnfs: client stats" added additional information to the output of /proc/self/mountstats. The new functions introduced are only used in this file and should be marked static. If CONFIG_NFS_V4_1 is not defined, empty stub functions are used. If CONFIG_NFS_V4 is not defined these stub functions are not used at all. Adding static for the functions results in compile warnings: fs/nfs/super.c:743: warning: 'show_sessions' defined but not used fs/nfs/super.c:756: warning: 'show_pnfs' defined but not used Fix this by adding a #ifdef CONFIG_NFS_V4 guard around the two show_ functions. Signed-off-by: H Hartley Sweeten Cc: Trond Myklebust Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5b19b6a..480b3b6 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -733,18 +733,22 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) return 0; } + +#ifdef CONFIG_NFS_V4 #ifdef CONFIG_NFS_V4_1 -void show_sessions(struct seq_file *m, struct nfs_server *server) +static void show_sessions(struct seq_file *m, struct nfs_server *server) { if (nfs4_has_session(server->nfs_client)) seq_printf(m, ",sessions"); } #else -void show_sessions(struct seq_file *m, struct nfs_server *server) {} +static void show_sessions(struct seq_file *m, struct nfs_server *server) {} +#endif #endif +#ifdef CONFIG_NFS_V4 #ifdef CONFIG_NFS_V4_1 -void show_pnfs(struct seq_file *m, struct nfs_server *server) +static void show_pnfs(struct seq_file *m, struct nfs_server *server) { seq_printf(m, ",pnfs="); if (server->pnfs_curr_ld) @@ -752,9 +756,10 @@ void show_pnfs(struct seq_file *m, struct nfs_server *server) else seq_printf(m, "not configured"); } -#else /* CONFIG_NFS_V4_1 */ -void show_pnfs(struct seq_file *m, struct nfs_server *server) {} -#endif /* CONFIG_NFS_V4_1 */ +#else +static void show_pnfs(struct seq_file *m, struct nfs_server *server) {} +#endif +#endif static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt) { -- cgit v0.10.2 From d77385f23830ee6c400569bac8b37e6eb3b7d360 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Oct 2011 16:08:10 -0700 Subject: SUNRPC: Fix rpc_sockaddr2uaddr rpc_sockaddr2uaddr is only used by net/sunrpc/rpcb_clnt.c, where it is used in a non-blockable context in at least one case. Add non-blocking capability by adding a gfp_t argument Signed-off-by: Trond Myklebust diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index db7bcaf..d926fd1 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -9,6 +9,7 @@ #ifndef _LINUX_SUNRPC_CLNT_H #define _LINUX_SUNRPC_CLNT_H +#include #include #include #include @@ -161,7 +162,7 @@ const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); size_t rpc_ntop(const struct sockaddr *, char *, const size_t); size_t rpc_pton(const char *, const size_t, struct sockaddr *, const size_t); -char * rpc_sockaddr2uaddr(const struct sockaddr *); +char * rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t); size_t rpc_uaddr2sockaddr(const char *, const size_t, struct sockaddr *, const size_t); diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index 4195233..23cd19d 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -255,12 +255,13 @@ EXPORT_SYMBOL_GPL(rpc_pton); /** * rpc_sockaddr2uaddr - Construct a universal address string from @sap. * @sap: socket address + * @gfp_flags: allocation mode * * Returns a %NUL-terminated string in dynamically allocated memory; * otherwise NULL is returned if an error occurred. Caller must * free the returned string. */ -char *rpc_sockaddr2uaddr(const struct sockaddr *sap) +char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags) { char portbuf[RPCBIND_MAXUADDRPLEN]; char addrbuf[RPCBIND_MAXUADDRLEN]; @@ -288,7 +289,7 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap) if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) > sizeof(addrbuf)) return NULL; - return kstrdup(addrbuf, GFP_KERNEL); + return kstrdup(addrbuf, gfp_flags); } EXPORT_SYMBOL_GPL(rpc_sockaddr2uaddr); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index e45d2fb..f588b85 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -410,7 +410,7 @@ static int rpcb_register_inet4(const struct sockaddr *sap, unsigned short port = ntohs(sin->sin_port); int result; - map->r_addr = rpc_sockaddr2uaddr(sap); + map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " "local rpcbind\n", (port ? "" : "un"), @@ -437,7 +437,7 @@ static int rpcb_register_inet6(const struct sockaddr *sap, unsigned short port = ntohs(sin6->sin6_port); int result; - map->r_addr = rpc_sockaddr2uaddr(sap); + map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " "local rpcbind\n", (port ? "" : "un"), @@ -686,7 +686,7 @@ void rpcb_getport_async(struct rpc_task *task) case RPCBVERS_4: case RPCBVERS_3: map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); - map->r_addr = rpc_sockaddr2uaddr(sap); + map->r_addr = rpc_sockaddr2uaddr(sap, GFP_ATOMIC); map->r_owner = ""; break; case RPCBVERS_2: -- cgit v0.10.2 From 919066d690541f4bd727b0e0fc2f7a20a7e3b3a7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Oct 2011 16:08:10 -0700 Subject: SUNRPC: Remove unnecessary export of rpc_sockaddr2uaddr It is only used internally by the RPC code. Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index 23cd19d..4548757 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -291,7 +291,6 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags) return kstrdup(addrbuf, gfp_flags); } -EXPORT_SYMBOL_GPL(rpc_sockaddr2uaddr); /** * rpc_uaddr2sockaddr - convert a universal address to a socket address. -- cgit v0.10.2 From a9a4a87a5942e9271523197a90aaa82349c818fb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Oct 2011 16:08:46 -0700 Subject: NFS: Use the inode->i_version to cache NFSv4 change attribute information Signed-off-by: Trond Myklebust diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 321a66b..7f26540 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -240,7 +240,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct sizeof(delegation->stateid.data)); delegation->type = res->delegation_type; delegation->maxsize = res->maxsize; - delegation->change_attr = nfsi->change_attr; + delegation->change_attr = inode->i_version; delegation->cred = get_rpccred(cred); delegation->inode = inode; delegation->flags = 1<vfs_inode.i_ctime; if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) - auxdata.change_attr = nfsi->change_attr; + auxdata.change_attr = nfsi->vfs_inode.i_version; if (bufmax > sizeof(auxdata)) bufmax = sizeof(auxdata); @@ -244,7 +244,7 @@ enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data, auxdata.ctime = nfsi->vfs_inode.i_ctime; if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) - auxdata.change_attr = nfsi->change_attr; + auxdata.change_attr = nfsi->vfs_inode.i_version; if (memcmp(data, &auxdata, datalen) != 0) return FSCACHE_CHECKAUX_OBSOLETE; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index fe12037..4dc6d07 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -318,7 +318,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) memset(&inode->i_atime, 0, sizeof(inode->i_atime)); memset(&inode->i_mtime, 0, sizeof(inode->i_mtime)); memset(&inode->i_ctime, 0, sizeof(inode->i_ctime)); - nfsi->change_attr = 0; + inode->i_version = 0; inode->i_size = 0; inode->i_nlink = 0; inode->i_uid = -2; @@ -344,7 +344,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; if (fattr->valid & NFS_ATTR_FATTR_CHANGE) - nfsi->change_attr = fattr->change_attr; + inode->i_version = fattr->change_attr; else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_DATA; @@ -897,8 +897,8 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) && (fattr->valid & NFS_ATTR_FATTR_CHANGE) - && nfsi->change_attr == fattr->pre_change_attr) { - nfsi->change_attr = fattr->change_attr; + && inode->i_version == fattr->pre_change_attr) { + inode->i_version = fattr->change_attr; if (S_ISDIR(inode->i_mode)) nfsi->cache_validity |= NFS_INO_INVALID_DATA; ret |= NFS_INO_INVALID_ATTR; @@ -952,7 +952,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat return -EIO; if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && - nfsi->change_attr != fattr->change_attr) + inode->i_version != fattr->change_attr) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; /* Verify a few of the more important attributes */ @@ -1163,7 +1163,7 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa } if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && (fattr->valid & NFS_ATTR_FATTR_PRECHANGE) == 0) { - fattr->pre_change_attr = NFS_I(inode)->change_attr; + fattr->pre_change_attr = inode->i_version; fattr->valid |= NFS_ATTR_FATTR_PRECHANGE; } if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 && @@ -1244,13 +1244,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* More cache consistency checks */ if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { - if (nfsi->change_attr != fattr->change_attr) { + if (inode->i_version != fattr->change_attr) { dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); - nfsi->change_attr = fattr->change_attr; + inode->i_version = fattr->change_attr; } } else if (server->caps & NFS_CAP_CHANGE_ATTR) invalid |= save_cache_validity; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4700fae..0f0b607 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -753,9 +753,9 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) spin_lock(&dir->i_lock); nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; - if (!cinfo->atomic || cinfo->before != nfsi->change_attr) + if (!cinfo->atomic || cinfo->before != dir->i_version) nfs_force_lookup_revalidate(dir); - nfsi->change_attr = cinfo->after; + dir->i_version = cinfo->after; spin_unlock(&dir->i_lock); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 106fd06..2084a64 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -390,7 +390,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); BUG_ON(error); if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) - nfsi->change_attr++; + inode->i_version++; set_bit(PG_MAPPED, &req->wb_flags); SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index eaac770..60a137b 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -149,7 +149,6 @@ struct nfs_inode { unsigned long read_cache_jiffies; unsigned long attrtimeo; unsigned long attrtimeo_timestamp; - __u64 change_attr; /* v4 only */ unsigned long attr_gencount; /* "Generation counter" for the attribute cache. This is -- cgit v0.10.2 From 0c2e53f11a6dae9e3af5f50f5ad0382e7c3e0cfa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2011 16:11:22 -0700 Subject: NFS: Remove the unused "lookupfh()" version of nfs4_proc_lookup() ...and also remove the associated nfs_v4_clientops entry. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0f0b607..b0c01b2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -73,9 +73,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); -static int _nfs4_proc_lookup(struct rpc_clnt *client, struct inode *dir, - const struct qstr *name, struct nfs_fh *fhandle, - struct nfs_fattr *fattr); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, @@ -2408,14 +2405,15 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, return status; } -static int _nfs4_proc_lookupfh(struct rpc_clnt *clnt, struct nfs_server *server, - const struct nfs_fh *dirfh, const struct qstr *name, - struct nfs_fh *fhandle, struct nfs_fattr *fattr) +static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, + const struct qstr *name, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) { + struct nfs_server *server = NFS_SERVER(dir); int status; struct nfs4_lookup_arg args = { .bitmask = server->attr_bitmask, - .dir_fh = dirfh, + .dir_fh = NFS_FH(dir), .name = name, }; struct nfs4_lookup_res res = { @@ -2431,40 +2429,8 @@ static int _nfs4_proc_lookupfh(struct rpc_clnt *clnt, struct nfs_server *server, nfs_fattr_init(fattr); - dprintk("NFS call lookupfh %s\n", name->name); - status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0); - dprintk("NFS reply lookupfh: %d\n", status); - return status; -} - -static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, - struct qstr *name, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) -{ - struct nfs4_exception exception = { }; - int err; - do { - err = _nfs4_proc_lookupfh(server->client, server, dirfh, name, fhandle, fattr); - /* FIXME: !!!! */ - if (err == -NFS4ERR_MOVED) { - err = -EREMOTE; - break; - } - err = nfs4_handle_exception(server, err, &exception); - } while (exception.retry); - return err; -} - -static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, - const struct qstr *name, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) -{ - int status; - dprintk("NFS call lookup %s\n", name->name); - status = _nfs4_proc_lookupfh(clnt, NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr); - if (status == -NFS4ERR_MOVED) - status = nfs4_get_referral(dir, name, fattr, fhandle); + status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0); dprintk("NFS reply lookup: %d\n", status); return status; } @@ -2485,11 +2451,18 @@ static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qst struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_lookup(clnt, dir, name, fhandle, fattr), - &exception); - if (err == -EPERM) + int status; + + status = _nfs4_proc_lookup(clnt, dir, name, fhandle, fattr); + switch (status) { + case -NFS4ERR_MOVED: + err = nfs4_get_referral(dir, name, fattr, fhandle); + break; + case -NFS4ERR_WRONGSEC: nfs_fixup_secinfo_attributes(fattr, fhandle); + } + err = nfs4_handle_exception(NFS_SERVER(dir), + status, &exception); } while (exception.retry); return err; } @@ -6270,7 +6243,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .getroot = nfs4_proc_get_root, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, - .lookupfh = nfs4_proc_lookupfh, .lookup = nfs4_proc_lookup, .access = nfs4_proc_access, .readlink = nfs4_proc_readlink, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index abd615d..1e31e1c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1197,9 +1197,6 @@ struct nfs_rpc_ops { int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); - int (*lookupfh)(struct nfs_server *, struct nfs_fh *, - struct qstr *, struct nfs_fh *, - struct nfs_fattr *); int (*getattr) (struct nfs_server *, struct nfs_fh *, struct nfs_fattr *); int (*setattr) (struct dentry *, struct nfs_fattr *, -- cgit v0.10.2 From 08ef7bd3bc04261d14d570ac7eaac3eac947b1ba Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2011 16:11:49 -0700 Subject: NFSv4: Translate NFS4ERR_BADNAME into ENOENT when applied to a lookup Both LOOKUP and OPEN operations may return NFS4ERR_BADNAME if we send a an invalid name as a filename argument. As far as the application is concerned, it just has to know that the file doesn't exist, and so ENOENT would be the appropriate reply. We should only return EINVAL if the filename is being used to _create_ a new object on the remote filesystem. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b0c01b2..ba0da50 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1593,8 +1593,14 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) int status; status = nfs4_run_open_task(data, 0); - if (status != 0 || !data->rpc_done) + if (!data->rpc_done) + return status; + if (status != 0) { + if (status == -NFS4ERR_BADNAME && + !(o_arg->open_flags & O_CREAT)) + return -ENOENT; return status; + } if (o_arg->open_flags & O_CREAT) { update_changeattr(dir, &o_res->cinfo); @@ -2455,6 +2461,8 @@ static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qst status = _nfs4_proc_lookup(clnt, dir, name, fhandle, fattr); switch (status) { + case -NFS4ERR_BADNAME: + return -ENOENT; case -NFS4ERR_MOVED: err = nfs4_get_referral(dir, name, fattr, fhandle); break; -- cgit v0.10.2 From a1940805d0636c6cdf37636f55b43b9681d53e73 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 19 Oct 2011 12:17:29 -0700 Subject: NFS: Get rid of the unused nfs_read_data->flags field Signed-off-by: Trond Myklebust diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 1e31e1c..c1cceb8 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1133,7 +1133,6 @@ struct nfs_page; #define NFS_PAGEVEC_SIZE (8U) struct nfs_read_data { - int flags; struct rpc_task task; struct inode *inode; struct rpc_cred *cred; -- cgit v0.10.2 From b8ef70639b609c5d12c618f1d9ffae6ac13aebe3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 19 Oct 2011 12:17:29 -0700 Subject: NFS: Get rid of the unused nfs_write_data->flags field Signed-off-by: Trond Myklebust diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c1cceb8..c74595b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1155,7 +1155,6 @@ struct nfs_read_data { }; struct nfs_write_data { - int flags; struct rpc_task task; struct inode *inode; struct rpc_cred *cred; -- cgit v0.10.2 From d00c5d43866720963a265fa3129f3203cac35b8e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 19 Oct 2011 12:17:29 -0700 Subject: NFS: Get rid of nfs_restart_rpc() It can trivially be replaced with rpc_restart_call_prepare. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ab12913..c1a1bd8 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -457,13 +457,3 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) PAGE_SIZE - 1) >> PAGE_SHIFT; } -/* - * Helper for restarting RPC calls in the possible presence of NFSv4.1 - * sessions. - */ -static inline int nfs_restart_rpc(struct rpc_task *task, const struct nfs_client *clp) -{ - if (nfs4_has_session(clp)) - return rpc_restart_call_prepare(task); - return rpc_restart_call(task); -} diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 4c78c62..0911941 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -122,7 +122,6 @@ static int filelayout_async_handle_error(struct rpc_task *task, static int filelayout_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) { - struct nfs_client *clp = data->ds_clp; int reset = 0; dprintk("%s DS read\n", __func__); @@ -134,9 +133,8 @@ static int filelayout_read_done_cb(struct rpc_task *task, if (reset) { pnfs_set_lo_fail(data->lseg); nfs4_reset_read(task, data); - clp = NFS_SERVER(data->inode)->nfs_client; } - nfs_restart_rpc(task, clp); + rpc_restart_call_prepare(task); return -EAGAIN; } @@ -203,17 +201,13 @@ static int filelayout_write_done_cb(struct rpc_task *task, if (filelayout_async_handle_error(task, data->args.context->state, data->ds_clp, &reset) == -EAGAIN) { - struct nfs_client *clp; - dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", __func__, data->ds_clp, data->ds_clp->cl_session); if (reset) { pnfs_set_lo_fail(data->lseg); nfs4_reset_write(task, data); - clp = NFS_SERVER(data->inode)->nfs_client; - } else - clp = data->ds_clp; - nfs_restart_rpc(task, clp); + } + rpc_restart_call_prepare(task); return -EAGAIN; } @@ -245,7 +239,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task, prepare_to_resend_writes(data); pnfs_set_lo_fail(data->lseg); } else - nfs_restart_rpc(task, data->ds_clp); + rpc_restart_call_prepare(task); return -EAGAIN; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ba0da50..d2ae413 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3191,7 +3191,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) struct nfs_server *server = NFS_SERVER(data->inode); if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { - nfs_restart_rpc(task, server->nfs_client); + rpc_restart_call_prepare(task); return -EAGAIN; } @@ -3241,7 +3241,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data struct inode *inode = data->inode; if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { - nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); + rpc_restart_call_prepare(task); return -EAGAIN; } if (task->tk_status >= 0) { @@ -3298,7 +3298,7 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *dat struct inode *inode = data->inode; if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { - nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); + rpc_restart_call_prepare(task); return -EAGAIN; } nfs_refresh_inode(inode, data->res.fattr); @@ -3838,7 +3838,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) default: if (nfs4_async_handle_error(task, data->res.server, NULL) == -EAGAIN) { - nfs_restart_rpc(task, data->res.server->nfs_client); + rpc_restart_call_prepare(task); return; } } @@ -4092,8 +4092,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) break; default: if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) - nfs_restart_rpc(task, - calldata->server->nfs_client); + rpc_restart_call_prepare(task); } } @@ -4926,7 +4925,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) task->tk_status = 0; /* fall through */ case -NFS4ERR_RETRY_UNCACHED_REP: - nfs_restart_rpc(task, data->clp); + rpc_restart_call_prepare(task); return; } dprintk("<-- %s\n", __func__); @@ -5767,7 +5766,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) server = NFS_SERVER(lrp->args.inode); if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { - nfs_restart_rpc(task, lrp->clp); + rpc_restart_call_prepare(task); return; } spin_lock(&lo->plh_inode->i_lock); @@ -5938,7 +5937,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) } if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { - nfs_restart_rpc(task, server->nfs_client); + rpc_restart_call_prepare(task); return; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index bfc20b1..e866a7e 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -435,7 +435,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; - nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client); + rpc_restart_call_prepare(task); } /* diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index b2fbbde..4f9319a 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -87,7 +87,7 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) struct inode *dir = data->dir; if (!NFS_PROTO(dir)->unlink_done(task, dir)) - nfs_restart_rpc(task, NFS_SERVER(dir)->nfs_client); + rpc_restart_call_prepare(task); } /** @@ -369,7 +369,7 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) struct dentry *new_dentry = data->new_dentry; if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { - nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client); + rpc_restart_call_prepare(task); return; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2084a64..ad90b0c9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1305,7 +1305,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) */ argp->stable = NFS_FILE_SYNC; } - nfs_restart_rpc(task, server->nfs_client); + rpc_restart_call_prepare(task); return; } if (time_before(complain, jiffies)) { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c5347d2..f0268ea 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -850,7 +850,9 @@ rpc_restart_call_prepare(struct rpc_task *task) { if (RPC_ASSASSINATED(task)) return 0; - task->tk_action = rpc_prepare_task; + task->tk_action = call_start; + if (task->tk_ops->rpc_call_prepare != NULL) + task->tk_action = rpc_prepare_task; return 1; } EXPORT_SYMBOL_GPL(rpc_restart_call_prepare); -- cgit v0.10.2 From fbb5a9abf0d589e9471dc93b18025b7b921d22c9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 19 Oct 2011 12:17:29 -0700 Subject: NFS: Get rid of unnecessary calls to ClearPageError() in read code The generic file read code does that for us anyway. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/read.c b/fs/nfs/read.c index e866a7e..09829d96 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -322,7 +322,6 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head offset += len; } while(nbytes != 0); atomic_set(&req->wb_complete, requests); - ClearPageError(page); desc->pg_rpc_callops = &nfs_read_partial_ops; return ret; out_bad: @@ -357,7 +356,6 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head * req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_list_add_request(req, &data->pages); - ClearPageError(req->wb_page); *pages++ = req->wb_page; } req = nfs_list_entry(data->pages.next); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ad90b0c9..2219c88 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1011,7 +1011,6 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_list_add_request(req, &data->pages); - ClearPageError(req->wb_page); *pages++ = req->wb_page; } req = nfs_list_entry(data->pages.next); -- cgit v0.10.2 From fba730050d1246d0e6ef44e026e0b584732fec2b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 19 Oct 2011 12:17:29 -0700 Subject: NFS: Don't rely on PageError in nfs_readpage_release_partial Don't rely on the PageError flag to tell us if one of the partial reads of the page failed. Instead, replace that with a dedicated flag in the struct nfs_page. Then clean out redundant uses of the PageError flag: the VM no longer checks it for reads. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 09829d96..fd58e90 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -276,7 +276,6 @@ nfs_async_read_error(struct list_head *head) while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); - SetPageError(req->wb_page); nfs_readpage_release(req); } } @@ -330,7 +329,6 @@ out_bad: list_del(&data->list); nfs_readdata_free(data); } - SetPageError(page); nfs_readpage_release(req); return -ENOMEM; } @@ -460,10 +458,10 @@ static void nfs_readpage_release_partial(void *calldata) int status = data->task.tk_status; if (status < 0) - SetPageError(page); + set_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags); if (atomic_dec_and_test(&req->wb_complete)) { - if (!PageError(page)) + if (!test_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags)) SetPageUptodate(page); nfs_readpage_release(req); } @@ -656,7 +654,6 @@ readpage_async_filler(void *data, struct page *page) return 0; out_error: error = PTR_ERR(new); - SetPageError(page); out_unlock: unlock_page(page); return error; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index e2791a2..ab465fe 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -34,6 +34,7 @@ enum { PG_NEED_COMMIT, PG_NEED_RESCHED, PG_PNFS_COMMIT, + PG_PARTIAL_READ_FAILED, }; struct nfs_inode; -- cgit v0.10.2 From b6ee8cd2642f6d822dd1a4ba62298b65ff99b72e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 19 Oct 2011 12:17:29 -0700 Subject: NFS: Get rid of the nfs_rdata_mempool We don't need a mempool in order to guarantee reliable NFS read performance. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/read.c b/fs/nfs/read.c index fd58e90..8b48ec6 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -35,16 +35,13 @@ static const struct rpc_call_ops nfs_read_partial_ops; static const struct rpc_call_ops nfs_read_full_ops; static struct kmem_cache *nfs_rdata_cachep; -static mempool_t *nfs_rdata_mempool; - -#define MIN_POOL_READ (32) struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) { - struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_KERNEL); + struct nfs_read_data *p; + p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); if (p) { - memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); p->npages = pagecount; if (pagecount <= ARRAY_SIZE(p->page_array)) @@ -52,7 +49,7 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) else { p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); if (!p->pagevec) { - mempool_free(p, nfs_rdata_mempool); + kmem_cache_free(nfs_rdata_cachep, p); p = NULL; } } @@ -64,7 +61,7 @@ void nfs_readdata_free(struct nfs_read_data *p) { if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); - mempool_free(p, nfs_rdata_mempool); + kmem_cache_free(nfs_rdata_cachep, p); } void nfs_readdata_release(struct nfs_read_data *rdata) @@ -716,16 +713,10 @@ int __init nfs_init_readpagecache(void) if (nfs_rdata_cachep == NULL) return -ENOMEM; - nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ, - nfs_rdata_cachep); - if (nfs_rdata_mempool == NULL) - return -ENOMEM; - return 0; } void nfs_destroy_readpagecache(void) { - mempool_destroy(nfs_rdata_mempool); kmem_cache_destroy(nfs_rdata_cachep); } -- cgit v0.10.2 From 940aab490215424a269f93d2eba2794fc8b3e269 Mon Sep 17 00:00:00 2001 From: Malahal Naineni Date: Tue, 20 Sep 2011 17:27:14 -0700 Subject: Check validity of cl_rpcclient in nfs_server_list_show As soon as the nfs_client gets created, its cl_rpcclient is set to ERR_PTR(-EINVAL). The rpc client structure is allocated later. Check if the client is ready before using the cl_rpcclient pointer. Signed-off-by: Malahal Naineni Signed-off-by: Trond Myklebust diff --git a/fs/nfs/client.c b/fs/nfs/client.c index b4e41dd..873bf00 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1868,6 +1868,10 @@ static int nfs_server_list_show(struct seq_file *m, void *v) /* display one transport per line on subsequent lines */ clp = list_entry(v, struct nfs_client, cl_share_link); + /* Check if the client is initialized */ + if (clp->cl_cons_state != NFS_CS_READY) + return 0; + seq_printf(m, "v%u %s %s %3d %s\n", clp->rpc_ops->version, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), -- cgit v0.10.2