summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/Kconfig11
-rw-r--r--fs/nfs/Makefile5
-rw-r--r--fs/nfs/blocklayout/blocklayout.c90
-rw-r--r--fs/nfs/blocklayout/blocklayoutdev.c2
-rw-r--r--fs/nfs/client.c268
-rw-r--r--fs/nfs/delegation.c16
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c39
-rw-r--r--fs/nfs/direct.c746
-rw-r--r--fs/nfs/file.c8
-rw-r--r--fs/nfs/fscache.c15
-rw-r--r--fs/nfs/fscache.h10
-rw-r--r--fs/nfs/getroot.c85
-rw-r--r--fs/nfs/idmap.c30
-rw-r--r--fs/nfs/inode.c118
-rw-r--r--fs/nfs/internal.h135
-rw-r--r--fs/nfs/namespace.c103
-rw-r--r--fs/nfs/netns.h5
-rw-r--r--fs/nfs/nfs2xdr.c5
-rw-r--r--fs/nfs/nfs3proc.c27
-rw-r--r--fs/nfs/nfs3xdr.c112
-rw-r--r--fs/nfs/nfs4_fs.h23
-rw-r--r--fs/nfs/nfs4filelayout.c688
-rw-r--r--fs/nfs/nfs4filelayout.h63
-rw-r--r--fs/nfs/nfs4filelayoutdev.c102
-rw-r--r--fs/nfs/nfs4namespace.c55
-rw-r--r--fs/nfs/nfs4proc.c537
-rw-r--r--fs/nfs/nfs4renewd.c2
-rw-r--r--fs/nfs/nfs4state.c225
-rw-r--r--fs/nfs/nfs4xdr.c399
-rw-r--r--fs/nfs/objlayout/objio_osd.c18
-rw-r--r--fs/nfs/objlayout/objlayout.c19
-rw-r--r--fs/nfs/pagelist.c61
-rw-r--r--fs/nfs/pnfs.c352
-rw-r--r--fs/nfs/pnfs.h127
-rw-r--r--fs/nfs/proc.c21
-rw-r--r--fs/nfs/read.c437
-rw-r--r--fs/nfs/super.c760
-rw-r--r--fs/nfs/write.c809
39 files changed, 3743 insertions, 2786 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 2a0e6c5..f90f4f5 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -29,9 +29,20 @@ config NFS_FS
If unsure, say N.
+config NFS_V2
+ bool "NFS client support for NFS version 2"
+ depends on NFS_FS
+ default y
+ help
+ This option enables support for version 2 of the NFS protocol
+ (RFC 1094) in the kernel's NFS client.
+
+ If unsure, say Y.
+
config NFS_V3
bool "NFS client support for NFS version 3"
depends on NFS_FS
+ default y
help
This option enables support for version 3 of the NFS protocol
(RFC 1813) in the kernel's NFS client.
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index b58613d..7ddd45d 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -4,11 +4,12 @@
obj-$(CONFIG_NFS_FS) += nfs.o
-nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
- direct.o pagelist.o proc.o read.o symlink.o unlink.o \
+nfs-y := client.o dir.o file.o getroot.o inode.o super.o \
+ direct.o pagelist.o read.o symlink.o unlink.o \
write.o namespace.o mount_clnt.o \
dns_resolve.o cache_lib.o
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
+nfs-$(CONFIG_NFS_V2) += proc.o nfs2xdr.o
nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 7f6a23f..7ae8a60 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -187,7 +187,6 @@ static void bl_end_io_read(struct bio *bio, int err)
struct parallel_io *par = bio->bi_private;
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct nfs_read_data *rdata = (struct nfs_read_data *)par->data;
do {
struct page *page = bvec->bv_page;
@@ -198,9 +197,12 @@ static void bl_end_io_read(struct bio *bio, int err)
SetPageUptodate(page);
} while (bvec >= bio->bi_io_vec);
if (!uptodate) {
- if (!rdata->pnfs_error)
- rdata->pnfs_error = -EIO;
- pnfs_set_lo_fail(rdata->lseg);
+ struct nfs_read_data *rdata = par->data;
+ struct nfs_pgio_header *header = rdata->header;
+
+ if (!header->pnfs_error)
+ header->pnfs_error = -EIO;
+ pnfs_set_lo_fail(header->lseg);
}
bio_put(bio);
put_parallel(par);
@@ -221,7 +223,7 @@ bl_end_par_io_read(void *data, int unused)
{
struct nfs_read_data *rdata = data;
- rdata->task.tk_status = rdata->pnfs_error;
+ rdata->task.tk_status = rdata->header->pnfs_error;
INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
schedule_work(&rdata->task.u.tk_work);
}
@@ -229,6 +231,7 @@ bl_end_par_io_read(void *data, int unused)
static enum pnfs_try_status
bl_read_pagelist(struct nfs_read_data *rdata)
{
+ struct nfs_pgio_header *header = rdata->header;
int i, hole;
struct bio *bio = NULL;
struct pnfs_block_extent *be = NULL, *cow_read = NULL;
@@ -239,7 +242,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)
int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
- rdata->npages, f_offset, (unsigned int)rdata->args.count);
+ rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
par = alloc_parallel(rdata);
if (!par)
@@ -249,17 +252,17 @@ bl_read_pagelist(struct nfs_read_data *rdata)
isect = (sector_t) (f_offset >> SECTOR_SHIFT);
/* Code assumes extents are page-aligned */
- for (i = pg_index; i < rdata->npages; i++) {
+ for (i = pg_index; i < rdata->pages.npages; i++) {
if (!extent_length) {
/* We've used up the previous extent */
bl_put_extent(be);
bl_put_extent(cow_read);
bio = bl_submit_bio(READ, bio);
/* Get the next one */
- be = bl_find_get_extent(BLK_LSEG2EXT(rdata->lseg),
+ be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
isect, &cow_read);
if (!be) {
- rdata->pnfs_error = -EIO;
+ header->pnfs_error = -EIO;
goto out;
}
extent_length = be->be_length -
@@ -282,11 +285,12 @@ bl_read_pagelist(struct nfs_read_data *rdata)
struct pnfs_block_extent *be_read;
be_read = (hole && cow_read) ? cow_read : be;
- bio = bl_add_page_to_bio(bio, rdata->npages - i, READ,
+ bio = bl_add_page_to_bio(bio, rdata->pages.npages - i,
+ READ,
isect, pages[i], be_read,
bl_end_io_read, par);
if (IS_ERR(bio)) {
- rdata->pnfs_error = PTR_ERR(bio);
+ header->pnfs_error = PTR_ERR(bio);
bio = NULL;
goto out;
}
@@ -294,9 +298,9 @@ bl_read_pagelist(struct nfs_read_data *rdata)
isect += PAGE_CACHE_SECTORS;
extent_length -= PAGE_CACHE_SECTORS;
}
- if ((isect << SECTOR_SHIFT) >= rdata->inode->i_size) {
+ if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
rdata->res.eof = 1;
- rdata->res.count = rdata->inode->i_size - f_offset;
+ rdata->res.count = header->inode->i_size - f_offset;
} else {
rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
}
@@ -345,7 +349,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
struct parallel_io *par = bio->bi_private;
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
do {
struct page *page = bvec->bv_page;
@@ -358,9 +361,12 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
} while (bvec >= bio->bi_io_vec);
if (unlikely(!uptodate)) {
- if (!wdata->pnfs_error)
- wdata->pnfs_error = -EIO;
- pnfs_set_lo_fail(wdata->lseg);
+ struct nfs_write_data *data = par->data;
+ struct nfs_pgio_header *header = data->header;
+
+ if (!header->pnfs_error)
+ header->pnfs_error = -EIO;
+ pnfs_set_lo_fail(header->lseg);
}
bio_put(bio);
put_parallel(par);
@@ -370,12 +376,13 @@ static void bl_end_io_write(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
+ struct nfs_write_data *data = par->data;
+ struct nfs_pgio_header *header = data->header;
if (!uptodate) {
- if (!wdata->pnfs_error)
- wdata->pnfs_error = -EIO;
- pnfs_set_lo_fail(wdata->lseg);
+ if (!header->pnfs_error)
+ header->pnfs_error = -EIO;
+ pnfs_set_lo_fail(header->lseg);
}
bio_put(bio);
put_parallel(par);
@@ -391,9 +398,9 @@ static void bl_write_cleanup(struct work_struct *work)
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
wdata = container_of(task, struct nfs_write_data, task);
- if (likely(!wdata->pnfs_error)) {
+ if (likely(!wdata->header->pnfs_error)) {
/* Marks for LAYOUTCOMMIT */
- mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
+ mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
wdata->args.offset, wdata->args.count);
}
pnfs_ld_write_done(wdata);
@@ -404,12 +411,12 @@ static void bl_end_par_io_write(void *data, int num_se)
{
struct nfs_write_data *wdata = data;
- if (unlikely(wdata->pnfs_error)) {
- bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
+ if (unlikely(wdata->header->pnfs_error)) {
+ bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
num_se);
}
- wdata->task.tk_status = wdata->pnfs_error;
+ wdata->task.tk_status = wdata->header->pnfs_error;
wdata->verf.committed = NFS_FILE_SYNC;
INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
schedule_work(&wdata->task.u.tk_work);
@@ -540,6 +547,7 @@ check_page:
static enum pnfs_try_status
bl_write_pagelist(struct nfs_write_data *wdata, int sync)
{
+ struct nfs_pgio_header *header = wdata->header;
int i, ret, npg_zero, pg_index, last = 0;
struct bio *bio = NULL;
struct pnfs_block_extent *be = NULL, *cow_read = NULL;
@@ -552,7 +560,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
pgoff_t index;
u64 temp;
int npg_per_block =
- NFS_SERVER(wdata->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
+ NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
/* At this point, wdata->pages is a (sequential) list of nfs_pages.
@@ -566,7 +574,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
/* At this point, have to be more careful with error handling */
isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT);
- be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
+ be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read);
if (!be || !is_writable(be, isect)) {
dprintk("%s no matching extents!\n", __func__);
goto out_mds;
@@ -597,10 +605,10 @@ fill_invalid_ext:
dprintk("%s zero %dth page: index %lu isect %llu\n",
__func__, npg_zero, index,
(unsigned long long)isect);
- page = bl_find_get_zeroing_page(wdata->inode, index,
+ page = bl_find_get_zeroing_page(header->inode, index,
cow_read);
if (unlikely(IS_ERR(page))) {
- wdata->pnfs_error = PTR_ERR(page);
+ header->pnfs_error = PTR_ERR(page);
goto out;
} else if (page == NULL)
goto next_page;
@@ -612,7 +620,7 @@ fill_invalid_ext:
__func__, ret);
end_page_writeback(page);
page_cache_release(page);
- wdata->pnfs_error = ret;
+ header->pnfs_error = ret;
goto out;
}
if (likely(!bl_push_one_short_extent(be->be_inval)))
@@ -620,11 +628,11 @@ fill_invalid_ext:
else {
end_page_writeback(page);
page_cache_release(page);
- wdata->pnfs_error = -ENOMEM;
+ header->pnfs_error = -ENOMEM;
goto out;
}
/* FIXME: This should be done in bi_end_io */
- mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
+ mark_extents_written(BLK_LSEG2EXT(header->lseg),
page->index << PAGE_CACHE_SHIFT,
PAGE_CACHE_SIZE);
@@ -632,7 +640,7 @@ fill_invalid_ext:
isect, page, be,
bl_end_io_write_zero, par);
if (IS_ERR(bio)) {
- wdata->pnfs_error = PTR_ERR(bio);
+ header->pnfs_error = PTR_ERR(bio);
bio = NULL;
goto out;
}
@@ -647,16 +655,16 @@ next_page:
/* Middle pages */
pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
- for (i = pg_index; i < wdata->npages; i++) {
+ for (i = pg_index; i < wdata->pages.npages; i++) {
if (!extent_length) {
/* We've used up the previous extent */
bl_put_extent(be);
bio = bl_submit_bio(WRITE, bio);
/* Get the next one */
- be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg),
+ be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
isect, NULL);
if (!be || !is_writable(be, isect)) {
- wdata->pnfs_error = -EINVAL;
+ header->pnfs_error = -EINVAL;
goto out;
}
if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
@@ -664,7 +672,7 @@ next_page:
be->be_inval)))
par->bse_count++;
else {
- wdata->pnfs_error = -ENOMEM;
+ header->pnfs_error = -ENOMEM;
goto out;
}
}
@@ -677,15 +685,15 @@ next_page:
if (unlikely(ret)) {
dprintk("%s bl_mark_sectors_init fail %d\n",
__func__, ret);
- wdata->pnfs_error = ret;
+ header->pnfs_error = ret;
goto out;
}
}
- bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE,
+ bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
isect, pages[i], be,
bl_end_io_write, par);
if (IS_ERR(bio)) {
- wdata->pnfs_error = PTR_ERR(bio);
+ header->pnfs_error = PTR_ERR(bio);
bio = NULL;
goto out;
}
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index a5c88a5..c965542 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -123,7 +123,7 @@ nfs4_blk_decode_device(struct nfs_server *server,
uint8_t *dataptr;
DECLARE_WAITQUEUE(wq, current);
int offset, len, i, rc;
- struct net *net = server->nfs_client->net;
+ struct net *net = server->nfs_client->cl_net;
struct nfs_net *nn = net_generic(net, nfs_net_id);
struct bl_dev_msg *reply = &nn->bl_mount_reply;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 60f7e4e..7d10875 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -65,7 +65,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
{
int ret = 0;
- struct nfs_net *nn = net_generic(clp->net, nfs_net_id);
+ struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
if (clp->rpc_ops->version != 4 || minorversion != 0)
return ret;
@@ -90,7 +90,9 @@ static bool nfs4_disable_idmapping = true;
* RPC cruft for NFS
*/
static const struct rpc_version *nfs_version[5] = {
+#ifdef CONFIG_NFS_V2
[2] = &nfs_version2,
+#endif
#ifdef CONFIG_NFS_V3
[3] = &nfs_version3,
#endif
@@ -129,6 +131,7 @@ const struct rpc_program nfsacl_program = {
#endif /* CONFIG_NFS_V3_ACL */
struct nfs_client_initdata {
+ unsigned long init_flags;
const char *hostname;
const struct sockaddr *addr;
size_t addrlen;
@@ -172,7 +175,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
clp->cl_rpcclient = ERR_PTR(-EINVAL);
clp->cl_proto = cl_init->proto;
- clp->net = get_net(cl_init->net);
+ clp->cl_net = get_net(cl_init->net);
#ifdef CONFIG_NFS_V4
err = nfs_get_cb_ident_idr(clp, cl_init->minorversion);
@@ -182,7 +185,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
spin_lock_init(&clp->cl_lock);
INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
- clp->cl_boot_time = CURRENT_TIME;
clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
clp->cl_minorversion = cl_init->minorversion;
clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
@@ -207,6 +209,7 @@ static void nfs4_shutdown_session(struct nfs_client *clp)
if (nfs4_has_session(clp)) {
nfs4_deviceid_purge_client(clp);
nfs4_destroy_session(clp->cl_session);
+ nfs4_destroy_clientid(clp);
}
}
@@ -235,6 +238,9 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
nfs_idmap_delete(clp);
rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
+ kfree(clp->cl_serverowner);
+ kfree(clp->cl_serverscope);
+ kfree(clp->cl_implid);
}
/* idr_remove_all is not needed as all id's are removed by nfs_put_client */
@@ -248,7 +254,7 @@ void nfs_cleanup_cb_ident_idr(struct net *net)
/* nfs_client_lock held */
static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
{
- struct nfs_net *nn = net_generic(clp->net, nfs_net_id);
+ struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
if (clp->cl_cb_ident)
idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident);
@@ -301,10 +307,8 @@ static void nfs_free_client(struct nfs_client *clp)
if (clp->cl_machine_cred != NULL)
put_rpccred(clp->cl_machine_cred);
- put_net(clp->net);
+ put_net(clp->cl_net);
kfree(clp->cl_hostname);
- kfree(clp->server_scope);
- kfree(clp->impl_id);
kfree(clp);
dprintk("<-- nfs_free_client()\n");
@@ -321,7 +325,7 @@ void nfs_put_client(struct nfs_client *clp)
return;
dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
- nn = net_generic(clp->net, nfs_net_id);
+ nn = net_generic(clp->cl_net, nfs_net_id);
if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
list_del(&clp->cl_share_link);
@@ -456,6 +460,8 @@ static bool nfs4_cb_match_client(const struct sockaddr *addr,
clp->cl_cons_state == NFS_CS_SESSION_INITING))
return false;
+ smp_rmb();
+
/* Match the version and minorversion */
if (clp->rpc_ops->version != 4 ||
clp->cl_minorversion != minorversion)
@@ -504,6 +510,47 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
return NULL;
}
+static bool nfs_client_init_is_complete(const struct nfs_client *clp)
+{
+ return clp->cl_cons_state != NFS_CS_INITING;
+}
+
+int nfs_wait_client_init_complete(const struct nfs_client *clp)
+{
+ return wait_event_killable(nfs_client_active_wq,
+ nfs_client_init_is_complete(clp));
+}
+
+/*
+ * Found an existing client. Make sure it's ready before returning.
+ */
+static struct nfs_client *
+nfs_found_client(const struct nfs_client_initdata *cl_init,
+ struct nfs_client *clp)
+{
+ int error;
+
+ error = nfs_wait_client_init_complete(clp);
+ if (error < 0) {
+ nfs_put_client(clp);
+ return ERR_PTR(-ERESTARTSYS);
+ }
+
+ if (clp->cl_cons_state < NFS_CS_READY) {
+ error = clp->cl_cons_state;
+ nfs_put_client(clp);
+ return ERR_PTR(error);
+ }
+
+ smp_rmb();
+
+ BUG_ON(clp->cl_cons_state != NFS_CS_READY);
+
+ dprintk("<-- %s found nfs_client %p for %s\n",
+ __func__, clp, cl_init->hostname ?: "");
+ return clp;
+}
+
/*
* Look up a client by IP address and protocol version
* - creates a new record if one doesn't yet exist
@@ -512,11 +559,9 @@ static struct nfs_client *
nfs_get_client(const struct nfs_client_initdata *cl_init,
const struct rpc_timeout *timeparms,
const char *ip_addr,
- rpc_authflavor_t authflavour,
- int noresvport)
+ rpc_authflavor_t authflavour)
{
struct nfs_client *clp, *new = NULL;
- int error;
struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
dprintk("--> nfs_get_client(%s,v%u)\n",
@@ -527,60 +572,29 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
spin_lock(&nn->nfs_client_lock);
clp = nfs_match_client(cl_init);
- if (clp)
- goto found_client;
- if (new)
- goto install_client;
+ if (clp) {
+ spin_unlock(&nn->nfs_client_lock);
+ if (new)
+ nfs_free_client(new);
+ return nfs_found_client(cl_init, clp);
+ }
+ if (new) {
+ list_add(&new->cl_share_link, &nn->nfs_client_list);
+ spin_unlock(&nn->nfs_client_lock);
+ new->cl_flags = cl_init->init_flags;
+ return cl_init->rpc_ops->init_client(new,
+ timeparms, ip_addr,
+ authflavour);
+ }
spin_unlock(&nn->nfs_client_lock);
new = nfs_alloc_client(cl_init);
} while (!IS_ERR(new));
- dprintk("--> nfs_get_client() = %ld [failed]\n", PTR_ERR(new));
+ dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n",
+ cl_init->hostname ?: "", PTR_ERR(new));
return new;
-
- /* install a new client and return with it unready */
-install_client:
- clp = new;
- list_add(&clp->cl_share_link, &nn->nfs_client_list);
- spin_unlock(&nn->nfs_client_lock);
-
- error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr,
- authflavour, noresvport);
- if (error < 0) {
- nfs_put_client(clp);
- return ERR_PTR(error);
- }
- dprintk("--> nfs_get_client() = %p [new]\n", clp);
- return clp;
-
- /* found an existing client
- * - make sure it's ready before returning
- */
-found_client:
- spin_unlock(&nn->nfs_client_lock);
-
- if (new)
- nfs_free_client(new);
-
- error = wait_event_killable(nfs_client_active_wq,
- clp->cl_cons_state < NFS_CS_INITING);
- if (error < 0) {
- nfs_put_client(clp);
- return ERR_PTR(-ERESTARTSYS);
- }
-
- if (clp->cl_cons_state < NFS_CS_READY) {
- error = clp->cl_cons_state;
- nfs_put_client(clp);
- return ERR_PTR(error);
- }
-
- BUG_ON(clp->cl_cons_state != NFS_CS_READY);
-
- dprintk("--> nfs_get_client() = %p [share]\n", clp);
- return clp;
}
/*
@@ -588,27 +602,12 @@ found_client:
*/
void nfs_mark_client_ready(struct nfs_client *clp, int state)
{
+ smp_wmb();
clp->cl_cons_state = state;
wake_up_all(&nfs_client_active_wq);
}
/*
- * With sessions, the client is not marked ready until after a
- * successful EXCHANGE_ID and CREATE_SESSION.
- *
- * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate
- * other versions of NFS can be tried.
- */
-int nfs4_check_client_ready(struct nfs_client *clp)
-{
- if (!nfs4_has_session(clp))
- return 0;
- if (clp->cl_cons_state < NFS_CS_READY)
- return -EPROTONOSUPPORT;
- return 0;
-}
-
-/*
* Initialise the timeout values for a connection
*/
static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
@@ -654,12 +653,11 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
*/
static int nfs_create_rpc_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
- rpc_authflavor_t flavor,
- int discrtry, int noresvport)
+ rpc_authflavor_t flavor)
{
struct rpc_clnt *clnt = NULL;
struct rpc_create_args args = {
- .net = clp->net,
+ .net = clp->cl_net,
.protocol = clp->cl_proto,
.address = (struct sockaddr *)&clp->cl_addr,
.addrsize = clp->cl_addrlen,
@@ -670,9 +668,9 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
.authflavor = flavor,
};
- if (discrtry)
+ if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_DISCRTRY;
- if (noresvport)
+ if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
if (!IS_ERR(clp->cl_rpcclient))
@@ -713,7 +711,7 @@ static int nfs_start_lockd(struct nfs_server *server)
.nfs_version = clp->rpc_ops->version,
.noresvport = server->flags & NFS_MOUNT_NORESVPORT ?
1 : 0,
- .net = clp->net,
+ .net = clp->cl_net,
};
if (nlm_init.nfs_version > 3)
@@ -805,36 +803,43 @@ static int nfs_init_server_rpcclient(struct nfs_server *server,
return 0;
}
-/*
- * Initialise an NFS2 or NFS3 client
+/**
+ * nfs_init_client - Initialise an NFS2 or NFS3 client
+ *
+ * @clp: nfs_client to initialise
+ * @timeparms: timeout parameters for underlying RPC transport
+ * @ip_addr: IP presentation address (not used)
+ * @authflavor: authentication flavor for underlying RPC transport
+ *
+ * Returns pointer to an NFS client, or an ERR_PTR value.
*/
-int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms,
- const char *ip_addr, rpc_authflavor_t authflavour,
- int noresvport)
+struct nfs_client *nfs_init_client(struct nfs_client *clp,
+ const struct rpc_timeout *timeparms,
+ const char *ip_addr, rpc_authflavor_t authflavour)
{
int error;
if (clp->cl_cons_state == NFS_CS_READY) {
/* the client is already initialised */
dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp);
- return 0;
+ return clp;
}
/*
* Create a client RPC handle for doing FSSTAT with UNIX auth only
* - RFC 2623, sec 2.3.2
*/
- error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX,
- 0, noresvport);
+ error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX);
if (error < 0)
goto error;
nfs_mark_client_ready(clp, NFS_CS_READY);
- return 0;
+ return clp;
error:
nfs_mark_client_ready(clp, error);
+ nfs_put_client(clp);
dprintk("<-- nfs_init_client() = xerror %d\n", error);
- return error;
+ return ERR_PTR(error);
}
/*
@@ -847,7 +852,7 @@ static int nfs_init_server(struct nfs_server *server,
.hostname = data->nfs_server.hostname,
.addr = (const struct sockaddr *)&data->nfs_server.address,
.addrlen = data->nfs_server.addrlen,
- .rpc_ops = &nfs_v2_clientops,
+ .rpc_ops = NULL,
.proto = data->nfs_server.protocol,
.net = data->net,
};
@@ -857,17 +862,28 @@ static int nfs_init_server(struct nfs_server *server,
dprintk("--> nfs_init_server()\n");
+ switch (data->version) {
+#ifdef CONFIG_NFS_V2
+ case 2:
+ cl_init.rpc_ops = &nfs_v2_clientops;
+ break;
+#endif
#ifdef CONFIG_NFS_V3
- if (data->version == 3)
+ case 3:
cl_init.rpc_ops = &nfs_v3_clientops;
+ break;
#endif
+ default:
+ return -EPROTONOSUPPORT;
+ }
nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
data->timeo, data->retrans);
+ if (data->flags & NFS_MOUNT_NORESVPORT)
+ set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
/* Allocate or find a client reference we can use */
- clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX,
- data->flags & NFS_MOUNT_NORESVPORT);
+ clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX);
if (IS_ERR(clp)) {
dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
return PTR_ERR(clp);
@@ -880,7 +896,7 @@ static int nfs_init_server(struct nfs_server *server,
server->options = data->options;
server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
- NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME;
+ NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME|NFS_CAP_CHANGE_ATTR;
if (data->rsize)
server->rsize = nfs_block_size(data->rsize, NULL);
@@ -1048,7 +1064,7 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve
static void nfs_server_insert_lists(struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
- struct nfs_net *nn = net_generic(clp->net, nfs_net_id);
+ struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
spin_lock(&nn->nfs_client_lock);
list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
@@ -1065,7 +1081,7 @@ static void nfs_server_remove_lists(struct nfs_server *server)
if (clp == NULL)
return;
- nn = net_generic(clp->net, nfs_net_id);
+ nn = net_generic(clp->cl_net, nfs_net_id);
spin_lock(&nn->nfs_client_lock);
list_del_rcu(&server->client_link);
if (list_empty(&clp->cl_superblocks))
@@ -1333,21 +1349,27 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
* so that the client back channel can find the
* nfs_client struct
*/
- clp->cl_cons_state = NFS_CS_SESSION_INITING;
+ nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING);
}
#endif /* CONFIG_NFS_V4_1 */
return nfs4_init_callback(clp);
}
-/*
- * Initialise an NFS4 client record
+/**
+ * nfs4_init_client - Initialise an NFS4 client record
+ *
+ * @clp: nfs_client to initialise
+ * @timeparms: timeout parameters for underlying RPC transport
+ * @ip_addr: callback IP address in presentation format
+ * @authflavor: authentication flavor for underlying RPC transport
+ *
+ * Returns pointer to an NFS client, or an ERR_PTR value.
*/
-int nfs4_init_client(struct nfs_client *clp,
- const struct rpc_timeout *timeparms,
- const char *ip_addr,
- rpc_authflavor_t authflavour,
- int noresvport)
+struct nfs_client *nfs4_init_client(struct nfs_client *clp,
+ const struct rpc_timeout *timeparms,
+ const char *ip_addr,
+ rpc_authflavor_t authflavour)
{
char buf[INET6_ADDRSTRLEN + 1];
int error;
@@ -1355,14 +1377,14 @@ int nfs4_init_client(struct nfs_client *clp,
if (clp->cl_cons_state == NFS_CS_READY) {
/* the client is initialised already */
dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
- return 0;
+ return clp;
}
/* Check NFS protocol revision and initialize RPC op vector */
clp->rpc_ops = &nfs_v4_clientops;
- error = nfs_create_rpc_client(clp, timeparms, authflavour,
- 1, noresvport);
+ __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
+ error = nfs_create_rpc_client(clp, timeparms, authflavour);
if (error < 0)
goto error;
@@ -1395,12 +1417,13 @@ int nfs4_init_client(struct nfs_client *clp,
if (!nfs4_has_session(clp))
nfs_mark_client_ready(clp, NFS_CS_READY);
- return 0;
+ return clp;
error:
nfs_mark_client_ready(clp, error);
+ nfs_put_client(clp);
dprintk("<-- nfs4_init_client() = xerror %d\n", error);
- return error;
+ return ERR_PTR(error);
}
/*
@@ -1429,9 +1452,11 @@ static int nfs4_set_client(struct nfs_server *server,
dprintk("--> nfs4_set_client()\n");
+ if (server->flags & NFS_MOUNT_NORESVPORT)
+ set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+
/* Allocate or find a client reference we can use */
- clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour,
- server->flags & NFS_MOUNT_NORESVPORT);
+ clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour);
if (IS_ERR(clp)) {
error = PTR_ERR(clp);
goto error;
@@ -1465,8 +1490,8 @@ error:
* the MDS.
*/
struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
- const struct sockaddr *ds_addr,
- int ds_addrlen, int ds_proto)
+ const struct sockaddr *ds_addr, int ds_addrlen,
+ int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans)
{
struct nfs_client_initdata cl_init = {
.addr = ds_addr,
@@ -1474,14 +1499,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
.rpc_ops = &nfs_v4_clientops,
.proto = ds_proto,
.minorversion = mds_clp->cl_minorversion,
- .net = mds_clp->net,
- };
- struct rpc_timeout ds_timeout = {
- .to_initval = 15 * HZ,
- .to_maxval = 15 * HZ,
- .to_retries = 1,
- .to_exponential = 1,
+ .net = mds_clp->cl_net,
};
+ struct rpc_timeout ds_timeout;
struct nfs_client *clp;
/*
@@ -1489,8 +1509,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
* cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
* (section 13.1 RFC 5661).
*/
+ nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);
clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr,
- mds_clp->cl_rpcclient->cl_auth->au_flavor, 0);
+ mds_clp->cl_rpcclient->cl_auth->au_flavor);
dprintk("<-- %s %p\n", __func__, clp);
return clp;
@@ -1701,7 +1722,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
rpc_protocol(parent_server->client),
parent_server->client->cl_timeout,
parent_client->cl_mvops->minor_version,
- parent_client->net);
+ parent_client->cl_net);
if (error < 0)
goto error;
@@ -1805,6 +1826,7 @@ void nfs_clients_init(struct net *net)
idr_init(&nn->cb_ident_idr);
#endif
spin_lock_init(&nn->nfs_client_lock);
+ nn->boot_time = CURRENT_TIME;
}
#ifdef CONFIG_PROC_FS
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 89af1d2..bd3a960 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -316,6 +316,10 @@ out:
* nfs_client_return_marked_delegations - return previously marked delegations
* @clp: nfs_client to process
*
+ * Note that this function is designed to be called by the state
+ * manager thread. For this reason, it cannot flush the dirty data,
+ * since that could deadlock in case of a state recovery error.
+ *
* Returns zero on success, or a negative errno value.
*/
int nfs_client_return_marked_delegations(struct nfs_client *clp)
@@ -340,11 +344,9 @@ restart:
server);
rcu_read_unlock();
- if (delegation != NULL) {
- filemap_flush(inode->i_mapping);
+ if (delegation != NULL)
err = __nfs_inode_return_delegation(inode,
delegation, 0);
- }
iput(inode);
if (!err)
goto restart;
@@ -380,6 +382,10 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
* nfs_inode_return_delegation - synchronously return a delegation
* @inode: inode to process
*
+ * This routine will always flush any dirty data to disk on the
+ * assumption that if we need to return the delegation, then
+ * we should stop caching.
+ *
* Returns zero on success, or a negative errno value.
*/
int nfs_inode_return_delegation(struct inode *inode)
@@ -389,10 +395,10 @@ int nfs_inode_return_delegation(struct inode *inode)
struct nfs_delegation *delegation;
int err = 0;
+ nfs_wb_all(inode);
if (rcu_access_pointer(nfsi->delegation) != NULL) {
delegation = nfs_detach_delegation(nfsi, server);
if (delegation != NULL) {
- nfs_wb_all(inode);
err = __nfs_inode_return_delegation(inode, delegation, 1);
}
}
@@ -538,6 +544,8 @@ int nfs_async_inode_return_delegation(struct inode *inode,
struct nfs_client *clp = server->nfs_client;
struct nfs_delegation *delegation;
+ filemap_flush(inode->i_mapping);
+
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index cd6a7a8..72709c4 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -66,6 +66,7 @@ static inline int nfs_have_delegation(struct inode *inode, fmode_t flags)
static inline int nfs_inode_return_delegation(struct inode *inode)
{
+ nfs_wb_all(inode);
return 0;
}
#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index eedd24d..0989a20 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -475,6 +475,29 @@ different:
}
static
+bool nfs_use_readdirplus(struct inode *dir, struct file *filp)
+{
+ if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
+ return false;
+ if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags))
+ return true;
+ if (filp->f_pos == 0)
+ return true;
+ return false;
+}
+
+/*
+ * This function is called by the lookup code to request the use of
+ * readdirplus to accelerate any future lookups in the same
+ * directory.
+ */
+static
+void nfs_advise_use_readdirplus(struct inode *dir)
+{
+ set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags);
+}
+
+static
void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
{
struct qstr filename = QSTR_INIT(entry->name, entry->len);
@@ -871,7 +894,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
desc->file = filp;
desc->dir_cookie = &dir_ctx->dir_cookie;
desc->decode = NFS_PROTO(inode)->decode_dirent;
- desc->plus = NFS_USE_READDIRPLUS(inode);
+ desc->plus = nfs_use_readdirplus(inode, filp) ? 1 : 0;
nfs_block_sillyrename(dentry);
res = nfs_revalidate_mapping(inode, filp->f_mapping);
@@ -1111,7 +1134,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
if (!inode) {
if (nfs_neg_need_reval(dir, dentry, nd))
goto out_bad;
- goto out_valid;
+ goto out_valid_noent;
}
if (is_bad_inode(inode)) {
@@ -1140,7 +1163,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
if (fhandle == NULL || fattr == NULL)
goto out_error;
- error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
+ error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
if (error)
goto out_bad;
if (nfs_compare_fh(NFS_FH(inode), fhandle))
@@ -1153,6 +1176,9 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
out_set_verifier:
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
out_valid:
+ /* Success: notify readdir to use READDIRPLUS */
+ nfs_advise_use_readdirplus(dir);
+ out_valid_noent:
dput(parent);
dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
__func__, dentry->d_parent->d_name.name,
@@ -1296,7 +1322,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
parent = dentry->d_parent;
/* Protect against concurrent sillydeletes */
nfs_block_sillyrename(parent);
- error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
+ error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
if (error == -ENOENT)
goto no_entry;
if (error < 0) {
@@ -1308,6 +1334,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
if (IS_ERR(res))
goto out_unblock_sillyrename;
+ /* Success: notify readdir to use READDIRPLUS */
+ nfs_advise_use_readdirplus(dir);
+
no_entry:
res = d_materialise_unique(dentry, inode);
if (res != NULL) {
@@ -1643,7 +1672,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
if (dentry->d_inode)
goto out;
if (fhandle->size == 0) {
- error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
+ error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
if (error)
goto out_error;
}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 481be7f..23d170b 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -56,6 +56,7 @@
#include "internal.h"
#include "iostat.h"
+#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_VFS
@@ -81,16 +82,19 @@ struct nfs_direct_req {
struct completion completion; /* wait for i/o completion */
/* commit state */
- struct list_head rewrite_list; /* saved nfs_write_data structs */
- struct nfs_write_data * commit_data; /* special write_data for commits */
+ struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */
+ struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */
+ struct work_struct work;
int flags;
#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
struct nfs_writeverf verf; /* unstable write verifier */
};
+static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
+static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
-static const struct rpc_call_ops nfs_write_direct_ops;
+static void nfs_direct_write_schedule_work(struct work_struct *work);
static inline void get_dreq(struct nfs_direct_req *dreq)
{
@@ -124,22 +128,6 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
return -EINVAL;
}
-static void nfs_direct_dirty_pages(struct page **pages, unsigned int pgbase, size_t count)
-{
- unsigned int npages;
- unsigned int i;
-
- if (count == 0)
- return;
- pages += (pgbase >> PAGE_SHIFT);
- npages = (count + (pgbase & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
- for (i = 0; i < npages; i++) {
- struct page *page = pages[i];
- if (!PageCompound(page))
- set_page_dirty(page);
- }
-}
-
static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
{
unsigned int i;
@@ -147,26 +135,30 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
page_cache_release(pages[i]);
}
+void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
+ struct nfs_direct_req *dreq)
+{
+ cinfo->lock = &dreq->lock;
+ cinfo->mds = &dreq->mds_cinfo;
+ cinfo->ds = &dreq->ds_cinfo;
+ cinfo->dreq = dreq;
+ cinfo->completion_ops = &nfs_direct_commit_completion_ops;
+}
+
static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
{
struct nfs_direct_req *dreq;
- dreq = kmem_cache_alloc(nfs_direct_cachep, GFP_KERNEL);
+ dreq = kmem_cache_zalloc(nfs_direct_cachep, GFP_KERNEL);
if (!dreq)
return NULL;
kref_init(&dreq->kref);
kref_get(&dreq->kref);
init_completion(&dreq->completion);
- INIT_LIST_HEAD(&dreq->rewrite_list);
- dreq->iocb = NULL;
- dreq->ctx = NULL;
- dreq->l_ctx = NULL;
+ INIT_LIST_HEAD(&dreq->mds_cinfo.list);
+ INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
spin_lock_init(&dreq->lock);
- atomic_set(&dreq->io_count, 0);
- dreq->count = 0;
- dreq->error = 0;
- dreq->flags = 0;
return dreq;
}
@@ -226,47 +218,80 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
nfs_direct_req_release(dreq);
}
-/*
- * We must hold a reference to all the pages in this direct read request
- * until the RPCs complete. This could be long *after* we are woken up in
- * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
- */
-static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
+static void nfs_direct_readpage_release(struct nfs_page *req)
{
- struct nfs_read_data *data = calldata;
-
- nfs_readpage_result(task, data);
+ dprintk("NFS: direct read done (%s/%lld %d@%lld)\n",
+ req->wb_context->dentry->d_inode->i_sb->s_id,
+ (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+ req->wb_bytes,
+ (long long)req_offset(req));
+ nfs_release_request(req);
}
-static void nfs_direct_read_release(void *calldata)
+static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
{
+ unsigned long bytes = 0;
+ struct nfs_direct_req *dreq = hdr->dreq;
- struct nfs_read_data *data = calldata;
- struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
- int status = data->task.tk_status;
+ if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+ goto out_put;
spin_lock(&dreq->lock);
- if (unlikely(status < 0)) {
- dreq->error = status;
- spin_unlock(&dreq->lock);
- } else {
- dreq->count += data->res.count;
- spin_unlock(&dreq->lock);
- nfs_direct_dirty_pages(data->pagevec,
- data->args.pgbase,
- data->res.count);
- }
- nfs_direct_release_pages(data->pagevec, data->npages);
+ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0))
+ dreq->error = hdr->error;
+ else
+ dreq->count += hdr->good_bytes;
+ spin_unlock(&dreq->lock);
+ while (!list_empty(&hdr->pages)) {
+ struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+ struct page *page = req->wb_page;
+
+ if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
+ if (bytes > hdr->good_bytes)
+ zero_user(page, 0, PAGE_SIZE);
+ else if (hdr->good_bytes - bytes < PAGE_SIZE)
+ zero_user_segment(page,
+ hdr->good_bytes & ~PAGE_MASK,
+ PAGE_SIZE);
+ }
+ if (!PageCompound(page)) {
+ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
+ if (bytes < hdr->good_bytes)
+ set_page_dirty(page);
+ } else
+ set_page_dirty(page);
+ }
+ bytes += req->wb_bytes;
+ nfs_list_remove_request(req);
+ nfs_direct_readpage_release(req);
+ }
+out_put:
if (put_dreq(dreq))
nfs_direct_complete(dreq);
- nfs_readdata_free(data);
+ hdr->release(hdr);
+}
+
+static void nfs_read_sync_pgio_error(struct list_head *head)
+{
+ struct nfs_page *req;
+
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_release_request(req);
+ }
}
-static const struct rpc_call_ops nfs_read_direct_ops = {
- .rpc_call_prepare = nfs_read_prepare,
- .rpc_call_done = nfs_direct_read_result,
- .rpc_release = nfs_direct_read_release,
+static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
+{
+ get_dreq(hdr->dreq);
+}
+
+static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
+ .error_cleanup = nfs_read_sync_pgio_error,
+ .init_hdr = nfs_direct_pgio_init,
+ .completion = nfs_direct_read_completion,
};
/*
@@ -276,107 +301,82 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
* handled automatically by nfs_direct_read_result(). Otherwise, if
* no requests have been sent, just return an error.
*/
-static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
+static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
const struct iovec *iov,
loff_t pos)
{
+ struct nfs_direct_req *dreq = desc->pg_dreq;
struct nfs_open_context *ctx = dreq->ctx;
struct inode *inode = ctx->dentry->d_inode;
unsigned long user_addr = (unsigned long)iov->iov_base;
size_t count = iov->iov_len;
size_t rsize = NFS_SERVER(inode)->rsize;
- struct rpc_task *task;
- struct rpc_message msg = {
- .rpc_cred = ctx->cred,
- };
- struct rpc_task_setup task_setup_data = {
- .rpc_client = NFS_CLIENT(inode),
- .rpc_message = &msg,
- .callback_ops = &nfs_read_direct_ops,
- .workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
- };
unsigned int pgbase;
int result;
ssize_t started = 0;
+ struct page **pagevec = NULL;
+ unsigned int npages;
do {
- struct nfs_read_data *data;
size_t bytes;
+ int i;
pgbase = user_addr & ~PAGE_MASK;
- bytes = min(rsize,count);
+ bytes = min(max_t(size_t, rsize, PAGE_SIZE), count);
result = -ENOMEM;
- data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes));
- if (unlikely(!data))
+ npages = nfs_page_array_len(pgbase, bytes);
+ if (!pagevec)
+ pagevec = kmalloc(npages * sizeof(struct page *),
+ GFP_KERNEL);
+ if (!pagevec)
break;
-
down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr,
- data->npages, 1, 0, data->pagevec, NULL);
+ npages, 1, 0, pagevec, NULL);
up_read(&current->mm->mmap_sem);
- if (result < 0) {
- nfs_readdata_free(data);
+ if (result < 0)
break;
- }
- if ((unsigned)result < data->npages) {
+ if ((unsigned)result < npages) {
bytes = result * PAGE_SIZE;
if (bytes <= pgbase) {
- nfs_direct_release_pages(data->pagevec, result);
- nfs_readdata_free(data);
+ nfs_direct_release_pages(pagevec, result);
break;
}
bytes -= pgbase;
- data->npages = result;
+ npages = result;
}
- get_dreq(dreq);
-
- data->req = (struct nfs_page *) dreq;
- data->inode = inode;
- data->cred = msg.rpc_cred;
- data->args.fh = NFS_FH(inode);
- data->args.context = ctx;
- data->args.lock_context = dreq->l_ctx;
- data->args.offset = pos;
- data->args.pgbase = pgbase;
- data->args.pages = data->pagevec;
- data->args.count = bytes;
- data->res.fattr = &data->fattr;
- data->res.eof = 0;
- data->res.count = bytes;
- nfs_fattr_init(&data->fattr);
- msg.rpc_argp = &data->args;
- msg.rpc_resp = &data->res;
-
- task_setup_data.task = &data->task;
- task_setup_data.callback_data = data;
- NFS_PROTO(inode)->read_setup(data, &msg);
-
- task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task))
- break;
- rpc_put_task(task);
-
- dprintk("NFS: %5u initiated direct read call "
- "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
- data->task.tk_pid,
- inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
- bytes,
- (unsigned long long)data->args.offset);
-
- started += bytes;
- user_addr += bytes;
- pos += bytes;
- /* FIXME: Remove this unnecessary math from final patch */
- pgbase += bytes;
- pgbase &= ~PAGE_MASK;
- BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
-
- count -= bytes;
- } while (count != 0);
+ for (i = 0; i < npages; i++) {
+ struct nfs_page *req;
+ unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
+ /* XXX do we need to do the eof zeroing found in async_filler? */
+ req = nfs_create_request(dreq->ctx, dreq->inode,
+ pagevec[i],
+ pgbase, req_len);
+ if (IS_ERR(req)) {
+ result = PTR_ERR(req);
+ break;
+ }
+ req->wb_index = pos >> PAGE_SHIFT;
+ req->wb_offset = pos & ~PAGE_MASK;
+ if (!nfs_pageio_add_request(desc, req)) {
+ result = desc->pg_error;
+ nfs_release_request(req);
+ break;
+ }
+ pgbase = 0;
+ bytes -= req_len;
+ started += req_len;
+ user_addr += req_len;
+ pos += req_len;
+ count -= req_len;
+ }
+ /* The nfs_page now hold references to these pages */
+ nfs_direct_release_pages(pagevec, npages);
+ } while (count != 0 && result >= 0);
+
+ kfree(pagevec);
if (started)
return started;
@@ -388,15 +388,19 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
unsigned long nr_segs,
loff_t pos)
{
+ struct nfs_pageio_descriptor desc;
ssize_t result = -EINVAL;
size_t requested_bytes = 0;
unsigned long seg;
+ nfs_pageio_init_read(&desc, dreq->inode,
+ &nfs_direct_read_completion_ops);
get_dreq(dreq);
+ desc.pg_dreq = dreq;
for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *vec = &iov[seg];
- result = nfs_direct_read_schedule_segment(dreq, vec, pos);
+ result = nfs_direct_read_schedule_segment(&desc, vec, pos);
if (result < 0)
break;
requested_bytes += result;
@@ -405,6 +409,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
pos += vec->iov_len;
}
+ nfs_pageio_complete(&desc);
+
/*
* If no bytes were started, return the error, and let the
* generic layer handle the completion.
@@ -441,104 +447,64 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
if (!result)
result = nfs_direct_wait(dreq);
+ NFS_I(inode)->read_io += result;
out_release:
nfs_direct_req_release(dreq);
out:
return result;
}
-static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
-{
- while (!list_empty(&dreq->rewrite_list)) {
- struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
- list_del(&data->pages);
- nfs_direct_release_pages(data->pagevec, data->npages);
- nfs_writedata_free(data);
- }
-}
-
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
{
- struct inode *inode = dreq->inode;
- struct list_head *p;
- struct nfs_write_data *data;
- struct rpc_task *task;
- struct rpc_message msg = {
- .rpc_cred = dreq->ctx->cred,
- };
- struct rpc_task_setup task_setup_data = {
- .rpc_client = NFS_CLIENT(inode),
- .rpc_message = &msg,
- .callback_ops = &nfs_write_direct_ops,
- .workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
- };
+ struct nfs_pageio_descriptor desc;
+ struct nfs_page *req, *tmp;
+ LIST_HEAD(reqs);
+ struct nfs_commit_info cinfo;
+ LIST_HEAD(failed);
+
+ nfs_init_cinfo_from_dreq(&cinfo, dreq);
+ pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo);
+ spin_lock(cinfo.lock);
+ nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0);
+ spin_unlock(cinfo.lock);
dreq->count = 0;
get_dreq(dreq);
- list_for_each(p, &dreq->rewrite_list) {
- data = list_entry(p, struct nfs_write_data, pages);
-
- get_dreq(dreq);
-
- /* Use stable writes */
- data->args.stable = NFS_FILE_SYNC;
-
- /*
- * Reset data->res.
- */
- nfs_fattr_init(&data->fattr);
- data->res.count = data->args.count;
- memset(&data->verf, 0, sizeof(data->verf));
-
- /*
- * Reuse data->task; data->args should not have changed
- * since the original request was sent.
- */
- task_setup_data.task = &data->task;
- task_setup_data.callback_data = data;
- msg.rpc_argp = &data->args;
- msg.rpc_resp = &data->res;
- NFS_PROTO(inode)->write_setup(data, &msg);
-
- /*
- * We're called via an RPC callback, so BKL is already held.
- */
- task = rpc_run_task(&task_setup_data);
- if (!IS_ERR(task))
- rpc_put_task(task);
-
- dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
- data->task.tk_pid,
- inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
- data->args.count,
- (unsigned long long)data->args.offset);
+ nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE,
+ &nfs_direct_write_completion_ops);
+ desc.pg_dreq = dreq;
+
+ list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
+ if (!nfs_pageio_add_request(&desc, req)) {
+ nfs_list_add_request(req, &failed);
+ spin_lock(cinfo.lock);
+ dreq->flags = 0;
+ dreq->error = -EIO;
+ spin_unlock(cinfo.lock);
+ }
}
+ nfs_pageio_complete(&desc);
- if (put_dreq(dreq))
- nfs_direct_write_complete(dreq, inode);
-}
-
-static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
-{
- struct nfs_write_data *data = calldata;
+ while (!list_empty(&failed))
+ nfs_unlock_and_release_request(req);
- /* Call the NFS version-specific code */
- NFS_PROTO(data->inode)->commit_done(task, data);
+ if (put_dreq(dreq))
+ nfs_direct_write_complete(dreq, dreq->inode);
}
-static void nfs_direct_commit_release(void *calldata)
+static void nfs_direct_commit_complete(struct nfs_commit_data *data)
{
- struct nfs_write_data *data = calldata;
- struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+ struct nfs_direct_req *dreq = data->dreq;
+ struct nfs_commit_info cinfo;
+ struct nfs_page *req;
int status = data->task.tk_status;
+ nfs_init_cinfo_from_dreq(&cinfo, dreq);
if (status < 0) {
dprintk("NFS: %5u commit failed with error %d.\n",
- data->task.tk_pid, status);
+ data->task.tk_pid, status);
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
} else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
@@ -546,62 +512,47 @@ static void nfs_direct_commit_release(void *calldata)
}
dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
- nfs_direct_write_complete(dreq, data->inode);
- nfs_commit_free(data);
+ while (!list_empty(&data->pages)) {
+ req = nfs_list_entry(data->pages.next);
+ nfs_list_remove_request(req);
+ if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
+ /* Note the rewrite will go through mds */
+ kref_get(&req->wb_kref);
+ nfs_mark_request_commit(req, NULL, &cinfo);
+ }
+ nfs_unlock_and_release_request(req);
+ }
+
+ if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
+ nfs_direct_write_complete(dreq, data->inode);
+}
+
+static void nfs_direct_error_cleanup(struct nfs_inode *nfsi)
+{
+ /* There is no lock to clear */
}
-static const struct rpc_call_ops nfs_commit_direct_ops = {
- .rpc_call_prepare = nfs_write_prepare,
- .rpc_call_done = nfs_direct_commit_result,
- .rpc_release = nfs_direct_commit_release,
+static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
+ .completion = nfs_direct_commit_complete,
+ .error_cleanup = nfs_direct_error_cleanup,
};
static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
{
- struct nfs_write_data *data = dreq->commit_data;
- struct rpc_task *task;
- struct rpc_message msg = {
- .rpc_argp = &data->args,
- .rpc_resp = &data->res,
- .rpc_cred = dreq->ctx->cred,
- };
- struct rpc_task_setup task_setup_data = {
- .task = &data->task,
- .rpc_client = NFS_CLIENT(dreq->inode),
- .rpc_message = &msg,
- .callback_ops = &nfs_commit_direct_ops,
- .callback_data = data,
- .workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
- };
-
- data->inode = dreq->inode;
- data->cred = msg.rpc_cred;
-
- data->args.fh = NFS_FH(data->inode);
- data->args.offset = 0;
- data->args.count = 0;
- data->args.context = dreq->ctx;
- data->args.lock_context = dreq->l_ctx;
- data->res.count = 0;
- data->res.fattr = &data->fattr;
- data->res.verf = &data->verf;
- nfs_fattr_init(&data->fattr);
-
- NFS_PROTO(data->inode)->commit_setup(data, &msg);
-
- /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
- dreq->commit_data = NULL;
-
- dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
-
- task = rpc_run_task(&task_setup_data);
- if (!IS_ERR(task))
- rpc_put_task(task);
+ int res;
+ struct nfs_commit_info cinfo;
+ LIST_HEAD(mds_list);
+
+ nfs_init_cinfo_from_dreq(&cinfo, dreq);
+ nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
+ res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
+ if (res < 0) /* res == -ENOMEM */
+ nfs_direct_write_reschedule(dreq);
}
-static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+static void nfs_direct_write_schedule_work(struct work_struct *work)
{
+ struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work);
int flags = dreq->flags;
dreq->flags = 0;
@@ -613,89 +564,32 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
nfs_direct_write_reschedule(dreq);
break;
default:
- if (dreq->commit_data != NULL)
- nfs_commit_free(dreq->commit_data);
- nfs_direct_free_writedata(dreq);
- nfs_zap_mapping(inode, inode->i_mapping);
+ nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping);
nfs_direct_complete(dreq);
}
}
-static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
{
- dreq->commit_data = nfs_commitdata_alloc();
- if (dreq->commit_data != NULL)
- dreq->commit_data->req = (struct nfs_page *) dreq;
+ schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */
}
+
#else
-static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+static void nfs_direct_write_schedule_work(struct work_struct *work)
{
- dreq->commit_data = NULL;
}
static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
{
- nfs_direct_free_writedata(dreq);
nfs_zap_mapping(inode, inode->i_mapping);
nfs_direct_complete(dreq);
}
#endif
-static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
-{
- struct nfs_write_data *data = calldata;
-
- nfs_writeback_done(task, data);
-}
-
/*
* NB: Return the value of the first error return code. Subsequent
* errors after the first one are ignored.
*/
-static void nfs_direct_write_release(void *calldata)
-{
- struct nfs_write_data *data = calldata;
- struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
- int status = data->task.tk_status;
-
- spin_lock(&dreq->lock);
-
- if (unlikely(status < 0)) {
- /* An error has occurred, so we should not commit */
- dreq->flags = 0;
- dreq->error = status;
- }
- if (unlikely(dreq->error != 0))
- goto out_unlock;
-
- dreq->count += data->res.count;
-
- if (data->res.verf->committed != NFS_FILE_SYNC) {
- switch (dreq->flags) {
- case 0:
- memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
- dreq->flags = NFS_ODIRECT_DO_COMMIT;
- break;
- case NFS_ODIRECT_DO_COMMIT:
- if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
- dprintk("NFS: %5u write verify failed\n", data->task.tk_pid);
- dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
- }
- }
- }
-out_unlock:
- spin_unlock(&dreq->lock);
-
- if (put_dreq(dreq))
- nfs_direct_write_complete(dreq, data->inode);
-}
-
-static const struct rpc_call_ops nfs_write_direct_ops = {
- .rpc_call_prepare = nfs_write_prepare,
- .rpc_call_done = nfs_direct_write_result,
- .rpc_release = nfs_direct_write_release,
-};
-
/*
* For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
* operation. If nfs_writedata_alloc() or get_user_pages() fails,
@@ -703,132 +597,187 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
* handled automatically by nfs_direct_write_result(). Otherwise, if
* no requests have been sent, just return an error.
*/
-static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
+static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
const struct iovec *iov,
- loff_t pos, int sync)
+ loff_t pos)
{
+ struct nfs_direct_req *dreq = desc->pg_dreq;
struct nfs_open_context *ctx = dreq->ctx;
struct inode *inode = ctx->dentry->d_inode;
unsigned long user_addr = (unsigned long)iov->iov_base;
size_t count = iov->iov_len;
- struct rpc_task *task;
- struct rpc_message msg = {
- .rpc_cred = ctx->cred,
- };
- struct rpc_task_setup task_setup_data = {
- .rpc_client = NFS_CLIENT(inode),
- .rpc_message = &msg,
- .callback_ops = &nfs_write_direct_ops,
- .workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
- };
size_t wsize = NFS_SERVER(inode)->wsize;
unsigned int pgbase;
int result;
ssize_t started = 0;
+ struct page **pagevec = NULL;
+ unsigned int npages;
do {
- struct nfs_write_data *data;
size_t bytes;
+ int i;
pgbase = user_addr & ~PAGE_MASK;
- bytes = min(wsize,count);
+ bytes = min(max_t(size_t, wsize, PAGE_SIZE), count);
result = -ENOMEM;
- data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes));
- if (unlikely(!data))
+ npages = nfs_page_array_len(pgbase, bytes);
+ if (!pagevec)
+ pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
+ if (!pagevec)
break;
down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr,
- data->npages, 0, 0, data->pagevec, NULL);
+ npages, 0, 0, pagevec, NULL);
up_read(&current->mm->mmap_sem);
- if (result < 0) {
- nfs_writedata_free(data);
+ if (result < 0)
break;
- }
- if ((unsigned)result < data->npages) {
+
+ if ((unsigned)result < npages) {
bytes = result * PAGE_SIZE;
if (bytes <= pgbase) {
- nfs_direct_release_pages(data->pagevec, result);
- nfs_writedata_free(data);
+ nfs_direct_release_pages(pagevec, result);
break;
}
bytes -= pgbase;
- data->npages = result;
+ npages = result;
}
- get_dreq(dreq);
-
- list_move_tail(&data->pages, &dreq->rewrite_list);
-
- data->req = (struct nfs_page *) dreq;
- data->inode = inode;
- data->cred = msg.rpc_cred;
- data->args.fh = NFS_FH(inode);
- data->args.context = ctx;
- data->args.lock_context = dreq->l_ctx;
- data->args.offset = pos;
- data->args.pgbase = pgbase;
- data->args.pages = data->pagevec;
- data->args.count = bytes;
- data->args.stable = sync;
- data->res.fattr = &data->fattr;
- data->res.count = bytes;
- data->res.verf = &data->verf;
- nfs_fattr_init(&data->fattr);
-
- task_setup_data.task = &data->task;
- task_setup_data.callback_data = data;
- msg.rpc_argp = &data->args;
- msg.rpc_resp = &data->res;
- NFS_PROTO(inode)->write_setup(data, &msg);
-
- task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task))
- break;
- rpc_put_task(task);
-
- dprintk("NFS: %5u initiated direct write call "
- "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
- data->task.tk_pid,
- inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
- bytes,
- (unsigned long long)data->args.offset);
+ for (i = 0; i < npages; i++) {
+ struct nfs_page *req;
+ unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
- started += bytes;
- user_addr += bytes;
- pos += bytes;
-
- /* FIXME: Remove this useless math from the final patch */
- pgbase += bytes;
- pgbase &= ~PAGE_MASK;
- BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
+ req = nfs_create_request(dreq->ctx, dreq->inode,
+ pagevec[i],
+ pgbase, req_len);
+ if (IS_ERR(req)) {
+ result = PTR_ERR(req);
+ break;
+ }
+ nfs_lock_request(req);
+ req->wb_index = pos >> PAGE_SHIFT;
+ req->wb_offset = pos & ~PAGE_MASK;
+ if (!nfs_pageio_add_request(desc, req)) {
+ result = desc->pg_error;
+ nfs_unlock_and_release_request(req);
+ break;
+ }
+ pgbase = 0;
+ bytes -= req_len;
+ started += req_len;
+ user_addr += req_len;
+ pos += req_len;
+ count -= req_len;
+ }
+ /* The nfs_page now hold references to these pages */
+ nfs_direct_release_pages(pagevec, npages);
+ } while (count != 0 && result >= 0);
- count -= bytes;
- } while (count != 0);
+ kfree(pagevec);
if (started)
return started;
return result < 0 ? (ssize_t) result : -EFAULT;
}
+static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
+{
+ struct nfs_direct_req *dreq = hdr->dreq;
+ struct nfs_commit_info cinfo;
+ int bit = -1;
+ struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+
+ if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+ goto out_put;
+
+ nfs_init_cinfo_from_dreq(&cinfo, dreq);
+
+ spin_lock(&dreq->lock);
+
+ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
+ dreq->flags = 0;
+ dreq->error = hdr->error;
+ }
+ if (dreq->error != 0)
+ bit = NFS_IOHDR_ERROR;
+ else {
+ dreq->count += hdr->good_bytes;
+ if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ bit = NFS_IOHDR_NEED_RESCHED;
+ } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+ if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
+ bit = NFS_IOHDR_NEED_RESCHED;
+ else if (dreq->flags == 0) {
+ memcpy(&dreq->verf, &req->wb_verf,
+ sizeof(dreq->verf));
+ bit = NFS_IOHDR_NEED_COMMIT;
+ dreq->flags = NFS_ODIRECT_DO_COMMIT;
+ } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
+ if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) {
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ bit = NFS_IOHDR_NEED_RESCHED;
+ } else
+ bit = NFS_IOHDR_NEED_COMMIT;
+ }
+ }
+ }
+ spin_unlock(&dreq->lock);
+
+ while (!list_empty(&hdr->pages)) {
+ req = nfs_list_entry(hdr->pages.next);
+ nfs_list_remove_request(req);
+ switch (bit) {
+ case NFS_IOHDR_NEED_RESCHED:
+ case NFS_IOHDR_NEED_COMMIT:
+ kref_get(&req->wb_kref);
+ nfs_mark_request_commit(req, hdr->lseg, &cinfo);
+ }
+ nfs_unlock_and_release_request(req);
+ }
+
+out_put:
+ if (put_dreq(dreq))
+ nfs_direct_write_complete(dreq, hdr->inode);
+ hdr->release(hdr);
+}
+
+static void nfs_write_sync_pgio_error(struct list_head *head)
+{
+ struct nfs_page *req;
+
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_unlock_and_release_request(req);
+ }
+}
+
+static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
+ .error_cleanup = nfs_write_sync_pgio_error,
+ .init_hdr = nfs_direct_pgio_init,
+ .completion = nfs_direct_write_completion,
+};
+
static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
const struct iovec *iov,
unsigned long nr_segs,
- loff_t pos, int sync)
+ loff_t pos)
{
+ struct nfs_pageio_descriptor desc;
ssize_t result = 0;
size_t requested_bytes = 0;
unsigned long seg;
+ nfs_pageio_init_write(&desc, dreq->inode, FLUSH_COND_STABLE,
+ &nfs_direct_write_completion_ops);
+ desc.pg_dreq = dreq;
get_dreq(dreq);
for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *vec = &iov[seg];
- result = nfs_direct_write_schedule_segment(dreq, vec,
- pos, sync);
+ result = nfs_direct_write_schedule_segment(&desc, vec, pos);
if (result < 0)
break;
requested_bytes += result;
@@ -836,6 +785,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
break;
pos += vec->iov_len;
}
+ nfs_pageio_complete(&desc);
+ NFS_I(dreq->inode)->write_io += desc.pg_bytes_written;
/*
* If no bytes were started, return the error, and let the
@@ -858,16 +809,10 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
ssize_t result = -ENOMEM;
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct nfs_direct_req *dreq;
- size_t wsize = NFS_SERVER(inode)->wsize;
- int sync = NFS_UNSTABLE;
dreq = nfs_direct_req_alloc();
if (!dreq)
goto out;
- nfs_alloc_commit_data(dreq);
-
- if (dreq->commit_data == NULL || count <= wsize)
- sync = NFS_FILE_SYNC;
dreq->inode = inode;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
@@ -877,7 +822,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
- result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync);
+ result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos);
if (!result)
result = nfs_direct_wait(dreq);
out_release:
@@ -997,10 +942,15 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
task_io_account_write(count);
retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
+ if (retval > 0) {
+ struct inode *inode = mapping->host;
- if (retval > 0)
iocb->ki_pos = pos + retval;
-
+ spin_lock(&inode->i_lock);
+ if (i_size_read(inode) < iocb->ki_pos)
+ i_size_write(inode, iocb->ki_pos);
+ spin_unlock(&inode->i_lock);
+ }
out:
return retval;
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index aa9b709..56311ca 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -174,6 +174,13 @@ nfs_file_flush(struct file *file, fl_owner_t id)
if ((file->f_mode & FMODE_WRITE) == 0)
return 0;
+ /*
+ * If we're holding a write delegation, then just start the i/o
+ * but don't wait for completion (or send a commit).
+ */
+ if (nfs_have_delegation(inode, FMODE_WRITE))
+ return filemap_fdatawrite(file->f_mapping);
+
/* Flush writes to the server and return any errors */
return vfs_fsync(file, 0);
}
@@ -417,6 +424,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
if (status < 0)
return status;
+ NFS_I(mapping->host)->write_io += copied;
return copied;
}
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index ae65c16..c817787 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -64,23 +64,12 @@ void nfs_fscache_release_client_cookie(struct nfs_client *clp)
* either by the 'fsc=xxx' option to mount, or by inheriting it from the parent
* superblock across an automount point of some nature.
*/
-void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq,
- struct nfs_clone_mount *mntdata)
+void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int ulen)
{
struct nfs_fscache_key *key, *xkey;
struct nfs_server *nfss = NFS_SB(sb);
struct rb_node **p, *parent;
- int diff, ulen;
-
- if (uniq) {
- ulen = strlen(uniq);
- } else if (mntdata) {
- struct nfs_server *mnt_s = NFS_SB(mntdata->sb);
- if (mnt_s->fscache_key) {
- uniq = mnt_s->fscache_key->key.uniquifier;
- ulen = mnt_s->fscache_key->key.uniq_len;
- }
- }
+ int diff;
if (!uniq) {
uniq = "";
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index b9c572d..c5b11b5 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -73,9 +73,7 @@ extern void nfs_fscache_unregister(void);
extern void nfs_fscache_get_client_cookie(struct nfs_client *);
extern void nfs_fscache_release_client_cookie(struct nfs_client *);
-extern void nfs_fscache_get_super_cookie(struct super_block *,
- const char *,
- struct nfs_clone_mount *);
+extern void nfs_fscache_get_super_cookie(struct super_block *, const char *, int);
extern void nfs_fscache_release_super_cookie(struct super_block *);
extern void nfs_fscache_init_inode_cookie(struct inode *);
@@ -172,12 +170,6 @@ static inline void nfs_fscache_unregister(void) {}
static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {}
static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {}
-static inline void nfs_fscache_get_super_cookie(
- struct super_block *sb,
- const char *uniq,
- struct nfs_clone_mount *mntdata)
-{
-}
static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {}
static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {}
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 4ca6f5c..8abfb19 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -150,7 +150,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
goto out;
/* Start by getting the root filehandle from the server */
- ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+ ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo);
if (ret < 0) {
dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);
goto out;
@@ -178,87 +178,4 @@ out:
return ret;
}
-/*
- * get an NFS4 root dentry from the root filehandle
- */
-struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh,
- const char *devname)
-{
- struct nfs_server *server = NFS_SB(sb);
- struct nfs_fattr *fattr = NULL;
- struct dentry *ret;
- struct inode *inode;
- void *name = kstrdup(devname, GFP_KERNEL);
- int error;
-
- dprintk("--> nfs4_get_root()\n");
-
- if (!name)
- return ERR_PTR(-ENOMEM);
-
- /* get the info about the server and filesystem */
- error = nfs4_server_capabilities(server, mntfh);
- if (error < 0) {
- dprintk("nfs_get_root: getcaps error = %d\n",
- -error);
- kfree(name);
- return ERR_PTR(error);
- }
-
- fattr = nfs_alloc_fattr();
- if (fattr == NULL) {
- kfree(name);
- return ERR_PTR(-ENOMEM);
- }
-
- /* get the actual root for this mount */
- error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
- if (error < 0) {
- dprintk("nfs_get_root: getattr error = %d\n", -error);
- ret = ERR_PTR(error);
- goto out;
- }
-
- if (fattr->valid & NFS_ATTR_FATTR_FSID &&
- !nfs_fsid_equal(&server->fsid, &fattr->fsid))
- memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
-
- inode = nfs_fhget(sb, mntfh, fattr);
- if (IS_ERR(inode)) {
- dprintk("nfs_get_root: get root inode failed\n");
- ret = ERR_CAST(inode);
- goto out;
- }
-
- error = nfs_superblock_set_dummy_root(sb, inode);
- if (error != 0) {
- ret = ERR_PTR(error);
- goto out;
- }
-
- /* root dentries normally start off anonymous and get spliced in later
- * if the dentry tree reaches them; however if the dentry already
- * exists, we'll pick it up at this point and use it as the root
- */
- ret = d_obtain_alias(inode);
- if (IS_ERR(ret)) {
- dprintk("nfs_get_root: get root dentry failed\n");
- goto out;
- }
-
- security_d_instantiate(ret, inode);
- spin_lock(&ret->d_lock);
- if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
- ret->d_fsdata = name;
- name = NULL;
- }
- spin_unlock(&ret->d_lock);
-out:
- if (name)
- kfree(name);
- nfs_free_fattr(fattr);
- dprintk("<-- nfs4_get_root()\n");
- return ret;
-}
-
#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index ba3019f..b5b86a0 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -415,7 +415,7 @@ static int __nfs_idmap_register(struct dentry *dir,
static void nfs_idmap_unregister(struct nfs_client *clp,
struct rpc_pipe *pipe)
{
- struct net *net = clp->net;
+ struct net *net = clp->cl_net;
struct super_block *pipefs_sb;
pipefs_sb = rpc_get_sb_net(net);
@@ -429,7 +429,7 @@ static int nfs_idmap_register(struct nfs_client *clp,
struct idmap *idmap,
struct rpc_pipe *pipe)
{
- struct net *net = clp->net;
+ struct net *net = clp->cl_net;
struct super_block *pipefs_sb;
int err = 0;
@@ -530,9 +530,25 @@ static struct nfs_client *nfs_get_client_for_event(struct net *net, int event)
struct nfs_net *nn = net_generic(net, nfs_net_id);
struct dentry *cl_dentry;
struct nfs_client *clp;
+ int err;
+restart:
spin_lock(&nn->nfs_client_lock);
list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
+ /* Wait for initialisation to finish */
+ if (clp->cl_cons_state == NFS_CS_INITING) {
+ atomic_inc(&clp->cl_count);
+ spin_unlock(&nn->nfs_client_lock);
+ err = nfs_wait_client_init_complete(clp);
+ nfs_put_client(clp);
+ if (err)
+ return NULL;
+ goto restart;
+ }
+ /* Skip nfs_clients that failed to initialise */
+ if (clp->cl_cons_state < 0)
+ continue;
+ smp_rmb();
if (clp->rpc_ops != &nfs_v4_clientops)
continue;
cl_dentry = clp->cl_idmap->idmap_pipe->dentry;
@@ -640,20 +656,16 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
struct idmap_msg *im;
struct idmap *idmap = (struct idmap *)aux;
struct key *key = cons->key;
- int ret;
+ int ret = -ENOMEM;
/* msg and im are freed in idmap_pipe_destroy_msg */
msg = kmalloc(sizeof(*msg), GFP_KERNEL);
- if (IS_ERR(msg)) {
- ret = PTR_ERR(msg);
+ if (!msg)
goto out0;
- }
im = kmalloc(sizeof(*im), GFP_KERNEL);
- if (IS_ERR(im)) {
- ret = PTR_ERR(im);
+ if (!im)
goto out1;
- }
ret = nfs_idmap_prepare_message(key->description, im, msg);
if (ret < 0)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c607313..2f6f78c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -285,9 +285,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode->i_mode = fattr->mode;
if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
&& nfs_server_capable(inode, NFS_CAP_MODE))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL;
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
/* Why so? Because we want revalidate for devices/FIFOs, and
* that's precisely what we have in nfs_file_inode_operations.
*/
@@ -300,8 +298,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
inode->i_fop = &nfs_dir_operations;
inode->i_data.a_ops = &nfs_dir_aops;
- if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
- set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
/* Deal with crossing mountpoints */
if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT ||
fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
@@ -327,6 +323,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode->i_gid = -2;
inode->i_blocks = 0;
memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+ nfsi->write_io = 0;
+ nfsi->read_io = 0;
nfsi->read_cache_jiffies = fattr->time_start;
nfsi->attr_gencount = fattr->gencount;
@@ -337,24 +335,19 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
inode->i_mtime = fattr->mtime;
else if (nfs_server_capable(inode, NFS_CAP_MTIME))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_DATA;
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
inode->i_ctime = fattr->ctime;
else if (nfs_server_capable(inode, NFS_CAP_CTIME))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL;
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
inode->i_version = fattr->change_attr;
else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_DATA;
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (fattr->valid & NFS_ATTR_FATTR_SIZE)
inode->i_size = nfs_size_to_loff_t(fattr->size);
else
nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_DATA
| NFS_INO_REVAL_PAGECACHE;
if (fattr->valid & NFS_ATTR_FATTR_NLINK)
set_nlink(inode, fattr->nlink);
@@ -363,15 +356,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
if (fattr->valid & NFS_ATTR_FATTR_OWNER)
inode->i_uid = fattr->uid;
else if (nfs_server_capable(inode, NFS_CAP_OWNER))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL;
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (fattr->valid & NFS_ATTR_FATTR_GROUP)
inode->i_gid = fattr->gid;
else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL;
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
inode->i_blocks = fattr->du.nfs2.blocks;
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -654,6 +643,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f
nfs_init_lock_context(&ctx->lock_context);
ctx->lock_context.open_context = ctx;
INIT_LIST_HEAD(&ctx->list);
+ ctx->mdsthreshold = NULL;
return ctx;
}
@@ -682,6 +672,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
put_rpccred(ctx->cred);
dput(ctx->dentry);
nfs_sb_deactive(sb);
+ kfree(ctx->mdsthreshold);
kfree(ctx);
}
@@ -870,6 +861,15 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
return 0;
}
+static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
+{
+ if (nfs_have_delegated_attributes(inode))
+ return false;
+ return (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE)
+ || nfs_attribute_timeout(inode)
+ || NFS_STALE(inode);
+}
+
/**
* nfs_revalidate_mapping - Revalidate the pagecache
* @inode - pointer to host inode
@@ -880,9 +880,7 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
struct nfs_inode *nfsi = NFS_I(inode);
int ret = 0;
- if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
- || nfs_attribute_cache_expired(inode)
- || NFS_STALE(inode)) {
+ if (nfs_mapping_need_revalidate_inode(inode)) {
ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (ret < 0)
goto out;
@@ -948,6 +946,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
unsigned long invalid = 0;
+ if (nfs_have_delegated_attributes(inode))
+ return 0;
/* Has the inode gone and changed behind our back? */
if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
return -EIO;
@@ -960,7 +960,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
/* Verify a few of the more important attributes */
if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
- invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+ invalid |= NFS_INO_INVALID_ATTR;
if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
cur_size = i_size_read(inode);
@@ -1279,14 +1279,26 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfs_display_fhandle_hash(NFS_FH(inode)),
atomic_read(&inode->i_count), fattr->valid);
- if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
- goto out_fileid;
+ if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) {
+ printk(KERN_ERR "NFS: server %s error: fileid changed\n"
+ "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
+ NFS_SERVER(inode)->nfs_client->cl_hostname,
+ inode->i_sb->s_id, (long long)nfsi->fileid,
+ (long long)fattr->fileid);
+ goto out_err;
+ }
/*
* Make sure the inode's type hasn't changed.
*/
- if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
- goto out_changed;
+ if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
+ /*
+ * Big trouble! The inode has become a different object.
+ */
+ printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n",
+ __func__, inode->i_ino, inode->i_mode, fattr->mode);
+ goto out_err;
+ }
server = NFS_SERVER(inode);
/* Update the fsid? */
@@ -1314,7 +1326,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (inode->i_version != fattr->change_attr) {
dprintk("NFS: change_attr change on server for file %s/%ld\n",
inode->i_sb->s_id, inode->i_ino);
- invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+ invalid |= NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_DATA
+ | NFS_INO_INVALID_ACCESS
+ | NFS_INO_INVALID_ACL
+ | NFS_INO_REVAL_PAGECACHE;
if (S_ISDIR(inode->i_mode))
nfs_force_lookup_revalidate(inode);
inode->i_version = fattr->change_attr;
@@ -1323,38 +1339,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
invalid |= save_cache_validity;
if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
- /* NFSv2/v3: Check if the mtime agrees */
- if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
- dprintk("NFS: mtime change on server for file %s/%ld\n",
- inode->i_sb->s_id, inode->i_ino);
- invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
- if (S_ISDIR(inode->i_mode))
- nfs_force_lookup_revalidate(inode);
- memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
- }
+ memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
} else if (server->caps & NFS_CAP_MTIME)
invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_DATA
- | NFS_INO_REVAL_PAGECACHE
| NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
- /* If ctime has changed we should definitely clear access+acl caches */
- if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
- invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
- /* and probably clear data for a directory too as utimes can cause
- * havoc with our cache.
- */
- if (S_ISDIR(inode->i_mode)) {
- invalid |= NFS_INO_INVALID_DATA;
- nfs_force_lookup_revalidate(inode);
- }
- memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
- }
+ memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
} else if (server->caps & NFS_CAP_CTIME)
invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED);
/* Check if our cached file size is stale */
@@ -1466,12 +1459,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfsi->cache_validity |= invalid;
return 0;
- out_changed:
- /*
- * Big trouble! The inode has become a different object.
- */
- printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n",
- __func__, inode->i_ino, inode->i_mode, fattr->mode);
out_err:
/*
* No need to worry about unhashing the dentry, as the
@@ -1480,13 +1467,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
*/
nfs_invalidate_inode(inode);
return -ESTALE;
-
- out_fileid:
- printk(KERN_ERR "NFS: server %s error: fileid changed\n"
- "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
- NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id,
- (long long)nfsi->fileid, (long long)fattr->fileid);
- goto out_err;
}
@@ -1547,7 +1527,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
nfsi->delegation_state = 0;
init_rwsem(&nfsi->rwsem);
nfsi->layout = NULL;
- atomic_set(&nfsi->commits_outstanding, 0);
+ atomic_set(&nfsi->commit_info.rpcs_out, 0);
#endif
}
@@ -1559,9 +1539,9 @@ static void init_once(void *foo)
INIT_LIST_HEAD(&nfsi->open_files);
INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
- INIT_LIST_HEAD(&nfsi->commit_list);
+ INIT_LIST_HEAD(&nfsi->commit_info.list);
nfsi->npages = 0;
- nfsi->ncommit = 0;
+ nfsi->commit_info.ncommit = 0;
atomic_set(&nfsi->silly_count, 1);
INIT_HLIST_HEAD(&nfsi->silly_list);
init_waitqueue_head(&nfsi->waitqueue);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b777bda..1848a72 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -103,6 +103,7 @@ struct nfs_parsed_mount_data {
unsigned int version;
unsigned int minorversion;
char *fscache_uniq;
+ bool need_mount;
struct {
struct sockaddr_storage address;
@@ -167,11 +168,13 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,
struct nfs_fh *,
struct nfs_fattr *,
rpc_authflavor_t);
+extern int nfs_wait_client_init_complete(const struct nfs_client *clp);
extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
-extern int nfs4_check_client_ready(struct nfs_client *clp);
extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
const struct sockaddr *ds_addr,
- int ds_addrlen, int ds_proto);
+ int ds_addrlen, int ds_proto,
+ unsigned int ds_timeo,
+ unsigned int ds_retrans);
#ifdef CONFIG_PROC_FS
extern int __init nfs_fs_proc_init(void);
extern void nfs_fs_proc_exit(void);
@@ -185,21 +188,11 @@ static inline void nfs_fs_proc_exit(void)
}
#endif
-/* nfs4namespace.c */
-#ifdef CONFIG_NFS_V4
-extern struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry);
-#else
-static inline
-struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
-{
- return ERR_PTR(-ENOENT);
-}
-#endif
-
/* callback_xdr.c */
extern struct svc_version nfs4_callback_version1;
extern struct svc_version nfs4_callback_version4;
+struct nfs_pageio_descriptor;
/* pagelist.c */
extern int __init nfs_init_nfspagecache(void);
extern void nfs_destroy_nfspagecache(void);
@@ -210,9 +203,13 @@ extern void nfs_destroy_writepagecache(void);
extern int __init nfs_init_directcache(void);
extern void nfs_destroy_directcache(void);
+extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);
+extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr,
+ void (*release)(struct nfs_pgio_header *hdr));
+void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
/* nfs2xdr.c */
-extern int nfs_stat_to_errno(enum nfs_stat);
extern struct rpc_procinfo nfs_procedures[];
extern int nfs2_decode_dirent(struct xdr_stream *,
struct nfs_entry *, int);
@@ -237,14 +234,13 @@ extern const u32 nfs41_maxwrite_overhead;
extern struct rpc_procinfo nfs4_procedures[];
#endif
-extern int nfs4_init_ds_session(struct nfs_client *clp);
+extern int nfs4_init_ds_session(struct nfs_client *, unsigned long);
/* proc.c */
void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
-extern int nfs_init_client(struct nfs_client *clp,
+extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
- const char *ip_addr, rpc_authflavor_t authflavour,
- int noresvport);
+ const char *ip_addr, rpc_authflavor_t authflavour);
/* dir.c */
extern int nfs_access_cache_shrinker(struct shrinker *shrink,
@@ -280,9 +276,10 @@ extern void nfs_sb_deactive(struct super_block *sb);
extern char *nfs_path(char **p, struct dentry *dentry,
char *buffer, ssize_t buflen);
extern struct vfsmount *nfs_d_automount(struct path *path);
-#ifdef CONFIG_NFS_V4
-rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
-#endif
+struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *,
+ struct nfs_fh *, struct nfs_fattr *);
+struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *,
+ struct nfs_fattr *, rpc_authflavor_t);
/* getroot.c */
extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
@@ -294,46 +291,73 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
#endif
-struct nfs_pageio_descriptor;
+struct nfs_pgio_completion_ops;
/* read.c */
-extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
- const struct rpc_call_ops *call_ops);
+extern struct nfs_read_header *nfs_readhdr_alloc(void);
+extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
+extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode,
+ const struct nfs_pgio_completion_ops *compl_ops);
+extern int nfs_initiate_read(struct rpc_clnt *clnt,
+ struct nfs_read_data *data,
+ const struct rpc_call_ops *call_ops, int flags);
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
- struct list_head *head);
-
+ struct nfs_pgio_header *hdr);
extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
- struct inode *inode);
+ struct inode *inode,
+ const struct nfs_pgio_completion_ops *compl_ops);
extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_readdata_release(struct nfs_read_data *rdata);
/* write.c */
+extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode, int ioflags,
+ const struct nfs_pgio_completion_ops *compl_ops);
+extern struct nfs_write_header *nfs_writehdr_alloc(void);
+extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
- struct list_head *head);
+ struct nfs_pgio_header *hdr);
extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
- struct inode *inode, int ioflags);
+ struct inode *inode, int ioflags,
+ const struct nfs_pgio_completion_ops *compl_ops);
extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_writedata_release(struct nfs_write_data *wdata);
-extern void nfs_commit_free(struct nfs_write_data *p);
-extern int nfs_initiate_write(struct nfs_write_data *data,
- struct rpc_clnt *clnt,
+extern void nfs_commit_free(struct nfs_commit_data *p);
+extern int nfs_initiate_write(struct rpc_clnt *clnt,
+ struct nfs_write_data *data,
const struct rpc_call_ops *call_ops,
- int how);
+ int how, int flags);
extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
-extern int nfs_initiate_commit(struct nfs_write_data *data,
- struct rpc_clnt *clnt,
+extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
+extern int nfs_initiate_commit(struct rpc_clnt *clnt,
+ struct nfs_commit_data *data,
const struct rpc_call_ops *call_ops,
- int how);
-extern void nfs_init_commit(struct nfs_write_data *data,
+ int how, int flags);
+extern void nfs_init_commit(struct nfs_commit_data *data,
struct list_head *head,
- struct pnfs_layout_segment *lseg);
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo);
+int nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
+ struct nfs_commit_info *cinfo, int max);
+int nfs_scan_commit(struct inode *inode, struct list_head *dst,
+ struct nfs_commit_info *cinfo);
+void nfs_mark_request_commit(struct nfs_page *req,
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo);
+int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
+ int how, struct nfs_commit_info *cinfo);
void nfs_retry_commit(struct list_head *page_list,
- struct pnfs_layout_segment *lseg);
-void nfs_commit_clear_lock(struct nfs_inode *nfsi);
-void nfs_commitdata_release(void *data);
-void nfs_commit_release_pages(struct nfs_write_data *data);
-void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head);
-void nfs_request_remove_commit_list(struct nfs_page *req);
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo);
+void nfs_commitdata_release(struct nfs_commit_data *data);
+void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
+ struct nfs_commit_info *cinfo);
+void nfs_request_remove_commit_list(struct nfs_page *req,
+ struct nfs_commit_info *cinfo);
+void nfs_init_cinfo(struct nfs_commit_info *cinfo,
+ struct inode *inode,
+ struct nfs_direct_req *dreq);
#ifdef CONFIG_MIGRATION
extern int nfs_migrate_page(struct address_space *,
@@ -342,15 +366,16 @@ extern int nfs_migrate_page(struct address_space *,
#define nfs_migrate_page NULL
#endif
+/* direct.c */
+void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
+ struct nfs_direct_req *dreq);
+
/* nfs4proc.c */
extern void __nfs4_read_done_cb(struct nfs_read_data *);
-extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data);
-extern int nfs4_init_client(struct nfs_client *clp,
+extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
const char *ip_addr,
- rpc_authflavor_t authflavour,
- int noresvport);
-extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
+ rpc_authflavor_t authflavour);
extern int _nfs4_call_sync(struct rpc_clnt *clnt,
struct nfs_server *server,
struct rpc_message *msg,
@@ -466,3 +491,15 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
PAGE_SIZE - 1) >> PAGE_SHIFT;
}
+/*
+ * Convert a struct timespec into a 64-bit change attribute
+ *
+ * This does approximately the same thing as timespec_to_ns(),
+ * but for calculation efficiency, we multiply the seconds by
+ * 1024*1024*1024.
+ */
+static inline
+u64 nfs_timespec_to_change_attr(const struct timespec *ts)
+{
+ return ((u64)ts->tv_sec << 30) + ts->tv_nsec;
+}
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index d51868e..08b9c93 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -26,11 +26,6 @@ static LIST_HEAD(nfs_automount_list);
static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
int nfs_mountpoint_expiry_timeout = 500 * HZ;
-static struct vfsmount *nfs_do_submount(struct dentry *dentry,
- struct nfs_fh *fh,
- struct nfs_fattr *fattr,
- rpc_authflavor_t authflavor);
-
/*
* nfs_path - reconstruct the path given an arbitrary dentry
* @base - used to return pointer to the end of devname part of path
@@ -118,64 +113,6 @@ Elong:
return ERR_PTR(-ENAMETOOLONG);
}
-#ifdef CONFIG_NFS_V4
-rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
-{
- struct gss_api_mech *mech;
- struct xdr_netobj oid;
- int i;
- rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
-
- for (i = 0; i < flavors->num_flavors; i++) {
- struct nfs4_secinfo_flavor *flavor;
- flavor = &flavors->flavors[i];
-
- if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
- pseudoflavor = flavor->flavor;
- break;
- } else if (flavor->flavor == RPC_AUTH_GSS) {
- oid.len = flavor->gss.sec_oid4.len;
- oid.data = flavor->gss.sec_oid4.data;
- mech = gss_mech_get_by_OID(&oid);
- if (!mech)
- continue;
- pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
- gss_mech_put(mech);
- break;
- }
- }
-
- return pseudoflavor;
-}
-
-static struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
- struct qstr *name,
- struct nfs_fh *fh,
- struct nfs_fattr *fattr)
-{
- int err;
-
- if (NFS_PROTO(dir)->version == 4)
- return nfs4_proc_lookup_mountpoint(dir, name, fh, fattr);
-
- err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr);
- if (err)
- return ERR_PTR(err);
- return rpc_clone_client(NFS_SERVER(dir)->client);
-}
-#else /* CONFIG_NFS_V4 */
-static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
- struct qstr *name,
- struct nfs_fh *fh,
- struct nfs_fattr *fattr)
-{
- int err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr);
- if (err)
- return ERR_PTR(err);
- return rpc_clone_client(NFS_SERVER(dir)->client);
-}
-#endif /* CONFIG_NFS_V4 */
-
/*
* nfs_d_automount - Handle crossing a mountpoint on the server
* @path - The mountpoint
@@ -191,10 +128,9 @@ static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
struct vfsmount *nfs_d_automount(struct path *path)
{
struct vfsmount *mnt;
- struct dentry *parent;
+ struct nfs_server *server = NFS_SERVER(path->dentry->d_inode);
struct nfs_fh *fh = NULL;
struct nfs_fattr *fattr = NULL;
- struct rpc_clnt *client;
dprintk("--> nfs_d_automount()\n");
@@ -210,21 +146,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
dprintk("%s: enter\n", __func__);
- /* Look it up again to get its attributes */
- parent = dget_parent(path->dentry);
- client = nfs_lookup_mountpoint(parent->d_inode, &path->dentry->d_name, fh, fattr);
- dput(parent);
- if (IS_ERR(client)) {
- mnt = ERR_CAST(client);
- goto out;
- }
-
- if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
- mnt = nfs_do_refmount(client, path->dentry);
- else
- mnt = nfs_do_submount(path->dentry, fh, fattr, client->cl_auth->au_flavor);
- rpc_shutdown_client(client);
-
+ mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr);
if (IS_ERR(mnt))
goto out;
@@ -297,10 +219,8 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
* @authflavor - security flavor to use when performing the mount
*
*/
-static struct vfsmount *nfs_do_submount(struct dentry *dentry,
- struct nfs_fh *fh,
- struct nfs_fattr *fattr,
- rpc_authflavor_t authflavor)
+struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
+ struct nfs_fattr *fattr, rpc_authflavor_t authflavor)
{
struct nfs_clone_mount mountdata = {
.sb = dentry->d_sb,
@@ -333,3 +253,18 @@ out:
dprintk("<-- nfs_do_submount() = %p\n", mnt);
return mnt;
}
+
+struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry,
+ struct nfs_fh *fh, struct nfs_fattr *fattr)
+{
+ int err;
+ struct dentry *parent = dget_parent(dentry);
+
+ /* Look it up again to get its attributes */
+ err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr);
+ dput(parent);
+ if (err != 0)
+ return ERR_PTR(err);
+
+ return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor);
+}
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index aa14ec3..8a6394e 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -1,3 +1,7 @@
+/*
+ * NFS-private data for each "struct net". Accessed with net_generic().
+ */
+
#ifndef __NFS_NETNS_H__
#define __NFS_NETNS_H__
@@ -20,6 +24,7 @@ struct nfs_net {
struct idr cb_ident_idr; /* Protected by nfs_client_lock */
#endif
spinlock_t nfs_client_lock;
+ struct timespec boot_time;
};
extern int nfs_net_id;
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 1f56000..baf759b 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -61,6 +61,7 @@
#define NFS_readdirres_sz (1)
#define NFS_statfsres_sz (1+NFS_info_sz)
+static int nfs_stat_to_errno(enum nfs_stat);
/*
* While encoding arguments, set up the reply buffer in advance to
@@ -313,6 +314,8 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
p = xdr_decode_time(p, &fattr->atime);
p = xdr_decode_time(p, &fattr->mtime);
xdr_decode_time(p, &fattr->ctime);
+ fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
+
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
@@ -1109,7 +1112,7 @@ static const struct {
* Returns a local errno value, or -EIO if the NFS status code is
* not recognized. This function is used jointly by NFSv2 and NFSv3.
*/
-int nfs_stat_to_errno(enum nfs_stat status)
+static int nfs_stat_to_errno(enum nfs_stat status)
{
int i;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 75c6829..2292a0f 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -142,7 +142,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
}
static int
-nfs3_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
+nfs3_proc_lookup(struct inode *dir, struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct nfs3_diropargs arg = {
@@ -810,11 +810,13 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
{
- if (nfs3_async_handle_jukebox(task, data->inode))
+ struct inode *inode = data->header->inode;
+
+ if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
- nfs_invalidate_atime(data->inode);
- nfs_refresh_inode(data->inode, &data->fattr);
+ nfs_invalidate_atime(inode);
+ nfs_refresh_inode(inode, &data->fattr);
return 0;
}
@@ -830,10 +832,12 @@ static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da
static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
- if (nfs3_async_handle_jukebox(task, data->inode))
+ struct inode *inode = data->header->inode;
+
+ if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
if (task->tk_status >= 0)
- nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
+ nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
return 0;
}
@@ -847,7 +851,12 @@ static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_
rpc_call_start(task);
}
-static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
+{
+ rpc_call_start(task);
+}
+
+static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
{
if (nfs3_async_handle_jukebox(task, data->inode))
return -EAGAIN;
@@ -855,7 +864,7 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
return 0;
}
-static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
}
@@ -875,6 +884,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.file_inode_ops = &nfs3_file_inode_operations,
.file_ops = &nfs_file_operations,
.getroot = nfs3_proc_get_root,
+ .submount = nfs_submount,
.getattr = nfs3_proc_getattr,
.setattr = nfs3_proc_setattr,
.lookup = nfs3_proc_lookup,
@@ -906,6 +916,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.write_rpc_prepare = nfs3_proc_write_rpc_prepare,
.write_done = nfs3_write_done,
.commit_setup = nfs3_proc_commit_setup,
+ .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,
.commit_done = nfs3_commit_done,
.lock = nfs3_proc_lock,
.clear_acl_cache = nfs3_forget_cached_acls,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index a77cc9a..902de48 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -86,6 +86,8 @@
XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
+static int nfs3_stat_to_errno(enum nfs_stat);
+
/*
* Map file type to S_IFMT bits
*/
@@ -675,6 +677,7 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
p = xdr_decode_nfstime3(p, &fattr->atime);
p = xdr_decode_nfstime3(p, &fattr->mtime);
xdr_decode_nfstime3(p, &fattr->ctime);
+ fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
fattr->valid |= NFS_ATTR_FATTR_V3;
return 0;
@@ -725,12 +728,14 @@ static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
goto out_overflow;
fattr->valid |= NFS_ATTR_FATTR_PRESIZE
+ | NFS_ATTR_FATTR_PRECHANGE
| NFS_ATTR_FATTR_PREMTIME
| NFS_ATTR_FATTR_PRECTIME;
p = xdr_decode_size3(p, &fattr->pre_size);
p = xdr_decode_nfstime3(p, &fattr->pre_mtime);
xdr_decode_nfstime3(p, &fattr->pre_ctime);
+ fattr->pre_change_attr = nfs_timespec_to_change_attr(&fattr->pre_ctime);
return 0;
out_overflow:
@@ -1287,7 +1292,7 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
* };
*/
static void encode_commit3args(struct xdr_stream *xdr,
- const struct nfs_writeargs *args)
+ const struct nfs_commitargs *args)
{
__be32 *p;
@@ -1300,7 +1305,7 @@ static void encode_commit3args(struct xdr_stream *xdr,
static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
struct xdr_stream *xdr,
- const struct nfs_writeargs *args)
+ const struct nfs_commitargs *args)
{
encode_commit3args(xdr, args);
}
@@ -1385,7 +1390,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1424,7 +1429,7 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1472,7 +1477,7 @@ out_default:
error = decode_post_op_attr(xdr, result->dir_attr);
if (unlikely(error))
goto out;
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1513,7 +1518,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1554,7 +1559,7 @@ static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1636,7 +1641,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1706,7 +1711,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1770,7 +1775,7 @@ out_default:
error = decode_wcc_data(xdr, result->dir_attr);
if (unlikely(error))
goto out;
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1809,7 +1814,7 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1853,7 +1858,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1896,7 +1901,7 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/**
@@ -2088,7 +2093,7 @@ out_default:
error = decode_post_op_attr(xdr, result->dir_attr);
if (unlikely(error))
goto out;
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -2156,7 +2161,7 @@ static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -2232,7 +2237,7 @@ static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -2295,7 +2300,7 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -2319,7 +2324,7 @@ out_status:
*/
static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
struct xdr_stream *xdr,
- struct nfs_writeres *result)
+ struct nfs_commitres *result)
{
enum nfs_stat status;
int error;
@@ -2336,7 +2341,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
#ifdef CONFIG_NFS_V3_ACL
@@ -2401,7 +2406,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
@@ -2420,11 +2425,76 @@ static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
#endif /* CONFIG_NFS_V3_ACL */
+
+/*
+ * We need to translate between nfs status return values and
+ * the local errno values which may not be the same.
+ */
+static const struct {
+ int stat;
+ int errno;
+} nfs_errtbl[] = {
+ { NFS_OK, 0 },
+ { NFSERR_PERM, -EPERM },
+ { NFSERR_NOENT, -ENOENT },
+ { NFSERR_IO, -errno_NFSERR_IO},
+ { NFSERR_NXIO, -ENXIO },
+/* { NFSERR_EAGAIN, -EAGAIN }, */
+ { NFSERR_ACCES, -EACCES },
+ { NFSERR_EXIST, -EEXIST },
+ { NFSERR_XDEV, -EXDEV },
+ { NFSERR_NODEV, -ENODEV },
+ { NFSERR_NOTDIR, -ENOTDIR },
+ { NFSERR_ISDIR, -EISDIR },
+ { NFSERR_INVAL, -EINVAL },
+ { NFSERR_FBIG, -EFBIG },
+ { NFSERR_NOSPC, -ENOSPC },
+ { NFSERR_ROFS, -EROFS },
+ { NFSERR_MLINK, -EMLINK },
+ { NFSERR_NAMETOOLONG, -ENAMETOOLONG },
+ { NFSERR_NOTEMPTY, -ENOTEMPTY },
+ { NFSERR_DQUOT, -EDQUOT },
+ { NFSERR_STALE, -ESTALE },
+ { NFSERR_REMOTE, -EREMOTE },
+#ifdef EWFLUSH
+ { NFSERR_WFLUSH, -EWFLUSH },
+#endif
+ { NFSERR_BADHANDLE, -EBADHANDLE },
+ { NFSERR_NOT_SYNC, -ENOTSYNC },
+ { NFSERR_BAD_COOKIE, -EBADCOOKIE },
+ { NFSERR_NOTSUPP, -ENOTSUPP },
+ { NFSERR_TOOSMALL, -ETOOSMALL },
+ { NFSERR_SERVERFAULT, -EREMOTEIO },
+ { NFSERR_BADTYPE, -EBADTYPE },
+ { NFSERR_JUKEBOX, -EJUKEBOX },
+ { -1, -EIO }
+};
+
+/**
+ * nfs3_stat_to_errno - convert an NFS status code to a local errno
+ * @status: NFS status code to convert
+ *
+ * Returns a local errno value, or -EIO if the NFS status code is
+ * not recognized. This function is used jointly by NFSv2 and NFSv3.
+ */
+static int nfs3_stat_to_errno(enum nfs_stat status)
+{
+ int i;
+
+ for (i = 0; nfs_errtbl[i].stat != -1; i++) {
+ if (nfs_errtbl[i].stat == (int)status)
+ return nfs_errtbl[i].errno;
+ }
+ dprintk("NFS: Unrecognized nfs status value: %u\n", status);
+ return nfs_errtbl[i].errno;
+}
+
+
#define PROC(proc, argtype, restype, timer) \
[NFS3PROC_##proc] = { \
.p_proc = NFS3PROC_##proc, \
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 8d75021..c6827f93 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -24,6 +24,8 @@ enum nfs4_client_state {
NFS4CLNT_RECALL_SLOT,
NFS4CLNT_LEASE_CONFIRM,
NFS4CLNT_SERVER_SCOPE_MISMATCH,
+ NFS4CLNT_PURGE_STATE,
+ NFS4CLNT_BIND_CONN_TO_SESSION,
};
enum nfs4_session_state {
@@ -52,11 +54,6 @@ struct nfs4_minor_version_ops {
const struct nfs4_state_maintenance_ops *state_renewal_ops;
};
-struct nfs_unique_id {
- struct rb_node rb_node;
- __u64 id;
-};
-
#define NFS_SEQID_CONFIRMED 1
struct nfs_seqid_counter {
ktime_t create_time;
@@ -206,12 +203,18 @@ extern const struct dentry_operations nfs4_dentry_operations;
extern const struct inode_operations nfs4_dir_inode_operations;
/* nfs4namespace.c */
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
+struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
+ struct nfs_fh *, struct nfs_fattr *);
/* nfs4proc.c */
extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
+extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
+extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, struct rpc_cred *cred);
extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
+extern int nfs4_destroy_clientid(struct nfs_client *clp);
extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
@@ -239,8 +242,8 @@ extern int nfs41_setup_sequence(struct nfs4_session *session,
struct rpc_task *task);
extern void nfs4_destroy_session(struct nfs4_session *session);
extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
-extern int nfs4_proc_create_session(struct nfs_client *);
-extern int nfs4_proc_destroy_session(struct nfs4_session *);
+extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *);
+extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *);
extern int nfs4_init_session(struct nfs_server *server);
extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
struct nfs_fsinfo *fsinfo);
@@ -310,9 +313,9 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
#if defined(CONFIG_NFS_V4_1)
struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
-extern void nfs4_schedule_session_recovery(struct nfs4_session *);
+extern void nfs4_schedule_session_recovery(struct nfs4_session *, int);
#else
-static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
+static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
{
}
#endif /* CONFIG_NFS_V4_1 */
@@ -334,7 +337,7 @@ extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs41_handle_recall_slot(struct nfs_client *clp);
extern void nfs41_handle_server_scope(struct nfs_client *,
- struct server_scope **);
+ struct nfs41_server_scope **);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 5acfd9e..e134029 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -82,29 +82,76 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
BUG();
}
+static void filelayout_reset_write(struct nfs_write_data *data)
+{
+ struct nfs_pgio_header *hdr = data->header;
+ struct rpc_task *task = &data->task;
+
+ if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+ dprintk("%s Reset task %5u for i/o through MDS "
+ "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
+ data->task.tk_pid,
+ hdr->inode->i_sb->s_id,
+ (long long)NFS_FILEID(hdr->inode),
+ data->args.count,
+ (unsigned long long)data->args.offset);
+
+ task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
+ &hdr->pages,
+ hdr->completion_ops);
+ }
+}
+
+static void filelayout_reset_read(struct nfs_read_data *data)
+{
+ struct nfs_pgio_header *hdr = data->header;
+ struct rpc_task *task = &data->task;
+
+ if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+ dprintk("%s Reset task %5u for i/o through MDS "
+ "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
+ data->task.tk_pid,
+ hdr->inode->i_sb->s_id,
+ (long long)NFS_FILEID(hdr->inode),
+ data->args.count,
+ (unsigned long long)data->args.offset);
+
+ task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
+ &hdr->pages,
+ hdr->completion_ops);
+ }
+}
+
static int filelayout_async_handle_error(struct rpc_task *task,
struct nfs4_state *state,
struct nfs_client *clp,
- int *reset)
+ struct pnfs_layout_segment *lseg)
{
- struct nfs_server *mds_server = NFS_SERVER(state->inode);
+ struct inode *inode = lseg->pls_layout->plh_inode;
+ struct nfs_server *mds_server = NFS_SERVER(inode);
+ struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
struct nfs_client *mds_client = mds_server->nfs_client;
+ struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
if (task->tk_status >= 0)
return 0;
- *reset = 0;
switch (task->tk_status) {
/* MDS state errors */
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
+ if (state == NULL)
+ break;
nfs_remove_bad_delegation(state->inode);
case -NFS4ERR_OPENMODE:
+ if (state == NULL)
+ break;
nfs4_schedule_stateid_recovery(mds_server, state);
goto wait_on_recovery;
case -NFS4ERR_EXPIRED:
- nfs4_schedule_stateid_recovery(mds_server, state);
+ if (state != NULL)
+ nfs4_schedule_stateid_recovery(mds_server, state);
nfs4_schedule_lease_recovery(mds_client);
goto wait_on_recovery;
/* DS session errors */
@@ -118,7 +165,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
dprintk("%s ERROR %d, Reset session. Exchangeid "
"flags 0x%x\n", __func__, task->tk_status,
clp->cl_exchange_flags);
- nfs4_schedule_session_recovery(clp->cl_session);
+ nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
break;
case -NFS4ERR_DELAY:
case -NFS4ERR_GRACE:
@@ -127,11 +174,48 @@ static int filelayout_async_handle_error(struct rpc_task *task,
break;
case -NFS4ERR_RETRY_UNCACHED_REP:
break;
+ /* Invalidate Layout errors */
+ case -NFS4ERR_PNFS_NO_LAYOUT:
+ case -ESTALE: /* mapped NFS4ERR_STALE */
+ case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
+ case -EISDIR: /* mapped NFS4ERR_ISDIR */
+ case -NFS4ERR_FHEXPIRED:
+ case -NFS4ERR_WRONG_TYPE:
+ dprintk("%s Invalid layout error %d\n", __func__,
+ task->tk_status);
+ /*
+ * Destroy layout so new i/o will get a new layout.
+ * Layout will not be destroyed until all current lseg
+ * references are put. Mark layout as invalid to resend failed
+ * i/o and all i/o waiting on the slot table to the MDS until
+ * layout is destroyed and a new valid layout is obtained.
+ */
+ set_bit(NFS_LAYOUT_INVALID,
+ &NFS_I(inode)->layout->plh_flags);
+ pnfs_destroy_layout(NFS_I(inode));
+ rpc_wake_up(&tbl->slot_tbl_waitq);
+ goto reset;
+ /* RPC connection errors */
+ case -ECONNREFUSED:
+ case -EHOSTDOWN:
+ case -EHOSTUNREACH:
+ case -ENETUNREACH:
+ case -EIO:
+ case -ETIMEDOUT:
+ case -EPIPE:
+ dprintk("%s DS connection error %d\n", __func__,
+ task->tk_status);
+ if (!filelayout_test_devid_invalid(devid))
+ _pnfs_return_layout(inode);
+ filelayout_mark_devid_invalid(devid);
+ rpc_wake_up(&tbl->slot_tbl_waitq);
+ nfs4_ds_disconnect(clp);
+ /* fall through */
default:
- dprintk("%s DS error. Retry through MDS %d\n", __func__,
+reset:
+ dprintk("%s Retry through MDS. Error %d\n", __func__,
task->tk_status);
- *reset = 1;
- break;
+ return -NFS4ERR_RESET_TO_MDS;
}
out:
task->tk_status = 0;
@@ -148,18 +232,17 @@ wait_on_recovery:
static int filelayout_read_done_cb(struct rpc_task *task,
struct nfs_read_data *data)
{
- int reset = 0;
+ struct nfs_pgio_header *hdr = data->header;
+ int err;
- dprintk("%s DS read\n", __func__);
+ err = filelayout_async_handle_error(task, data->args.context->state,
+ data->ds_clp, hdr->lseg);
- if (filelayout_async_handle_error(task, data->args.context->state,
- data->ds_clp, &reset) == -EAGAIN) {
- dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
- __func__, data->ds_clp, data->ds_clp->cl_session);
- if (reset) {
- pnfs_set_lo_fail(data->lseg);
- nfs4_reset_read(task, data);
- }
+ switch (err) {
+ case -NFS4ERR_RESET_TO_MDS:
+ filelayout_reset_read(data);
+ return task->tk_status;
+ case -EAGAIN:
rpc_restart_call_prepare(task);
return -EAGAIN;
}
@@ -175,13 +258,15 @@ static int filelayout_read_done_cb(struct rpc_task *task,
static void
filelayout_set_layoutcommit(struct nfs_write_data *wdata)
{
- if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds ||
+ struct nfs_pgio_header *hdr = wdata->header;
+
+ if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
wdata->res.verf->committed == NFS_FILE_SYNC)
return;
pnfs_set_layoutcommit(wdata);
- dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino,
- (unsigned long) NFS_I(wdata->inode)->layout->plh_lwb);
+ dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
+ (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
}
/*
@@ -191,8 +276,14 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)
*/
static void filelayout_read_prepare(struct rpc_task *task, void *data)
{
- struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+ struct nfs_read_data *rdata = data;
+ if (filelayout_reset_to_mds(rdata->header->lseg)) {
+ dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
+ filelayout_reset_read(rdata);
+ rpc_exit(task, 0);
+ return;
+ }
rdata->read_done_cb = filelayout_read_done_cb;
if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
@@ -205,42 +296,47 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
static void filelayout_read_call_done(struct rpc_task *task, void *data)
{
- struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+ struct nfs_read_data *rdata = data;
dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
+ if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) &&
+ task->tk_status == 0)
+ return;
+
/* Note this may cause RPC to be resent */
- rdata->mds_ops->rpc_call_done(task, data);
+ rdata->header->mds_ops->rpc_call_done(task, data);
}
static void filelayout_read_count_stats(struct rpc_task *task, void *data)
{
- struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+ struct nfs_read_data *rdata = data;
- rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics);
+ rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
}
static void filelayout_read_release(void *data)
{
- struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+ struct nfs_read_data *rdata = data;
- put_lseg(rdata->lseg);
- rdata->mds_ops->rpc_release(data);
+ nfs_put_client(rdata->ds_clp);
+ rdata->header->mds_ops->rpc_release(data);
}
static int filelayout_write_done_cb(struct rpc_task *task,
struct nfs_write_data *data)
{
- int reset = 0;
-
- if (filelayout_async_handle_error(task, data->args.context->state,
- data->ds_clp, &reset) == -EAGAIN) {
- dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
- __func__, data->ds_clp, data->ds_clp->cl_session);
- if (reset) {
- pnfs_set_lo_fail(data->lseg);
- nfs4_reset_write(task, data);
- }
+ struct nfs_pgio_header *hdr = data->header;
+ int err;
+
+ err = filelayout_async_handle_error(task, data->args.context->state,
+ data->ds_clp, hdr->lseg);
+
+ switch (err) {
+ case -NFS4ERR_RESET_TO_MDS:
+ filelayout_reset_write(data);
+ return task->tk_status;
+ case -EAGAIN:
rpc_restart_call_prepare(task);
return -EAGAIN;
}
@@ -250,7 +346,7 @@ static int filelayout_write_done_cb(struct rpc_task *task,
}
/* Fake up some data that will cause nfs_commit_release to retry the writes. */
-static void prepare_to_resend_writes(struct nfs_write_data *data)
+static void prepare_to_resend_writes(struct nfs_commit_data *data)
{
struct nfs_page *first = nfs_list_entry(data->pages.next);
@@ -261,19 +357,19 @@ static void prepare_to_resend_writes(struct nfs_write_data *data)
}
static int filelayout_commit_done_cb(struct rpc_task *task,
- struct nfs_write_data *data)
+ struct nfs_commit_data *data)
{
- int reset = 0;
-
- if (filelayout_async_handle_error(task, data->args.context->state,
- data->ds_clp, &reset) == -EAGAIN) {
- dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
- __func__, data->ds_clp, data->ds_clp->cl_session);
- if (reset) {
- prepare_to_resend_writes(data);
- pnfs_set_lo_fail(data->lseg);
- } else
- rpc_restart_call_prepare(task);
+ int err;
+
+ err = filelayout_async_handle_error(task, NULL, data->ds_clp,
+ data->lseg);
+
+ switch (err) {
+ case -NFS4ERR_RESET_TO_MDS:
+ prepare_to_resend_writes(data);
+ return -EAGAIN;
+ case -EAGAIN:
+ rpc_restart_call_prepare(task);
return -EAGAIN;
}
@@ -282,8 +378,14 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
static void filelayout_write_prepare(struct rpc_task *task, void *data)
{
- struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+ struct nfs_write_data *wdata = data;
+ if (filelayout_reset_to_mds(wdata->header->lseg)) {
+ dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
+ filelayout_reset_write(wdata);
+ rpc_exit(task, 0);
+ return;
+ }
if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
&wdata->args.seq_args, &wdata->res.seq_res,
task))
@@ -294,36 +396,66 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
static void filelayout_write_call_done(struct rpc_task *task, void *data)
{
- struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+ struct nfs_write_data *wdata = data;
+
+ if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
+ task->tk_status == 0)
+ return;
/* Note this may cause RPC to be resent */
- wdata->mds_ops->rpc_call_done(task, data);
+ wdata->header->mds_ops->rpc_call_done(task, data);
}
static void filelayout_write_count_stats(struct rpc_task *task, void *data)
{
- struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+ struct nfs_write_data *wdata = data;
- rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics);
+ rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
}
static void filelayout_write_release(void *data)
{
- struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+ struct nfs_write_data *wdata = data;
+
+ nfs_put_client(wdata->ds_clp);
+ wdata->header->mds_ops->rpc_release(data);
+}
+
+static void filelayout_commit_prepare(struct rpc_task *task, void *data)
+{
+ struct nfs_commit_data *wdata = data;
- put_lseg(wdata->lseg);
- wdata->mds_ops->rpc_release(data);
+ if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
+ &wdata->args.seq_args, &wdata->res.seq_res,
+ task))
+ return;
+
+ rpc_call_start(task);
+}
+
+static void filelayout_write_commit_done(struct rpc_task *task, void *data)
+{
+ struct nfs_commit_data *wdata = data;
+
+ /* Note this may cause RPC to be resent */
+ wdata->mds_ops->rpc_call_done(task, data);
+}
+
+static void filelayout_commit_count_stats(struct rpc_task *task, void *data)
+{
+ struct nfs_commit_data *cdata = data;
+
+ rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics);
}
-static void filelayout_commit_release(void *data)
+static void filelayout_commit_release(void *calldata)
{
- struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+ struct nfs_commit_data *data = calldata;
- nfs_commit_release_pages(wdata);
- if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding))
- nfs_commit_clear_lock(NFS_I(wdata->inode));
- put_lseg(wdata->lseg);
- nfs_commitdata_release(wdata);
+ data->completion_ops->completion(data);
+ put_lseg(data->lseg);
+ nfs_put_client(data->ds_clp);
+ nfs_commitdata_release(data);
}
static const struct rpc_call_ops filelayout_read_call_ops = {
@@ -341,16 +473,17 @@ static const struct rpc_call_ops filelayout_write_call_ops = {
};
static const struct rpc_call_ops filelayout_commit_call_ops = {
- .rpc_call_prepare = filelayout_write_prepare,
- .rpc_call_done = filelayout_write_call_done,
- .rpc_count_stats = filelayout_write_count_stats,
+ .rpc_call_prepare = filelayout_commit_prepare,
+ .rpc_call_done = filelayout_write_commit_done,
+ .rpc_count_stats = filelayout_commit_count_stats,
.rpc_release = filelayout_commit_release,
};
static enum pnfs_try_status
filelayout_read_pagelist(struct nfs_read_data *data)
{
- struct pnfs_layout_segment *lseg = data->lseg;
+ struct nfs_pgio_header *hdr = data->header;
+ struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
loff_t offset = data->args.offset;
u32 j, idx;
@@ -358,25 +491,20 @@ filelayout_read_pagelist(struct nfs_read_data *data)
int status;
dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
- __func__, data->inode->i_ino,
+ __func__, hdr->inode->i_ino,
data->args.pgbase, (size_t)data->args.count, offset);
- if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
- return PNFS_NOT_ATTEMPTED;
-
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index(lseg, offset);
idx = nfs4_fl_calc_ds_index(lseg, j);
ds = nfs4_fl_prepare_ds(lseg, idx);
- if (!ds) {
- /* Either layout fh index faulty, or ds connect failed */
- set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
- set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
+ if (!ds)
return PNFS_NOT_ATTEMPTED;
- }
- dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr);
+ dprintk("%s USE DS: %s cl_count %d\n", __func__,
+ ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
/* No multipath support. Use first DS */
+ atomic_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp;
fh = nfs4_fl_select_ds_fh(lseg, j);
if (fh)
@@ -386,8 +514,8 @@ filelayout_read_pagelist(struct nfs_read_data *data)
data->mds_offset = offset;
/* Perform an asynchronous read to ds */
- status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient,
- &filelayout_read_call_ops);
+ status = nfs_initiate_read(ds->ds_clp->cl_rpcclient, data,
+ &filelayout_read_call_ops, RPC_TASK_SOFTCONN);
BUG_ON(status != 0);
return PNFS_ATTEMPTED;
}
@@ -396,32 +524,26 @@ filelayout_read_pagelist(struct nfs_read_data *data)
static enum pnfs_try_status
filelayout_write_pagelist(struct nfs_write_data *data, int sync)
{
- struct pnfs_layout_segment *lseg = data->lseg;
+ struct nfs_pgio_header *hdr = data->header;
+ struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
loff_t offset = data->args.offset;
u32 j, idx;
struct nfs_fh *fh;
int status;
- if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
- return PNFS_NOT_ATTEMPTED;
-
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index(lseg, offset);
idx = nfs4_fl_calc_ds_index(lseg, j);
ds = nfs4_fl_prepare_ds(lseg, idx);
- if (!ds) {
- printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
- __func__);
- set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
- set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
+ if (!ds)
return PNFS_NOT_ATTEMPTED;
- }
- dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__,
- data->inode->i_ino, sync, (size_t) data->args.count, offset,
- ds->ds_remotestr);
+ dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
+ __func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
+ offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
data->write_done_cb = filelayout_write_done_cb;
+ atomic_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp;
fh = nfs4_fl_select_ds_fh(lseg, j);
if (fh)
@@ -433,8 +555,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
data->args.offset = filelayout_get_dserver_offset(lseg, offset);
/* Perform an asynchronous write */
- status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
- &filelayout_write_call_ops, sync);
+ status = nfs_initiate_write(ds->ds_clp->cl_rpcclient, data,
+ &filelayout_write_call_ops, sync,
+ RPC_TASK_SOFTCONN);
BUG_ON(status != 0);
return PNFS_ATTEMPTED;
}
@@ -650,10 +773,65 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
dprintk("--> %s\n", __func__);
nfs4_fl_put_deviceid(fl->dsaddr);
- kfree(fl->commit_buckets);
+ /* This assumes a single RW lseg */
+ if (lseg->pls_range.iomode == IOMODE_RW) {
+ struct nfs4_filelayout *flo;
+
+ flo = FILELAYOUT_FROM_HDR(lseg->pls_layout);
+ flo->commit_info.nbuckets = 0;
+ kfree(flo->commit_info.buckets);
+ flo->commit_info.buckets = NULL;
+ }
_filelayout_free_lseg(fl);
}
+static int
+filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo,
+ gfp_t gfp_flags)
+{
+ struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
+ struct pnfs_commit_bucket *buckets;
+ int size;
+
+ if (fl->commit_through_mds)
+ return 0;
+ if (cinfo->ds->nbuckets != 0) {
+ /* This assumes there is only one IOMODE_RW lseg. What
+ * we really want to do is have a layout_hdr level
+ * dictionary of <multipath_list4, fh> keys, each
+ * associated with a struct list_head, populated by calls
+ * to filelayout_write_pagelist().
+ * */
+ return 0;
+ }
+
+ size = (fl->stripe_type == STRIPE_SPARSE) ?
+ fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
+
+ buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
+ gfp_flags);
+ if (!buckets)
+ return -ENOMEM;
+ else {
+ int i;
+
+ spin_lock(cinfo->lock);
+ if (cinfo->ds->nbuckets != 0)
+ kfree(buckets);
+ else {
+ cinfo->ds->buckets = buckets;
+ cinfo->ds->nbuckets = size;
+ for (i = 0; i < size; i++) {
+ INIT_LIST_HEAD(&buckets[i].written);
+ INIT_LIST_HEAD(&buckets[i].committing);
+ }
+ }
+ spin_unlock(cinfo->lock);
+ return 0;
+ }
+}
+
static struct pnfs_layout_segment *
filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
struct nfs4_layoutget_res *lgr,
@@ -673,29 +851,6 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
_filelayout_free_lseg(fl);
return NULL;
}
-
- /* This assumes there is only one IOMODE_RW lseg. What
- * we really want to do is have a layout_hdr level
- * dictionary of <multipath_list4, fh> keys, each
- * associated with a struct list_head, populated by calls
- * to filelayout_write_pagelist().
- * */
- if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) {
- int i;
- int size = (fl->stripe_type == STRIPE_SPARSE) ?
- fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
-
- fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags);
- if (!fl->commit_buckets) {
- filelayout_free_lseg(&fl->generic_hdr);
- return NULL;
- }
- fl->number_of_buckets = size;
- for (i = 0; i < size; i++) {
- INIT_LIST_HEAD(&fl->commit_buckets[i].written);
- INIT_LIST_HEAD(&fl->commit_buckets[i].committing);
- }
- }
return &fl->generic_hdr;
}
@@ -716,8 +871,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
!nfs_generic_pg_test(pgio, prev, req))
return false;
- p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
- r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
+ p_stripe = (u64)req_offset(prev);
+ r_stripe = (u64)req_offset(req);
stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
do_div(p_stripe, stripe_unit);
@@ -732,6 +887,16 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
{
BUG_ON(pgio->pg_lseg != NULL);
+ if (req->wb_offset != req->wb_pgbase) {
+ /*
+ * Handling unaligned pages is difficult, because have to
+ * somehow split a req in two in certain cases in the
+ * pg.test code. Avoid this by just not using pnfs
+ * in this case.
+ */
+ nfs_pageio_reset_read_mds(pgio);
+ return;
+ }
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
@@ -747,8 +912,13 @@ static void
filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
+ struct nfs_commit_info cinfo;
+ int status;
+
BUG_ON(pgio->pg_lseg != NULL);
+ if (req->wb_offset != req->wb_pgbase)
+ goto out_mds;
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
@@ -757,7 +927,17 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
GFP_NOFS);
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
- nfs_pageio_reset_write_mds(pgio);
+ goto out_mds;
+ nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq);
+ status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS);
+ if (status < 0) {
+ put_lseg(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ goto out_mds;
+ }
+ return;
+out_mds:
+ nfs_pageio_reset_write_mds(pgio);
}
static const struct nfs_pageio_ops filelayout_pg_read_ops = {
@@ -784,43 +964,42 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
* If this will make the bucket empty, it will need to put the lseg reference.
*/
static void
-filelayout_clear_request_commit(struct nfs_page *req)
+filelayout_clear_request_commit(struct nfs_page *req,
+ struct nfs_commit_info *cinfo)
{
struct pnfs_layout_segment *freeme = NULL;
- struct inode *inode = req->wb_context->dentry->d_inode;
- spin_lock(&inode->i_lock);
+ spin_lock(cinfo->lock);
if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
goto out;
+ cinfo->ds->nwritten--;
if (list_is_singular(&req->wb_list)) {
- struct pnfs_layout_segment *lseg;
+ struct pnfs_commit_bucket *bucket;
- /* From here we can find the bucket, but for the moment,
- * since there is only one relevant lseg...
- */
- list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
- if (lseg->pls_range.iomode == IOMODE_RW) {
- freeme = lseg;
- break;
- }
- }
+ bucket = list_first_entry(&req->wb_list,
+ struct pnfs_commit_bucket,
+ written);
+ freeme = bucket->wlseg;
+ bucket->wlseg = NULL;
}
out:
- nfs_request_remove_commit_list(req);
- spin_unlock(&inode->i_lock);
+ nfs_request_remove_commit_list(req, cinfo);
+ spin_unlock(cinfo->lock);
put_lseg(freeme);
}
static struct list_head *
filelayout_choose_commit_list(struct nfs_page *req,
- struct pnfs_layout_segment *lseg)
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
{
struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
u32 i, j;
struct list_head *list;
+ struct pnfs_commit_bucket *buckets;
if (fl->commit_through_mds)
- return &NFS_I(req->wb_context->dentry->d_inode)->commit_list;
+ return &cinfo->mds->list;
/* Note that we are calling nfs4_fl_calc_j_index on each page
* that ends up being committed to a data server. An attractive
@@ -828,31 +1007,33 @@ filelayout_choose_commit_list(struct nfs_page *req,
* to store the value calculated in filelayout_write_pagelist
* and just use that here.
*/
- j = nfs4_fl_calc_j_index(lseg,
- (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
+ j = nfs4_fl_calc_j_index(lseg, req_offset(req));
i = select_bucket_index(fl, j);
- list = &fl->commit_buckets[i].written;
+ buckets = cinfo->ds->buckets;
+ list = &buckets[i].written;
if (list_empty(list)) {
/* Non-empty buckets hold a reference on the lseg. That ref
* is normally transferred to the COMMIT call and released
* there. It could also be released if the last req is pulled
* off due to a rewrite, in which case it will be done in
- * filelayout_remove_commit_req
+ * filelayout_clear_request_commit
*/
- get_lseg(lseg);
+ buckets[i].wlseg = get_lseg(lseg);
}
set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
+ cinfo->ds->nwritten++;
return list;
}
static void
filelayout_mark_request_commit(struct nfs_page *req,
- struct pnfs_layout_segment *lseg)
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
{
struct list_head *list;
- list = filelayout_choose_commit_list(req, lseg);
- nfs_request_add_commit_list(req, list);
+ list = filelayout_choose_commit_list(req, lseg, cinfo);
+ nfs_request_add_commit_list(req, list, cinfo);
}
static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
@@ -880,7 +1061,7 @@ select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
return flseg->fh_array[i];
}
-static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
+static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
{
struct pnfs_layout_segment *lseg = data->lseg;
struct nfs4_pnfs_ds *ds;
@@ -890,135 +1071,138 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
ds = nfs4_fl_prepare_ds(lseg, idx);
if (!ds) {
- printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
- __func__);
- set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
- set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
prepare_to_resend_writes(data);
filelayout_commit_release(data);
return -EAGAIN;
}
- dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how);
- data->write_done_cb = filelayout_commit_done_cb;
+ dprintk("%s ino %lu, how %d cl_count %d\n", __func__,
+ data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count));
+ data->commit_done_cb = filelayout_commit_done_cb;
+ atomic_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp;
fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
if (fh)
data->args.fh = fh;
- return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient,
- &filelayout_commit_call_ops, how);
-}
-
-/*
- * This is only useful while we are using whole file layouts.
- */
-static struct pnfs_layout_segment *
-find_only_write_lseg_locked(struct inode *inode)
-{
- struct pnfs_layout_segment *lseg;
-
- list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
- if (lseg->pls_range.iomode == IOMODE_RW)
- return lseg;
- return NULL;
-}
-
-static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
-{
- struct pnfs_layout_segment *rv;
-
- spin_lock(&inode->i_lock);
- rv = find_only_write_lseg_locked(inode);
- if (rv)
- get_lseg(rv);
- spin_unlock(&inode->i_lock);
- return rv;
+ return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data,
+ &filelayout_commit_call_ops, how,
+ RPC_TASK_SOFTCONN);
}
static int
-filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
- spinlock_t *lock)
+transfer_commit_list(struct list_head *src, struct list_head *dst,
+ struct nfs_commit_info *cinfo, int max)
{
- struct list_head *src = &bucket->written;
- struct list_head *dst = &bucket->committing;
struct nfs_page *req, *tmp;
int ret = 0;
list_for_each_entry_safe(req, tmp, src, wb_list) {
if (!nfs_lock_request(req))
continue;
- if (cond_resched_lock(lock))
+ kref_get(&req->wb_kref);
+ if (cond_resched_lock(cinfo->lock))
list_safe_reset_next(req, tmp, wb_list);
- nfs_request_remove_commit_list(req);
+ nfs_request_remove_commit_list(req, cinfo);
clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
nfs_list_add_request(req, dst);
ret++;
- if (ret == max)
+ if ((ret == max) && !cinfo->dreq)
break;
}
return ret;
}
+static int
+filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
+ struct nfs_commit_info *cinfo,
+ int max)
+{
+ struct list_head *src = &bucket->written;
+ struct list_head *dst = &bucket->committing;
+ int ret;
+
+ ret = transfer_commit_list(src, dst, cinfo, max);
+ if (ret) {
+ cinfo->ds->nwritten -= ret;
+ cinfo->ds->ncommitting += ret;
+ bucket->clseg = bucket->wlseg;
+ if (list_empty(src))
+ bucket->wlseg = NULL;
+ else
+ get_lseg(bucket->clseg);
+ }
+ return ret;
+}
+
/* Move reqs from written to committing lists, returning count of number moved.
- * Note called with i_lock held.
+ * Note called with cinfo->lock held.
*/
-static int filelayout_scan_commit_lists(struct inode *inode, int max,
- spinlock_t *lock)
+static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo,
+ int max)
{
- struct pnfs_layout_segment *lseg;
- struct nfs4_filelayout_segment *fl;
int i, rv = 0, cnt;
- lseg = find_only_write_lseg_locked(inode);
- if (!lseg)
- goto out_done;
- fl = FILELAYOUT_LSEG(lseg);
- if (fl->commit_through_mds)
- goto out_done;
- for (i = 0; i < fl->number_of_buckets && max != 0; i++) {
- cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i],
- max, lock);
+ for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) {
+ cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i],
+ cinfo, max);
max -= cnt;
rv += cnt;
}
-out_done:
return rv;
}
+/* Pull everything off the committing lists and dump into @dst */
+static void filelayout_recover_commit_reqs(struct list_head *dst,
+ struct nfs_commit_info *cinfo)
+{
+ struct pnfs_commit_bucket *b;
+ int i;
+
+ /* NOTE cinfo->lock is NOT held, relying on fact that this is
+ * only called on single thread per dreq.
+ * Can't take the lock because need to do put_lseg
+ */
+ for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
+ if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
+ BUG_ON(!list_empty(&b->written));
+ put_lseg(b->wlseg);
+ b->wlseg = NULL;
+ }
+ }
+ cinfo->ds->nwritten = 0;
+}
+
static unsigned int
-alloc_ds_commits(struct inode *inode, struct list_head *list)
+alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
{
- struct pnfs_layout_segment *lseg;
- struct nfs4_filelayout_segment *fl;
- struct nfs_write_data *data;
+ struct pnfs_ds_commit_info *fl_cinfo;
+ struct pnfs_commit_bucket *bucket;
+ struct nfs_commit_data *data;
int i, j;
unsigned int nreq = 0;
- /* Won't need this when non-whole file layout segments are supported
- * instead we will use a pnfs_layout_hdr structure */
- lseg = find_only_write_lseg(inode);
- if (!lseg)
- return 0;
- fl = FILELAYOUT_LSEG(lseg);
- for (i = 0; i < fl->number_of_buckets; i++) {
- if (list_empty(&fl->commit_buckets[i].committing))
+ fl_cinfo = cinfo->ds;
+ bucket = fl_cinfo->buckets;
+ for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
+ if (list_empty(&bucket->committing))
continue;
data = nfs_commitdata_alloc();
if (!data)
break;
data->ds_commit_index = i;
- data->lseg = lseg;
+ data->lseg = bucket->clseg;
+ bucket->clseg = NULL;
list_add(&data->pages, list);
nreq++;
}
/* Clean up on error */
- for (j = i; j < fl->number_of_buckets; j++) {
- if (list_empty(&fl->commit_buckets[i].committing))
+ for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) {
+ if (list_empty(&bucket->committing))
continue;
- nfs_retry_commit(&fl->commit_buckets[i].committing, lseg);
- put_lseg(lseg); /* associated with emptying bucket */
+ nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
+ put_lseg(bucket->clseg);
+ bucket->clseg = NULL;
}
- put_lseg(lseg);
/* Caller will clean up entries put on list */
return nreq;
}
@@ -1026,9 +1210,9 @@ alloc_ds_commits(struct inode *inode, struct list_head *list)
/* This follows nfs_commit_list pretty closely */
static int
filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
- int how)
+ int how, struct nfs_commit_info *cinfo)
{
- struct nfs_write_data *data, *tmp;
+ struct nfs_commit_data *data, *tmp;
LIST_HEAD(list);
unsigned int nreq = 0;
@@ -1039,30 +1223,34 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
list_add(&data->pages, &list);
nreq++;
} else
- nfs_retry_commit(mds_pages, NULL);
+ nfs_retry_commit(mds_pages, NULL, cinfo);
}
- nreq += alloc_ds_commits(inode, &list);
+ nreq += alloc_ds_commits(cinfo, &list);
if (nreq == 0) {
- nfs_commit_clear_lock(NFS_I(inode));
+ cinfo->completion_ops->error_cleanup(NFS_I(inode));
goto out;
}
- atomic_add(nreq, &NFS_I(inode)->commits_outstanding);
+ atomic_add(nreq, &cinfo->mds->rpcs_out);
list_for_each_entry_safe(data, tmp, &list, pages) {
list_del_init(&data->pages);
if (!data->lseg) {
- nfs_init_commit(data, mds_pages, NULL);
- nfs_initiate_commit(data, NFS_CLIENT(inode),
- data->mds_ops, how);
+ nfs_init_commit(data, mds_pages, NULL, cinfo);
+ nfs_initiate_commit(NFS_CLIENT(inode), data,
+ data->mds_ops, how, 0);
} else {
- nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg);
+ struct pnfs_commit_bucket *buckets;
+
+ buckets = cinfo->ds->buckets;
+ nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo);
filelayout_initiate_commit(data, how);
}
}
out:
+ cinfo->ds->ncommitting = 0;
return PNFS_ATTEMPTED;
}
@@ -1072,17 +1260,47 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
}
+static struct pnfs_layout_hdr *
+filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
+{
+ struct nfs4_filelayout *flo;
+
+ flo = kzalloc(sizeof(*flo), gfp_flags);
+ return &flo->generic_hdr;
+}
+
+static void
+filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+ kfree(FILELAYOUT_FROM_HDR(lo));
+}
+
+static struct pnfs_ds_commit_info *
+filelayout_get_ds_info(struct inode *inode)
+{
+ struct pnfs_layout_hdr *layout = NFS_I(inode)->layout;
+
+ if (layout == NULL)
+ return NULL;
+ else
+ return &FILELAYOUT_FROM_HDR(layout)->commit_info;
+}
+
static struct pnfs_layoutdriver_type filelayout_type = {
.id = LAYOUT_NFSV4_1_FILES,
.name = "LAYOUT_NFSV4_1_FILES",
.owner = THIS_MODULE,
+ .alloc_layout_hdr = filelayout_alloc_layout_hdr,
+ .free_layout_hdr = filelayout_free_layout_hdr,
.alloc_lseg = filelayout_alloc_lseg,
.free_lseg = filelayout_free_lseg,
.pg_read_ops = &filelayout_pg_read_ops,
.pg_write_ops = &filelayout_pg_write_ops,
+ .get_ds_info = &filelayout_get_ds_info,
.mark_request_commit = filelayout_mark_request_commit,
.clear_request_commit = filelayout_clear_request_commit,
.scan_commit_lists = filelayout_scan_commit_lists,
+ .recover_commit_reqs = filelayout_recover_commit_reqs,
.commit_pagelist = filelayout_commit_pagelist,
.read_pagelist = filelayout_read_pagelist,
.write_pagelist = filelayout_write_pagelist,
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 21190bb..43fe802 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -33,6 +33,13 @@
#include "pnfs.h"
/*
+ * Default data server connection timeout and retrans vaules.
+ * Set by module paramters dataserver_timeo and dataserver_retrans.
+ */
+#define NFS4_DEF_DS_TIMEO 60
+#define NFS4_DEF_DS_RETRANS 5
+
+/*
* Field testing shows we need to support up to 4096 stripe indices.
* We store each index as a u8 (u32 on the wire) to keep the memory footprint
* reasonable. This in turn means we support a maximum of 256
@@ -41,6 +48,9 @@
#define NFS4_PNFS_MAX_STRIPE_CNT 4096
#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */
+/* error codes for internal use */
+#define NFS4ERR_RESET_TO_MDS 12001
+
enum stripetype4 {
STRIPE_SPARSE = 1,
STRIPE_DENSE = 2
@@ -62,23 +72,14 @@ struct nfs4_pnfs_ds {
atomic_t ds_count;
};
-/* nfs4_file_layout_dsaddr flags */
-#define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001
-
struct nfs4_file_layout_dsaddr {
struct nfs4_deviceid_node id_node;
- unsigned long flags;
u32 stripe_count;
u8 *stripe_indices;
u32 ds_num;
struct nfs4_pnfs_ds *ds_list[1];
};
-struct nfs4_fl_commit_bucket {
- struct list_head written;
- struct list_head committing;
-};
-
struct nfs4_filelayout_segment {
struct pnfs_layout_segment generic_hdr;
u32 stripe_type;
@@ -89,10 +90,19 @@ struct nfs4_filelayout_segment {
struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
unsigned int num_fh;
struct nfs_fh **fh_array;
- struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */
- int number_of_buckets;
};
+struct nfs4_filelayout {
+ struct pnfs_layout_hdr generic_hdr;
+ struct pnfs_ds_commit_info commit_info;
+};
+
+static inline struct nfs4_filelayout *
+FILELAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo)
+{
+ return container_of(lo, struct nfs4_filelayout, generic_hdr);
+}
+
static inline struct nfs4_filelayout_segment *
FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
{
@@ -107,6 +117,36 @@ FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg)
return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node;
}
+static inline void
+filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node)
+{
+ u32 *p = (u32 *)&node->deviceid;
+
+ printk(KERN_WARNING "NFS: Deviceid [%x%x%x%x] marked out of use.\n",
+ p[0], p[1], p[2], p[3]);
+
+ set_bit(NFS_DEVICEID_INVALID, &node->flags);
+}
+
+static inline bool
+filelayout_test_layout_invalid(struct pnfs_layout_hdr *lo)
+{
+ return test_bit(NFS_LAYOUT_INVALID, &lo->plh_flags);
+}
+
+static inline bool
+filelayout_test_devid_invalid(struct nfs4_deviceid_node *node)
+{
+ return test_bit(NFS_DEVICEID_INVALID, &node->flags);
+}
+
+static inline bool
+filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
+{
+ return filelayout_test_devid_invalid(FILELAYOUT_DEVID_NODE(lseg)) ||
+ filelayout_test_layout_invalid(lseg->pls_layout);
+}
+
extern struct nfs_fh *
nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
@@ -119,5 +159,6 @@ extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
struct nfs4_file_layout_dsaddr *
get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
+void nfs4_ds_disconnect(struct nfs_client *clp);
#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index c9cff9a..a1fab8d 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -30,12 +30,16 @@
#include <linux/nfs_fs.h>
#include <linux/vmalloc.h>
+#include <linux/module.h>
#include "internal.h"
#include "nfs4filelayout.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
+static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO;
+static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS;
+
/*
* Data server cache
*
@@ -145,6 +149,28 @@ _data_server_lookup_locked(const struct list_head *dsaddrs)
}
/*
+ * Lookup DS by nfs_client pointer. Zero data server client pointer
+ */
+void nfs4_ds_disconnect(struct nfs_client *clp)
+{
+ struct nfs4_pnfs_ds *ds;
+ struct nfs_client *found = NULL;
+
+ dprintk("%s clp %p\n", __func__, clp);
+ spin_lock(&nfs4_ds_cache_lock);
+ list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
+ if (ds->ds_clp && ds->ds_clp == clp) {
+ found = ds->ds_clp;
+ ds->ds_clp = NULL;
+ }
+ spin_unlock(&nfs4_ds_cache_lock);
+ if (found) {
+ set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
+ nfs_put_client(clp);
+ }
+}
+
+/*
* Create an rpc connection to the nfs4_pnfs_ds data server
* Currently only supports IPv4 and IPv6 addresses
*/
@@ -165,8 +191,9 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
__func__, ds->ds_remotestr, da->da_remotestr);
clp = nfs4_set_ds_client(mds_srv->nfs_client,
- (struct sockaddr *)&da->da_addr,
- da->da_addrlen, IPPROTO_TCP);
+ (struct sockaddr *)&da->da_addr,
+ da->da_addrlen, IPPROTO_TCP,
+ dataserver_timeo, dataserver_retrans);
if (!IS_ERR(clp))
break;
}
@@ -176,28 +203,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
goto out;
}
- if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) {
- if (!is_ds_client(clp)) {
- status = -ENODEV;
- goto out_put;
- }
- ds->ds_clp = clp;
- dprintk("%s [existing] server=%s\n", __func__,
- ds->ds_remotestr);
- goto out;
- }
-
- /*
- * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to
- * be equal to the MDS lease. Renewal is scheduled in create_session.
- */
- spin_lock(&mds_srv->nfs_client->cl_lock);
- clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
- spin_unlock(&mds_srv->nfs_client->cl_lock);
- clp->cl_last_renewal = jiffies;
-
- /* New nfs_client */
- status = nfs4_init_ds_session(clp);
+ status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time);
if (status)
goto out_put;
@@ -602,7 +608,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
mp_count = be32_to_cpup(p); /* multipath count */
for (j = 0; j < mp_count; j++) {
- da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->net,
+ da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->cl_net,
&stream, gfp_flags);
if (da)
list_add_tail(&da->da_node, &dsaddrs);
@@ -791,48 +797,42 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
return flseg->fh_array[i];
}
-static void
-filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
- int err, const char *ds_remotestr)
-{
- u32 *p = (u32 *)&dsaddr->id_node.deviceid;
-
- printk(KERN_ERR "NFS: data server %s connection error %d."
- " Deviceid [%x%x%x%x] marked out of use.\n",
- ds_remotestr, err, p[0], p[1], p[2], p[3]);
-
- spin_lock(&nfs4_ds_cache_lock);
- dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
- spin_unlock(&nfs4_ds_cache_lock);
-}
-
struct nfs4_pnfs_ds *
nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
{
struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
+ struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
+
+ if (filelayout_test_devid_invalid(devid))
+ return NULL;
if (ds == NULL) {
printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
__func__, ds_idx);
- return NULL;
+ goto mark_dev_invalid;
}
if (!ds->ds_clp) {
struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
int err;
- if (dsaddr->flags & NFS4_DEVICE_ID_NEG_ENTRY) {
- /* Already tried to connect, don't try again */
- dprintk("%s Deviceid marked out of use\n", __func__);
- return NULL;
- }
err = nfs4_ds_connect(s, ds);
- if (err) {
- filelayout_mark_devid_negative(dsaddr, err,
- ds->ds_remotestr);
- return NULL;
- }
+ if (err)
+ goto mark_dev_invalid;
}
return ds;
+
+mark_dev_invalid:
+ filelayout_mark_devid_invalid(devid);
+ return NULL;
}
+
+module_param(dataserver_retrans, uint, 0644);
+MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client "
+ "retries a request before it attempts further "
+ " recovery action.");
+module_param(dataserver_timeo, uint, 0644);
+MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the "
+ "NFSv4.1 client waits for a response from a "
+ " data server before it retries an NFS request.");
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index a7f3ded..017b4b0 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -132,6 +132,35 @@ static size_t nfs_parse_server_name(char *string, size_t len,
return ret;
}
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
+{
+ struct gss_api_mech *mech;
+ struct xdr_netobj oid;
+ int i;
+ rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
+
+ for (i = 0; i < flavors->num_flavors; i++) {
+ struct nfs4_secinfo_flavor *flavor;
+ flavor = &flavors->flavors[i];
+
+ if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
+ pseudoflavor = flavor->flavor;
+ break;
+ } else if (flavor->flavor == RPC_AUTH_GSS) {
+ oid.len = flavor->gss.sec_oid4.len;
+ oid.data = flavor->gss.sec_oid4.data;
+ mech = gss_mech_get_by_OID(&oid);
+ if (!mech)
+ continue;
+ pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
+ gss_mech_put(mech);
+ break;
+ }
+ }
+
+ return pseudoflavor;
+}
+
static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name)
{
struct page *page;
@@ -168,7 +197,7 @@ struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *ino
rpc_authflavor_t flavor;
flavor = nfs4_negotiate_security(inode, name);
- if (flavor < 0)
+ if ((int)flavor < 0)
return ERR_PTR(flavor);
clone = rpc_clone_client(clnt);
@@ -300,7 +329,7 @@ out:
* @dentry - dentry of referral
*
*/
-struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
+static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
{
struct vfsmount *mnt = ERR_PTR(-ENOMEM);
struct dentry *parent;
@@ -341,3 +370,25 @@ out:
dprintk("%s: done\n", __func__);
return mnt;
}
+
+struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
+ struct nfs_fh *fh, struct nfs_fattr *fattr)
+{
+ struct dentry *parent = dget_parent(dentry);
+ struct rpc_clnt *client;
+ struct vfsmount *mnt;
+
+ /* Look it up again to get its attributes and sec flavor */
+ client = nfs4_proc_lookup_mountpoint(parent->d_inode, &dentry->d_name, fh, fattr);
+ dput(parent);
+ if (IS_ERR(client))
+ return ERR_CAST(client);
+
+ if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
+ mnt = nfs_do_refmount(client, dentry);
+ else
+ mnt = nfs_do_submount(dentry, fh, fattr, client->cl_auth->au_flavor);
+
+ rpc_shutdown_client(client);
+ return mnt;
+}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ab985f6..d48dbef 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -64,6 +64,7 @@
#include "iostat.h"
#include "callback.h"
#include "pnfs.h"
+#include "netns.h"
#define NFSDBG_FACILITY NFSDBG_PROC
@@ -80,6 +81,7 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
+static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *);
static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
struct nfs_fattr *fattr, struct iattr *sattr,
@@ -101,6 +103,8 @@ static int nfs4_map_errors(int err)
case -NFS4ERR_BADOWNER:
case -NFS4ERR_BADNAME:
return -EINVAL;
+ case -NFS4ERR_SHARE_DENIED:
+ return -EACCES;
default:
dprintk("%s could not handle NFSv4 error %d\n",
__func__, -err);
@@ -304,7 +308,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR: %d Reset session\n", __func__,
errorcode);
- nfs4_schedule_session_recovery(clp->cl_session);
+ nfs4_schedule_session_recovery(clp->cl_session, errorcode);
exception->retry = 1;
break;
#endif /* defined(CONFIG_NFS_V4_1) */
@@ -772,7 +776,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
struct nfs_inode *nfsi = NFS_I(dir);
spin_lock(&dir->i_lock);
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA;
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
if (!cinfo->atomic || cinfo->before != dir->i_version)
nfs_force_lookup_revalidate(dir);
dir->i_version = cinfo->after;
@@ -788,7 +792,6 @@ struct nfs4_opendata {
struct nfs4_string owner_name;
struct nfs4_string group_name;
struct nfs_fattr f_attr;
- struct nfs_fattr dir_attr;
struct dentry *dir;
struct dentry *dentry;
struct nfs4_state_owner *owner;
@@ -804,12 +807,10 @@ struct nfs4_opendata {
static void nfs4_init_opendata_res(struct nfs4_opendata *p)
{
p->o_res.f_attr = &p->f_attr;
- p->o_res.dir_attr = &p->dir_attr;
p->o_res.seqid = p->o_arg.seqid;
p->c_res.seqid = p->c_arg.seqid;
p->o_res.server = p->o_arg.server;
nfs_fattr_init(&p->f_attr);
- nfs_fattr_init(&p->dir_attr);
nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name);
}
@@ -843,7 +844,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
p->o_arg.name = &dentry->d_name;
p->o_arg.server = server;
p->o_arg.bitmask = server->attr_bitmask;
- p->o_arg.dir_bitmask = server->cache_consistency_bitmask;
p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
if (attrs != NULL && attrs->ia_valid != 0) {
__be32 verf[2];
@@ -1332,7 +1332,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_DEADSESSION:
- nfs4_schedule_session_recovery(server->nfs_client->cl_session);
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
goto out;
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
@@ -1611,8 +1611,6 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr);
- nfs_refresh_inode(dir, o_res->dir_attr);
-
if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
status = _nfs4_proc_open_confirm(data);
if (status != 0)
@@ -1645,11 +1643,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
nfs_fattr_map_and_free_names(server, &data->f_attr);
- if (o_arg->open_flags & O_CREAT) {
+ if (o_arg->open_flags & O_CREAT)
update_changeattr(dir, &o_res->cinfo);
- nfs_post_op_update_inode(dir, o_res->dir_attr);
- } else
- nfs_refresh_inode(dir, o_res->dir_attr);
if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
server->caps &= ~NFS_CAP_POSIX_LOCK;
if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
@@ -1789,7 +1784,14 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
/*
* Returns a referenced nfs4_state
*/
-static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+static int _nfs4_do_open(struct inode *dir,
+ struct dentry *dentry,
+ fmode_t fmode,
+ int flags,
+ struct iattr *sattr,
+ struct rpc_cred *cred,
+ struct nfs4_state **res,
+ struct nfs4_threshold **ctx_th)
{
struct nfs4_state_owner *sp;
struct nfs4_state *state = NULL;
@@ -1814,6 +1816,11 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
if (opendata == NULL)
goto err_put_state_owner;
+ if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) {
+ opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
+ if (!opendata->f_attr.mdsthreshold)
+ goto err_opendata_put;
+ }
if (dentry->d_inode != NULL)
opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
@@ -1839,11 +1846,19 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
nfs_setattr_update_inode(state->inode, sattr);
nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
}
+
+ if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server))
+ *ctx_th = opendata->f_attr.mdsthreshold;
+ else
+ kfree(opendata->f_attr.mdsthreshold);
+ opendata->f_attr.mdsthreshold = NULL;
+
nfs4_opendata_put(opendata);
nfs4_put_state_owner(sp);
*res = state;
return 0;
err_opendata_put:
+ kfree(opendata->f_attr.mdsthreshold);
nfs4_opendata_put(opendata);
err_put_state_owner:
nfs4_put_state_owner(sp);
@@ -1853,14 +1868,21 @@ out_err:
}
-static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred)
+static struct nfs4_state *nfs4_do_open(struct inode *dir,
+ struct dentry *dentry,
+ fmode_t fmode,
+ int flags,
+ struct iattr *sattr,
+ struct rpc_cred *cred,
+ struct nfs4_threshold **ctx_th)
{
struct nfs4_exception exception = { };
struct nfs4_state *res;
int status;
do {
- status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res);
+ status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred,
+ &res, ctx_th);
if (status == 0)
break;
/* NOTE: BAD_SEQID means the server and client disagree about the
@@ -2184,7 +2206,8 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags
struct nfs4_state *state;
/* Protect against concurrent sillydeletes */
- state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, ctx->cred);
+ state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr,
+ ctx->cred, &ctx->mdsthreshold);
if (IS_ERR(state))
return ERR_CAST(state);
ctx->state = state;
@@ -2354,8 +2377,8 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
/*
* get the file handle for the "/" directory on the server
*/
-static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info)
+int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fsinfo *info)
{
int minor_version = server->nfs_client->cl_minorversion;
int status = nfs4_lookup_root(server, fhandle, info);
@@ -2372,6 +2395,31 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
return nfs4_map_errors(status);
}
+static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh,
+ struct nfs_fsinfo *info)
+{
+ int error;
+ struct nfs_fattr *fattr = info->fattr;
+
+ error = nfs4_server_capabilities(server, mntfh);
+ if (error < 0) {
+ dprintk("nfs4_get_root: getcaps error = %d\n", -error);
+ return error;
+ }
+
+ error = nfs4_proc_getattr(server, mntfh, fattr);
+ if (error < 0) {
+ dprintk("nfs4_get_root: getattr error = %d\n", -error);
+ return error;
+ }
+
+ if (fattr->valid & NFS_ATTR_FATTR_FSID &&
+ !nfs_fsid_equal(&server->fsid, &fattr->fsid))
+ memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
+
+ return error;
+}
+
/*
* Get locations and (maybe) other attributes of a referral.
* Note that we'll actually follow the referral later when
@@ -2578,7 +2626,7 @@ out:
return err;
}
-static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
+static int nfs4_proc_lookup(struct inode *dir, struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
int status;
@@ -2761,7 +2809,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
fmode = ctx->mode;
}
sattr->ia_mode &= ~current_umask();
- state = nfs4_do_open(dir, de, fmode, flags, sattr, cred);
+ state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL);
d_drop(dentry);
if (IS_ERR(state)) {
status = PTR_ERR(state);
@@ -2783,7 +2831,6 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
struct nfs_removeargs args = {
.fh = NFS_FH(dir),
.name = *name,
- .bitmask = server->attr_bitmask,
};
struct nfs_removeres res = {
.server = server,
@@ -2793,19 +2840,11 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
.rpc_argp = &args,
.rpc_resp = &res,
};
- int status = -ENOMEM;
-
- res.dir_attr = nfs_alloc_fattr();
- if (res.dir_attr == NULL)
- goto out;
+ int status;
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
- if (status == 0) {
+ if (status == 0)
update_changeattr(dir, &res.cinfo);
- nfs_post_op_update_inode(dir, res.dir_attr);
- }
- nfs_free_fattr(res.dir_attr);
-out:
return status;
}
@@ -2827,7 +2866,6 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
struct nfs_removeargs *args = msg->rpc_argp;
struct nfs_removeres *res = msg->rpc_resp;
- args->bitmask = server->cache_consistency_bitmask;
res->server = server;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
nfs41_init_sequence(&args->seq_args, &res->seq_res, 1);
@@ -2852,7 +2890,6 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
return 0;
update_changeattr(dir, &res->cinfo);
- nfs_post_op_update_inode(dir, res->dir_attr);
return 1;
}
@@ -2863,7 +2900,6 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
struct nfs_renameres *res = msg->rpc_resp;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
- arg->bitmask = server->attr_bitmask;
res->server = server;
nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1);
}
@@ -2889,9 +2925,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
return 0;
update_changeattr(old_dir, &res->old_cinfo);
- nfs_post_op_update_inode(old_dir, res->old_fattr);
update_changeattr(new_dir, &res->new_cinfo);
- nfs_post_op_update_inode(new_dir, res->new_fattr);
return 1;
}
@@ -2904,7 +2938,6 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
.new_dir = NFS_FH(new_dir),
.old_name = old_name,
.new_name = new_name,
- .bitmask = server->attr_bitmask,
};
struct nfs_renameres res = {
.server = server,
@@ -2916,21 +2949,11 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
};
int status = -ENOMEM;
- res.old_fattr = nfs_alloc_fattr();
- res.new_fattr = nfs_alloc_fattr();
- if (res.old_fattr == NULL || res.new_fattr == NULL)
- goto out;
-
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (!status) {
update_changeattr(old_dir, &res.old_cinfo);
- nfs_post_op_update_inode(old_dir, res.old_fattr);
update_changeattr(new_dir, &res.new_cinfo);
- nfs_post_op_update_inode(new_dir, res.new_fattr);
}
-out:
- nfs_free_fattr(res.new_fattr);
- nfs_free_fattr(res.old_fattr);
return status;
}
@@ -2968,18 +2991,15 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
int status = -ENOMEM;
res.fattr = nfs_alloc_fattr();
- res.dir_attr = nfs_alloc_fattr();
- if (res.fattr == NULL || res.dir_attr == NULL)
+ if (res.fattr == NULL)
goto out;
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (!status) {
update_changeattr(dir, &res.cinfo);
- nfs_post_op_update_inode(dir, res.dir_attr);
nfs_post_op_update_inode(inode, res.fattr);
}
out:
- nfs_free_fattr(res.dir_attr);
nfs_free_fattr(res.fattr);
return status;
}
@@ -3002,7 +3022,6 @@ struct nfs4_createdata {
struct nfs4_create_res res;
struct nfs_fh fh;
struct nfs_fattr fattr;
- struct nfs_fattr dir_fattr;
};
static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
@@ -3026,9 +3045,7 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
data->res.server = server;
data->res.fh = &data->fh;
data->res.fattr = &data->fattr;
- data->res.dir_fattr = &data->dir_fattr;
nfs_fattr_init(data->res.fattr);
- nfs_fattr_init(data->res.dir_fattr);
}
return data;
}
@@ -3039,7 +3056,6 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
&data->arg.seq_args, &data->res.seq_res, 1);
if (status == 0) {
update_changeattr(dir, &data->res.dir_cinfo);
- nfs_post_op_update_inode(dir, data->res.dir_fattr);
status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
}
return status;
@@ -3335,12 +3351,12 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
void __nfs4_read_done_cb(struct nfs_read_data *data)
{
- nfs_invalidate_atime(data->inode);
+ nfs_invalidate_atime(data->header->inode);
}
static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
{
- struct nfs_server *server = NFS_SERVER(data->inode);
+ struct nfs_server *server = NFS_SERVER(data->header->inode);
if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
rpc_restart_call_prepare(task);
@@ -3375,7 +3391,7 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message
static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
{
- if (nfs4_setup_sequence(NFS_SERVER(data->inode),
+ if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
&data->args.seq_args,
&data->res.seq_res,
task))
@@ -3383,25 +3399,9 @@ static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da
rpc_call_start(task);
}
-/* Reset the the nfs_read_data to send the read to the MDS. */
-void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
-{
- dprintk("%s Reset task for i/o through\n", __func__);
- put_lseg(data->lseg);
- data->lseg = NULL;
- /* offsets will differ in the dense stripe case */
- data->args.offset = data->mds_offset;
- data->ds_clp = NULL;
- data->args.fh = NFS_FH(data->inode);
- data->read_done_cb = nfs4_read_done_cb;
- task->tk_ops = data->mds_ops;
- rpc_task_reset_client(task, NFS_CLIENT(data->inode));
-}
-EXPORT_SYMBOL_GPL(nfs4_reset_read);
-
static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
{
- struct inode *inode = data->inode;
+ struct inode *inode = data->header->inode;
if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
rpc_restart_call_prepare(task);
@@ -3409,7 +3409,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data
}
if (task->tk_status >= 0) {
renew_lease(NFS_SERVER(inode), data->timestamp);
- nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
+ nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
}
return 0;
}
@@ -3422,32 +3422,30 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
nfs4_write_done_cb(task, data);
}
-/* Reset the the nfs_write_data to send the write to the MDS. */
-void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data)
+static
+bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
{
- dprintk("%s Reset task for i/o through\n", __func__);
- put_lseg(data->lseg);
- data->lseg = NULL;
- data->ds_clp = NULL;
- data->write_done_cb = nfs4_write_done_cb;
- data->args.fh = NFS_FH(data->inode);
- data->args.bitmask = data->res.server->cache_consistency_bitmask;
- data->args.offset = data->mds_offset;
- data->res.fattr = &data->fattr;
- task->tk_ops = data->mds_ops;
- rpc_task_reset_client(task, NFS_CLIENT(data->inode));
+ const struct nfs_pgio_header *hdr = data->header;
+
+ /* Don't request attributes for pNFS or O_DIRECT writes */
+ if (data->ds_clp != NULL || hdr->dreq != NULL)
+ return false;
+ /* Otherwise, request attributes if and only if we don't hold
+ * a delegation
+ */
+ return nfs_have_delegation(hdr->inode, FMODE_READ) == 0;
}
-EXPORT_SYMBOL_GPL(nfs4_reset_write);
static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
{
- struct nfs_server *server = NFS_SERVER(data->inode);
+ struct nfs_server *server = NFS_SERVER(data->header->inode);
- if (data->lseg) {
+ if (!nfs4_write_need_cache_consistency_data(data)) {
data->args.bitmask = NULL;
data->res.fattr = NULL;
} else
data->args.bitmask = server->cache_consistency_bitmask;
+
if (!data->write_done_cb)
data->write_done_cb = nfs4_write_done_cb;
data->res.server = server;
@@ -3459,6 +3457,16 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
{
+ if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
+ &data->args.seq_args,
+ &data->res.seq_res,
+ task))
+ return;
+ rpc_call_start(task);
+}
+
+static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
+{
if (nfs4_setup_sequence(NFS_SERVER(data->inode),
&data->args.seq_args,
&data->res.seq_res,
@@ -3467,7 +3475,7 @@ static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_
rpc_call_start(task);
}
-static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *data)
{
struct inode *inode = data->inode;
@@ -3475,28 +3483,22 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *dat
rpc_restart_call_prepare(task);
return -EAGAIN;
}
- nfs_refresh_inode(inode, data->res.fattr);
return 0;
}
-static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
{
if (!nfs4_sequence_done(task, &data->res.seq_res))
return -EAGAIN;
- return data->write_done_cb(task, data);
+ return data->commit_done_cb(task, data);
}
-static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
{
struct nfs_server *server = NFS_SERVER(data->inode);
- if (data->lseg) {
- data->args.bitmask = NULL;
- data->res.fattr = NULL;
- } else
- data->args.bitmask = server->cache_consistency_bitmask;
- if (!data->write_done_cb)
- data->write_done_cb = nfs4_commit_done_cb;
+ if (data->commit_done_cb == NULL)
+ data->commit_done_cb = nfs4_commit_done_cb;
data->res.server = server;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
@@ -3905,7 +3907,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session\n", __func__,
task->tk_status);
- nfs4_schedule_session_recovery(clp->cl_session);
+ nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
task->tk_status = 0;
return -EAGAIN;
#endif /* CONFIG_NFS_V4_1 */
@@ -3931,13 +3933,21 @@ wait_on_recovery:
return -EAGAIN;
}
-static void nfs4_construct_boot_verifier(struct nfs_client *clp,
- nfs4_verifier *bootverf)
+static void nfs4_init_boot_verifier(const struct nfs_client *clp,
+ nfs4_verifier *bootverf)
{
__be32 verf[2];
- verf[0] = htonl((u32)clp->cl_boot_time.tv_sec);
- verf[1] = htonl((u32)clp->cl_boot_time.tv_nsec);
+ if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
+ /* An impossible timestamp guarantees this value
+ * will never match a generated boot time. */
+ verf[0] = 0;
+ verf[1] = (__be32)(NSEC_PER_SEC + 1);
+ } else {
+ struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
+ verf[0] = (__be32)nn->boot_time.tv_sec;
+ verf[1] = (__be32)nn->boot_time.tv_nsec;
+ }
memcpy(bootverf->data, verf, sizeof(bootverf->data));
}
@@ -3960,7 +3970,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
int loop = 0;
int status;
- nfs4_construct_boot_verifier(clp, &sc_verifier);
+ nfs4_init_boot_verifier(clp, &sc_verifier);
for(;;) {
rcu_read_lock();
@@ -4104,7 +4114,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
data->args.fhandle = &data->fh;
data->args.stateid = &data->stateid;
- data->args.bitmask = server->attr_bitmask;
+ data->args.bitmask = server->cache_consistency_bitmask;
nfs_copy_fh(&data->fh, NFS_FH(inode));
nfs4_stateid_copy(&data->stateid, stateid);
data->res.fattr = &data->fattr;
@@ -4125,9 +4135,10 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
if (status != 0)
goto out;
status = data->rpc_status;
- if (status != 0)
- goto out;
- nfs_refresh_inode(inode, &data->fattr);
+ if (status == 0)
+ nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
+ else
+ nfs_refresh_inode(inode, &data->fattr);
out:
rpc_put_task(task);
return status;
@@ -4837,7 +4848,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_DEADSESSION:
- nfs4_schedule_session_recovery(server->nfs_client->cl_session);
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
goto out;
case -ERESTARTSYS:
/*
@@ -5079,7 +5090,8 @@ out_inval:
}
static bool
-nfs41_same_server_scope(struct server_scope *a, struct server_scope *b)
+nfs41_same_server_scope(struct nfs41_server_scope *a,
+ struct nfs41_server_scope *b)
{
if (a->server_scope_sz == b->server_scope_sz &&
memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0)
@@ -5089,6 +5101,61 @@ nfs41_same_server_scope(struct server_scope *a, struct server_scope *b)
}
/*
+ * nfs4_proc_bind_conn_to_session()
+ *
+ * The 4.1 client currently uses the same TCP connection for the
+ * fore and backchannel.
+ */
+int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred)
+{
+ int status;
+ struct nfs41_bind_conn_to_session_res res;
+ struct rpc_message msg = {
+ .rpc_proc =
+ &nfs4_procedures[NFSPROC4_CLNT_BIND_CONN_TO_SESSION],
+ .rpc_argp = clp,
+ .rpc_resp = &res,
+ .rpc_cred = cred,
+ };
+
+ dprintk("--> %s\n", __func__);
+ BUG_ON(clp == NULL);
+
+ res.session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS);
+ if (unlikely(res.session == NULL)) {
+ status = -ENOMEM;
+ goto out;
+ }
+
+ status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+ if (status == 0) {
+ if (memcmp(res.session->sess_id.data,
+ clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) {
+ dprintk("NFS: %s: Session ID mismatch\n", __func__);
+ status = -EIO;
+ goto out_session;
+ }
+ if (res.dir != NFS4_CDFS4_BOTH) {
+ dprintk("NFS: %s: Unexpected direction from server\n",
+ __func__);
+ status = -EIO;
+ goto out_session;
+ }
+ if (res.use_conn_in_rdma_mode) {
+ dprintk("NFS: %s: Server returned RDMA mode = true\n",
+ __func__);
+ status = -EIO;
+ goto out_session;
+ }
+ }
+out_session:
+ kfree(res.session);
+out:
+ dprintk("<-- %s status= %d\n", __func__, status);
+ return status;
+}
+
+/*
* nfs4_proc_exchange_id()
*
* Since the clientid has expired, all compounds using sessions
@@ -5105,7 +5172,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
.flags = EXCHGID4_FLAG_SUPP_MOVED_REFER,
};
struct nfs41_exchange_id_res res = {
- .client = clp,
+ 0
};
int status;
struct rpc_message msg = {
@@ -5118,7 +5185,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
dprintk("--> %s\n", __func__);
BUG_ON(clp == NULL);
- nfs4_construct_boot_verifier(clp, &verifier);
+ nfs4_init_boot_verifier(clp, &verifier);
args.id_len = scnprintf(args.id, sizeof(args.id),
"%s/%s/%u",
@@ -5126,59 +5193,135 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
clp->cl_rpcclient->cl_nodename,
clp->cl_rpcclient->cl_auth->au_flavor);
- res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL);
- if (unlikely(!res.server_scope)) {
+ res.server_owner = kzalloc(sizeof(struct nfs41_server_owner),
+ GFP_NOFS);
+ if (unlikely(res.server_owner == NULL)) {
status = -ENOMEM;
goto out;
}
- res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_KERNEL);
- if (unlikely(!res.impl_id)) {
+ res.server_scope = kzalloc(sizeof(struct nfs41_server_scope),
+ GFP_NOFS);
+ if (unlikely(res.server_scope == NULL)) {
+ status = -ENOMEM;
+ goto out_server_owner;
+ }
+
+ res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_NOFS);
+ if (unlikely(res.impl_id == NULL)) {
status = -ENOMEM;
goto out_server_scope;
}
status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
- if (!status)
- status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
+ if (status == 0)
+ status = nfs4_check_cl_exchange_flags(res.flags);
+
+ if (status == 0) {
+ clp->cl_clientid = res.clientid;
+ clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R);
+ if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R))
+ clp->cl_seqid = res.seqid;
+
+ kfree(clp->cl_serverowner);
+ clp->cl_serverowner = res.server_owner;
+ res.server_owner = NULL;
- if (!status) {
/* use the most recent implementation id */
- kfree(clp->impl_id);
- clp->impl_id = res.impl_id;
- } else
- kfree(res.impl_id);
+ kfree(clp->cl_implid);
+ clp->cl_implid = res.impl_id;
- if (!status) {
- if (clp->server_scope &&
- !nfs41_same_server_scope(clp->server_scope,
+ if (clp->cl_serverscope != NULL &&
+ !nfs41_same_server_scope(clp->cl_serverscope,
res.server_scope)) {
dprintk("%s: server_scope mismatch detected\n",
__func__);
set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state);
- kfree(clp->server_scope);
- clp->server_scope = NULL;
+ kfree(clp->cl_serverscope);
+ clp->cl_serverscope = NULL;
}
- if (!clp->server_scope) {
- clp->server_scope = res.server_scope;
+ if (clp->cl_serverscope == NULL) {
+ clp->cl_serverscope = res.server_scope;
goto out;
}
- }
+ } else
+ kfree(res.impl_id);
+out_server_owner:
+ kfree(res.server_owner);
out_server_scope:
kfree(res.server_scope);
out:
- if (clp->impl_id)
+ if (clp->cl_implid != NULL)
dprintk("%s: Server Implementation ID: "
"domain: %s, name: %s, date: %llu,%u\n",
- __func__, clp->impl_id->domain, clp->impl_id->name,
- clp->impl_id->date.seconds,
- clp->impl_id->date.nseconds);
+ __func__, clp->cl_implid->domain, clp->cl_implid->name,
+ clp->cl_implid->date.seconds,
+ clp->cl_implid->date.nseconds);
dprintk("<-- %s status= %d\n", __func__, status);
return status;
}
+static int _nfs4_proc_destroy_clientid(struct nfs_client *clp,
+ struct rpc_cred *cred)
+{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_CLIENTID],
+ .rpc_argp = clp,
+ .rpc_cred = cred,
+ };
+ int status;
+
+ status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+ if (status)
+ pr_warn("NFS: Got error %d from the server %s on "
+ "DESTROY_CLIENTID.", status, clp->cl_hostname);
+ return status;
+}
+
+static int nfs4_proc_destroy_clientid(struct nfs_client *clp,
+ struct rpc_cred *cred)
+{
+ unsigned int loop;
+ int ret;
+
+ for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
+ ret = _nfs4_proc_destroy_clientid(clp, cred);
+ switch (ret) {
+ case -NFS4ERR_DELAY:
+ case -NFS4ERR_CLIENTID_BUSY:
+ ssleep(1);
+ break;
+ default:
+ return ret;
+ }
+ }
+ return 0;
+}
+
+int nfs4_destroy_clientid(struct nfs_client *clp)
+{
+ struct rpc_cred *cred;
+ int ret = 0;
+
+ if (clp->cl_mvops->minor_version < 1)
+ goto out;
+ if (clp->cl_exchange_flags == 0)
+ goto out;
+ cred = nfs4_get_exchange_id_cred(clp);
+ ret = nfs4_proc_destroy_clientid(clp, cred);
+ if (cred)
+ put_rpccred(cred);
+ switch (ret) {
+ case 0:
+ case -NFS4ERR_STALE_CLIENTID:
+ clp->cl_exchange_flags = 0;
+ }
+out:
+ return ret;
+}
+
struct nfs4_get_lease_time_data {
struct nfs4_get_lease_time_args *args;
struct nfs4_get_lease_time_res *res;
@@ -5399,8 +5542,12 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
void nfs4_destroy_session(struct nfs4_session *session)
{
struct rpc_xprt *xprt;
+ struct rpc_cred *cred;
- nfs4_proc_destroy_session(session);
+ cred = nfs4_get_exchange_id_cred(session->clp);
+ nfs4_proc_destroy_session(session, cred);
+ if (cred)
+ put_rpccred(cred);
rcu_read_lock();
xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt);
@@ -5510,7 +5657,8 @@ static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args,
return nfs4_verify_back_channel_attrs(args, session);
}
-static int _nfs4_proc_create_session(struct nfs_client *clp)
+static int _nfs4_proc_create_session(struct nfs_client *clp,
+ struct rpc_cred *cred)
{
struct nfs4_session *session = clp->cl_session;
struct nfs41_create_session_args args = {
@@ -5524,6 +5672,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp)
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE_SESSION],
.rpc_argp = &args,
.rpc_resp = &res,
+ .rpc_cred = cred,
};
int status;
@@ -5548,7 +5697,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp)
* It is the responsibility of the caller to verify the session is
* expired before calling this routine.
*/
-int nfs4_proc_create_session(struct nfs_client *clp)
+int nfs4_proc_create_session(struct nfs_client *clp, struct rpc_cred *cred)
{
int status;
unsigned *ptr;
@@ -5556,7 +5705,7 @@ int nfs4_proc_create_session(struct nfs_client *clp)
dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
- status = _nfs4_proc_create_session(clp);
+ status = _nfs4_proc_create_session(clp, cred);
if (status)
goto out;
@@ -5578,10 +5727,15 @@ out:
* Issue the over-the-wire RPC DESTROY_SESSION.
* The caller must serialize access to this routine.
*/
-int nfs4_proc_destroy_session(struct nfs4_session *session)
+int nfs4_proc_destroy_session(struct nfs4_session *session,
+ struct rpc_cred *cred)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION],
+ .rpc_argp = session,
+ .rpc_cred = cred,
+ };
int status = 0;
- struct rpc_message msg;
dprintk("--> nfs4_proc_destroy_session\n");
@@ -5589,10 +5743,6 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
if (session->clp->cl_cons_state != NFS_CS_READY)
return status;
- msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION];
- msg.rpc_argp = session;
- msg.rpc_resp = NULL;
- msg.rpc_cred = NULL;
status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
if (status)
@@ -5604,53 +5754,79 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
return status;
}
+/*
+ * With sessions, the client is not marked ready until after a
+ * successful EXCHANGE_ID and CREATE_SESSION.
+ *
+ * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate
+ * other versions of NFS can be tried.
+ */
+static int nfs41_check_session_ready(struct nfs_client *clp)
+{
+ int ret;
+
+ if (clp->cl_cons_state == NFS_CS_SESSION_INITING) {
+ ret = nfs4_client_recover_expired_lease(clp);
+ if (ret)
+ return ret;
+ }
+ if (clp->cl_cons_state < NFS_CS_READY)
+ return -EPROTONOSUPPORT;
+ smp_rmb();
+ return 0;
+}
+
int nfs4_init_session(struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_session *session;
unsigned int rsize, wsize;
- int ret;
if (!nfs4_has_session(clp))
return 0;
session = clp->cl_session;
- if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
- return 0;
+ spin_lock(&clp->cl_lock);
+ if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
- rsize = server->rsize;
- if (rsize == 0)
- rsize = NFS_MAX_FILE_IO_SIZE;
- wsize = server->wsize;
- if (wsize == 0)
- wsize = NFS_MAX_FILE_IO_SIZE;
+ rsize = server->rsize;
+ if (rsize == 0)
+ rsize = NFS_MAX_FILE_IO_SIZE;
+ wsize = server->wsize;
+ if (wsize == 0)
+ wsize = NFS_MAX_FILE_IO_SIZE;
- session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
- session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
+ session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
+ session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
+ }
+ spin_unlock(&clp->cl_lock);
- ret = nfs4_recover_expired_lease(server);
- if (!ret)
- ret = nfs4_check_client_ready(clp);
- return ret;
+ return nfs41_check_session_ready(clp);
}
-int nfs4_init_ds_session(struct nfs_client *clp)
+int nfs4_init_ds_session(struct nfs_client *clp, unsigned long lease_time)
{
struct nfs4_session *session = clp->cl_session;
int ret;
- if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
- return 0;
-
- ret = nfs4_client_recover_expired_lease(clp);
- if (!ret)
- /* Test for the DS role */
- if (!is_ds_client(clp))
- ret = -ENODEV;
- if (!ret)
- ret = nfs4_check_client_ready(clp);
- return ret;
+ spin_lock(&clp->cl_lock);
+ if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
+ /*
+ * Do not set NFS_CS_CHECK_LEASE_TIME instead set the
+ * DS lease to be equal to the MDS lease.
+ */
+ clp->cl_lease_time = lease_time;
+ clp->cl_last_renewal = jiffies;
+ }
+ spin_unlock(&clp->cl_lock);
+ ret = nfs41_check_session_ready(clp);
+ if (ret)
+ return ret;
+ /* Test for the DS role */
+ if (!is_ds_client(clp))
+ return -ENODEV;
+ return 0;
}
EXPORT_SYMBOL_GPL(nfs4_init_ds_session);
@@ -6557,6 +6733,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.file_inode_ops = &nfs4_file_inode_operations,
.file_ops = &nfs4_file_operations,
.getroot = nfs4_proc_get_root,
+ .submount = nfs4_submount,
.getattr = nfs4_proc_getattr,
.setattr = nfs4_proc_setattr,
.lookup = nfs4_proc_lookup,
@@ -6589,13 +6766,13 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.write_rpc_prepare = nfs4_proc_write_rpc_prepare,
.write_done = nfs4_write_done,
.commit_setup = nfs4_proc_commit_setup,
+ .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare,
.commit_done = nfs4_commit_done,
.lock = nfs4_proc_lock,
.clear_acl_cache = nfs4_zap_acl_attr,
.close_context = nfs4_close_context,
.open_context = nfs4_atomic_open,
.init_client = nfs4_init_client,
- .secinfo = nfs4_proc_secinfo,
};
static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index dc484c0..6930bec 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -49,7 +49,7 @@
#include "nfs4_fs.h"
#include "delegation.h"
-#define NFSDBG_FACILITY NFSDBG_PROC
+#define NFSDBG_FACILITY NFSDBG_STATE
void
nfs4_renew_state(struct work_struct *work)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 7f0fcfc..c679b9e 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -57,6 +57,8 @@
#include "internal.h"
#include "pnfs.h"
+#define NFSDBG_FACILITY NFSDBG_STATE
+
#define OPENOWNER_POOL_SIZE 8
const nfs4_stateid zero_stateid;
@@ -254,7 +256,7 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
goto out;
set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
do_confirm:
- status = nfs4_proc_create_session(clp);
+ status = nfs4_proc_create_session(clp, cred);
if (status != 0)
goto out;
clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
@@ -1106,6 +1108,8 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp)
return;
if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
+ dprintk("%s: scheduling lease recovery for server %s\n", __func__,
+ clp->cl_hostname);
nfs4_schedule_state_manager(clp);
}
EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery);
@@ -1122,6 +1126,8 @@ static void nfs40_handle_cb_pathdown(struct nfs_client *clp)
{
set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
nfs_expire_all_delegations(clp);
+ dprintk("%s: handling CB_PATHDOWN recovery for server %s\n", __func__,
+ clp->cl_hostname);
}
void nfs4_schedule_path_down_recovery(struct nfs_client *clp)
@@ -1158,6 +1164,8 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4
struct nfs_client *clp = server->nfs_client;
nfs4_state_mark_reclaim_nograce(clp, state);
+ dprintk("%s: scheduling stateid recovery for server %s\n", __func__,
+ clp->cl_hostname);
nfs4_schedule_state_manager(clp);
}
EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery);
@@ -1491,19 +1499,25 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_DEADSESSION:
- case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_SEQ_FALSE_RETRY:
case -NFS4ERR_SEQ_MISORDERED:
set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
/* Zero session reset errors */
break;
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
+ break;
case -EKEYEXPIRED:
/* Nothing we can do */
nfs4_warn_keyexpired(clp->cl_hostname);
break;
default:
+ dprintk("%s: failed to handle error %d for server %s\n",
+ __func__, error, clp->cl_hostname);
return error;
}
+ dprintk("%s: handled error %d for server %s\n", __func__, error,
+ clp->cl_hostname);
return 0;
}
@@ -1572,34 +1586,82 @@ out:
return nfs4_recovery_handle_error(clp, status);
}
+/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
+ * on EXCHANGE_ID for v4.1
+ */
+static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
+{
+ switch (status) {
+ case -NFS4ERR_SEQ_MISORDERED:
+ if (test_and_set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state))
+ return -ESERVERFAULT;
+ /* Lease confirmation error: retry after purging the lease */
+ ssleep(1);
+ case -NFS4ERR_CLID_INUSE:
+ case -NFS4ERR_STALE_CLIENTID:
+ clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+ break;
+ case -EACCES:
+ if (clp->cl_machine_cred == NULL)
+ return -EACCES;
+ /* Handle case where the user hasn't set up machine creds */
+ nfs4_clear_machine_cred(clp);
+ case -NFS4ERR_DELAY:
+ case -ETIMEDOUT:
+ case -EAGAIN:
+ ssleep(1);
+ break;
+
+ case -NFS4ERR_MINOR_VERS_MISMATCH:
+ if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
+ nfs_mark_client_ready(clp, -EPROTONOSUPPORT);
+ dprintk("%s: exit with error %d for server %s\n",
+ __func__, -EPROTONOSUPPORT, clp->cl_hostname);
+ return -EPROTONOSUPPORT;
+ case -EKEYEXPIRED:
+ nfs4_warn_keyexpired(clp->cl_hostname);
+ case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
+ * in nfs4_exchange_id */
+ default:
+ dprintk("%s: exit with error %d for server %s\n", __func__,
+ status, clp->cl_hostname);
+ return status;
+ }
+ set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+ dprintk("%s: handled error %d for server %s\n", __func__, status,
+ clp->cl_hostname);
+ return 0;
+}
+
static int nfs4_reclaim_lease(struct nfs_client *clp)
{
struct rpc_cred *cred;
const struct nfs4_state_recovery_ops *ops =
clp->cl_mvops->reboot_recovery_ops;
- int status = -ENOENT;
+ int status;
cred = ops->get_clid_cred(clp);
- if (cred != NULL) {
- status = ops->establish_clid(clp, cred);
- put_rpccred(cred);
- /* Handle case where the user hasn't set up machine creds */
- if (status == -EACCES && cred == clp->cl_machine_cred) {
- nfs4_clear_machine_cred(clp);
- status = -EAGAIN;
- }
- if (status == -NFS4ERR_MINOR_VERS_MISMATCH)
- status = -EPROTONOSUPPORT;
- }
- return status;
+ if (cred == NULL)
+ return -ENOENT;
+ status = ops->establish_clid(clp, cred);
+ put_rpccred(cred);
+ if (status != 0)
+ return nfs4_handle_reclaim_lease_error(clp, status);
+ return 0;
}
#ifdef CONFIG_NFS_V4_1
-void nfs4_schedule_session_recovery(struct nfs4_session *session)
+void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
{
struct nfs_client *clp = session->clp;
- set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+ switch (err) {
+ default:
+ set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+ break;
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
+ }
nfs4_schedule_lease_recovery(clp);
}
EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
@@ -1607,14 +1669,19 @@ EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
void nfs41_handle_recall_slot(struct nfs_client *clp)
{
set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
+ dprintk("%s: scheduling slot recall for server %s\n", __func__,
+ clp->cl_hostname);
nfs4_schedule_state_manager(clp);
}
static void nfs4_reset_all_state(struct nfs_client *clp)
{
if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
- clp->cl_boot_time = CURRENT_TIME;
+ set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
+ clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
nfs4_state_start_reclaim_nograce(clp);
+ dprintk("%s: scheduling reset of all state for server %s!\n",
+ __func__, clp->cl_hostname);
nfs4_schedule_state_manager(clp);
}
}
@@ -1623,33 +1690,50 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
{
if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
nfs4_state_start_reclaim_reboot(clp);
+ dprintk("%s: server %s rebooted!\n", __func__,
+ clp->cl_hostname);
nfs4_schedule_state_manager(clp);
}
}
static void nfs41_handle_state_revoked(struct nfs_client *clp)
{
- /* Temporary */
nfs4_reset_all_state(clp);
+ dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname);
}
static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp)
{
/* This will need to handle layouts too */
nfs_expire_all_delegations(clp);
+ dprintk("%s: Recallable state revoked on server %s!\n", __func__,
+ clp->cl_hostname);
}
-static void nfs41_handle_cb_path_down(struct nfs_client *clp)
+static void nfs41_handle_backchannel_fault(struct nfs_client *clp)
{
nfs_expire_all_delegations(clp);
if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
nfs4_schedule_state_manager(clp);
+ dprintk("%s: server %s declared a backchannel fault\n", __func__,
+ clp->cl_hostname);
+}
+
+static void nfs41_handle_cb_path_down(struct nfs_client *clp)
+{
+ if (test_and_set_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
+ &clp->cl_state) == 0)
+ nfs4_schedule_state_manager(clp);
}
void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
{
if (!flags)
return;
+
+ dprintk("%s: \"%s\" (client ID %llx) flags=0x%08x\n",
+ __func__, clp->cl_hostname, clp->cl_clientid, flags);
+
if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
nfs41_handle_server_reboot(clp);
if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
@@ -1659,18 +1743,21 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
nfs41_handle_state_revoked(clp);
if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
nfs41_handle_recallable_state_revoked(clp);
- if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
- SEQ4_STATUS_BACKCHANNEL_FAULT |
- SEQ4_STATUS_CB_PATH_DOWN_SESSION))
+ if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT)
+ nfs41_handle_backchannel_fault(clp);
+ else if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
+ SEQ4_STATUS_CB_PATH_DOWN_SESSION))
nfs41_handle_cb_path_down(clp);
}
static int nfs4_reset_session(struct nfs_client *clp)
{
+ struct rpc_cred *cred;
int status;
nfs4_begin_drain_session(clp);
- status = nfs4_proc_destroy_session(clp->cl_session);
+ cred = nfs4_get_exchange_id_cred(clp);
+ status = nfs4_proc_destroy_session(clp->cl_session, cred);
if (status && status != -NFS4ERR_BADSESSION &&
status != -NFS4ERR_DEADSESSION) {
status = nfs4_recovery_handle_error(clp, status);
@@ -1678,19 +1765,26 @@ static int nfs4_reset_session(struct nfs_client *clp)
}
memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN);
- status = nfs4_proc_create_session(clp);
+ status = nfs4_proc_create_session(clp, cred);
if (status) {
- status = nfs4_recovery_handle_error(clp, status);
+ dprintk("%s: session reset failed with status %d for server %s!\n",
+ __func__, status, clp->cl_hostname);
+ status = nfs4_handle_reclaim_lease_error(clp, status);
goto out;
}
clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
/* create_session negotiated new slot table */
clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
+ clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
+ dprintk("%s: session reset was successful for server %s!\n",
+ __func__, clp->cl_hostname);
/* Let the state manager reestablish state */
if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
nfs41_setup_state_renewal(clp);
out:
+ if (cred)
+ put_rpccred(cred);
return status;
}
@@ -1722,37 +1816,41 @@ static int nfs4_recall_slot(struct nfs_client *clp)
return 0;
}
-#else /* CONFIG_NFS_V4_1 */
-static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
-static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
-static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
-#endif /* CONFIG_NFS_V4_1 */
-
-/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
- * on EXCHANGE_ID for v4.1
- */
-static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
+static int nfs4_bind_conn_to_session(struct nfs_client *clp)
{
- switch (status) {
- case -NFS4ERR_CLID_INUSE:
- case -NFS4ERR_STALE_CLIENTID:
- clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+ struct rpc_cred *cred;
+ int ret;
+
+ nfs4_begin_drain_session(clp);
+ cred = nfs4_get_exchange_id_cred(clp);
+ ret = nfs4_proc_bind_conn_to_session(clp, cred);
+ if (cred)
+ put_rpccred(cred);
+ clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
+ switch (ret) {
+ case 0:
+ dprintk("%s: bind_conn_to_session was successful for server %s!\n",
+ __func__, clp->cl_hostname);
break;
case -NFS4ERR_DELAY:
- case -ETIMEDOUT:
- case -EAGAIN:
ssleep(1);
+ set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
break;
-
- case -EKEYEXPIRED:
- nfs4_warn_keyexpired(clp->cl_hostname);
- case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
- * in nfs4_exchange_id */
default:
- return;
+ return nfs4_recovery_handle_error(clp, ret);
}
- set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+ return 0;
}
+#else /* CONFIG_NFS_V4_1 */
+static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
+static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
+static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
+
+static int nfs4_bind_conn_to_session(struct nfs_client *clp)
+{
+ return 0;
+}
+#endif /* CONFIG_NFS_V4_1 */
static void nfs4_state_manager(struct nfs_client *clp)
{
@@ -1760,19 +1858,21 @@ static void nfs4_state_manager(struct nfs_client *clp)
/* Ensure exclusive access to NFSv4 state */
do {
+ if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
+ status = nfs4_reclaim_lease(clp);
+ if (status < 0)
+ goto out_error;
+ clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
+ set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+ }
+
if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
/* We're going to have to re-establish a clientid */
status = nfs4_reclaim_lease(clp);
- if (status) {
- nfs4_set_lease_expired(clp, status);
- if (test_bit(NFS4CLNT_LEASE_EXPIRED,
- &clp->cl_state))
- continue;
- if (clp->cl_cons_state ==
- NFS_CS_SESSION_INITING)
- nfs_mark_client_ready(clp, status);
+ if (status < 0)
goto out_error;
- }
+ if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
+ continue;
clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH,
@@ -1803,6 +1903,15 @@ static void nfs4_state_manager(struct nfs_client *clp)
goto out_error;
}
+ /* Send BIND_CONN_TO_SESSION */
+ if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
+ &clp->cl_state) && nfs4_has_session(clp)) {
+ status = nfs4_bind_conn_to_session(clp);
+ if (status < 0)
+ goto out_error;
+ continue;
+ }
+
/* First recover reboot state... */
if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
status = nfs4_do_reclaim(clp,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index c54aae3..ee4a74d 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -53,9 +53,11 @@
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_idmap.h>
+
#include "nfs4_fs.h"
#include "internal.h"
#include "pnfs.h"
+#include "netns.h"
#define NFSDBG_FACILITY NFSDBG_XDR
@@ -99,9 +101,12 @@ static int nfs4_stat_to_errno(int);
#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2))
#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+/* We support only one layout type per file system */
+#define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8)
/* This is based on getfattr, which uses the most attributes: */
#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
- 3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz))
+ 3 + 3 + 3 + nfs4_owner_maxsz + \
+ nfs4_group_maxsz + decode_mdsthreshold_maxsz))
#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \
nfs4_fattr_value_maxsz)
#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
@@ -321,8 +326,20 @@ static int nfs4_stat_to_errno(int);
1 /* csr_flags */ + \
decode_channel_attrs_maxsz + \
decode_channel_attrs_maxsz)
+#define encode_bind_conn_to_session_maxsz (op_encode_hdr_maxsz + \
+ /* bctsa_sessid */ \
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
+ 1 /* bctsa_dir */ + \
+ 1 /* bctsa_use_conn_in_rdma_mode */)
+#define decode_bind_conn_to_session_maxsz (op_decode_hdr_maxsz + \
+ /* bctsr_sessid */ \
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
+ 1 /* bctsr_dir */ + \
+ 1 /* bctsr_use_conn_in_rdma_mode */)
#define encode_destroy_session_maxsz (op_encode_hdr_maxsz + 4)
#define decode_destroy_session_maxsz (op_decode_hdr_maxsz)
+#define encode_destroy_clientid_maxsz (op_encode_hdr_maxsz + 2)
+#define decode_destroy_clientid_maxsz (op_decode_hdr_maxsz)
#define encode_sequence_maxsz (op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4)
#define decode_sequence_maxsz (op_decode_hdr_maxsz + \
@@ -421,30 +438,22 @@ static int nfs4_stat_to_errno(int);
#define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
- encode_commit_maxsz + \
- encode_getattr_maxsz)
+ encode_commit_maxsz)
#define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
- decode_commit_maxsz + \
- decode_getattr_maxsz)
+ decode_commit_maxsz)
#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
- encode_savefh_maxsz + \
encode_open_maxsz + \
encode_getfh_maxsz + \
- encode_getattr_maxsz + \
- encode_restorefh_maxsz + \
encode_getattr_maxsz)
#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
- decode_savefh_maxsz + \
decode_open_maxsz + \
decode_getfh_maxsz + \
- decode_getattr_maxsz + \
- decode_restorefh_maxsz + \
decode_getattr_maxsz)
#define NFS4_enc_open_confirm_sz \
(compound_encode_hdr_maxsz + \
@@ -595,47 +604,37 @@ static int nfs4_stat_to_errno(int);
#define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
- encode_remove_maxsz + \
- encode_getattr_maxsz)
+ encode_remove_maxsz)
#define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
- decode_remove_maxsz + \
- decode_getattr_maxsz)
+ decode_remove_maxsz)
#define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_savefh_maxsz + \
encode_putfh_maxsz + \
- encode_rename_maxsz + \
- encode_getattr_maxsz + \
- encode_restorefh_maxsz + \
- encode_getattr_maxsz)
+ encode_rename_maxsz)
#define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_savefh_maxsz + \
decode_putfh_maxsz + \
- decode_rename_maxsz + \
- decode_getattr_maxsz + \
- decode_restorefh_maxsz + \
- decode_getattr_maxsz)
+ decode_rename_maxsz)
#define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_savefh_maxsz + \
encode_putfh_maxsz + \
encode_link_maxsz + \
- decode_getattr_maxsz + \
encode_restorefh_maxsz + \
- decode_getattr_maxsz)
+ encode_getattr_maxsz)
#define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_savefh_maxsz + \
decode_putfh_maxsz + \
decode_link_maxsz + \
- decode_getattr_maxsz + \
decode_restorefh_maxsz + \
decode_getattr_maxsz)
#define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \
@@ -653,20 +652,14 @@ static int nfs4_stat_to_errno(int);
#define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
- encode_savefh_maxsz + \
encode_create_maxsz + \
encode_getfh_maxsz + \
- encode_getattr_maxsz + \
- encode_restorefh_maxsz + \
encode_getattr_maxsz)
#define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
- decode_savefh_maxsz + \
decode_create_maxsz + \
decode_getfh_maxsz + \
- decode_getattr_maxsz + \
- decode_restorefh_maxsz + \
decode_getattr_maxsz)
#define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
@@ -738,6 +731,12 @@ static int nfs4_stat_to_errno(int);
decode_putfh_maxsz + \
decode_secinfo_maxsz)
#if defined(CONFIG_NFS_V4_1)
+#define NFS4_enc_bind_conn_to_session_sz \
+ (compound_encode_hdr_maxsz + \
+ encode_bind_conn_to_session_maxsz)
+#define NFS4_dec_bind_conn_to_session_sz \
+ (compound_decode_hdr_maxsz + \
+ decode_bind_conn_to_session_maxsz)
#define NFS4_enc_exchange_id_sz \
(compound_encode_hdr_maxsz + \
encode_exchange_id_maxsz)
@@ -754,6 +753,10 @@ static int nfs4_stat_to_errno(int);
encode_destroy_session_maxsz)
#define NFS4_dec_destroy_session_sz (compound_decode_hdr_maxsz + \
decode_destroy_session_maxsz)
+#define NFS4_enc_destroy_clientid_sz (compound_encode_hdr_maxsz + \
+ encode_destroy_clientid_maxsz)
+#define NFS4_dec_destroy_clientid_sz (compound_decode_hdr_maxsz + \
+ decode_destroy_clientid_maxsz)
#define NFS4_enc_sequence_sz \
(compound_decode_hdr_maxsz + \
encode_sequence_maxsz)
@@ -1103,7 +1106,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
encode_nfs4_stateid(xdr, arg->stateid);
}
-static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
+static void encode_commit(struct xdr_stream *xdr, const struct nfs_commitargs *args, struct compound_hdr *hdr)
{
__be32 *p;
@@ -1194,6 +1197,16 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c
bitmask[1] & nfs4_fattr_bitmap[1], hdr);
}
+static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask,
+ struct compound_hdr *hdr)
+{
+ encode_getattr_three(xdr,
+ bitmask[0] & nfs4_fattr_bitmap[0],
+ bitmask[1] & nfs4_fattr_bitmap[1],
+ bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD,
+ hdr);
+}
+
static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
{
encode_getattr_three(xdr,
@@ -1678,6 +1691,20 @@ static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, stru
#if defined(CONFIG_NFS_V4_1)
/* NFSv4.1 operations */
+static void encode_bind_conn_to_session(struct xdr_stream *xdr,
+ struct nfs4_session *session,
+ struct compound_hdr *hdr)
+{
+ __be32 *p;
+
+ encode_op_hdr(xdr, OP_BIND_CONN_TO_SESSION,
+ decode_bind_conn_to_session_maxsz, hdr);
+ encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
+ p = xdr_reserve_space(xdr, 8);
+ *p++ = cpu_to_be32(NFS4_CDFC4_BACK_OR_BOTH);
+ *p = 0; /* use_conn_in_rdma_mode = False */
+}
+
static void encode_exchange_id(struct xdr_stream *xdr,
struct nfs41_exchange_id_args *args,
struct compound_hdr *hdr)
@@ -1726,6 +1753,7 @@ static void encode_create_session(struct xdr_stream *xdr,
char machine_name[NFS4_MAX_MACHINE_NAME_LEN];
uint32_t len;
struct nfs_client *clp = args->client;
+ struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
u32 max_resp_sz_cached;
/*
@@ -1767,7 +1795,7 @@ static void encode_create_session(struct xdr_stream *xdr,
*p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */
/* authsys_parms rfc1831 */
- *p++ = cpu_to_be32((u32)clp->cl_boot_time.tv_nsec); /* stamp */
+ *p++ = (__be32)nn->boot_time.tv_nsec; /* stamp */
p = xdr_encode_opaque(p, machine_name, len);
*p++ = cpu_to_be32(0); /* UID */
*p++ = cpu_to_be32(0); /* GID */
@@ -1782,6 +1810,14 @@ static void encode_destroy_session(struct xdr_stream *xdr,
encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
}
+static void encode_destroy_clientid(struct xdr_stream *xdr,
+ uint64_t clientid,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_DESTROY_CLIENTID, decode_destroy_clientid_maxsz, hdr);
+ encode_uint64(xdr, clientid);
+}
+
static void encode_reclaim_complete(struct xdr_stream *xdr,
struct nfs41_reclaim_complete_args *args,
struct compound_hdr *hdr)
@@ -2064,7 +2100,6 @@ static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
encode_remove(xdr, &args->name, &hdr);
- encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
}
@@ -2084,9 +2119,6 @@ static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_savefh(xdr, &hdr);
encode_putfh(xdr, args->new_dir, &hdr);
encode_rename(xdr, args->old_name, args->new_name, &hdr);
- encode_getfattr(xdr, args->bitmask, &hdr);
- encode_restorefh(xdr, &hdr);
- encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
}
@@ -2106,7 +2138,6 @@ static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_savefh(xdr, &hdr);
encode_putfh(xdr, args->dir_fh, &hdr);
encode_link(xdr, args->name, &hdr);
- encode_getfattr(xdr, args->bitmask, &hdr);
encode_restorefh(xdr, &hdr);
encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
@@ -2125,12 +2156,9 @@ static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->dir_fh, &hdr);
- encode_savefh(xdr, &hdr);
encode_create(xdr, args, &hdr);
encode_getfh(xdr, &hdr);
encode_getfattr(xdr, args->bitmask, &hdr);
- encode_restorefh(xdr, &hdr);
- encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
}
@@ -2191,12 +2219,9 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
- encode_savefh(xdr, &hdr);
encode_open(xdr, args, &hdr);
encode_getfh(xdr, &hdr);
- encode_getfattr(xdr, args->bitmask, &hdr);
- encode_restorefh(xdr, &hdr);
- encode_getfattr(xdr, args->dir_bitmask, &hdr);
+ encode_getfattr_open(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
}
@@ -2448,7 +2473,7 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
* a COMMIT request
*/
static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_writeargs *args)
+ struct nfs_commitargs *args)
{
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2458,8 +2483,6 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
encode_commit(xdr, args, &hdr);
- if (args->bitmask)
- encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
}
@@ -2602,8 +2625,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fhandle, &hdr);
- encode_delegreturn(xdr, args->stateid, &hdr);
encode_getfattr(xdr, args->bitmask, &hdr);
+ encode_delegreturn(xdr, args->stateid, &hdr);
encode_nops(&hdr);
}
@@ -2651,6 +2674,22 @@ static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
#if defined(CONFIG_NFS_V4_1)
/*
+ * BIND_CONN_TO_SESSION request
+ */
+static void nfs4_xdr_enc_bind_conn_to_session(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_client *clp)
+{
+ struct compound_hdr hdr = {
+ .minorversion = clp->cl_mvops->minor_version,
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_bind_conn_to_session(xdr, clp->cl_session, &hdr);
+ encode_nops(&hdr);
+}
+
+/*
* EXCHANGE_ID request
*/
static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req,
@@ -2699,6 +2738,22 @@ static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,
}
/*
+ * a DESTROY_CLIENTID request
+ */
+static void nfs4_xdr_enc_destroy_clientid(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_client *clp)
+{
+ struct compound_hdr hdr = {
+ .minorversion = clp->cl_mvops->minor_version,
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_destroy_clientid(xdr, clp->cl_clientid, &hdr);
+ encode_nops(&hdr);
+}
+
+/*
* a SEQUENCE request
*/
static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr,
@@ -4102,7 +4157,7 @@ static int decode_verifier(struct xdr_stream *xdr, void *verifier)
return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE);
}
-static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
+static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res)
{
int status;
@@ -4220,6 +4275,110 @@ xdr_error:
return status;
}
+static int decode_threshold_hint(struct xdr_stream *xdr,
+ uint32_t *bitmap,
+ uint64_t *res,
+ uint32_t hint_bit)
+{
+ __be32 *p;
+
+ *res = 0;
+ if (likely(bitmap[0] & hint_bit)) {
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, res);
+ }
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int decode_first_threshold_item4(struct xdr_stream *xdr,
+ struct nfs4_threshold *res)
+{
+ __be32 *p, *savep;
+ uint32_t bitmap[3] = {0,}, attrlen;
+ int status;
+
+ /* layout type */
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p)) {
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+ }
+ res->l_type = be32_to_cpup(p);
+
+ /* thi_hintset bitmap */
+ status = decode_attr_bitmap(xdr, bitmap);
+ if (status < 0)
+ goto xdr_error;
+
+ /* thi_hintlist length */
+ status = decode_attr_length(xdr, &attrlen, &savep);
+ if (status < 0)
+ goto xdr_error;
+ /* thi_hintlist */
+ status = decode_threshold_hint(xdr, bitmap, &res->rd_sz, THRESHOLD_RD);
+ if (status < 0)
+ goto xdr_error;
+ status = decode_threshold_hint(xdr, bitmap, &res->wr_sz, THRESHOLD_WR);
+ if (status < 0)
+ goto xdr_error;
+ status = decode_threshold_hint(xdr, bitmap, &res->rd_io_sz,
+ THRESHOLD_RD_IO);
+ if (status < 0)
+ goto xdr_error;
+ status = decode_threshold_hint(xdr, bitmap, &res->wr_io_sz,
+ THRESHOLD_WR_IO);
+ if (status < 0)
+ goto xdr_error;
+
+ status = verify_attr_len(xdr, savep, attrlen);
+ res->bm = bitmap[0];
+
+ dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
+ __func__, res->bm, res->rd_sz, res->wr_sz, res->rd_io_sz,
+ res->wr_io_sz);
+xdr_error:
+ dprintk("%s ret=%d!\n", __func__, status);
+ return status;
+}
+
+/*
+ * Thresholds on pNFS direct I/O vrs MDS I/O
+ */
+static int decode_attr_mdsthreshold(struct xdr_stream *xdr,
+ uint32_t *bitmap,
+ struct nfs4_threshold *res)
+{
+ __be32 *p;
+ int status = 0;
+ uint32_t num;
+
+ if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U)))
+ return -EIO;
+ if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ num = be32_to_cpup(p);
+ if (num == 0)
+ return 0;
+ if (num > 1)
+ printk(KERN_INFO "%s: Warning: Multiple pNFS layout "
+ "drivers per filesystem not supported\n",
+ __func__);
+
+ status = decode_first_threshold_item4(xdr, res);
+ }
+ return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
struct nfs_fattr *fattr, struct nfs_fh *fh,
struct nfs4_fs_locations *fs_loc,
@@ -4326,6 +4485,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
goto xdr_error;
fattr->valid |= status;
+ status = decode_attr_mdsthreshold(xdr, bitmap, fattr->mdsthreshold);
+ if (status < 0)
+ goto xdr_error;
+
xdr_error:
dprintk("%s: xdr returned %d\n", __func__, -status);
return status;
@@ -5156,7 +5319,6 @@ static int decode_exchange_id(struct xdr_stream *xdr,
uint32_t dummy;
char *dummy_str;
int status;
- struct nfs_client *clp = res->client;
uint32_t impl_id_count;
status = decode_op_hdr(xdr, OP_EXCHANGE_ID);
@@ -5166,36 +5328,39 @@ static int decode_exchange_id(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
goto out_overflow;
- xdr_decode_hyper(p, &clp->cl_clientid);
+ xdr_decode_hyper(p, &res->clientid);
p = xdr_inline_decode(xdr, 12);
if (unlikely(!p))
goto out_overflow;
- clp->cl_seqid = be32_to_cpup(p++);
- clp->cl_exchange_flags = be32_to_cpup(p++);
+ res->seqid = be32_to_cpup(p++);
+ res->flags = be32_to_cpup(p++);
/* We ask for SP4_NONE */
dummy = be32_to_cpup(p);
if (dummy != SP4_NONE)
return -EIO;
- /* Throw away minor_id */
+ /* server_owner4.so_minor_id */
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
goto out_overflow;
+ p = xdr_decode_hyper(p, &res->server_owner->minor_id);
- /* Throw away Major id */
+ /* server_owner4.so_major_id */
status = decode_opaque_inline(xdr, &dummy, &dummy_str);
if (unlikely(status))
return status;
+ if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
+ return -EIO;
+ memcpy(res->server_owner->major_id, dummy_str, dummy);
+ res->server_owner->major_id_sz = dummy;
- /* Save server_scope */
+ /* server_scope4 */
status = decode_opaque_inline(xdr, &dummy, &dummy_str);
if (unlikely(status))
return status;
-
if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
return -EIO;
-
memcpy(res->server_scope->server_scope, dummy_str, dummy);
res->server_scope->server_scope_sz = dummy;
@@ -5276,6 +5441,37 @@ static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid)
return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN);
}
+static int decode_bind_conn_to_session(struct xdr_stream *xdr,
+ struct nfs41_bind_conn_to_session_res *res)
+{
+ __be32 *p;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_BIND_CONN_TO_SESSION);
+ if (!status)
+ status = decode_sessionid(xdr, &res->session->sess_id);
+ if (unlikely(status))
+ return status;
+
+ /* dir flags, rdma mode bool */
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+
+ res->dir = be32_to_cpup(p++);
+ if (res->dir == 0 || res->dir > NFS4_CDFS4_BOTH)
+ return -EIO;
+ if (be32_to_cpup(p) == 0)
+ res->use_conn_in_rdma_mode = false;
+ else
+ res->use_conn_in_rdma_mode = true;
+
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
static int decode_create_session(struct xdr_stream *xdr,
struct nfs41_create_session_res *res)
{
@@ -5312,6 +5508,11 @@ static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)
return decode_op_hdr(xdr, OP_DESTROY_SESSION);
}
+static int decode_destroy_clientid(struct xdr_stream *xdr, void *dummy)
+{
+ return decode_op_hdr(xdr, OP_DESTROY_CLIENTID);
+}
+
static int decode_reclaim_complete(struct xdr_stream *xdr, void *dummy)
{
return decode_op_hdr(xdr, OP_RECLAIM_COMPLETE);
@@ -5800,9 +6001,6 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
if (status)
goto out;
status = decode_remove(xdr, &res->cinfo);
- if (status)
- goto out;
- decode_getfattr(xdr, res->dir_attr, res->server);
out:
return status;
}
@@ -5832,15 +6030,6 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
if (status)
goto out;
status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo);
- if (status)
- goto out;
- /* Current FH is target directory */
- if (decode_getfattr(xdr, res->new_fattr, res->server))
- goto out;
- status = decode_restorefh(xdr);
- if (status)
- goto out;
- decode_getfattr(xdr, res->old_fattr, res->server);
out:
return status;
}
@@ -5876,8 +6065,6 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
* Note order: OP_LINK leaves the directory as the current
* filehandle.
*/
- if (decode_getfattr(xdr, res->dir_attr, res->server))
- goto out;
status = decode_restorefh(xdr);
if (status)
goto out;
@@ -5904,21 +6091,13 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_savefh(xdr);
- if (status)
- goto out;
status = decode_create(xdr, &res->dir_cinfo);
if (status)
goto out;
status = decode_getfh(xdr, res->fh);
if (status)
goto out;
- if (decode_getfattr(xdr, res->fattr, res->server))
- goto out;
- status = decode_restorefh(xdr);
- if (status)
- goto out;
- decode_getfattr(xdr, res->dir_fattr, res->server);
+ decode_getfattr(xdr, res->fattr, res->server);
out:
return status;
}
@@ -6075,19 +6254,12 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_savefh(xdr);
- if (status)
- goto out;
status = decode_open(xdr, res);
if (status)
goto out;
if (decode_getfh(xdr, &res->fh) != 0)
goto out;
- if (decode_getfattr(xdr, res->f_attr, res->server) != 0)
- goto out;
- if (decode_restorefh(xdr) != 0)
- goto out;
- decode_getfattr(xdr, res->dir_attr, res->server);
+ decode_getfattr(xdr, res->f_attr, res->server);
out:
return status;
}
@@ -6353,7 +6525,7 @@ out:
* Decode COMMIT response
*/
static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
- struct nfs_writeres *res)
+ struct nfs_commitres *res)
{
struct compound_hdr hdr;
int status;
@@ -6368,10 +6540,6 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
if (status)
goto out;
status = decode_commit(xdr, res);
- if (status)
- goto out;
- if (res->fattr)
- decode_getfattr(xdr, res->fattr, res->server);
out:
return status;
}
@@ -6527,10 +6695,10 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
status = decode_putfh(xdr);
if (status != 0)
goto out;
- status = decode_delegreturn(xdr);
+ status = decode_getfattr(xdr, res->fattr, res->server);
if (status != 0)
goto out;
- decode_getfattr(xdr, res->fattr, res->server);
+ status = decode_delegreturn(xdr);
out:
return status;
}
@@ -6591,6 +6759,22 @@ out:
#if defined(CONFIG_NFS_V4_1)
/*
+ * Decode BIND_CONN_TO_SESSION response
+ */
+static int nfs4_xdr_dec_bind_conn_to_session(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *res)
+{
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (!status)
+ status = decode_bind_conn_to_session(xdr, res);
+ return status;
+}
+
+/*
* Decode EXCHANGE_ID response
*/
static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp,
@@ -6639,6 +6823,22 @@ static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp,
}
/*
+ * Decode DESTROY_CLIENTID response
+ */
+static int nfs4_xdr_dec_destroy_clientid(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *res)
+{
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (!status)
+ status = decode_destroy_clientid(xdr, res);
+ return status;
+}
+
+/*
* Decode SEQUENCE response
*/
static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp,
@@ -7085,6 +7285,9 @@ struct rpc_procinfo nfs4_procedures[] = {
PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid),
PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid),
PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist),
+ PROC(BIND_CONN_TO_SESSION,
+ enc_bind_conn_to_session, dec_bind_conn_to_session),
+ PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid),
#endif /* CONFIG_NFS_V4_1 */
};
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 4bff4a3..b47277ba 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -211,7 +211,7 @@ static void copy_single_comp(struct ore_components *oc, unsigned c,
memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));
}
-int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
+static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
struct objio_segment **pseg)
{
/* This is the in memory structure of the objio_segment
@@ -440,11 +440,12 @@ static void _read_done(struct ore_io_state *ios, void *private)
int objio_read_pagelist(struct nfs_read_data *rdata)
{
+ struct nfs_pgio_header *hdr = rdata->header;
struct objio_state *objios;
int ret;
- ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true,
- rdata->lseg, rdata->args.pages, rdata->args.pgbase,
+ ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
+ hdr->lseg, rdata->args.pages, rdata->args.pgbase,
rdata->args.offset, rdata->args.count, rdata,
GFP_KERNEL, &objios);
if (unlikely(ret))
@@ -483,12 +484,12 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
{
struct objio_state *objios = priv;
struct nfs_write_data *wdata = objios->oir.rpcdata;
+ struct address_space *mapping = wdata->header->inode->i_mapping;
pgoff_t index = offset / PAGE_SIZE;
- struct page *page = find_get_page(wdata->inode->i_mapping, index);
+ struct page *page = find_get_page(mapping, index);
if (!page) {
- page = find_or_create_page(wdata->inode->i_mapping,
- index, GFP_NOFS);
+ page = find_or_create_page(mapping, index, GFP_NOFS);
if (unlikely(!page)) {
dprintk("%s: grab_cache_page Failed index=0x%lx\n",
__func__, index);
@@ -518,11 +519,12 @@ static const struct _ore_r4w_op _r4w_op = {
int objio_write_pagelist(struct nfs_write_data *wdata, int how)
{
+ struct nfs_pgio_header *hdr = wdata->header;
struct objio_state *objios;
int ret;
- ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false,
- wdata->lseg, wdata->args.pages, wdata->args.pgbase,
+ ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
+ hdr->lseg, wdata->args.pages, wdata->args.pgbase,
wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,
&objios);
if (unlikely(ret))
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 595c5fc..8746135 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -258,7 +258,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
if (status >= 0)
rdata->res.count = status;
else
- rdata->pnfs_error = status;
+ rdata->header->pnfs_error = status;
objlayout_iodone(oir);
/* must not use oir after this point */
@@ -279,12 +279,14 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
enum pnfs_try_status
objlayout_read_pagelist(struct nfs_read_data *rdata)
{
+ struct nfs_pgio_header *hdr = rdata->header;
+ struct inode *inode = hdr->inode;
loff_t offset = rdata->args.offset;
size_t count = rdata->args.count;
int err;
loff_t eof;
- eof = i_size_read(rdata->inode);
+ eof = i_size_read(inode);
if (unlikely(offset + count > eof)) {
if (offset >= eof) {
err = 0;
@@ -297,17 +299,17 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)
}
rdata->res.eof = (offset + count) >= eof;
- _fix_verify_io_params(rdata->lseg, &rdata->args.pages,
+ _fix_verify_io_params(hdr->lseg, &rdata->args.pages,
&rdata->args.pgbase,
rdata->args.offset, rdata->args.count);
dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
- __func__, rdata->inode->i_ino, offset, count, rdata->res.eof);
+ __func__, inode->i_ino, offset, count, rdata->res.eof);
err = objio_read_pagelist(rdata);
out:
if (unlikely(err)) {
- rdata->pnfs_error = err;
+ hdr->pnfs_error = err;
dprintk("%s: Returned Error %d\n", __func__, err);
return PNFS_NOT_ATTEMPTED;
}
@@ -340,7 +342,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
wdata->res.count = status;
wdata->verf.committed = oir->committed;
} else {
- wdata->pnfs_error = status;
+ wdata->header->pnfs_error = status;
}
objlayout_iodone(oir);
/* must not use oir after this point */
@@ -363,15 +365,16 @@ enum pnfs_try_status
objlayout_write_pagelist(struct nfs_write_data *wdata,
int how)
{
+ struct nfs_pgio_header *hdr = wdata->header;
int err;
- _fix_verify_io_params(wdata->lseg, &wdata->args.pages,
+ _fix_verify_io_params(hdr->lseg, &wdata->args.pages,
&wdata->args.pgbase,
wdata->args.offset, wdata->args.count);
err = objio_write_pagelist(wdata, how);
if (unlikely(err)) {
- wdata->pnfs_error = err;
+ hdr->pnfs_error = err;
dprintk("%s: Returned Error %d\n", __func__, err);
return PNFS_NOT_ATTEMPTED;
}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d21fcea..aed913c 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -26,6 +26,47 @@
static struct kmem_cache *nfs_page_cachep;
+bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
+{
+ p->npages = pagecount;
+ if (pagecount <= ARRAY_SIZE(p->page_array))
+ p->pagevec = p->page_array;
+ else {
+ p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
+ if (!p->pagevec)
+ p->npages = 0;
+ }
+ return p->pagevec != NULL;
+}
+
+void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr,
+ void (*release)(struct nfs_pgio_header *hdr))
+{
+ hdr->req = nfs_list_entry(desc->pg_list.next);
+ hdr->inode = desc->pg_inode;
+ hdr->cred = hdr->req->wb_context->cred;
+ hdr->io_start = req_offset(hdr->req);
+ hdr->good_bytes = desc->pg_count;
+ hdr->dreq = desc->pg_dreq;
+ hdr->release = release;
+ hdr->completion_ops = desc->pg_completion_ops;
+ if (hdr->completion_ops->init_hdr)
+ hdr->completion_ops->init_hdr(hdr);
+}
+
+void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
+{
+ spin_lock(&hdr->lock);
+ if (pos < hdr->io_start + hdr->good_bytes) {
+ set_bit(NFS_IOHDR_ERROR, &hdr->flags);
+ clear_bit(NFS_IOHDR_EOF, &hdr->flags);
+ hdr->good_bytes = pos - hdr->io_start;
+ hdr->error = error;
+ }
+ spin_unlock(&hdr->lock);
+}
+
static inline struct nfs_page *
nfs_page_alloc(void)
{
@@ -76,12 +117,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
* long write-back delay. This will be adjusted in
* update_nfs_request below if the region is not locked. */
req->wb_page = page;
- atomic_set(&req->wb_complete, 0);
req->wb_index = page->index;
page_cache_get(page);
- BUG_ON(PagePrivate(page));
- BUG_ON(!PageLocked(page));
- BUG_ON(page->mapping->host != inode);
req->wb_offset = offset;
req->wb_pgbase = offset;
req->wb_bytes = count;
@@ -104,6 +141,15 @@ void nfs_unlock_request(struct nfs_page *req)
clear_bit(PG_BUSY, &req->wb_flags);
smp_mb__after_clear_bit();
wake_up_bit(&req->wb_flags, PG_BUSY);
+}
+
+/**
+ * nfs_unlock_and_release_request - Unlock request and release the nfs_page
+ * @req:
+ */
+void nfs_unlock_and_release_request(struct nfs_page *req)
+{
+ nfs_unlock_request(req);
nfs_release_request(req);
}
@@ -203,6 +249,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
const struct nfs_pageio_ops *pg_ops,
+ const struct nfs_pgio_completion_ops *compl_ops,
size_t bsize,
int io_flags)
{
@@ -215,9 +262,11 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_recoalesce = 0;
desc->pg_inode = inode;
desc->pg_ops = pg_ops;
+ desc->pg_completion_ops = compl_ops;
desc->pg_ioflags = io_flags;
desc->pg_error = 0;
desc->pg_lseg = NULL;
+ desc->pg_dreq = NULL;
}
/**
@@ -241,12 +290,12 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
return false;
if (req->wb_context->state != prev->wb_context->state)
return false;
- if (req->wb_index != (prev->wb_index + 1))
- return false;
if (req->wb_pgbase != 0)
return false;
if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
return false;
+ if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
+ return false;
return pgio->pg_ops->pg_test(pgio, prev, req);
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 38512bc..b8323aa 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -395,6 +395,9 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
dprintk("%s:Begin lo %p\n", __func__, lo);
if (list_empty(&lo->plh_segs)) {
+ /* Reset MDS Threshold I/O counters */
+ NFS_I(lo->plh_inode)->write_io = 0;
+ NFS_I(lo->plh_inode)->read_io = 0;
if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
put_layout_hdr_locked(lo);
return 0;
@@ -455,6 +458,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
spin_unlock(&nfsi->vfs_inode.i_lock);
pnfs_free_lseg_list(&tmp_list);
}
+EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
/*
* Called by the state manger to remove all layouts established under an
@@ -692,6 +696,7 @@ out:
dprintk("<-- %s status: %d\n", __func__, status);
return status;
}
+EXPORT_SYMBOL_GPL(_pnfs_return_layout);
bool pnfs_roc(struct inode *ino)
{
@@ -931,6 +936,81 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
}
/*
+ * Use mdsthreshold hints set at each OPEN to determine if I/O should go
+ * to the MDS or over pNFS
+ *
+ * The nfs_inode read_io and write_io fields are cumulative counters reset
+ * when there are no layout segments. Note that in pnfs_update_layout iomode
+ * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a
+ * WRITE request.
+ *
+ * A return of true means use MDS I/O.
+ *
+ * From rfc 5661:
+ * If a file's size is smaller than the file size threshold, data accesses
+ * SHOULD be sent to the metadata server. If an I/O request has a length that
+ * is below the I/O size threshold, the I/O SHOULD be sent to the metadata
+ * server. If both file size and I/O size are provided, the client SHOULD
+ * reach or exceed both thresholds before sending its read or write
+ * requests to the data server.
+ */
+static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
+ struct inode *ino, int iomode)
+{
+ struct nfs4_threshold *t = ctx->mdsthreshold;
+ struct nfs_inode *nfsi = NFS_I(ino);
+ loff_t fsize = i_size_read(ino);
+ bool size = false, size_set = false, io = false, io_set = false, ret = false;
+
+ if (t == NULL)
+ return ret;
+
+ dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
+ __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz);
+
+ switch (iomode) {
+ case IOMODE_READ:
+ if (t->bm & THRESHOLD_RD) {
+ dprintk("%s fsize %llu\n", __func__, fsize);
+ size_set = true;
+ if (fsize < t->rd_sz)
+ size = true;
+ }
+ if (t->bm & THRESHOLD_RD_IO) {
+ dprintk("%s nfsi->read_io %llu\n", __func__,
+ nfsi->read_io);
+ io_set = true;
+ if (nfsi->read_io < t->rd_io_sz)
+ io = true;
+ }
+ break;
+ case IOMODE_RW:
+ if (t->bm & THRESHOLD_WR) {
+ dprintk("%s fsize %llu\n", __func__, fsize);
+ size_set = true;
+ if (fsize < t->wr_sz)
+ size = true;
+ }
+ if (t->bm & THRESHOLD_WR_IO) {
+ dprintk("%s nfsi->write_io %llu\n", __func__,
+ nfsi->write_io);
+ io_set = true;
+ if (nfsi->write_io < t->wr_io_sz)
+ io = true;
+ }
+ break;
+ }
+ if (size_set && io_set) {
+ if (size && io)
+ ret = true;
+ } else if (size || io)
+ ret = true;
+
+ dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret);
+ return ret;
+}
+
+/*
* Layout segment is retreived from the server if not cached.
* The appropriate layout segment is referenced and returned to the caller.
*/
@@ -957,6 +1037,10 @@ pnfs_update_layout(struct inode *ino,
if (!pnfs_enabled_sb(NFS_SERVER(ino)))
return NULL;
+
+ if (pnfs_within_mdsthreshold(ctx, ino, iomode))
+ return NULL;
+
spin_lock(&ino->i_lock);
lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
if (lo == NULL) {
@@ -1082,6 +1166,10 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
{
BUG_ON(pgio->pg_lseg != NULL);
+ if (req->wb_offset != req->wb_pgbase) {
+ nfs_pageio_reset_read_mds(pgio);
+ return;
+ }
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
req_offset(req),
@@ -1100,6 +1188,10 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
{
BUG_ON(pgio->pg_lseg != NULL);
+ if (req->wb_offset != req->wb_pgbase) {
+ nfs_pageio_reset_write_mds(pgio);
+ return;
+ }
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
req_offset(req),
@@ -1113,26 +1205,31 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
bool
-pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
+pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
if (ld == NULL)
return false;
- nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0);
+ nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops,
+ server->rsize, 0);
return true;
}
bool
-pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
+ int ioflags,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
if (ld == NULL)
return false;
- nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags);
+ nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops,
+ server->wsize, ioflags);
return true;
}
@@ -1162,13 +1259,15 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
-static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head)
+int pnfs_write_done_resend_to_mds(struct inode *inode,
+ struct list_head *head,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
struct nfs_pageio_descriptor pgio;
LIST_HEAD(failed);
/* Resend all requests through the MDS */
- nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE);
+ nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops);
while (!list_empty(head)) {
struct nfs_page *req = nfs_list_entry(head->next);
@@ -1188,30 +1287,37 @@ static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *
}
return 0;
}
+EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
+
+static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
+{
+ struct nfs_pgio_header *hdr = data->header;
+
+ dprintk("pnfs write error = %d\n", hdr->pnfs_error);
+ if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
+ PNFS_LAYOUTRET_ON_ERROR) {
+ clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
+ pnfs_return_layout(hdr->inode);
+ }
+ if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
+ data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
+ &hdr->pages,
+ hdr->completion_ops);
+}
/*
* Called by non rpc-based layout drivers
*/
void pnfs_ld_write_done(struct nfs_write_data *data)
{
- if (likely(!data->pnfs_error)) {
+ struct nfs_pgio_header *hdr = data->header;
+
+ if (!hdr->pnfs_error) {
pnfs_set_layoutcommit(data);
- data->mds_ops->rpc_call_done(&data->task, data);
- } else {
- dprintk("pnfs write error = %d\n", data->pnfs_error);
- if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
- PNFS_LAYOUTRET_ON_ERROR) {
- /* Don't lo_commit on error, Server will needs to
- * preform a file recovery.
- */
- clear_bit(NFS_INO_LAYOUTCOMMIT,
- &NFS_I(data->inode)->flags);
- pnfs_return_layout(data->inode);
- }
- data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages);
- }
- put_lseg(data->lseg);
- data->mds_ops->rpc_release(data);
+ hdr->mds_ops->rpc_call_done(&data->task, data);
+ } else
+ pnfs_ld_handle_write_error(data);
+ hdr->mds_ops->rpc_release(data);
}
EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
@@ -1219,12 +1325,13 @@ static void
pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
struct nfs_write_data *data)
{
- list_splice_tail_init(&data->pages, &desc->pg_list);
- if (data->req && list_empty(&data->req->wb_list))
- nfs_list_add_request(data->req, &desc->pg_list);
- nfs_pageio_reset_write_mds(desc);
- desc->pg_recoalesce = 1;
- put_lseg(data->lseg);
+ struct nfs_pgio_header *hdr = data->header;
+
+ if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+ list_splice_tail_init(&hdr->pages, &desc->pg_list);
+ nfs_pageio_reset_write_mds(desc);
+ desc->pg_recoalesce = 1;
+ }
nfs_writedata_release(data);
}
@@ -1234,23 +1341,18 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
struct pnfs_layout_segment *lseg,
int how)
{
- struct inode *inode = wdata->inode;
+ struct nfs_pgio_header *hdr = wdata->header;
+ struct inode *inode = hdr->inode;
enum pnfs_try_status trypnfs;
struct nfs_server *nfss = NFS_SERVER(inode);
- wdata->mds_ops = call_ops;
- wdata->lseg = get_lseg(lseg);
+ hdr->mds_ops = call_ops;
dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
inode->i_ino, wdata->args.count, wdata->args.offset, how);
-
trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
- if (trypnfs == PNFS_NOT_ATTEMPTED) {
- put_lseg(wdata->lseg);
- wdata->lseg = NULL;
- } else
+ if (trypnfs != PNFS_NOT_ATTEMPTED)
nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
-
dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
return trypnfs;
}
@@ -1266,7 +1368,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
while (!list_empty(head)) {
enum pnfs_try_status trypnfs;
- data = list_entry(head->next, struct nfs_write_data, list);
+ data = list_first_entry(head, struct nfs_write_data, list);
list_del_init(&data->list);
trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
@@ -1276,43 +1378,82 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
put_lseg(lseg);
}
+static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
+{
+ put_lseg(hdr->lseg);
+ nfs_writehdr_free(hdr);
+}
+
int
pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
- LIST_HEAD(head);
+ struct nfs_write_header *whdr;
+ struct nfs_pgio_header *hdr;
int ret;
- ret = nfs_generic_flush(desc, &head);
- if (ret != 0) {
+ whdr = nfs_writehdr_alloc();
+ if (!whdr) {
+ desc->pg_completion_ops->error_cleanup(&desc->pg_list);
put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
- return ret;
+ return -ENOMEM;
}
- pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags);
- return 0;
+ hdr = &whdr->header;
+ nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
+ hdr->lseg = get_lseg(desc->pg_lseg);
+ atomic_inc(&hdr->refcnt);
+ ret = nfs_generic_flush(desc, hdr);
+ if (ret != 0) {
+ put_lseg(desc->pg_lseg);
+ desc->pg_lseg = NULL;
+ } else
+ pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
+ if (atomic_dec_and_test(&hdr->refcnt))
+ hdr->completion_ops->completion(hdr);
+ return ret;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
-static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
+int pnfs_read_done_resend_to_mds(struct inode *inode,
+ struct list_head *head,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
struct nfs_pageio_descriptor pgio;
+ LIST_HEAD(failed);
- put_lseg(data->lseg);
- data->lseg = NULL;
- dprintk("pnfs write error = %d\n", data->pnfs_error);
- if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
- PNFS_LAYOUTRET_ON_ERROR)
- pnfs_return_layout(data->inode);
-
- nfs_pageio_init_read_mds(&pgio, data->inode);
-
- while (!list_empty(&data->pages)) {
- struct nfs_page *req = nfs_list_entry(data->pages.next);
+ /* Resend all requests through the MDS */
+ nfs_pageio_init_read_mds(&pgio, inode, compl_ops);
+ while (!list_empty(head)) {
+ struct nfs_page *req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
- nfs_pageio_add_request(&pgio, req);
+ if (!nfs_pageio_add_request(&pgio, req))
+ nfs_list_add_request(req, &failed);
}
nfs_pageio_complete(&pgio);
+
+ if (!list_empty(&failed)) {
+ list_move(&failed, head);
+ return -EIO;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
+
+static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
+{
+ struct nfs_pgio_header *hdr = data->header;
+
+ dprintk("pnfs read error = %d\n", hdr->pnfs_error);
+ if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
+ PNFS_LAYOUTRET_ON_ERROR) {
+ clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
+ pnfs_return_layout(hdr->inode);
+ }
+ if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
+ data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
+ &hdr->pages,
+ hdr->completion_ops);
}
/*
@@ -1320,13 +1461,14 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
*/
void pnfs_ld_read_done(struct nfs_read_data *data)
{
- if (likely(!data->pnfs_error)) {
+ struct nfs_pgio_header *hdr = data->header;
+
+ if (likely(!hdr->pnfs_error)) {
__nfs4_read_done_cb(data);
- data->mds_ops->rpc_call_done(&data->task, data);
+ hdr->mds_ops->rpc_call_done(&data->task, data);
} else
pnfs_ld_handle_read_error(data);
- put_lseg(data->lseg);
- data->mds_ops->rpc_release(data);
+ hdr->mds_ops->rpc_release(data);
}
EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
@@ -1334,11 +1476,13 @@ static void
pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
struct nfs_read_data *data)
{
- list_splice_tail_init(&data->pages, &desc->pg_list);
- if (data->req && list_empty(&data->req->wb_list))
- nfs_list_add_request(data->req, &desc->pg_list);
- nfs_pageio_reset_read_mds(desc);
- desc->pg_recoalesce = 1;
+ struct nfs_pgio_header *hdr = data->header;
+
+ if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+ list_splice_tail_init(&hdr->pages, &desc->pg_list);
+ nfs_pageio_reset_read_mds(desc);
+ desc->pg_recoalesce = 1;
+ }
nfs_readdata_release(data);
}
@@ -1350,23 +1494,19 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
const struct rpc_call_ops *call_ops,
struct pnfs_layout_segment *lseg)
{
- struct inode *inode = rdata->inode;
+ struct nfs_pgio_header *hdr = rdata->header;
+ struct inode *inode = hdr->inode;
struct nfs_server *nfss = NFS_SERVER(inode);
enum pnfs_try_status trypnfs;
- rdata->mds_ops = call_ops;
- rdata->lseg = get_lseg(lseg);
+ hdr->mds_ops = call_ops;
dprintk("%s: Reading ino:%lu %u@%llu\n",
__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
- if (trypnfs == PNFS_NOT_ATTEMPTED) {
- put_lseg(rdata->lseg);
- rdata->lseg = NULL;
- } else {
+ if (trypnfs != PNFS_NOT_ATTEMPTED)
nfs_inc_stats(inode, NFSIOS_PNFS_READ);
- }
dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
return trypnfs;
}
@@ -1382,7 +1522,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
while (!list_empty(head)) {
enum pnfs_try_status trypnfs;
- data = list_entry(head->next, struct nfs_read_data, list);
+ data = list_first_entry(head, struct nfs_read_data, list);
list_del_init(&data->list);
trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
@@ -1392,20 +1532,40 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
put_lseg(lseg);
}
+static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
+{
+ put_lseg(hdr->lseg);
+ nfs_readhdr_free(hdr);
+}
+
int
pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
- LIST_HEAD(head);
+ struct nfs_read_header *rhdr;
+ struct nfs_pgio_header *hdr;
int ret;
- ret = nfs_generic_pagein(desc, &head);
- if (ret != 0) {
+ rhdr = nfs_readhdr_alloc();
+ if (!rhdr) {
+ desc->pg_completion_ops->error_cleanup(&desc->pg_list);
+ ret = -ENOMEM;
put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
return ret;
}
- pnfs_do_multiple_reads(desc, &head);
- return 0;
+ hdr = &rhdr->header;
+ nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
+ hdr->lseg = get_lseg(desc->pg_lseg);
+ atomic_inc(&hdr->refcnt);
+ ret = nfs_generic_pagein(desc, hdr);
+ if (ret != 0) {
+ put_lseg(desc->pg_lseg);
+ desc->pg_lseg = NULL;
+ } else
+ pnfs_do_multiple_reads(desc, &hdr->rpc_list);
+ if (atomic_dec_and_test(&hdr->refcnt))
+ hdr->completion_ops->completion(hdr);
+ return ret;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
@@ -1438,30 +1598,32 @@ EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
void
pnfs_set_layoutcommit(struct nfs_write_data *wdata)
{
- struct nfs_inode *nfsi = NFS_I(wdata->inode);
+ struct nfs_pgio_header *hdr = wdata->header;
+ struct inode *inode = hdr->inode;
+ struct nfs_inode *nfsi = NFS_I(inode);
loff_t end_pos = wdata->mds_offset + wdata->res.count;
bool mark_as_dirty = false;
- spin_lock(&nfsi->vfs_inode.i_lock);
+ spin_lock(&inode->i_lock);
if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
mark_as_dirty = true;
dprintk("%s: Set layoutcommit for inode %lu ",
- __func__, wdata->inode->i_ino);
+ __func__, inode->i_ino);
}
- if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) {
+ if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) {
/* references matched in nfs4_layoutcommit_release */
- get_lseg(wdata->lseg);
+ get_lseg(hdr->lseg);
}
if (end_pos > nfsi->layout->plh_lwb)
nfsi->layout->plh_lwb = end_pos;
- spin_unlock(&nfsi->vfs_inode.i_lock);
+ spin_unlock(&inode->i_lock);
dprintk("%s: lseg %p end_pos %llu\n",
- __func__, wdata->lseg, nfsi->layout->plh_lwb);
+ __func__, hdr->lseg, nfsi->layout->plh_lwb);
/* if pnfs_layoutcommit_inode() runs between inode locks, the next one
* will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
if (mark_as_dirty)
- mark_inode_dirty_sync(wdata->inode);
+ mark_inode_dirty_sync(inode);
}
EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
@@ -1550,3 +1712,15 @@ out_free:
kfree(data);
goto out;
}
+
+struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
+{
+ struct nfs4_threshold *thp;
+
+ thp = kzalloc(sizeof(*thp), GFP_NOFS);
+ if (!thp) {
+ dprintk("%s mdsthreshold allocation failed\n", __func__);
+ return NULL;
+ }
+ return thp;
+}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 442ebf6..29fd23c 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -63,6 +63,7 @@ enum {
NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
NFS_LAYOUT_ROC, /* some lseg had roc bit set */
NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */
+ NFS_LAYOUT_INVALID, /* layout is being destroyed */
};
enum layoutdriver_policy_flags {
@@ -94,11 +95,20 @@ struct pnfs_layoutdriver_type {
const struct nfs_pageio_ops *pg_read_ops;
const struct nfs_pageio_ops *pg_write_ops;
+ struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode);
void (*mark_request_commit) (struct nfs_page *req,
- struct pnfs_layout_segment *lseg);
- void (*clear_request_commit) (struct nfs_page *req);
- int (*scan_commit_lists) (struct inode *inode, int max, spinlock_t *lock);
- int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how);
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo);
+ void (*clear_request_commit) (struct nfs_page *req,
+ struct nfs_commit_info *cinfo);
+ int (*scan_commit_lists) (struct nfs_commit_info *cinfo,
+ int max);
+ void (*recover_commit_reqs) (struct list_head *list,
+ struct nfs_commit_info *cinfo);
+ int (*commit_pagelist)(struct inode *inode,
+ struct list_head *mds_pages,
+ int how,
+ struct nfs_commit_info *cinfo);
/*
* Return PNFS_ATTEMPTED to indicate the layout code has attempted
@@ -168,8 +178,10 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
void get_layout_hdr(struct pnfs_layout_hdr *lo);
void put_lseg(struct pnfs_layout_segment *lseg);
-bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
-bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int);
+bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
+ const struct nfs_pgio_completion_ops *);
+bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
+ int, const struct nfs_pgio_completion_ops *);
void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
void unset_pnfs_layoutdriver(struct nfs_server *);
@@ -211,6 +223,11 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
gfp_t gfp_flags);
void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
+int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head,
+ const struct nfs_pgio_completion_ops *compl_ops);
+int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head,
+ const struct nfs_pgio_completion_ops *compl_ops);
+struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
/* nfs4_deviceid_flags */
enum {
@@ -261,49 +278,66 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss)
}
static inline int
-pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
+pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
+ struct nfs_commit_info *cinfo)
{
- if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags))
+ if (cinfo->ds == NULL || cinfo->ds->ncommitting == 0)
return PNFS_NOT_ATTEMPTED;
- return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how);
+ return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how, cinfo);
+}
+
+static inline struct pnfs_ds_commit_info *
+pnfs_get_ds_info(struct inode *inode)
+{
+ struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
+
+ if (ld == NULL || ld->get_ds_info == NULL)
+ return NULL;
+ return ld->get_ds_info(inode);
}
static inline bool
-pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
{
struct inode *inode = req->wb_context->dentry->d_inode;
struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
if (lseg == NULL || ld->mark_request_commit == NULL)
return false;
- ld->mark_request_commit(req, lseg);
+ ld->mark_request_commit(req, lseg, cinfo);
return true;
}
static inline bool
-pnfs_clear_request_commit(struct nfs_page *req)
+pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
{
struct inode *inode = req->wb_context->dentry->d_inode;
struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
if (ld == NULL || ld->clear_request_commit == NULL)
return false;
- ld->clear_request_commit(req);
+ ld->clear_request_commit(req, cinfo);
return true;
}
static inline int
-pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock)
+pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
+ int max)
{
- struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
- int ret;
-
- if (ld == NULL || ld->scan_commit_lists == NULL)
+ if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
return 0;
- ret = ld->scan_commit_lists(inode, max, lock);
- if (ret != 0)
- set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags);
- return ret;
+ else
+ return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max);
+}
+
+static inline void
+pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
+ struct nfs_commit_info *cinfo)
+{
+ if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
+ return;
+ NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
}
/* Should the pNFS client commit and return the layout upon a setattr */
@@ -327,6 +361,14 @@ static inline int pnfs_return_layout(struct inode *ino)
return 0;
}
+static inline bool
+pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
+ struct nfs_server *nfss)
+{
+ return (dst && src && src->bm != 0 &&
+ nfss->pnfs_curr_ld->id == src->l_type);
+}
+
#ifdef NFS_DEBUG
void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
#else
@@ -396,45 +438,74 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
{
}
-static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
+static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
return false;
}
-static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
+static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
return false;
}
static inline int
-pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
+pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
+ struct nfs_commit_info *cinfo)
{
return PNFS_NOT_ATTEMPTED;
}
+static inline struct pnfs_ds_commit_info *
+pnfs_get_ds_info(struct inode *inode)
+{
+ return NULL;
+}
+
static inline bool
-pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
{
return false;
}
static inline bool
-pnfs_clear_request_commit(struct nfs_page *req)
+pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
{
return false;
}
static inline int
-pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock)
+pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
+ int max)
{
return 0;
}
+static inline void
+pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
+ struct nfs_commit_info *cinfo)
+{
+}
+
static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
{
return 0;
}
+static inline bool
+pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
+ struct nfs_server *nfss)
+{
+ return false;
+}
+
+static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
+{
+ return NULL;
+}
+
#endif /* CONFIG_NFS_V4_1 */
#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index d6408b6..a706b6b 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -178,7 +178,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
}
static int
-nfs_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
+nfs_proc_lookup(struct inode *dir, struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct nfs_diropargs arg = {
@@ -640,12 +640,14 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
{
+ struct inode *inode = data->header->inode;
+
if (nfs_async_handle_expired_key(task))
return -EAGAIN;
- nfs_invalidate_atime(data->inode);
+ nfs_invalidate_atime(inode);
if (task->tk_status >= 0) {
- nfs_refresh_inode(data->inode, data->res.fattr);
+ nfs_refresh_inode(inode, data->res.fattr);
/* Emulate the eof flag, which isn't normally needed in NFSv2
* as it is guaranteed to always return the file attributes
*/
@@ -667,11 +669,13 @@ static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat
static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
+ struct inode *inode = data->header->inode;
+
if (nfs_async_handle_expired_key(task))
return -EAGAIN;
if (task->tk_status >= 0)
- nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
+ nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
return 0;
}
@@ -687,8 +691,13 @@ static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_d
rpc_call_start(task);
}
+static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
+{
+ BUG();
+}
+
static void
-nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
+nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
{
BUG();
}
@@ -732,6 +741,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.file_inode_ops = &nfs_file_inode_operations,
.file_ops = &nfs_file_operations,
.getroot = nfs_proc_get_root,
+ .submount = nfs_submount,
.getattr = nfs_proc_getattr,
.setattr = nfs_proc_setattr,
.lookup = nfs_proc_lookup,
@@ -763,6 +773,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.write_rpc_prepare = nfs_proc_write_rpc_prepare,
.write_done = nfs_write_done,
.commit_setup = nfs_proc_commit_setup,
+ .commit_rpc_prepare = nfs_proc_commit_rpc_prepare,
.lock = nfs_proc_lock,
.lock_check_bounds = nfs_lock_check_bounds,
.close_context = nfs_close_context,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 0a4be28..86ced78 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -30,43 +30,73 @@
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
static const struct nfs_pageio_ops nfs_pageio_read_ops;
-static const struct rpc_call_ops nfs_read_partial_ops;
-static const struct rpc_call_ops nfs_read_full_ops;
+static const struct rpc_call_ops nfs_read_common_ops;
+static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
static struct kmem_cache *nfs_rdata_cachep;
-struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+struct nfs_read_header *nfs_readhdr_alloc(void)
{
- struct nfs_read_data *p;
-
- p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
- if (p) {
- INIT_LIST_HEAD(&p->pages);
- p->npages = pagecount;
- if (pagecount <= ARRAY_SIZE(p->page_array))
- p->pagevec = p->page_array;
- else {
- p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
- if (!p->pagevec) {
- kmem_cache_free(nfs_rdata_cachep, p);
- p = NULL;
- }
- }
+ struct nfs_read_header *rhdr;
+
+ rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
+ if (rhdr) {
+ struct nfs_pgio_header *hdr = &rhdr->header;
+
+ INIT_LIST_HEAD(&hdr->pages);
+ INIT_LIST_HEAD(&hdr->rpc_list);
+ spin_lock_init(&hdr->lock);
+ atomic_set(&hdr->refcnt, 0);
+ }
+ return rhdr;
+}
+
+static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
+ unsigned int pagecount)
+{
+ struct nfs_read_data *data, *prealloc;
+
+ prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
+ if (prealloc->header == NULL)
+ data = prealloc;
+ else
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ goto out;
+
+ if (nfs_pgarray_set(&data->pages, pagecount)) {
+ data->header = hdr;
+ atomic_inc(&hdr->refcnt);
+ } else {
+ if (data != prealloc)
+ kfree(data);
+ data = NULL;
}
- return p;
+out:
+ return data;
}
-void nfs_readdata_free(struct nfs_read_data *p)
+void nfs_readhdr_free(struct nfs_pgio_header *hdr)
{
- if (p && (p->pagevec != &p->page_array[0]))
- kfree(p->pagevec);
- kmem_cache_free(nfs_rdata_cachep, p);
+ struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header);
+
+ kmem_cache_free(nfs_rdata_cachep, rhdr);
}
void nfs_readdata_release(struct nfs_read_data *rdata)
{
+ struct nfs_pgio_header *hdr = rdata->header;
+ struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
+
put_nfs_open_context(rdata->args.context);
- nfs_readdata_free(rdata);
+ if (rdata->pages.pagevec != rdata->pages.page_array)
+ kfree(rdata->pages.pagevec);
+ if (rdata != &read_header->rpc_data)
+ kfree(rdata);
+ else
+ rdata->header = NULL;
+ if (atomic_dec_and_test(&hdr->refcnt))
+ hdr->completion_ops->completion(hdr);
}
static
@@ -78,39 +108,11 @@ int nfs_return_empty_page(struct page *page)
return 0;
}
-static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
-{
- unsigned int remainder = data->args.count - data->res.count;
- unsigned int base = data->args.pgbase + data->res.count;
- unsigned int pglen;
- struct page **pages;
-
- if (data->res.eof == 0 || remainder == 0)
- return;
- /*
- * Note: "remainder" can never be negative, since we check for
- * this in the XDR code.
- */
- pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
- base &= ~PAGE_CACHE_MASK;
- pglen = PAGE_CACHE_SIZE - base;
- for (;;) {
- if (remainder <= pglen) {
- zero_user(*pages, base, remainder);
- break;
- }
- zero_user(*pages, base, pglen);
- pages++;
- remainder -= pglen;
- pglen = PAGE_CACHE_SIZE;
- base = 0;
- }
-}
-
void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
- struct inode *inode)
+ struct inode *inode,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
- nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
+ nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops,
NFS_SERVER(inode)->rsize, 0);
}
@@ -121,11 +123,12 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
-static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
- struct inode *inode)
+void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
- if (!pnfs_pageio_init_read(pgio, inode))
- nfs_pageio_init_read_mds(pgio, inode);
+ if (!pnfs_pageio_init_read(pgio, inode, compl_ops))
+ nfs_pageio_init_read_mds(pgio, inode, compl_ops);
}
int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
@@ -146,9 +149,10 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
if (len < PAGE_CACHE_SIZE)
zero_user_segment(page, len, PAGE_CACHE_SIZE);
- nfs_pageio_init_read(&pgio, inode);
+ nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
nfs_pageio_add_request(&pgio, new);
nfs_pageio_complete(&pgio);
+ NFS_I(inode)->read_io += pgio.pg_bytes_written;
return 0;
}
@@ -169,16 +173,49 @@ static void nfs_readpage_release(struct nfs_page *req)
nfs_release_request(req);
}
-int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
- const struct rpc_call_ops *call_ops)
+/* Note io was page aligned */
+static void nfs_read_completion(struct nfs_pgio_header *hdr)
+{
+ unsigned long bytes = 0;
+
+ if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+ goto out;
+ while (!list_empty(&hdr->pages)) {
+ struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+ struct page *page = req->wb_page;
+
+ if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
+ if (bytes > hdr->good_bytes)
+ zero_user(page, 0, PAGE_SIZE);
+ else if (hdr->good_bytes - bytes < PAGE_SIZE)
+ zero_user_segment(page,
+ hdr->good_bytes & ~PAGE_MASK,
+ PAGE_SIZE);
+ }
+ bytes += req->wb_bytes;
+ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
+ if (bytes <= hdr->good_bytes)
+ SetPageUptodate(page);
+ } else
+ SetPageUptodate(page);
+ nfs_list_remove_request(req);
+ nfs_readpage_release(req);
+ }
+out:
+ hdr->release(hdr);
+}
+
+int nfs_initiate_read(struct rpc_clnt *clnt,
+ struct nfs_read_data *data,
+ const struct rpc_call_ops *call_ops, int flags)
{
- struct inode *inode = data->inode;
+ struct inode *inode = data->header->inode;
int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
struct rpc_task *task;
struct rpc_message msg = {
.rpc_argp = &data->args,
.rpc_resp = &data->res,
- .rpc_cred = data->cred,
+ .rpc_cred = data->header->cred,
};
struct rpc_task_setup task_setup_data = {
.task = &data->task,
@@ -187,7 +224,7 @@ int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
.callback_ops = call_ops,
.callback_data = data,
.workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC | swap_flags,
+ .flags = RPC_TASK_ASYNC | swap_flags | flags,
};
/* Set up the initial task struct. */
@@ -212,19 +249,15 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
/*
* Set up the NFS read request struct
*/
-static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+static void nfs_read_rpcsetup(struct nfs_read_data *data,
unsigned int count, unsigned int offset)
{
- struct inode *inode = req->wb_context->dentry->d_inode;
-
- data->req = req;
- data->inode = inode;
- data->cred = req->wb_context->cred;
+ struct nfs_page *req = data->header->req;
- data->args.fh = NFS_FH(inode);
+ data->args.fh = NFS_FH(data->header->inode);
data->args.offset = req_offset(req) + offset;
data->args.pgbase = req->wb_pgbase + offset;
- data->args.pages = data->pagevec;
+ data->args.pages = data->pages.pagevec;
data->args.count = count;
data->args.context = get_nfs_open_context(req->wb_context);
data->args.lock_context = req->wb_lock_context;
@@ -238,9 +271,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
static int nfs_do_read(struct nfs_read_data *data,
const struct rpc_call_ops *call_ops)
{
- struct inode *inode = data->args.context->dentry->d_inode;
+ struct inode *inode = data->header->inode;
- return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
+ return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0);
}
static int
@@ -253,7 +286,7 @@ nfs_do_multiple_reads(struct list_head *head,
while (!list_empty(head)) {
int ret2;
- data = list_entry(head->next, struct nfs_read_data, list);
+ data = list_first_entry(head, struct nfs_read_data, list);
list_del_init(&data->list);
ret2 = nfs_do_read(data, call_ops);
@@ -275,6 +308,24 @@ nfs_async_read_error(struct list_head *head)
}
}
+static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
+ .error_cleanup = nfs_async_read_error,
+ .completion = nfs_read_completion,
+};
+
+static void nfs_pagein_error(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
+{
+ set_bit(NFS_IOHDR_REDO, &hdr->flags);
+ while (!list_empty(&hdr->rpc_list)) {
+ struct nfs_read_data *data = list_first_entry(&hdr->rpc_list,
+ struct nfs_read_data, list);
+ list_del(&data->list);
+ nfs_readdata_release(data);
+ }
+ desc->pg_completion_ops->error_cleanup(&desc->pg_list);
+}
+
/*
* Generate multiple requests to fill a single page.
*
@@ -288,93 +339,95 @@ nfs_async_read_error(struct list_head *head)
* won't see the new data until our attribute cache is updated. This is more
* or less conventional NFS client behavior.
*/
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
{
- struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
+ struct nfs_page *req = hdr->req;
struct page *page = req->wb_page;
struct nfs_read_data *data;
size_t rsize = desc->pg_bsize, nbytes;
unsigned int offset;
- int requests = 0;
- int ret = 0;
-
- nfs_list_remove_request(req);
offset = 0;
nbytes = desc->pg_count;
do {
size_t len = min(nbytes,rsize);
- data = nfs_readdata_alloc(1);
- if (!data)
- goto out_bad;
- data->pagevec[0] = page;
- nfs_read_rpcsetup(req, data, len, offset);
- list_add(&data->list, res);
- requests++;
+ data = nfs_readdata_alloc(hdr, 1);
+ if (!data) {
+ nfs_pagein_error(desc, hdr);
+ return -ENOMEM;
+ }
+ data->pages.pagevec[0] = page;
+ nfs_read_rpcsetup(data, len, offset);
+ list_add(&data->list, &hdr->rpc_list);
nbytes -= len;
offset += len;
- } while(nbytes != 0);
- atomic_set(&req->wb_complete, requests);
- desc->pg_rpc_callops = &nfs_read_partial_ops;
- return ret;
-out_bad:
- while (!list_empty(res)) {
- data = list_entry(res->next, struct nfs_read_data, list);
- list_del(&data->list);
- nfs_readdata_release(data);
- }
- nfs_readpage_release(req);
- return -ENOMEM;
+ } while (nbytes != 0);
+
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, &hdr->pages);
+ desc->pg_rpc_callops = &nfs_read_common_ops;
+ return 0;
}
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
{
struct nfs_page *req;
struct page **pages;
- struct nfs_read_data *data;
+ struct nfs_read_data *data;
struct list_head *head = &desc->pg_list;
- int ret = 0;
- data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
- desc->pg_count));
+ data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
+ desc->pg_count));
if (!data) {
- nfs_async_read_error(head);
- ret = -ENOMEM;
- goto out;
+ nfs_pagein_error(desc, hdr);
+ return -ENOMEM;
}
- pages = data->pagevec;
+ pages = data->pages.pagevec;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
- nfs_list_add_request(req, &data->pages);
+ nfs_list_add_request(req, &hdr->pages);
*pages++ = req->wb_page;
}
- req = nfs_list_entry(data->pages.next);
- nfs_read_rpcsetup(req, data, desc->pg_count, 0);
- list_add(&data->list, res);
- desc->pg_rpc_callops = &nfs_read_full_ops;
-out:
- return ret;
+ nfs_read_rpcsetup(data, desc->pg_count, 0);
+ list_add(&data->list, &hdr->rpc_list);
+ desc->pg_rpc_callops = &nfs_read_common_ops;
+ return 0;
}
-int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head)
+int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
{
if (desc->pg_bsize < PAGE_CACHE_SIZE)
- return nfs_pagein_multi(desc, head);
- return nfs_pagein_one(desc, head);
+ return nfs_pagein_multi(desc, hdr);
+ return nfs_pagein_one(desc, hdr);
}
static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
- LIST_HEAD(head);
+ struct nfs_read_header *rhdr;
+ struct nfs_pgio_header *hdr;
int ret;
- ret = nfs_generic_pagein(desc, &head);
+ rhdr = nfs_readhdr_alloc();
+ if (!rhdr) {
+ desc->pg_completion_ops->error_cleanup(&desc->pg_list);
+ return -ENOMEM;
+ }
+ hdr = &rhdr->header;
+ nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
+ atomic_inc(&hdr->refcnt);
+ ret = nfs_generic_pagein(desc, hdr);
if (ret == 0)
- ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops);
+ ret = nfs_do_multiple_reads(&hdr->rpc_list,
+ desc->pg_rpc_callops);
+ if (atomic_dec_and_test(&hdr->refcnt))
+ hdr->completion_ops->completion(hdr);
return ret;
}
@@ -389,20 +442,21 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = {
*/
int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
{
+ struct inode *inode = data->header->inode;
int status;
dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
task->tk_status);
- status = NFS_PROTO(data->inode)->read_done(task, data);
+ status = NFS_PROTO(inode)->read_done(task, data);
if (status != 0)
return status;
- nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count);
+ nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count);
if (task->tk_status == -ESTALE) {
- set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags);
- nfs_mark_for_revalidate(data->inode);
+ set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
+ nfs_mark_for_revalidate(inode);
}
return 0;
}
@@ -412,15 +466,13 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
struct nfs_readargs *argp = &data->args;
struct nfs_readres *resp = &data->res;
- if (resp->eof || resp->count == argp->count)
- return;
-
/* This is a short read! */
- nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
+ nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
/* Has the server at least made some progress? */
- if (resp->count == 0)
+ if (resp->count == 0) {
+ nfs_set_pgio_error(data->header, -EIO, argp->offset);
return;
-
+ }
/* Yes, so retry the read at the end of the data */
data->mds_offset += resp->count;
argp->offset += resp->count;
@@ -429,114 +481,46 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
rpc_restart_call_prepare(task);
}
-/*
- * Handle a read reply that fills part of a page.
- */
-static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
+static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
{
struct nfs_read_data *data = calldata;
-
+ struct nfs_pgio_header *hdr = data->header;
+
+ /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
if (nfs_readpage_result(task, data) != 0)
return;
if (task->tk_status < 0)
- return;
-
- nfs_readpage_truncate_uninitialised_page(data);
- nfs_readpage_retry(task, data);
+ nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
+ else if (data->res.eof) {
+ loff_t bound;
+
+ bound = data->args.offset + data->res.count;
+ spin_lock(&hdr->lock);
+ if (bound < hdr->io_start + hdr->good_bytes) {
+ set_bit(NFS_IOHDR_EOF, &hdr->flags);
+ clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
+ hdr->good_bytes = bound - hdr->io_start;
+ }
+ spin_unlock(&hdr->lock);
+ } else if (data->res.count != data->args.count)
+ nfs_readpage_retry(task, data);
}
-static void nfs_readpage_release_partial(void *calldata)
+static void nfs_readpage_release_common(void *calldata)
{
- struct nfs_read_data *data = calldata;
- struct nfs_page *req = data->req;
- struct page *page = req->wb_page;
- int status = data->task.tk_status;
-
- if (status < 0)
- set_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags);
-
- if (atomic_dec_and_test(&req->wb_complete)) {
- if (!test_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags))
- SetPageUptodate(page);
- nfs_readpage_release(req);
- }
nfs_readdata_release(calldata);
}
void nfs_read_prepare(struct rpc_task *task, void *calldata)
{
struct nfs_read_data *data = calldata;
- NFS_PROTO(data->inode)->read_rpc_prepare(task, data);
-}
-
-static const struct rpc_call_ops nfs_read_partial_ops = {
- .rpc_call_prepare = nfs_read_prepare,
- .rpc_call_done = nfs_readpage_result_partial,
- .rpc_release = nfs_readpage_release_partial,
-};
-
-static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
-{
- unsigned int count = data->res.count;
- unsigned int base = data->args.pgbase;
- struct page **pages;
-
- if (data->res.eof)
- count = data->args.count;
- if (unlikely(count == 0))
- return;
- pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
- base &= ~PAGE_CACHE_MASK;
- count += base;
- for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
- SetPageUptodate(*pages);
- if (count == 0)
- return;
- /* Was this a short read? */
- if (data->res.eof || data->res.count == data->args.count)
- SetPageUptodate(*pages);
-}
-
-/*
- * This is the callback from RPC telling us whether a reply was
- * received or some error occurred (timeout or socket shutdown).
- */
-static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
-{
- struct nfs_read_data *data = calldata;
-
- if (nfs_readpage_result(task, data) != 0)
- return;
- if (task->tk_status < 0)
- return;
- /*
- * Note: nfs_readpage_retry may change the values of
- * data->args. In the multi-page case, we therefore need
- * to ensure that we call nfs_readpage_set_pages_uptodate()
- * first.
- */
- nfs_readpage_truncate_uninitialised_page(data);
- nfs_readpage_set_pages_uptodate(data);
- nfs_readpage_retry(task, data);
-}
-
-static void nfs_readpage_release_full(void *calldata)
-{
- struct nfs_read_data *data = calldata;
-
- while (!list_empty(&data->pages)) {
- struct nfs_page *req = nfs_list_entry(data->pages.next);
-
- nfs_list_remove_request(req);
- nfs_readpage_release(req);
- }
- nfs_readdata_release(calldata);
+ NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
}
-static const struct rpc_call_ops nfs_read_full_ops = {
+static const struct rpc_call_ops nfs_read_common_ops = {
.rpc_call_prepare = nfs_read_prepare,
- .rpc_call_done = nfs_readpage_result_full,
- .rpc_release = nfs_readpage_release_full,
+ .rpc_call_done = nfs_readpage_result_common,
+ .rpc_release = nfs_readpage_release_common,
};
/*
@@ -668,11 +652,12 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
if (ret == 0)
goto read_complete; /* all pages were read */
- nfs_pageio_init_read(&pgio, inode);
+ nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
nfs_pageio_complete(&pgio);
+ NFS_I(inode)->read_io += pgio.pg_bytes_written;
npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
nfs_add_stats(inode, NFSIOS_READPAGES, npages);
read_complete:
@@ -684,7 +669,7 @@ out:
int __init nfs_init_readpagecache(void)
{
nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
- sizeof(struct nfs_read_data),
+ sizeof(struct nfs_read_header),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (nfs_rdata_cachep == NULL)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 4ac7fca..ff656c0 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -66,6 +66,7 @@
#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_VFS
+#define NFS_TEXT_DATA 1
#ifdef CONFIG_NFS_V3
#define NFS_DEFAULT_VERSION 3
@@ -277,12 +278,22 @@ static match_table_t nfs_vers_tokens = {
{ Opt_vers_err, NULL }
};
+struct nfs_mount_info {
+ void (*fill_super)(struct super_block *, struct nfs_mount_info *);
+ int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *);
+ struct nfs_parsed_mount_data *parsed;
+ struct nfs_clone_mount *cloned;
+ struct nfs_fh *mntfh;
+};
+
static void nfs_umount_begin(struct super_block *);
static int nfs_statfs(struct dentry *, struct kstatfs *);
static int nfs_show_options(struct seq_file *, struct dentry *);
static int nfs_show_devname(struct seq_file *, struct dentry *);
static int nfs_show_path(struct seq_file *, struct dentry *);
static int nfs_show_stats(struct seq_file *, struct dentry *);
+static struct dentry *nfs_fs_mount_common(struct file_system_type *,
+ struct nfs_server *, int, const char *, struct nfs_mount_info *);
static struct dentry *nfs_fs_mount(struct file_system_type *,
int, const char *, void *);
static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
@@ -323,12 +334,11 @@ static const struct super_operations nfs_sops = {
};
#ifdef CONFIG_NFS_V4
-static int nfs4_validate_text_mount_data(void *options,
+static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *);
+static int nfs4_validate_mount_data(void *options,
struct nfs_parsed_mount_data *args, const char *dev_name);
static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
- struct nfs_parsed_mount_data *data);
-static struct dentry *nfs4_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data);
+ struct nfs_mount_info *mount_info);
static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data);
static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type,
@@ -342,7 +352,7 @@ static void nfs4_kill_super(struct super_block *sb);
static struct file_system_type nfs4_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
- .mount = nfs4_mount,
+ .mount = nfs_fs_mount,
.kill_sb = nfs4_kill_super,
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
@@ -786,8 +796,8 @@ static void show_pnfs(struct seq_file *m, struct nfs_server *server)
static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss)
{
- if (nfss->nfs_client && nfss->nfs_client->impl_id) {
- struct nfs41_impl_id *impl_id = nfss->nfs_client->impl_id;
+ if (nfss->nfs_client && nfss->nfs_client->cl_implid) {
+ struct nfs41_impl_id *impl_id = nfss->nfs_client->cl_implid;
seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s',"
"date='%llu,%u'",
impl_id->name, impl_id->domain,
@@ -938,7 +948,7 @@ static void nfs_umount_begin(struct super_block *sb)
rpc_killall_tasks(rpc);
}
-static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int version)
+static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)
{
struct nfs_parsed_mount_data *data;
@@ -953,8 +963,8 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve
data->nfs_server.protocol = XPRT_TRANSPORT_TCP;
data->auth_flavors[0] = RPC_AUTH_UNIX;
data->auth_flavor_len = 1;
- data->version = version;
data->minorversion = 0;
+ data->need_mount = true;
data->net = current->nsproxy->net_ns;
security_init_mnt_opts(&data->lsm_opts);
}
@@ -1674,8 +1684,8 @@ static int nfs_walk_authlist(struct nfs_parsed_mount_data *args,
* Use the remote server's MOUNT service to request the NFS file handle
* corresponding to the provided path.
*/
-static int nfs_try_mount(struct nfs_parsed_mount_data *args,
- struct nfs_fh *root_fh)
+static int nfs_request_mount(struct nfs_parsed_mount_data *args,
+ struct nfs_fh *root_fh)
{
rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS];
unsigned int server_authlist_len = ARRAY_SIZE(server_authlist);
@@ -1738,6 +1748,26 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
return nfs_walk_authlist(args, &request);
}
+static struct dentry *nfs_try_mount(int flags, const char *dev_name,
+ struct nfs_mount_info *mount_info)
+{
+ int status;
+ struct nfs_server *server;
+
+ if (mount_info->parsed->need_mount) {
+ status = nfs_request_mount(mount_info->parsed, mount_info->mntfh);
+ if (status)
+ return ERR_PTR(status);
+ }
+
+ /* Get a volume representation */
+ server = nfs_create_server(mount_info->parsed, mount_info->mntfh);
+ if (IS_ERR(server))
+ return ERR_CAST(server);
+
+ return nfs_fs_mount_common(&nfs_fs_type, server, flags, dev_name, mount_info);
+}
+
/*
* Split "dev_name" into "hostname:export_path".
*
@@ -1826,10 +1856,10 @@ out_path:
* + breaking back: trying proto=udp after proto=tcp, v2 after v3,
* mountproto=tcp after mountproto=udp, and so on
*/
-static int nfs_validate_mount_data(void *options,
- struct nfs_parsed_mount_data *args,
- struct nfs_fh *mntfh,
- const char *dev_name)
+static int nfs23_validate_mount_data(void *options,
+ struct nfs_parsed_mount_data *args,
+ struct nfs_fh *mntfh,
+ const char *dev_name)
{
struct nfs_mount_data *data = (struct nfs_mount_data *)options;
struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
@@ -1883,6 +1913,7 @@ static int nfs_validate_mount_data(void *options,
args->acregmax = data->acregmax;
args->acdirmin = data->acdirmin;
args->acdirmax = data->acdirmax;
+ args->need_mount = false;
memcpy(sap, &data->addr, sizeof(data->addr));
args->nfs_server.addrlen = sizeof(data->addr);
@@ -1934,43 +1965,8 @@ static int nfs_validate_mount_data(void *options,
}
break;
- default: {
- int status;
-
- if (nfs_parse_mount_options((char *)options, args) == 0)
- return -EINVAL;
-
- if (!nfs_verify_server_address(sap))
- goto out_no_address;
-
- if (args->version == 4)
-#ifdef CONFIG_NFS_V4
- return nfs4_validate_text_mount_data(options,
- args, dev_name);
-#else
- goto out_v4_not_compiled;
-#endif
-
- nfs_set_port(sap, &args->nfs_server.port, 0);
-
- nfs_set_mount_transport_protocol(args);
-
- status = nfs_parse_devname(dev_name,
- &args->nfs_server.hostname,
- PAGE_SIZE,
- &args->nfs_server.export_path,
- NFS_MAXPATHLEN);
- if (!status)
- status = nfs_try_mount(args, mntfh);
-
- kfree(args->nfs_server.export_path);
- args->nfs_server.export_path = NULL;
-
- if (status)
- return status;
-
- break;
- }
+ default:
+ return NFS_TEXT_DATA;
}
#ifndef CONFIG_NFS_V3
@@ -1999,12 +1995,6 @@ out_v3_not_compiled:
return -EPROTONOSUPPORT;
#endif /* !CONFIG_NFS_V3 */
-#ifndef CONFIG_NFS_V4
-out_v4_not_compiled:
- dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
- return -EPROTONOSUPPORT;
-#endif /* !CONFIG_NFS_V4 */
-
out_nomem:
dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n");
return -ENOMEM;
@@ -2018,6 +2008,82 @@ out_invalid_fh:
return -EINVAL;
}
+#ifdef CONFIG_NFS_V4
+static int nfs_validate_mount_data(struct file_system_type *fs_type,
+ void *options,
+ struct nfs_parsed_mount_data *args,
+ struct nfs_fh *mntfh,
+ const char *dev_name)
+{
+ if (fs_type == &nfs_fs_type)
+ return nfs23_validate_mount_data(options, args, mntfh, dev_name);
+ return nfs4_validate_mount_data(options, args, dev_name);
+}
+#else
+static int nfs_validate_mount_data(struct file_system_type *fs_type,
+ void *options,
+ struct nfs_parsed_mount_data *args,
+ struct nfs_fh *mntfh,
+ const char *dev_name)
+{
+ return nfs23_validate_mount_data(options, args, mntfh, dev_name);
+}
+#endif
+
+static int nfs_validate_text_mount_data(void *options,
+ struct nfs_parsed_mount_data *args,
+ const char *dev_name)
+{
+ int port = 0;
+ int max_namelen = PAGE_SIZE;
+ int max_pathlen = NFS_MAXPATHLEN;
+ struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
+
+ if (nfs_parse_mount_options((char *)options, args) == 0)
+ return -EINVAL;
+
+ if (!nfs_verify_server_address(sap))
+ goto out_no_address;
+
+ if (args->version == 4) {
+#ifdef CONFIG_NFS_V4
+ port = NFS_PORT;
+ max_namelen = NFS4_MAXNAMLEN;
+ max_pathlen = NFS4_MAXPATHLEN;
+ nfs_validate_transport_protocol(args);
+ nfs4_validate_mount_flags(args);
+#else
+ goto out_v4_not_compiled;
+#endif /* CONFIG_NFS_V4 */
+ } else
+ nfs_set_mount_transport_protocol(args);
+
+ nfs_set_port(sap, &args->nfs_server.port, port);
+
+ if (args->auth_flavor_len > 1)
+ goto out_bad_auth;
+
+ return nfs_parse_devname(dev_name,
+ &args->nfs_server.hostname,
+ max_namelen,
+ &args->nfs_server.export_path,
+ max_pathlen);
+
+#ifndef CONFIG_NFS_V4
+out_v4_not_compiled:
+ dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
+ return -EPROTONOSUPPORT;
+#endif /* !CONFIG_NFS_V4 */
+
+out_no_address:
+ dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
+ return -EINVAL;
+
+out_bad_auth:
+ dfprintk(MOUNT, "NFS: Too many RPC auth flavours specified\n");
+ return -EINVAL;
+}
+
static int
nfs_compare_remount_data(struct nfs_server *nfss,
struct nfs_parsed_mount_data *data)
@@ -2129,8 +2195,9 @@ static inline void nfs_initialise_sb(struct super_block *sb)
* Finish setting up an NFS2/3 superblock
*/
static void nfs_fill_super(struct super_block *sb,
- struct nfs_parsed_mount_data *data)
+ struct nfs_mount_info *mount_info)
{
+ struct nfs_parsed_mount_data *data = mount_info->parsed;
struct nfs_server *server = NFS_SB(sb);
sb->s_blocksize_bits = 0;
@@ -2154,8 +2221,9 @@ static void nfs_fill_super(struct super_block *sb,
* Finish setting up a cloned NFS2/3 superblock
*/
static void nfs_clone_super(struct super_block *sb,
- const struct super_block *old_sb)
+ struct nfs_mount_info *mount_info)
{
+ const struct super_block *old_sb = mount_info->cloned->sb;
struct nfs_server *server = NFS_SB(sb);
sb->s_blocksize_bits = old_sb->s_blocksize_bits;
@@ -2278,52 +2346,70 @@ static int nfs_compare_super(struct super_block *sb, void *data)
return nfs_compare_mount_options(sb, server, mntflags);
}
+#ifdef CONFIG_NFS_FSCACHE
+static void nfs_get_cache_cookie(struct super_block *sb,
+ struct nfs_parsed_mount_data *parsed,
+ struct nfs_clone_mount *cloned)
+{
+ char *uniq = NULL;
+ int ulen = 0;
+
+ if (parsed && parsed->fscache_uniq) {
+ uniq = parsed->fscache_uniq;
+ ulen = strlen(parsed->fscache_uniq);
+ } else if (cloned) {
+ struct nfs_server *mnt_s = NFS_SB(cloned->sb);
+ if (mnt_s->fscache_key) {
+ uniq = mnt_s->fscache_key->key.uniquifier;
+ ulen = mnt_s->fscache_key->key.uniq_len;
+ };
+ }
+
+ nfs_fscache_get_super_cookie(sb, uniq, ulen);
+}
+#else
+static void nfs_get_cache_cookie(struct super_block *sb,
+ struct nfs_parsed_mount_data *parsed,
+ struct nfs_clone_mount *cloned)
+{
+}
+#endif
+
static int nfs_bdi_register(struct nfs_server *server)
{
return bdi_register_dev(&server->backing_dev_info, server->s_dev);
}
-static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data)
+static int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot,
+ struct nfs_mount_info *mount_info)
+{
+ return security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts);
+}
+
+static int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot,
+ struct nfs_mount_info *mount_info)
+{
+ /* clone any lsm security options from the parent to the new sb */
+ security_sb_clone_mnt_opts(mount_info->cloned->sb, s);
+ if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops)
+ return -ESTALE;
+ return 0;
+}
+
+static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type,
+ struct nfs_server *server,
+ int flags, const char *dev_name,
+ struct nfs_mount_info *mount_info)
{
- struct nfs_server *server = NULL;
struct super_block *s;
- struct nfs_parsed_mount_data *data;
- struct nfs_fh *mntfh;
struct dentry *mntroot = ERR_PTR(-ENOMEM);
int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
struct nfs_sb_mountdata sb_mntdata = {
.mntflags = flags,
+ .server = server,
};
int error;
- data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
- mntfh = nfs_alloc_fhandle();
- if (data == NULL || mntfh == NULL)
- goto out;
-
- /* Validate the mount data */
- error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
- if (error < 0) {
- mntroot = ERR_PTR(error);
- goto out;
- }
-
-#ifdef CONFIG_NFS_V4
- if (data->version == 4) {
- mntroot = nfs4_try_mount(flags, dev_name, data);
- goto out;
- }
-#endif /* CONFIG_NFS_V4 */
-
- /* Get a volume representation */
- server = nfs_create_server(data, mntfh);
- if (IS_ERR(server)) {
- mntroot = ERR_CAST(server);
- goto out;
- }
- sb_mntdata.server = server;
-
if (server->flags & NFS_MOUNT_UNSHARED)
compare_super = NULL;
@@ -2351,23 +2437,21 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
if (!s->s_root) {
/* initial superblock/root creation */
- nfs_fill_super(s, data);
- nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL);
+ mount_info->fill_super(s, mount_info);
+ nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned);
}
- mntroot = nfs_get_root(s, mntfh, dev_name);
+ mntroot = nfs_get_root(s, mount_info->mntfh, dev_name);
if (IS_ERR(mntroot))
goto error_splat_super;
- error = security_sb_set_mnt_opts(s, &data->lsm_opts);
+ error = mount_info->set_security(s, mntroot, mount_info);
if (error)
goto error_splat_root;
s->s_flags |= MS_ACTIVE;
out:
- nfs_free_parsed_mount_data(data);
- nfs_free_fhandle(mntfh);
return mntroot;
out_err_nosb:
@@ -2385,6 +2469,43 @@ error_splat_bdi:
goto out;
}
+static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data)
+{
+ struct nfs_mount_info mount_info = {
+ .fill_super = nfs_fill_super,
+ .set_security = nfs_set_sb_security,
+ };
+ struct dentry *mntroot = ERR_PTR(-ENOMEM);
+ int error;
+
+ mount_info.parsed = nfs_alloc_parsed_mount_data();
+ mount_info.mntfh = nfs_alloc_fhandle();
+ if (mount_info.parsed == NULL || mount_info.mntfh == NULL)
+ goto out;
+
+ /* Validate the mount data */
+ error = nfs_validate_mount_data(fs_type, raw_data, mount_info.parsed, mount_info.mntfh, dev_name);
+ if (error == NFS_TEXT_DATA)
+ error = nfs_validate_text_mount_data(raw_data, mount_info.parsed, dev_name);
+ if (error < 0) {
+ mntroot = ERR_PTR(error);
+ goto out;
+ }
+
+#ifdef CONFIG_NFS_V4
+ if (mount_info.parsed->version == 4)
+ mntroot = nfs4_try_mount(flags, dev_name, &mount_info);
+ else
+#endif /* CONFIG_NFS_V4 */
+ mntroot = nfs_try_mount(flags, dev_name, &mount_info);
+
+out:
+ nfs_free_parsed_mount_data(mount_info.parsed);
+ nfs_free_fhandle(mount_info.mntfh);
+ return mntroot;
+}
+
/*
* Ensure that we unregister the bdi before kill_anon_super
* releases the device name
@@ -2409,93 +2530,51 @@ static void nfs_kill_super(struct super_block *s)
}
/*
- * Clone an NFS2/3 server record on xdev traversal (FSID-change)
+ * Clone an NFS2/3/4 server record on xdev traversal (FSID-change)
*/
static struct dentry *
-nfs_xdev_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *raw_data)
+nfs_xdev_mount_common(struct file_system_type *fs_type, int flags,
+ const char *dev_name, struct nfs_mount_info *mount_info)
{
- struct nfs_clone_mount *data = raw_data;
- struct super_block *s;
+ struct nfs_clone_mount *data = mount_info->cloned;
struct nfs_server *server;
- struct dentry *mntroot;
- int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
- struct nfs_sb_mountdata sb_mntdata = {
- .mntflags = flags,
- };
+ struct dentry *mntroot = ERR_PTR(-ENOMEM);
int error;
- dprintk("--> nfs_xdev_mount()\n");
+ dprintk("--> nfs_xdev_mount_common()\n");
+
+ mount_info->mntfh = data->fh;
/* create a new volume representation */
server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
if (IS_ERR(server)) {
error = PTR_ERR(server);
- goto out_err_noserver;
- }
- sb_mntdata.server = server;
-
- if (server->flags & NFS_MOUNT_UNSHARED)
- compare_super = NULL;
-
- /* -o noac implies -o sync */
- if (server->flags & NFS_MOUNT_NOAC)
- sb_mntdata.mntflags |= MS_SYNCHRONOUS;
-
- /* Get a superblock - note that we may end up sharing one that already exists */
- s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata);
- if (IS_ERR(s)) {
- error = PTR_ERR(s);
- goto out_err_nosb;
- }
-
- if (s->s_fs_info != server) {
- nfs_free_server(server);
- server = NULL;
- } else {
- error = nfs_bdi_register(server);
- if (error)
- goto error_splat_bdi;
- }
-
- if (!s->s_root) {
- /* initial superblock/root creation */
- nfs_clone_super(s, data->sb);
- nfs_fscache_get_super_cookie(s, NULL, data);
- }
-
- mntroot = nfs_get_root(s, data->fh, dev_name);
- if (IS_ERR(mntroot)) {
- error = PTR_ERR(mntroot);
- goto error_splat_super;
- }
- if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
- dput(mntroot);
- error = -ESTALE;
- goto error_splat_super;
+ goto out_err;
}
- s->s_flags |= MS_ACTIVE;
-
- /* clone any lsm security options from the parent to the new sb */
- security_sb_clone_mnt_opts(data->sb, s);
-
- dprintk("<-- nfs_xdev_mount() = 0\n");
+ mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info);
+ dprintk("<-- nfs_xdev_mount_common() = 0\n");
+out:
return mntroot;
-out_err_nosb:
- nfs_free_server(server);
-out_err_noserver:
- dprintk("<-- nfs_xdev_mount() = %d [error]\n", error);
- return ERR_PTR(error);
+out_err:
+ dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error);
+ goto out;
+}
-error_splat_super:
- if (server && !s->s_root)
- bdi_unregister(&server->backing_dev_info);
-error_splat_bdi:
- deactivate_locked_super(s);
- dprintk("<-- nfs_xdev_mount() = %d [splat]\n", error);
- return ERR_PTR(error);
+/*
+ * Clone an NFS2/3 server record on xdev traversal (FSID-change)
+ */
+static struct dentry *
+nfs_xdev_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data)
+{
+ struct nfs_mount_info mount_info = {
+ .fill_super = nfs_clone_super,
+ .set_security = nfs_clone_sb_security,
+ .cloned = raw_data,
+ };
+ return nfs_xdev_mount_common(&nfs_fs_type, flags, dev_name, &mount_info);
}
#ifdef CONFIG_NFS_V4
@@ -2504,8 +2583,9 @@ error_splat_bdi:
* Finish setting up a cloned NFS4 superblock
*/
static void nfs4_clone_super(struct super_block *sb,
- const struct super_block *old_sb)
+ struct nfs_mount_info *mount_info)
{
+ const struct super_block *old_sb = mount_info->cloned->sb;
sb->s_blocksize_bits = old_sb->s_blocksize_bits;
sb->s_blocksize = old_sb->s_blocksize;
sb->s_maxbytes = old_sb->s_maxbytes;
@@ -2523,7 +2603,8 @@ static void nfs4_clone_super(struct super_block *sb,
/*
* Set up an NFS4 superblock
*/
-static void nfs4_fill_super(struct super_block *sb)
+static void nfs4_fill_super(struct super_block *sb,
+ struct nfs_mount_info *mount_info)
{
sb->s_time_gran = 1;
sb->s_op = &nfs4_sops;
@@ -2542,37 +2623,6 @@ static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL);
}
-static int nfs4_validate_text_mount_data(void *options,
- struct nfs_parsed_mount_data *args,
- const char *dev_name)
-{
- struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
-
- nfs_set_port(sap, &args->nfs_server.port, NFS_PORT);
-
- nfs_validate_transport_protocol(args);
-
- nfs4_validate_mount_flags(args);
-
- if (args->version != 4) {
- dfprintk(MOUNT,
- "NFS4: Illegal mount version\n");
- return -EINVAL;
- }
-
- if (args->auth_flavor_len > 1) {
- dfprintk(MOUNT,
- "NFS4: Too many RPC auth flavours specified\n");
- return -EINVAL;
- }
-
- return nfs_parse_devname(dev_name,
- &args->nfs_server.hostname,
- NFS4_MAXNAMLEN,
- &args->nfs_server.export_path,
- NFS4_MAXPATHLEN);
-}
-
/*
* Validate NFSv4 mount options
*/
@@ -2643,13 +2693,7 @@ static int nfs4_validate_mount_data(void *options,
break;
default:
- if (nfs_parse_mount_options((char *)options, args) == 0)
- return -EINVAL;
-
- if (!nfs_verify_server_address(sap))
- return -EINVAL;
-
- return nfs4_validate_text_mount_data(options, args, dev_name);
+ return NFS_TEXT_DATA;
}
return 0;
@@ -2673,91 +2717,26 @@ out_no_address:
*/
static struct dentry *
nfs4_remote_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *raw_data)
+ const char *dev_name, void *info)
{
- struct nfs_parsed_mount_data *data = raw_data;
- struct super_block *s;
+ struct nfs_mount_info *mount_info = info;
struct nfs_server *server;
- struct nfs_fh *mntfh;
- struct dentry *mntroot;
- int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
- struct nfs_sb_mountdata sb_mntdata = {
- .mntflags = flags,
- };
- int error = -ENOMEM;
+ struct dentry *mntroot = ERR_PTR(-ENOMEM);
- mntfh = nfs_alloc_fhandle();
- if (data == NULL || mntfh == NULL)
- goto out;
+ mount_info->fill_super = nfs4_fill_super;
+ mount_info->set_security = nfs_set_sb_security;
/* Get a volume representation */
- server = nfs4_create_server(data, mntfh);
+ server = nfs4_create_server(mount_info->parsed, mount_info->mntfh);
if (IS_ERR(server)) {
- error = PTR_ERR(server);
+ mntroot = ERR_CAST(server);
goto out;
}
- sb_mntdata.server = server;
- if (server->flags & NFS4_MOUNT_UNSHARED)
- compare_super = NULL;
-
- /* -o noac implies -o sync */
- if (server->flags & NFS_MOUNT_NOAC)
- sb_mntdata.mntflags |= MS_SYNCHRONOUS;
-
- /* Get a superblock - note that we may end up sharing one that already exists */
- s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
- if (IS_ERR(s)) {
- error = PTR_ERR(s);
- goto out_free;
- }
-
- if (s->s_fs_info != server) {
- nfs_free_server(server);
- server = NULL;
- } else {
- error = nfs_bdi_register(server);
- if (error)
- goto error_splat_bdi;
- }
-
- if (!s->s_root) {
- /* initial superblock/root creation */
- nfs4_fill_super(s);
- nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL);
- }
-
- mntroot = nfs4_get_root(s, mntfh, dev_name);
- if (IS_ERR(mntroot)) {
- error = PTR_ERR(mntroot);
- goto error_splat_super;
- }
-
- error = security_sb_set_mnt_opts(s, &data->lsm_opts);
- if (error)
- goto error_splat_root;
-
- s->s_flags |= MS_ACTIVE;
-
- nfs_free_fhandle(mntfh);
- return mntroot;
+ mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info);
out:
- nfs_free_fhandle(mntfh);
- return ERR_PTR(error);
-
-out_free:
- nfs_free_server(server);
- goto out;
-
-error_splat_root:
- dput(mntroot);
-error_splat_super:
- if (server && !s->s_root)
- bdi_unregister(&server->backing_dev_info);
-error_splat_bdi:
- deactivate_locked_super(s);
- goto out;
+ return mntroot;
}
static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
@@ -2869,17 +2848,18 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
}
static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
- struct nfs_parsed_mount_data *data)
+ struct nfs_mount_info *mount_info)
{
char *export_path;
struct vfsmount *root_mnt;
struct dentry *res;
+ struct nfs_parsed_mount_data *data = mount_info->parsed;
dfprintk(MOUNT, "--> nfs4_try_mount()\n");
export_path = data->nfs_server.export_path;
data->nfs_server.export_path = "/";
- root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data,
+ root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info,
data->nfs_server.hostname);
data->nfs_server.export_path = export_path;
@@ -2891,38 +2871,6 @@ static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
return res;
}
-/*
- * Get the superblock for an NFS4 mountpoint
- */
-static struct dentry *nfs4_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data)
-{
- struct nfs_parsed_mount_data *data;
- int error = -ENOMEM;
- struct dentry *res = ERR_PTR(-ENOMEM);
-
- data = nfs_alloc_parsed_mount_data(4);
- if (data == NULL)
- goto out;
-
- /* Validate the mount data */
- error = nfs4_validate_mount_data(raw_data, data, dev_name);
- if (error < 0) {
- res = ERR_PTR(error);
- goto out;
- }
-
- res = nfs4_try_mount(flags, dev_name, data);
- if (IS_ERR(res))
- error = PTR_ERR(res);
-
-out:
- nfs_free_parsed_mount_data(data);
- dprintk("<-- nfs4_mount() = %d%s\n", error,
- error != 0 ? " [error]" : "");
- return res;
-}
-
static void nfs4_kill_super(struct super_block *sb)
{
struct nfs_server *server = NFS_SB(sb);
@@ -2942,181 +2890,43 @@ static struct dentry *
nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *raw_data)
{
- struct nfs_clone_mount *data = raw_data;
- struct super_block *s;
- struct nfs_server *server;
- struct dentry *mntroot;
- int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
- struct nfs_sb_mountdata sb_mntdata = {
- .mntflags = flags,
+ struct nfs_mount_info mount_info = {
+ .fill_super = nfs4_clone_super,
+ .set_security = nfs_clone_sb_security,
+ .cloned = raw_data,
};
- int error;
-
- dprintk("--> nfs4_xdev_mount()\n");
-
- /* create a new volume representation */
- server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
- if (IS_ERR(server)) {
- error = PTR_ERR(server);
- goto out_err_noserver;
- }
- sb_mntdata.server = server;
-
- if (server->flags & NFS4_MOUNT_UNSHARED)
- compare_super = NULL;
-
- /* -o noac implies -o sync */
- if (server->flags & NFS_MOUNT_NOAC)
- sb_mntdata.mntflags |= MS_SYNCHRONOUS;
-
- /* Get a superblock - note that we may end up sharing one that already exists */
- s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
- if (IS_ERR(s)) {
- error = PTR_ERR(s);
- goto out_err_nosb;
- }
-
- if (s->s_fs_info != server) {
- nfs_free_server(server);
- server = NULL;
- } else {
- error = nfs_bdi_register(server);
- if (error)
- goto error_splat_bdi;
- }
-
- if (!s->s_root) {
- /* initial superblock/root creation */
- nfs4_clone_super(s, data->sb);
- nfs_fscache_get_super_cookie(s, NULL, data);
- }
-
- mntroot = nfs4_get_root(s, data->fh, dev_name);
- if (IS_ERR(mntroot)) {
- error = PTR_ERR(mntroot);
- goto error_splat_super;
- }
- if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
- dput(mntroot);
- error = -ESTALE;
- goto error_splat_super;
- }
-
- s->s_flags |= MS_ACTIVE;
-
- security_sb_clone_mnt_opts(data->sb, s);
-
- dprintk("<-- nfs4_xdev_mount() = 0\n");
- return mntroot;
-
-out_err_nosb:
- nfs_free_server(server);
-out_err_noserver:
- dprintk("<-- nfs4_xdev_mount() = %d [error]\n", error);
- return ERR_PTR(error);
-
-error_splat_super:
- if (server && !s->s_root)
- bdi_unregister(&server->backing_dev_info);
-error_splat_bdi:
- deactivate_locked_super(s);
- dprintk("<-- nfs4_xdev_mount() = %d [splat]\n", error);
- return ERR_PTR(error);
+ return nfs_xdev_mount_common(&nfs4_fs_type, flags, dev_name, &mount_info);
}
static struct dentry *
nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *raw_data)
{
- struct nfs_clone_mount *data = raw_data;
- struct super_block *s;
- struct nfs_server *server;
- struct dentry *mntroot;
- struct nfs_fh *mntfh;
- int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
- struct nfs_sb_mountdata sb_mntdata = {
- .mntflags = flags,
+ struct nfs_mount_info mount_info = {
+ .fill_super = nfs4_fill_super,
+ .set_security = nfs_clone_sb_security,
+ .cloned = raw_data,
};
- int error = -ENOMEM;
+ struct nfs_server *server;
+ struct dentry *mntroot = ERR_PTR(-ENOMEM);
dprintk("--> nfs4_referral_get_sb()\n");
- mntfh = nfs_alloc_fhandle();
- if (mntfh == NULL)
- goto out_err_nofh;
+ mount_info.mntfh = nfs_alloc_fhandle();
+ if (mount_info.cloned == NULL || mount_info.mntfh == NULL)
+ goto out;
/* create a new volume representation */
- server = nfs4_create_referral_server(data, mntfh);
+ server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh);
if (IS_ERR(server)) {
- error = PTR_ERR(server);
- goto out_err_noserver;
- }
- sb_mntdata.server = server;
-
- if (server->flags & NFS4_MOUNT_UNSHARED)
- compare_super = NULL;
-
- /* -o noac implies -o sync */
- if (server->flags & NFS_MOUNT_NOAC)
- sb_mntdata.mntflags |= MS_SYNCHRONOUS;
-
- /* Get a superblock - note that we may end up sharing one that already exists */
- s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
- if (IS_ERR(s)) {
- error = PTR_ERR(s);
- goto out_err_nosb;
- }
-
- if (s->s_fs_info != server) {
- nfs_free_server(server);
- server = NULL;
- } else {
- error = nfs_bdi_register(server);
- if (error)
- goto error_splat_bdi;
- }
-
- if (!s->s_root) {
- /* initial superblock/root creation */
- nfs4_fill_super(s);
- nfs_fscache_get_super_cookie(s, NULL, data);
- }
-
- mntroot = nfs4_get_root(s, mntfh, dev_name);
- if (IS_ERR(mntroot)) {
- error = PTR_ERR(mntroot);
- goto error_splat_super;
- }
- if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
- dput(mntroot);
- error = -ESTALE;
- goto error_splat_super;
+ mntroot = ERR_CAST(server);
+ goto out;
}
- s->s_flags |= MS_ACTIVE;
-
- security_sb_clone_mnt_opts(data->sb, s);
-
- nfs_free_fhandle(mntfh);
- dprintk("<-- nfs4_referral_get_sb() = 0\n");
+ mntroot = nfs_fs_mount_common(&nfs4_fs_type, server, flags, dev_name, &mount_info);
+out:
+ nfs_free_fhandle(mount_info.mntfh);
return mntroot;
-
-out_err_nosb:
- nfs_free_server(server);
-out_err_noserver:
- nfs_free_fhandle(mntfh);
-out_err_nofh:
- dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
- return ERR_PTR(error);
-
-error_splat_super:
- if (server && !s->s_root)
- bdi_unregister(&server->backing_dev_info);
-error_splat_bdi:
- deactivate_locked_super(s);
- nfs_free_fhandle(mntfh);
- dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
- return ERR_PTR(error);
}
/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c074623..e6fe3d6 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -39,20 +39,20 @@
/*
* Local function declarations
*/
-static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
- struct inode *inode, int ioflags);
static void nfs_redirty_request(struct nfs_page *req);
-static const struct rpc_call_ops nfs_write_partial_ops;
-static const struct rpc_call_ops nfs_write_full_ops;
+static const struct rpc_call_ops nfs_write_common_ops;
static const struct rpc_call_ops nfs_commit_ops;
+static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
+static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
+static struct kmem_cache *nfs_cdata_cachep;
static mempool_t *nfs_commit_mempool;
-struct nfs_write_data *nfs_commitdata_alloc(void)
+struct nfs_commit_data *nfs_commitdata_alloc(void)
{
- struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
+ struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
if (p) {
memset(p, 0, sizeof(*p));
@@ -62,46 +62,73 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
}
EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
-void nfs_commit_free(struct nfs_write_data *p)
+void nfs_commit_free(struct nfs_commit_data *p)
{
- if (p && (p->pagevec != &p->page_array[0]))
- kfree(p->pagevec);
mempool_free(p, nfs_commit_mempool);
}
EXPORT_SYMBOL_GPL(nfs_commit_free);
-struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+struct nfs_write_header *nfs_writehdr_alloc(void)
{
- struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
+ struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
if (p) {
+ struct nfs_pgio_header *hdr = &p->header;
+
memset(p, 0, sizeof(*p));
- INIT_LIST_HEAD(&p->pages);
- p->npages = pagecount;
- if (pagecount <= ARRAY_SIZE(p->page_array))
- p->pagevec = p->page_array;
- else {
- p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
- if (!p->pagevec) {
- mempool_free(p, nfs_wdata_mempool);
- p = NULL;
- }
- }
+ INIT_LIST_HEAD(&hdr->pages);
+ INIT_LIST_HEAD(&hdr->rpc_list);
+ spin_lock_init(&hdr->lock);
+ atomic_set(&hdr->refcnt, 0);
}
return p;
}
-void nfs_writedata_free(struct nfs_write_data *p)
+static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
+ unsigned int pagecount)
+{
+ struct nfs_write_data *data, *prealloc;
+
+ prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
+ if (prealloc->header == NULL)
+ data = prealloc;
+ else
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ goto out;
+
+ if (nfs_pgarray_set(&data->pages, pagecount)) {
+ data->header = hdr;
+ atomic_inc(&hdr->refcnt);
+ } else {
+ if (data != prealloc)
+ kfree(data);
+ data = NULL;
+ }
+out:
+ return data;
+}
+
+void nfs_writehdr_free(struct nfs_pgio_header *hdr)
{
- if (p && (p->pagevec != &p->page_array[0]))
- kfree(p->pagevec);
- mempool_free(p, nfs_wdata_mempool);
+ struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
+ mempool_free(whdr, nfs_wdata_mempool);
}
void nfs_writedata_release(struct nfs_write_data *wdata)
{
+ struct nfs_pgio_header *hdr = wdata->header;
+ struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
+
put_nfs_open_context(wdata->args.context);
- nfs_writedata_free(wdata);
+ if (wdata->pages.pagevec != wdata->pages.page_array)
+ kfree(wdata->pages.pagevec);
+ if (wdata != &write_header->rpc_data)
+ kfree(wdata);
+ else
+ wdata->header = NULL;
+ if (atomic_dec_and_test(&hdr->refcnt))
+ hdr->completion_ops->completion(hdr);
}
static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -203,7 +230,6 @@ static int nfs_set_page_writeback(struct page *page)
struct inode *inode = page->mapping->host;
struct nfs_server *nfss = NFS_SERVER(inode);
- page_cache_get(page);
if (atomic_long_inc_return(&nfss->writeback) >
NFS_CONGESTION_ON_THRESH) {
set_bdi_congested(&nfss->backing_dev_info,
@@ -219,7 +245,6 @@ static void nfs_end_page_writeback(struct page *page)
struct nfs_server *nfss = NFS_SERVER(inode);
end_page_writeback(page);
- page_cache_release(page);
if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
}
@@ -235,10 +260,10 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo
req = nfs_page_find_request_locked(page);
if (req == NULL)
break;
- if (nfs_lock_request_dontget(req))
+ if (nfs_lock_request(req))
break;
/* Note: If we hold the page lock, as is the case in nfs_writepage,
- * then the call to nfs_lock_request_dontget() will always
+ * then the call to nfs_lock_request() will always
* succeed provided that someone hasn't already marked the
* request as dirty (in which case we don't care).
*/
@@ -310,7 +335,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
struct nfs_pageio_descriptor pgio;
int err;
- nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc));
+ nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
+ &nfs_async_write_completion_ops);
err = nfs_do_writepage(page, wbc, &pgio);
nfs_pageio_complete(&pgio);
if (err < 0)
@@ -353,7 +379,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
- nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
+ nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
+ &nfs_async_write_completion_ops);
err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
nfs_pageio_complete(&pgio);
@@ -379,7 +406,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
struct nfs_inode *nfsi = NFS_I(inode);
/* Lock the request! */
- nfs_lock_request_dontget(req);
+ nfs_lock_request(req);
spin_lock(&inode->i_lock);
if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
@@ -421,65 +448,88 @@ nfs_mark_request_dirty(struct nfs_page *req)
/**
* nfs_request_add_commit_list - add request to a commit list
* @req: pointer to a struct nfs_page
- * @head: commit list head
+ * @dst: commit list head
+ * @cinfo: holds list lock and accounting info
*
- * This sets the PG_CLEAN bit, updates the inode global count of
+ * This sets the PG_CLEAN bit, updates the cinfo count of
* number of outstanding requests requiring a commit as well as
* the MM page stats.
*
- * The caller must _not_ hold the inode->i_lock, but must be
+ * The caller must _not_ hold the cinfo->lock, but must be
* holding the nfs_page lock.
*/
void
-nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head)
+nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
+ struct nfs_commit_info *cinfo)
{
- struct inode *inode = req->wb_context->dentry->d_inode;
-
set_bit(PG_CLEAN, &(req)->wb_flags);
- spin_lock(&inode->i_lock);
- nfs_list_add_request(req, head);
- NFS_I(inode)->ncommit++;
- spin_unlock(&inode->i_lock);
- inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
- inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
- __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+ spin_lock(cinfo->lock);
+ nfs_list_add_request(req, dst);
+ cinfo->mds->ncommit++;
+ spin_unlock(cinfo->lock);
+ if (!cinfo->dreq) {
+ inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+ inc_bdi_stat(req->wb_page->mapping->backing_dev_info,
+ BDI_RECLAIMABLE);
+ __mark_inode_dirty(req->wb_context->dentry->d_inode,
+ I_DIRTY_DATASYNC);
+ }
}
EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
/**
* nfs_request_remove_commit_list - Remove request from a commit list
* @req: pointer to a nfs_page
+ * @cinfo: holds list lock and accounting info
*
- * This clears the PG_CLEAN bit, and updates the inode global count of
+ * This clears the PG_CLEAN bit, and updates the cinfo's count of
* number of outstanding requests requiring a commit
* It does not update the MM page stats.
*
- * The caller _must_ hold the inode->i_lock and the nfs_page lock.
+ * The caller _must_ hold the cinfo->lock and the nfs_page lock.
*/
void
-nfs_request_remove_commit_list(struct nfs_page *req)
+nfs_request_remove_commit_list(struct nfs_page *req,
+ struct nfs_commit_info *cinfo)
{
- struct inode *inode = req->wb_context->dentry->d_inode;
-
if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
return;
nfs_list_remove_request(req);
- NFS_I(inode)->ncommit--;
+ cinfo->mds->ncommit--;
}
EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
+static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
+ struct inode *inode)
+{
+ cinfo->lock = &inode->i_lock;
+ cinfo->mds = &NFS_I(inode)->commit_info;
+ cinfo->ds = pnfs_get_ds_info(inode);
+ cinfo->dreq = NULL;
+ cinfo->completion_ops = &nfs_commit_completion_ops;
+}
+
+void nfs_init_cinfo(struct nfs_commit_info *cinfo,
+ struct inode *inode,
+ struct nfs_direct_req *dreq)
+{
+ if (dreq)
+ nfs_init_cinfo_from_dreq(cinfo, dreq);
+ else
+ nfs_init_cinfo_from_inode(cinfo, inode);
+}
+EXPORT_SYMBOL_GPL(nfs_init_cinfo);
/*
* Add a request to the inode's commit list.
*/
-static void
-nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+void
+nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
{
- struct inode *inode = req->wb_context->dentry->d_inode;
-
- if (pnfs_mark_request_commit(req, lseg))
+ if (pnfs_mark_request_commit(req, lseg, cinfo))
return;
- nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list);
+ nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
}
static void
@@ -494,11 +544,13 @@ nfs_clear_request_commit(struct nfs_page *req)
{
if (test_bit(PG_CLEAN, &req->wb_flags)) {
struct inode *inode = req->wb_context->dentry->d_inode;
+ struct nfs_commit_info cinfo;
- if (!pnfs_clear_request_commit(req)) {
- spin_lock(&inode->i_lock);
- nfs_request_remove_commit_list(req);
- spin_unlock(&inode->i_lock);
+ nfs_init_cinfo_from_inode(&cinfo, inode);
+ if (!pnfs_clear_request_commit(req, &cinfo)) {
+ spin_lock(cinfo.lock);
+ nfs_request_remove_commit_list(req, &cinfo);
+ spin_unlock(cinfo.lock);
}
nfs_clear_page_commit(req->wb_page);
}
@@ -508,28 +560,25 @@ static inline
int nfs_write_need_commit(struct nfs_write_data *data)
{
if (data->verf.committed == NFS_DATA_SYNC)
- return data->lseg == NULL;
- else
- return data->verf.committed != NFS_FILE_SYNC;
+ return data->header->lseg == NULL;
+ return data->verf.committed != NFS_FILE_SYNC;
}
-static inline
-int nfs_reschedule_unstable_write(struct nfs_page *req,
- struct nfs_write_data *data)
+#else
+static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
+ struct inode *inode)
{
- if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
- nfs_mark_request_commit(req, data->lseg);
- return 1;
- }
- if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
- nfs_mark_request_dirty(req);
- return 1;
- }
- return 0;
}
-#else
-static void
-nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+
+void nfs_init_cinfo(struct nfs_commit_info *cinfo,
+ struct inode *inode,
+ struct nfs_direct_req *dreq)
+{
+}
+
+void
+nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
{
}
@@ -544,25 +593,57 @@ int nfs_write_need_commit(struct nfs_write_data *data)
return 0;
}
-static inline
-int nfs_reschedule_unstable_write(struct nfs_page *req,
- struct nfs_write_data *data)
+#endif
+
+static void nfs_write_completion(struct nfs_pgio_header *hdr)
{
- return 0;
+ struct nfs_commit_info cinfo;
+ unsigned long bytes = 0;
+
+ if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+ goto out;
+ nfs_init_cinfo_from_inode(&cinfo, hdr->inode);
+ while (!list_empty(&hdr->pages)) {
+ struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+
+ bytes += req->wb_bytes;
+ nfs_list_remove_request(req);
+ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
+ (hdr->good_bytes < bytes)) {
+ nfs_set_pageerror(req->wb_page);
+ nfs_context_set_write_error(req->wb_context, hdr->error);
+ goto remove_req;
+ }
+ if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
+ nfs_mark_request_dirty(req);
+ goto next;
+ }
+ if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+ nfs_mark_request_commit(req, hdr->lseg, &cinfo);
+ goto next;
+ }
+remove_req:
+ nfs_inode_remove_request(req);
+next:
+ nfs_unlock_request(req);
+ nfs_end_page_writeback(req->wb_page);
+ nfs_release_request(req);
+ }
+out:
+ hdr->release(hdr);
}
-#endif
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-static int
-nfs_need_commit(struct nfs_inode *nfsi)
+static unsigned long
+nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
{
- return nfsi->ncommit > 0;
+ return cinfo->mds->ncommit;
}
-/* i_lock held by caller */
-static int
-nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
- spinlock_t *lock)
+/* cinfo->lock held by caller */
+int
+nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
+ struct nfs_commit_info *cinfo, int max)
{
struct nfs_page *req, *tmp;
int ret = 0;
@@ -570,12 +651,13 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
list_for_each_entry_safe(req, tmp, src, wb_list) {
if (!nfs_lock_request(req))
continue;
- if (cond_resched_lock(lock))
+ kref_get(&req->wb_kref);
+ if (cond_resched_lock(cinfo->lock))
list_safe_reset_next(req, tmp, wb_list);
- nfs_request_remove_commit_list(req);
+ nfs_request_remove_commit_list(req, cinfo);
nfs_list_add_request(req, dst);
ret++;
- if (ret == max)
+ if ((ret == max) && !cinfo->dreq)
break;
}
return ret;
@@ -584,37 +666,38 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
/*
* nfs_scan_commit - Scan an inode for commit requests
* @inode: NFS inode to scan
- * @dst: destination list
+ * @dst: mds destination list
+ * @cinfo: mds and ds lists of reqs ready to commit
*
* Moves requests from the inode's 'commit' request list.
* The requests are *not* checked to ensure that they form a contiguous set.
*/
-static int
-nfs_scan_commit(struct inode *inode, struct list_head *dst)
+int
+nfs_scan_commit(struct inode *inode, struct list_head *dst,
+ struct nfs_commit_info *cinfo)
{
- struct nfs_inode *nfsi = NFS_I(inode);
int ret = 0;
- spin_lock(&inode->i_lock);
- if (nfsi->ncommit > 0) {
+ spin_lock(cinfo->lock);
+ if (cinfo->mds->ncommit > 0) {
const int max = INT_MAX;
- ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max,
- &inode->i_lock);
- ret += pnfs_scan_commit_lists(inode, max - ret,
- &inode->i_lock);
+ ret = nfs_scan_commit_list(&cinfo->mds->list, dst,
+ cinfo, max);
+ ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(cinfo->lock);
return ret;
}
#else
-static inline int nfs_need_commit(struct nfs_inode *nfsi)
+static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
{
return 0;
}
-static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst)
+int nfs_scan_commit(struct inode *inode, struct list_head *dst,
+ struct nfs_commit_info *cinfo)
{
return 0;
}
@@ -659,7 +742,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
|| end < req->wb_offset)
goto out_flushme;
- if (nfs_lock_request_dontget(req))
+ if (nfs_lock_request(req))
break;
/* The request is locked, so wait and then retry */
@@ -729,7 +812,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
nfs_grow_file(page, offset, count);
nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
nfs_mark_request_dirty(req);
- nfs_unlock_request(req);
+ nfs_unlock_and_release_request(req);
return 0;
}
@@ -766,10 +849,14 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
* the PageUptodate() flag. In this case, we will need to turn off
* write optimisations that depend on the page contents being correct.
*/
-static int nfs_write_pageuptodate(struct page *page, struct inode *inode)
+static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
{
- return PageUptodate(page) &&
- !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA));
+ if (nfs_have_delegated_attributes(inode))
+ goto out;
+ if (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE)
+ return false;
+out:
+ return PageUptodate(page) != 0;
}
/*
@@ -815,17 +902,6 @@ int nfs_updatepage(struct file *file, struct page *page,
return status;
}
-static void nfs_writepage_release(struct nfs_page *req,
- struct nfs_write_data *data)
-{
- struct page *page = req->wb_page;
-
- if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
- nfs_inode_remove_request(req);
- nfs_unlock_request(req);
- nfs_end_page_writeback(page);
-}
-
static int flush_task_priority(int how)
{
switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
@@ -837,18 +913,18 @@ static int flush_task_priority(int how)
return RPC_PRIORITY_NORMAL;
}
-int nfs_initiate_write(struct nfs_write_data *data,
- struct rpc_clnt *clnt,
+int nfs_initiate_write(struct rpc_clnt *clnt,
+ struct nfs_write_data *data,
const struct rpc_call_ops *call_ops,
- int how)
+ int how, int flags)
{
- struct inode *inode = data->inode;
+ struct inode *inode = data->header->inode;
int priority = flush_task_priority(how);
struct rpc_task *task;
struct rpc_message msg = {
.rpc_argp = &data->args,
.rpc_resp = &data->res,
- .rpc_cred = data->cred,
+ .rpc_cred = data->header->cred,
};
struct rpc_task_setup task_setup_data = {
.rpc_client = clnt,
@@ -857,7 +933,7 @@ int nfs_initiate_write(struct nfs_write_data *data,
.callback_ops = call_ops,
.callback_data = data,
.workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | flags,
.priority = priority,
};
int ret = 0;
@@ -892,26 +968,21 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
/*
* Set up the argument/result storage required for the RPC call.
*/
-static void nfs_write_rpcsetup(struct nfs_page *req,
- struct nfs_write_data *data,
+static void nfs_write_rpcsetup(struct nfs_write_data *data,
unsigned int count, unsigned int offset,
- int how)
+ int how, struct nfs_commit_info *cinfo)
{
- struct inode *inode = req->wb_context->dentry->d_inode;
+ struct nfs_page *req = data->header->req;
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
- data->req = req;
- data->inode = inode = req->wb_context->dentry->d_inode;
- data->cred = req->wb_context->cred;
-
- data->args.fh = NFS_FH(inode);
+ data->args.fh = NFS_FH(data->header->inode);
data->args.offset = req_offset(req) + offset;
/* pnfs_set_layoutcommit needs this */
data->mds_offset = data->args.offset;
data->args.pgbase = req->wb_pgbase + offset;
- data->args.pages = data->pagevec;
+ data->args.pages = data->pages.pagevec;
data->args.count = count;
data->args.context = get_nfs_open_context(req->wb_context);
data->args.lock_context = req->wb_lock_context;
@@ -920,7 +991,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
case 0:
break;
case FLUSH_COND_STABLE:
- if (nfs_need_commit(NFS_I(inode)))
+ if (nfs_reqs_to_commit(cinfo))
break;
default:
data->args.stable = NFS_FILE_SYNC;
@@ -936,9 +1007,9 @@ static int nfs_do_write(struct nfs_write_data *data,
const struct rpc_call_ops *call_ops,
int how)
{
- struct inode *inode = data->args.context->dentry->d_inode;
+ struct inode *inode = data->header->inode;
- return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
+ return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0);
}
static int nfs_do_multiple_writes(struct list_head *head,
@@ -951,7 +1022,7 @@ static int nfs_do_multiple_writes(struct list_head *head,
while (!list_empty(head)) {
int ret2;
- data = list_entry(head->next, struct nfs_write_data, list);
+ data = list_first_entry(head, struct nfs_write_data, list);
list_del_init(&data->list);
ret2 = nfs_do_write(data, call_ops, how);
@@ -967,31 +1038,60 @@ static int nfs_do_multiple_writes(struct list_head *head,
*/
static void nfs_redirty_request(struct nfs_page *req)
{
- struct page *page = req->wb_page;
-
nfs_mark_request_dirty(req);
nfs_unlock_request(req);
- nfs_end_page_writeback(page);
+ nfs_end_page_writeback(req->wb_page);
+ nfs_release_request(req);
+}
+
+static void nfs_async_write_error(struct list_head *head)
+{
+ struct nfs_page *req;
+
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_redirty_request(req);
+ }
+}
+
+static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
+ .error_cleanup = nfs_async_write_error,
+ .completion = nfs_write_completion,
+};
+
+static void nfs_flush_error(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
+{
+ set_bit(NFS_IOHDR_REDO, &hdr->flags);
+ while (!list_empty(&hdr->rpc_list)) {
+ struct nfs_write_data *data = list_first_entry(&hdr->rpc_list,
+ struct nfs_write_data, list);
+ list_del(&data->list);
+ nfs_writedata_release(data);
+ }
+ desc->pg_completion_ops->error_cleanup(&desc->pg_list);
}
/*
* Generate multiple small requests to write out a single
* contiguous dirty area on one page.
*/
-static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
{
- struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
+ struct nfs_page *req = hdr->req;
struct page *page = req->wb_page;
struct nfs_write_data *data;
size_t wsize = desc->pg_bsize, nbytes;
unsigned int offset;
int requests = 0;
- int ret = 0;
+ struct nfs_commit_info cinfo;
- nfs_list_remove_request(req);
+ nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
- (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit ||
+ (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) ||
desc->pg_count > wsize))
desc->pg_ioflags &= ~FLUSH_COND_STABLE;
@@ -1001,28 +1101,22 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
do {
size_t len = min(nbytes, wsize);
- data = nfs_writedata_alloc(1);
- if (!data)
- goto out_bad;
- data->pagevec[0] = page;
- nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags);
- list_add(&data->list, res);
+ data = nfs_writedata_alloc(hdr, 1);
+ if (!data) {
+ nfs_flush_error(desc, hdr);
+ return -ENOMEM;
+ }
+ data->pages.pagevec[0] = page;
+ nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo);
+ list_add(&data->list, &hdr->rpc_list);
requests++;
nbytes -= len;
offset += len;
} while (nbytes != 0);
- atomic_set(&req->wb_complete, requests);
- desc->pg_rpc_callops = &nfs_write_partial_ops;
- return ret;
-
-out_bad:
- while (!list_empty(res)) {
- data = list_entry(res->next, struct nfs_write_data, list);
- list_del(&data->list);
- nfs_writedata_release(data);
- }
- nfs_redirty_request(req);
- return -ENOMEM;
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, &hdr->pages);
+ desc->pg_rpc_callops = &nfs_write_common_ops;
+ return 0;
}
/*
@@ -1033,62 +1127,71 @@ out_bad:
* This is the case if nfs_updatepage detects a conflicting request
* that has been written but not committed.
*/
-static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
{
struct nfs_page *req;
struct page **pages;
struct nfs_write_data *data;
struct list_head *head = &desc->pg_list;
- int ret = 0;
+ struct nfs_commit_info cinfo;
- data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
- desc->pg_count));
+ data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
+ desc->pg_count));
if (!data) {
- while (!list_empty(head)) {
- req = nfs_list_entry(head->next);
- nfs_list_remove_request(req);
- nfs_redirty_request(req);
- }
- ret = -ENOMEM;
- goto out;
+ nfs_flush_error(desc, hdr);
+ return -ENOMEM;
}
- pages = data->pagevec;
+
+ nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
+ pages = data->pages.pagevec;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
- nfs_list_add_request(req, &data->pages);
+ nfs_list_add_request(req, &hdr->pages);
*pages++ = req->wb_page;
}
- req = nfs_list_entry(data->pages.next);
if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
- (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
+ (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
desc->pg_ioflags &= ~FLUSH_COND_STABLE;
/* Set up the argument struct */
- nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
- list_add(&data->list, res);
- desc->pg_rpc_callops = &nfs_write_full_ops;
-out:
- return ret;
+ nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
+ list_add(&data->list, &hdr->rpc_list);
+ desc->pg_rpc_callops = &nfs_write_common_ops;
+ return 0;
}
-int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head)
+int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
{
if (desc->pg_bsize < PAGE_CACHE_SIZE)
- return nfs_flush_multi(desc, head);
- return nfs_flush_one(desc, head);
+ return nfs_flush_multi(desc, hdr);
+ return nfs_flush_one(desc, hdr);
}
static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
- LIST_HEAD(head);
+ struct nfs_write_header *whdr;
+ struct nfs_pgio_header *hdr;
int ret;
- ret = nfs_generic_flush(desc, &head);
+ whdr = nfs_writehdr_alloc();
+ if (!whdr) {
+ desc->pg_completion_ops->error_cleanup(&desc->pg_list);
+ return -ENOMEM;
+ }
+ hdr = &whdr->header;
+ nfs_pgheader_init(desc, hdr, nfs_writehdr_free);
+ atomic_inc(&hdr->refcnt);
+ ret = nfs_generic_flush(desc, hdr);
if (ret == 0)
- ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops,
- desc->pg_ioflags);
+ ret = nfs_do_multiple_writes(&hdr->rpc_list,
+ desc->pg_rpc_callops,
+ desc->pg_ioflags);
+ if (atomic_dec_and_test(&hdr->refcnt))
+ hdr->completion_ops->completion(hdr);
return ret;
}
@@ -1098,9 +1201,10 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = {
};
void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
- struct inode *inode, int ioflags)
+ struct inode *inode, int ioflags,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
- nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
+ nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops,
NFS_SERVER(inode)->wsize, ioflags);
}
@@ -1111,80 +1215,27 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
-static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
- struct inode *inode, int ioflags)
+void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode, int ioflags,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
- if (!pnfs_pageio_init_write(pgio, inode, ioflags))
- nfs_pageio_init_write_mds(pgio, inode, ioflags);
+ if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops))
+ nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops);
}
-/*
- * Handle a write reply that flushed part of a page.
- */
-static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
+void nfs_write_prepare(struct rpc_task *task, void *calldata)
{
- struct nfs_write_data *data = calldata;
-
- dprintk("NFS: %5u write(%s/%lld %d@%lld)",
- task->tk_pid,
- data->req->wb_context->dentry->d_inode->i_sb->s_id,
- (long long)
- NFS_FILEID(data->req->wb_context->dentry->d_inode),
- data->req->wb_bytes, (long long)req_offset(data->req));
-
- nfs_writeback_done(task, data);
+ struct nfs_write_data *data = calldata;
+ NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
}
-static void nfs_writeback_release_partial(void *calldata)
+void nfs_commit_prepare(struct rpc_task *task, void *calldata)
{
- struct nfs_write_data *data = calldata;
- struct nfs_page *req = data->req;
- struct page *page = req->wb_page;
- int status = data->task.tk_status;
+ struct nfs_commit_data *data = calldata;
- if (status < 0) {
- nfs_set_pageerror(page);
- nfs_context_set_write_error(req->wb_context, status);
- dprintk(", error = %d\n", status);
- goto out;
- }
-
- if (nfs_write_need_commit(data)) {
- struct inode *inode = page->mapping->host;
-
- spin_lock(&inode->i_lock);
- if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
- /* Do nothing we need to resend the writes */
- } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
- memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
- dprintk(" defer commit\n");
- } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
- set_bit(PG_NEED_RESCHED, &req->wb_flags);
- clear_bit(PG_NEED_COMMIT, &req->wb_flags);
- dprintk(" server reboot detected\n");
- }
- spin_unlock(&inode->i_lock);
- } else
- dprintk(" OK\n");
-
-out:
- if (atomic_dec_and_test(&req->wb_complete))
- nfs_writepage_release(req, data);
- nfs_writedata_release(calldata);
+ NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
}
-void nfs_write_prepare(struct rpc_task *task, void *calldata)
-{
- struct nfs_write_data *data = calldata;
- NFS_PROTO(data->inode)->write_rpc_prepare(task, data);
-}
-
-static const struct rpc_call_ops nfs_write_partial_ops = {
- .rpc_call_prepare = nfs_write_prepare,
- .rpc_call_done = nfs_writeback_done_partial,
- .rpc_release = nfs_writeback_release_partial,
-};
-
/*
* Handle a write reply that flushes a whole page.
*
@@ -1192,59 +1243,37 @@ static const struct rpc_call_ops nfs_write_partial_ops = {
* writebacks since the page->count is kept > 1 for as long
* as the page has a write request pending.
*/
-static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
+static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
{
struct nfs_write_data *data = calldata;
nfs_writeback_done(task, data);
}
-static void nfs_writeback_release_full(void *calldata)
+static void nfs_writeback_release_common(void *calldata)
{
struct nfs_write_data *data = calldata;
+ struct nfs_pgio_header *hdr = data->header;
int status = data->task.tk_status;
+ struct nfs_page *req = hdr->req;
- /* Update attributes as result of writeback. */
- while (!list_empty(&data->pages)) {
- struct nfs_page *req = nfs_list_entry(data->pages.next);
- struct page *page = req->wb_page;
-
- nfs_list_remove_request(req);
-
- dprintk("NFS: %5u write (%s/%lld %d@%lld)",
- data->task.tk_pid,
- req->wb_context->dentry->d_inode->i_sb->s_id,
- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
- req->wb_bytes,
- (long long)req_offset(req));
-
- if (status < 0) {
- nfs_set_pageerror(page);
- nfs_context_set_write_error(req->wb_context, status);
- dprintk(", error = %d\n", status);
- goto remove_request;
- }
-
- if (nfs_write_need_commit(data)) {
+ if ((status >= 0) && nfs_write_need_commit(data)) {
+ spin_lock(&hdr->lock);
+ if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
+ ; /* Do nothing */
+ else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
- nfs_mark_request_commit(req, data->lseg);
- dprintk(" marked for commit\n");
- goto next;
- }
- dprintk(" OK\n");
-remove_request:
- nfs_inode_remove_request(req);
- next:
- nfs_unlock_request(req);
- nfs_end_page_writeback(page);
+ else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf)))
+ set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
+ spin_unlock(&hdr->lock);
}
- nfs_writedata_release(calldata);
+ nfs_writedata_release(data);
}
-static const struct rpc_call_ops nfs_write_full_ops = {
+static const struct rpc_call_ops nfs_write_common_ops = {
.rpc_call_prepare = nfs_write_prepare,
- .rpc_call_done = nfs_writeback_done_full,
- .rpc_release = nfs_writeback_release_full,
+ .rpc_call_done = nfs_writeback_done_common,
+ .rpc_release = nfs_writeback_release_common,
};
@@ -1255,6 +1284,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
{
struct nfs_writeargs *argp = &data->args;
struct nfs_writeres *resp = &data->res;
+ struct inode *inode = data->header->inode;
int status;
dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
@@ -1267,10 +1297,10 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
* another writer had changed the file, but some applications
* depend on tighter cache coherency when writing.
*/
- status = NFS_PROTO(data->inode)->write_done(task, data);
+ status = NFS_PROTO(inode)->write_done(task, data);
if (status != 0)
return;
- nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+ nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
@@ -1288,46 +1318,47 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
if (time_before(complain, jiffies)) {
dprintk("NFS: faulty NFS server %s:"
" (committed = %d) != (stable = %d)\n",
- NFS_SERVER(data->inode)->nfs_client->cl_hostname,
+ NFS_SERVER(inode)->nfs_client->cl_hostname,
resp->verf->committed, argp->stable);
complain = jiffies + 300 * HZ;
}
}
#endif
- /* Is this a short write? */
- if (task->tk_status >= 0 && resp->count < argp->count) {
+ if (task->tk_status < 0)
+ nfs_set_pgio_error(data->header, task->tk_status, argp->offset);
+ else if (resp->count < argp->count) {
static unsigned long complain;
- nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+ /* This a short write! */
+ nfs_inc_stats(inode, NFSIOS_SHORTWRITE);
/* Has the server at least made some progress? */
- if (resp->count != 0) {
- /* Was this an NFSv2 write or an NFSv3 stable write? */
- if (resp->verf->committed != NFS_UNSTABLE) {
- /* Resend from where the server left off */
- data->mds_offset += resp->count;
- argp->offset += resp->count;
- argp->pgbase += resp->count;
- argp->count -= resp->count;
- } else {
- /* Resend as a stable write in order to avoid
- * headaches in the case of a server crash.
- */
- argp->stable = NFS_FILE_SYNC;
+ if (resp->count == 0) {
+ if (time_before(complain, jiffies)) {
+ printk(KERN_WARNING
+ "NFS: Server wrote zero bytes, expected %u.\n",
+ argp->count);
+ complain = jiffies + 300 * HZ;
}
- rpc_restart_call_prepare(task);
+ nfs_set_pgio_error(data->header, -EIO, argp->offset);
+ task->tk_status = -EIO;
return;
}
- if (time_before(complain, jiffies)) {
- printk(KERN_WARNING
- "NFS: Server wrote zero bytes, expected %u.\n",
- argp->count);
- complain = jiffies + 300 * HZ;
+ /* Was this an NFSv2 write or an NFSv3 stable write? */
+ if (resp->verf->committed != NFS_UNSTABLE) {
+ /* Resend from where the server left off */
+ data->mds_offset += resp->count;
+ argp->offset += resp->count;
+ argp->pgbase += resp->count;
+ argp->count -= resp->count;
+ } else {
+ /* Resend as a stable write in order to avoid
+ * headaches in the case of a server crash.
+ */
+ argp->stable = NFS_FILE_SYNC;
}
- /* Can't do anything about it except throw an error. */
- task->tk_status = -EIO;
+ rpc_restart_call_prepare(task);
}
- return;
}
@@ -1347,26 +1378,23 @@ static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
return (ret < 0) ? ret : 1;
}
-void nfs_commit_clear_lock(struct nfs_inode *nfsi)
+static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
{
clear_bit(NFS_INO_COMMIT, &nfsi->flags);
smp_mb__after_clear_bit();
wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
}
-EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);
-void nfs_commitdata_release(void *data)
+void nfs_commitdata_release(struct nfs_commit_data *data)
{
- struct nfs_write_data *wdata = data;
-
- put_nfs_open_context(wdata->args.context);
- nfs_commit_free(wdata);
+ put_nfs_open_context(data->context);
+ nfs_commit_free(data);
}
EXPORT_SYMBOL_GPL(nfs_commitdata_release);
-int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,
+int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
const struct rpc_call_ops *call_ops,
- int how)
+ int how, int flags)
{
struct rpc_task *task;
int priority = flush_task_priority(how);
@@ -1382,7 +1410,7 @@ int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,
.callback_ops = call_ops,
.callback_data = data,
.workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | flags,
.priority = priority,
};
/* Set up the initial task struct. */
@@ -1403,9 +1431,10 @@ EXPORT_SYMBOL_GPL(nfs_initiate_commit);
/*
* Set up the argument/result storage required for the RPC call.
*/
-void nfs_init_commit(struct nfs_write_data *data,
- struct list_head *head,
- struct pnfs_layout_segment *lseg)
+void nfs_init_commit(struct nfs_commit_data *data,
+ struct list_head *head,
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
{
struct nfs_page *first = nfs_list_entry(head->next);
struct inode *inode = first->wb_context->dentry->d_inode;
@@ -1419,13 +1448,14 @@ void nfs_init_commit(struct nfs_write_data *data,
data->cred = first->wb_context->cred;
data->lseg = lseg; /* reference transferred */
data->mds_ops = &nfs_commit_ops;
+ data->completion_ops = cinfo->completion_ops;
+ data->dreq = cinfo->dreq;
data->args.fh = NFS_FH(data->inode);
/* Note: we always request a commit of the entire inode */
data->args.offset = 0;
data->args.count = 0;
- data->args.context = get_nfs_open_context(first->wb_context);
- data->res.count = 0;
+ data->context = get_nfs_open_context(first->wb_context);
data->res.fattr = &data->fattr;
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
@@ -1433,18 +1463,21 @@ void nfs_init_commit(struct nfs_write_data *data,
EXPORT_SYMBOL_GPL(nfs_init_commit);
void nfs_retry_commit(struct list_head *page_list,
- struct pnfs_layout_segment *lseg)
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
{
struct nfs_page *req;
while (!list_empty(page_list)) {
req = nfs_list_entry(page_list->next);
nfs_list_remove_request(req);
- nfs_mark_request_commit(req, lseg);
- dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
- dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
- BDI_RECLAIMABLE);
- nfs_unlock_request(req);
+ nfs_mark_request_commit(req, lseg, cinfo);
+ if (!cinfo->dreq) {
+ dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+ dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
+ BDI_RECLAIMABLE);
+ }
+ nfs_unlock_and_release_request(req);
}
}
EXPORT_SYMBOL_GPL(nfs_retry_commit);
@@ -1453,9 +1486,10 @@ EXPORT_SYMBOL_GPL(nfs_retry_commit);
* Commit dirty pages
*/
static int
-nfs_commit_list(struct inode *inode, struct list_head *head, int how)
+nfs_commit_list(struct inode *inode, struct list_head *head, int how,
+ struct nfs_commit_info *cinfo)
{
- struct nfs_write_data *data;
+ struct nfs_commit_data *data;
data = nfs_commitdata_alloc();
@@ -1463,11 +1497,13 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
goto out_bad;
/* Set up the argument struct */
- nfs_init_commit(data, head, NULL);
- return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how);
+ nfs_init_commit(data, head, NULL, cinfo);
+ atomic_inc(&cinfo->mds->rpcs_out);
+ return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops,
+ how, 0);
out_bad:
- nfs_retry_commit(head, NULL);
- nfs_commit_clear_lock(NFS_I(inode));
+ nfs_retry_commit(head, NULL, cinfo);
+ cinfo->completion_ops->error_cleanup(NFS_I(inode));
return -ENOMEM;
}
@@ -1476,7 +1512,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
*/
static void nfs_commit_done(struct rpc_task *task, void *calldata)
{
- struct nfs_write_data *data = calldata;
+ struct nfs_commit_data *data = calldata;
dprintk("NFS: %5u nfs_commit_done (status %d)\n",
task->tk_pid, task->tk_status);
@@ -1485,10 +1521,11 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
NFS_PROTO(data->inode)->commit_done(task, data);
}
-void nfs_commit_release_pages(struct nfs_write_data *data)
+static void nfs_commit_release_pages(struct nfs_commit_data *data)
{
struct nfs_page *req;
int status = data->task.tk_status;
+ struct nfs_commit_info cinfo;
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
@@ -1519,42 +1556,59 @@ void nfs_commit_release_pages(struct nfs_write_data *data)
dprintk(" mismatch\n");
nfs_mark_request_dirty(req);
next:
- nfs_unlock_request(req);
+ nfs_unlock_and_release_request(req);
}
+ nfs_init_cinfo(&cinfo, data->inode, data->dreq);
+ if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
+ nfs_commit_clear_lock(NFS_I(data->inode));
}
-EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
static void nfs_commit_release(void *calldata)
{
- struct nfs_write_data *data = calldata;
+ struct nfs_commit_data *data = calldata;
- nfs_commit_release_pages(data);
- nfs_commit_clear_lock(NFS_I(data->inode));
+ data->completion_ops->completion(data);
nfs_commitdata_release(calldata);
}
static const struct rpc_call_ops nfs_commit_ops = {
- .rpc_call_prepare = nfs_write_prepare,
+ .rpc_call_prepare = nfs_commit_prepare,
.rpc_call_done = nfs_commit_done,
.rpc_release = nfs_commit_release,
};
+static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
+ .completion = nfs_commit_release_pages,
+ .error_cleanup = nfs_commit_clear_lock,
+};
+
+int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
+ int how, struct nfs_commit_info *cinfo)
+{
+ int status;
+
+ status = pnfs_commit_list(inode, head, how, cinfo);
+ if (status == PNFS_NOT_ATTEMPTED)
+ status = nfs_commit_list(inode, head, how, cinfo);
+ return status;
+}
+
int nfs_commit_inode(struct inode *inode, int how)
{
LIST_HEAD(head);
+ struct nfs_commit_info cinfo;
int may_wait = how & FLUSH_SYNC;
int res;
res = nfs_commit_set_lock(NFS_I(inode), may_wait);
if (res <= 0)
goto out_mark_dirty;
- res = nfs_scan_commit(inode, &head);
+ nfs_init_cinfo_from_inode(&cinfo, inode);
+ res = nfs_scan_commit(inode, &head, &cinfo);
if (res) {
int error;
- error = pnfs_commit_list(inode, &head, how);
- if (error == PNFS_NOT_ATTEMPTED)
- error = nfs_commit_list(inode, &head, how);
+ error = nfs_generic_commit_list(inode, &head, how, &cinfo);
if (error < 0)
return error;
if (!may_wait)
@@ -1585,14 +1639,14 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
int ret = 0;
/* no commits means nothing needs to be done */
- if (!nfsi->ncommit)
+ if (!nfsi->commit_info.ncommit)
return ret;
if (wbc->sync_mode == WB_SYNC_NONE) {
/* Don't commit yet if this is a non-blocking flush and there
* are a lot of outstanding writes for this mapping.
*/
- if (nfsi->ncommit <= (nfsi->npages >> 1))
+ if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1))
goto out_mark_dirty;
/* don't wait for the COMMIT response */
@@ -1665,7 +1719,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
req = nfs_page_find_request(page);
if (req == NULL)
break;
- if (nfs_lock_request_dontget(req)) {
+ if (nfs_lock_request(req)) {
nfs_clear_request_commit(req);
nfs_inode_remove_request(req);
/*
@@ -1673,7 +1727,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
* page as being dirty
*/
cancel_dirty_page(page, PAGE_CACHE_SIZE);
- nfs_unlock_request(req);
+ nfs_unlock_and_release_request(req);
break;
}
ret = nfs_wait_on_request(req);
@@ -1742,7 +1796,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
int __init nfs_init_writepagecache(void)
{
nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
- sizeof(struct nfs_write_data),
+ sizeof(struct nfs_write_header),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (nfs_wdata_cachep == NULL)
@@ -1753,6 +1807,13 @@ int __init nfs_init_writepagecache(void)
if (nfs_wdata_mempool == NULL)
return -ENOMEM;
+ nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
+ sizeof(struct nfs_commit_data),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (nfs_cdata_cachep == NULL)
+ return -ENOMEM;
+
nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
nfs_wdata_cachep);
if (nfs_commit_mempool == NULL)