diff options
author | Chuck Lever <cel@netapp.com> | 2005-11-30 23:09:02 (GMT) |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-01-06 19:58:49 (GMT) |
commit | 40859d7ee64ed6bfad8a4e93f9bb5c1074afadff (patch) | |
tree | ed4069423c3d6551035d5b6116f50452cdac4103 /fs/nfs | |
parent | 325cfed9ae901320e9234b18c21434b783dbe342 (diff) | |
download | linux-40859d7ee64ed6bfad8a4e93f9bb5c1074afadff.tar.xz |
NFS: support large reads and writes on the wire
Most NFS server implementations allow up to 64KB reads and writes on the
wire. The Solaris NFS server allows up to a megabyte, for instance.
Now the Linux NFS client supports transfer sizes up to 1MB, too. This will
help reduce protocol and context switch overhead on read/write intensive NFS
workloads, and support larger atomic read and write operations on servers
that support them.
Test-plan:
Connectathon and iozone on mount point with wsize=rsize>32768 over TCP.
Tests with NFS over UDP to verify the maximum RPC payload size cap.
Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs/nfs')
-rw-r--r-- | fs/nfs/direct.c | 5 | ||||
-rw-r--r-- | fs/nfs/inode.c | 25 | ||||
-rw-r--r-- | fs/nfs/nfsroot.c | 4 | ||||
-rw-r--r-- | fs/nfs/read.c | 6 | ||||
-rw-r--r-- | fs/nfs/write.c | 29 |
5 files changed, 40 insertions, 29 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f69d95a..fd7ac5e 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -154,6 +154,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int struct list_head *list; struct nfs_direct_req *dreq; unsigned int reads = 0; + unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL); if (!dreq) @@ -167,7 +168,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int list = &dreq->list; for(;;) { - struct nfs_read_data *data = nfs_readdata_alloc(); + struct nfs_read_data *data = nfs_readdata_alloc(rpages); if (unlikely(!data)) { while (!list_empty(list)) { @@ -431,7 +432,7 @@ static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_writeverf first_verf; struct nfs_write_data *wdata; - wdata = nfs_writedata_alloc(); + wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); if (!wdata) return -ENOMEM; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 4e6558d..acde2c5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -221,10 +221,10 @@ nfs_calc_block_size(u64 tsize) static inline unsigned long nfs_block_size(unsigned long bsize, unsigned char *nrbitsp) { - if (bsize < 1024) - bsize = NFS_DEF_FILE_IO_BUFFER_SIZE; - else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE) - bsize = NFS_MAX_FILE_IO_BUFFER_SIZE; + if (bsize < NFS_MIN_FILE_IO_SIZE) + bsize = NFS_DEF_FILE_IO_SIZE; + else if (bsize >= NFS_MAX_FILE_IO_SIZE) + bsize = NFS_MAX_FILE_IO_SIZE; return nfs_block_bits(bsize, nrbitsp); } @@ -307,20 +307,15 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); if (server->rsize > max_rpc_payload) server->rsize = max_rpc_payload; - if (server->wsize > max_rpc_payload) - server->wsize = max_rpc_payload; - + if (server->rsize > NFS_MAX_FILE_IO_SIZE) + server->rsize = NFS_MAX_FILE_IO_SIZE; server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (server->rpages > NFS_READ_MAXIOV) { - server->rpages = NFS_READ_MAXIOV; - server->rsize = server->rpages << PAGE_CACHE_SHIFT; - } + if (server->wsize > max_rpc_payload) + server->wsize = max_rpc_payload; + if (server->wsize > NFS_MAX_FILE_IO_SIZE) + server->wsize = NFS_MAX_FILE_IO_SIZE; server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (server->wpages > NFS_WRITE_MAXIOV) { - server->wpages = NFS_WRITE_MAXIOV; - server->wsize = server->wpages << PAGE_CACHE_SHIFT; - } if (sb->s_blocksize == 0) sb->s_blocksize = nfs_block_bits(server->wsize, diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 1b272a1..985cc53 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -296,8 +296,8 @@ static int __init root_nfs_name(char *name) nfs_port = -1; nfs_data.version = NFS_MOUNT_VERSION; nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ - nfs_data.rsize = NFS_DEF_FILE_IO_BUFFER_SIZE; - nfs_data.wsize = NFS_DEF_FILE_IO_BUFFER_SIZE; + nfs_data.rsize = NFS_DEF_FILE_IO_SIZE; + nfs_data.wsize = NFS_DEF_FILE_IO_SIZE; nfs_data.acregmin = 3; nfs_data.acregmax = 60; nfs_data.acdirmin = 30; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 2148624..05eb43f 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -83,7 +83,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, int result; struct nfs_read_data *rdata; - rdata = nfs_readdata_alloc(); + rdata = nfs_readdata_alloc(1); if (!rdata) return -ENOMEM; @@ -283,7 +283,7 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode) nbytes = req->wb_bytes; for(;;) { - data = nfs_readdata_alloc(); + data = nfs_readdata_alloc(1); if (!data) goto out_bad; INIT_LIST_HEAD(&data->pages); @@ -339,7 +339,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode) if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) return nfs_pagein_multi(head, inode); - data = nfs_readdata_alloc(); + data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages); if (!data) goto out_bad; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 80bc4ea..1ce0c20 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -89,18 +89,33 @@ static mempool_t *nfs_commit_mempool; static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); -static inline struct nfs_write_data *nfs_commit_alloc(void) +static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) { struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); + if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); + if (pagecount < NFS_PAGEVEC_SIZE) + p->pagevec = &p->page_array[0]; + else { + size_t size = ++pagecount * sizeof(struct page *); + p->pagevec = kmalloc(size, GFP_NOFS); + if (p->pagevec) { + memset(p->pagevec, 0, size); + } else { + mempool_free(p, nfs_commit_mempool); + p = NULL; + } + } } return p; } static inline void nfs_commit_free(struct nfs_write_data *p) { + if (p && (p->pagevec != &p->page_array[0])) + kfree(p->pagevec); mempool_free(p, nfs_commit_mempool); } @@ -167,7 +182,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, int result, written = 0; struct nfs_write_data *wdata; - wdata = nfs_writedata_alloc(); + wdata = nfs_writedata_alloc(1); if (!wdata) return -ENOMEM; @@ -909,7 +924,7 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how) nbytes = req->wb_bytes; for (;;) { - data = nfs_writedata_alloc(); + data = nfs_writedata_alloc(1); if (!data) goto out_bad; list_add(&data->pages, &list); @@ -973,7 +988,7 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE) return nfs_flush_multi(head, inode, how); - data = nfs_writedata_alloc(); + data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); if (!data) goto out_bad; @@ -1241,12 +1256,12 @@ static void nfs_commit_rpcsetup(struct list_head *head, * Commit dirty pages */ static int -nfs_commit_list(struct list_head *head, int how) +nfs_commit_list(struct inode *inode, struct list_head *head, int how) { struct nfs_write_data *data; struct nfs_page *req; - data = nfs_commit_alloc(); + data = nfs_commit_alloc(NFS_SERVER(inode)->wpages); if (!data) goto out_bad; @@ -1351,7 +1366,7 @@ int nfs_commit_inode(struct inode *inode, int how) res = nfs_scan_commit(inode, &head, 0, 0); spin_unlock(&nfsi->req_lock); if (res) { - error = nfs_commit_list(&head, how); + error = nfs_commit_list(inode, &head, how); if (error < 0) return error; } |