From a20c6bec0b8ae775e2e8f350819cef98eea9a832 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:36 -0400 Subject: NFS: grab open context in direct read Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 481be7f..8a89423 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -260,7 +260,7 @@ static void nfs_direct_read_release(void *calldata) if (put_dreq(dreq)) nfs_direct_complete(dreq); - nfs_readdata_free(data); + nfs_readdata_release(data); } static const struct rpc_call_ops nfs_read_direct_ops = { @@ -337,7 +337,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, data->inode = inode; data->cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); - data->args.context = ctx; + data->args.context = get_nfs_open_context(ctx); data->args.lock_context = dreq->l_ctx; data->args.offset = pos; data->args.pgbase = pgbase; -- cgit v0.10.2 From 1acbbb4e16209e85c35ff6cacad61d802c07289b Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:37 -0400 Subject: NFS4.1: make pnfs_ld_[read|write]_done consistent The two functions had diverged quite a bit, with the write function being a bit more robust than the read. However, these still break badly in the desc->pg_bsize < PAGE_CACHE_SIZE case, as then there is nothing hanging on the data->pages list, and the resend ends up doing nothing. This will be fixed in a patch later in the series. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 38512bc..9c4d14a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1189,6 +1189,17 @@ static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head * return 0; } +static void pnfs_ld_handle_write_error(struct nfs_write_data *data) +{ + dprintk("pnfs write error = %d\n", data->pnfs_error); + if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & + PNFS_LAYOUTRET_ON_ERROR) { + clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags); + pnfs_return_layout(data->inode); + } + data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages); +} + /* * Called by non rpc-based layout drivers */ @@ -1197,19 +1208,8 @@ void pnfs_ld_write_done(struct nfs_write_data *data) if (likely(!data->pnfs_error)) { pnfs_set_layoutcommit(data); data->mds_ops->rpc_call_done(&data->task, data); - } else { - dprintk("pnfs write error = %d\n", data->pnfs_error); - if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & - PNFS_LAYOUTRET_ON_ERROR) { - /* Don't lo_commit on error, Server will needs to - * preform a file recovery. - */ - clear_bit(NFS_INO_LAYOUTCOMMIT, - &NFS_I(data->inode)->flags); - pnfs_return_layout(data->inode); - } - data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages); - } + } else + pnfs_ld_handle_write_error(data); put_lseg(data->lseg); data->mds_ops->rpc_release(data); } @@ -1293,26 +1293,38 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) } EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); -static void pnfs_ld_handle_read_error(struct nfs_read_data *data) +static int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head) { struct nfs_pageio_descriptor pgio; + LIST_HEAD(failed); - put_lseg(data->lseg); - data->lseg = NULL; - dprintk("pnfs write error = %d\n", data->pnfs_error); - if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & - PNFS_LAYOUTRET_ON_ERROR) - pnfs_return_layout(data->inode); - - nfs_pageio_init_read_mds(&pgio, data->inode); - - while (!list_empty(&data->pages)) { - struct nfs_page *req = nfs_list_entry(data->pages.next); + /* Resend all requests through the MDS */ + nfs_pageio_init_read_mds(&pgio, inode); + while (!list_empty(head)) { + struct nfs_page *req = nfs_list_entry(head->next); nfs_list_remove_request(req); - nfs_pageio_add_request(&pgio, req); + if (!nfs_pageio_add_request(&pgio, req)) + nfs_list_add_request(req, &failed); } nfs_pageio_complete(&pgio); + + if (!list_empty(&failed)) { + list_move(&failed, head); + return -EIO; + } + return 0; +} + +static void pnfs_ld_handle_read_error(struct nfs_read_data *data) +{ + dprintk("pnfs read error = %d\n", data->pnfs_error); + if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & + PNFS_LAYOUTRET_ON_ERROR) { + clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags); + pnfs_return_layout(data->inode); + } + data->task.tk_status = pnfs_read_done_resend_to_mds(data->inode, &data->pages); } /* -- cgit v0.10.2 From 799ba8d53d32c84bd2a867ca2689538a48176140 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:38 -0400 Subject: NFS4.1: Add lseg to struct nfs4_fl_commit_bucket Also create a commit_info structure to hold the bucket array and push it up from the lseg to the layout where it really belongs. While we are at it, fix a refcounting bug due to an (incorrect) implicit assumption that filelayout_scan_ds_commit_list always completely emptied the src list. This clarifies refcounting, removes the ugly find_only_write_lseg functions, and pushes the file layout commit code along on the path to supporting multiple lsegs. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 5acfd9e..15aeba2 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -650,10 +650,66 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg) dprintk("--> %s\n", __func__); nfs4_fl_put_deviceid(fl->dsaddr); - kfree(fl->commit_buckets); + /* This assumes a single RW lseg */ + if (lseg->pls_range.iomode == IOMODE_RW) { + struct nfs4_filelayout *flo; + + flo = FILELAYOUT_FROM_HDR(lseg->pls_layout); + flo->commit_info.nbuckets = 0; + kfree(flo->commit_info.buckets); + flo->commit_info.buckets = NULL; + } _filelayout_free_lseg(fl); } +static int +filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, + gfp_t gfp_flags) +{ + struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); + struct nfs4_filelayout *flo = FILELAYOUT_FROM_HDR(lseg->pls_layout); + + struct nfs4_fl_commit_bucket *buckets; + int size; + + if (fl->commit_through_mds) + return 0; + if (flo->commit_info.nbuckets != 0) { + /* This assumes there is only one IOMODE_RW lseg. What + * we really want to do is have a layout_hdr level + * dictionary of keys, each + * associated with a struct list_head, populated by calls + * to filelayout_write_pagelist(). + * */ + return 0; + } + + size = (fl->stripe_type == STRIPE_SPARSE) ? + fl->dsaddr->ds_num : fl->dsaddr->stripe_count; + + buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), + gfp_flags); + if (!buckets) + return -ENOMEM; + else { + int i; + + spin_lock(&lseg->pls_layout->plh_inode->i_lock); + if (flo->commit_info.nbuckets != 0) + kfree(buckets); + else { + flo->commit_info.buckets = buckets; + flo->commit_info.nbuckets = size; + for (i = 0; i < size; i++) { + INIT_LIST_HEAD(&buckets[i].written); + INIT_LIST_HEAD(&buckets[i].committing); + } + } + spin_unlock(&lseg->pls_layout->plh_inode->i_lock); + return 0; + } +} + static struct pnfs_layout_segment * filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, @@ -673,29 +729,6 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, _filelayout_free_lseg(fl); return NULL; } - - /* This assumes there is only one IOMODE_RW lseg. What - * we really want to do is have a layout_hdr level - * dictionary of keys, each - * associated with a struct list_head, populated by calls - * to filelayout_write_pagelist(). - * */ - if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) { - int i; - int size = (fl->stripe_type == STRIPE_SPARSE) ? - fl->dsaddr->ds_num : fl->dsaddr->stripe_count; - - fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags); - if (!fl->commit_buckets) { - filelayout_free_lseg(&fl->generic_hdr); - return NULL; - } - fl->number_of_buckets = size; - for (i = 0; i < size; i++) { - INIT_LIST_HEAD(&fl->commit_buckets[i].written); - INIT_LIST_HEAD(&fl->commit_buckets[i].committing); - } - } return &fl->generic_hdr; } @@ -747,6 +780,8 @@ static void filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { + int status; + BUG_ON(pgio->pg_lseg != NULL); pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, @@ -757,7 +792,16 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, GFP_NOFS); /* If no lseg, fall back to write through mds */ if (pgio->pg_lseg == NULL) - nfs_pageio_reset_write_mds(pgio); + goto out_mds; + status = filelayout_alloc_commit_info(pgio->pg_lseg, GFP_NOFS); + if (status < 0) { + put_lseg(pgio->pg_lseg); + pgio->pg_lseg = NULL; + goto out_mds; + } + return; +out_mds: + nfs_pageio_reset_write_mds(pgio); } static const struct nfs_pageio_ops filelayout_pg_read_ops = { @@ -793,17 +837,13 @@ filelayout_clear_request_commit(struct nfs_page *req) if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) goto out; if (list_is_singular(&req->wb_list)) { - struct pnfs_layout_segment *lseg; + struct nfs4_fl_commit_bucket *bucket; - /* From here we can find the bucket, but for the moment, - * since there is only one relevant lseg... - */ - list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { - if (lseg->pls_range.iomode == IOMODE_RW) { - freeme = lseg; - break; - } - } + bucket = list_first_entry(&req->wb_list, + struct nfs4_fl_commit_bucket, + written); + freeme = bucket->wlseg; + bucket->wlseg = NULL; } out: nfs_request_remove_commit_list(req); @@ -818,6 +858,7 @@ filelayout_choose_commit_list(struct nfs_page *req, struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); u32 i, j; struct list_head *list; + struct nfs4_fl_commit_bucket *buckets; if (fl->commit_through_mds) return &NFS_I(req->wb_context->dentry->d_inode)->commit_list; @@ -831,15 +872,16 @@ filelayout_choose_commit_list(struct nfs_page *req, j = nfs4_fl_calc_j_index(lseg, (loff_t)req->wb_index << PAGE_CACHE_SHIFT); i = select_bucket_index(fl, j); - list = &fl->commit_buckets[i].written; + buckets = FILELAYOUT_FROM_HDR(lseg->pls_layout)->commit_info.buckets; + list = &buckets[i].written; if (list_empty(list)) { /* Non-empty buckets hold a reference on the lseg. That ref * is normally transferred to the COMMIT call and released * there. It could also be released if the last req is pulled * off due to a rewrite, in which case it will be done in - * filelayout_remove_commit_req + * filelayout_clear_request_commit */ - get_lseg(lseg); + buckets[i].wlseg = get_lseg(lseg); } set_bit(PG_COMMIT_TO_DS, &req->wb_flags); return list; @@ -908,32 +950,6 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how) &filelayout_commit_call_ops, how); } -/* - * This is only useful while we are using whole file layouts. - */ -static struct pnfs_layout_segment * -find_only_write_lseg_locked(struct inode *inode) -{ - struct pnfs_layout_segment *lseg; - - list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) - if (lseg->pls_range.iomode == IOMODE_RW) - return lseg; - return NULL; -} - -static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) -{ - struct pnfs_layout_segment *rv; - - spin_lock(&inode->i_lock); - rv = find_only_write_lseg_locked(inode); - if (rv) - get_lseg(rv); - spin_unlock(&inode->i_lock); - return rv; -} - static int filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, spinlock_t *lock) @@ -955,6 +971,13 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, if (ret == max) break; } + if (ret) { + bucket->clseg = bucket->wlseg; + if (list_empty(src)) + bucket->wlseg = NULL; + else + get_lseg(bucket->clseg); + } return ret; } @@ -964,18 +987,14 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, static int filelayout_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) { - struct pnfs_layout_segment *lseg; - struct nfs4_filelayout_segment *fl; + struct nfs4_fl_commit_info *fl_cinfo; int i, rv = 0, cnt; - lseg = find_only_write_lseg_locked(inode); - if (!lseg) - goto out_done; - fl = FILELAYOUT_LSEG(lseg); - if (fl->commit_through_mds) + fl_cinfo = &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info; + if (fl_cinfo->nbuckets == 0) goto out_done; - for (i = 0; i < fl->number_of_buckets && max != 0; i++) { - cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i], + for (i = 0; i < fl_cinfo->nbuckets && max != 0; i++) { + cnt = filelayout_scan_ds_commit_list(&fl_cinfo->buckets[i], max, lock); max -= cnt; rv += cnt; @@ -987,38 +1006,35 @@ out_done: static unsigned int alloc_ds_commits(struct inode *inode, struct list_head *list) { - struct pnfs_layout_segment *lseg; - struct nfs4_filelayout_segment *fl; + struct nfs4_fl_commit_info *fl_cinfo; + struct nfs4_fl_commit_bucket *bucket; struct nfs_write_data *data; int i, j; unsigned int nreq = 0; - /* Won't need this when non-whole file layout segments are supported - * instead we will use a pnfs_layout_hdr structure */ - lseg = find_only_write_lseg(inode); - if (!lseg) - return 0; - fl = FILELAYOUT_LSEG(lseg); - for (i = 0; i < fl->number_of_buckets; i++) { - if (list_empty(&fl->commit_buckets[i].committing)) + fl_cinfo = &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info; + bucket = fl_cinfo->buckets; + for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { + if (list_empty(&bucket->committing)) continue; data = nfs_commitdata_alloc(); if (!data) break; data->ds_commit_index = i; - data->lseg = lseg; + data->lseg = bucket->clseg; + bucket->clseg = NULL; list_add(&data->pages, list); nreq++; } /* Clean up on error */ - for (j = i; j < fl->number_of_buckets; j++) { - if (list_empty(&fl->commit_buckets[i].committing)) + for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) { + if (list_empty(&bucket->committing)) continue; - nfs_retry_commit(&fl->commit_buckets[i].committing, lseg); - put_lseg(lseg); /* associated with emptying bucket */ + nfs_retry_commit(&bucket->committing, bucket->clseg); + put_lseg(bucket->clseg); + bucket->clseg = NULL; } - put_lseg(lseg); /* Caller will clean up entries put on list */ return nreq; } @@ -1058,7 +1074,10 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); } else { - nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg); + struct nfs4_fl_commit_info *fl_cinfo; + + fl_cinfo = &FILELAYOUT_FROM_HDR(data->lseg->pls_layout)->commit_info; + nfs_init_commit(data, &fl_cinfo->buckets[data->ds_commit_index].committing, data->lseg); filelayout_initiate_commit(data, how); } } @@ -1072,10 +1091,27 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d) nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); } +static struct pnfs_layout_hdr * +filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) +{ + struct nfs4_filelayout *flo; + + flo = kzalloc(sizeof(*flo), gfp_flags); + return &flo->generic_hdr; +} + +static void +filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo) +{ + kfree(FILELAYOUT_FROM_HDR(lo)); +} + static struct pnfs_layoutdriver_type filelayout_type = { .id = LAYOUT_NFSV4_1_FILES, .name = "LAYOUT_NFSV4_1_FILES", .owner = THIS_MODULE, + .alloc_layout_hdr = filelayout_alloc_layout_hdr, + .free_layout_hdr = filelayout_free_layout_hdr, .alloc_lseg = filelayout_alloc_lseg, .free_lseg = filelayout_free_lseg, .pg_read_ops = &filelayout_pg_read_ops, diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 21190bb..333a3ac 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -77,6 +77,13 @@ struct nfs4_file_layout_dsaddr { struct nfs4_fl_commit_bucket { struct list_head written; struct list_head committing; + struct pnfs_layout_segment *wlseg; + struct pnfs_layout_segment *clseg; +}; + +struct nfs4_fl_commit_info { + int nbuckets; + struct nfs4_fl_commit_bucket *buckets; }; struct nfs4_filelayout_segment { @@ -89,10 +96,19 @@ struct nfs4_filelayout_segment { struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ unsigned int num_fh; struct nfs_fh **fh_array; - struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */ - int number_of_buckets; }; +struct nfs4_filelayout { + struct pnfs_layout_hdr generic_hdr; + struct nfs4_fl_commit_info commit_info; +}; + +static inline struct nfs4_filelayout * +FILELAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo) +{ + return container_of(lo, struct nfs4_filelayout, generic_hdr); +} + static inline struct nfs4_filelayout_segment * FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) { -- cgit v0.10.2 From 0b7c01533aa9f4a228d07d2768d084acb3a387bc Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:39 -0400 Subject: NFS: add a struct nfs_commit_data to replace nfs_write_data in commits Commits don't need the vectors of pages, etc. that writes do. Split out a separate structure for the commit operation. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 8a89423..5897dfe 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -82,7 +82,7 @@ struct nfs_direct_req { /* commit state */ struct list_head rewrite_list; /* saved nfs_write_data structs */ - struct nfs_write_data * commit_data; /* special write_data for commits */ + struct nfs_commit_data *commit_data; /* special write_data for commits */ int flags; #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ #define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ @@ -524,7 +524,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = calldata; + struct nfs_commit_data *data = calldata; /* Call the NFS version-specific code */ NFS_PROTO(data->inode)->commit_done(task, data); @@ -532,8 +532,8 @@ static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) static void nfs_direct_commit_release(void *calldata) { - struct nfs_write_data *data = calldata; - struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + struct nfs_commit_data *data = calldata; + struct nfs_direct_req *dreq = data->dreq; int status = data->task.tk_status; if (status < 0) { @@ -551,14 +551,14 @@ static void nfs_direct_commit_release(void *calldata) } static const struct rpc_call_ops nfs_commit_direct_ops = { - .rpc_call_prepare = nfs_write_prepare, + .rpc_call_prepare = nfs_commit_prepare, .rpc_call_done = nfs_direct_commit_result, .rpc_release = nfs_direct_commit_release, }; static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) { - struct nfs_write_data *data = dreq->commit_data; + struct nfs_commit_data *data = dreq->commit_data; struct rpc_task *task; struct rpc_message msg = { .rpc_argp = &data->args, @@ -581,9 +581,6 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) data->args.fh = NFS_FH(data->inode); data->args.offset = 0; data->args.count = 0; - data->args.context = dreq->ctx; - data->args.lock_context = dreq->l_ctx; - data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); @@ -625,7 +622,7 @@ static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) { dreq->commit_data = nfs_commitdata_alloc(); if (dreq->commit_data != NULL) - dreq->commit_data->req = (struct nfs_page *) dreq; + dreq->commit_data->dreq = dreq; } #else static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b777bda..29ab441 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -314,24 +314,25 @@ extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_writedata_release(struct nfs_write_data *wdata); -extern void nfs_commit_free(struct nfs_write_data *p); +extern void nfs_commit_free(struct nfs_commit_data *p); extern int nfs_initiate_write(struct nfs_write_data *data, struct rpc_clnt *clnt, const struct rpc_call_ops *call_ops, int how); extern void nfs_write_prepare(struct rpc_task *task, void *calldata); -extern int nfs_initiate_commit(struct nfs_write_data *data, - struct rpc_clnt *clnt, +extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); +extern int nfs_initiate_commit(struct rpc_clnt *clnt, + struct nfs_commit_data *data, const struct rpc_call_ops *call_ops, int how); -extern void nfs_init_commit(struct nfs_write_data *data, +extern void nfs_init_commit(struct nfs_commit_data *data, struct list_head *head, struct pnfs_layout_segment *lseg); void nfs_retry_commit(struct list_head *page_list, struct pnfs_layout_segment *lseg); void nfs_commit_clear_lock(struct nfs_inode *nfsi); -void nfs_commitdata_release(void *data); -void nfs_commit_release_pages(struct nfs_write_data *data); +void nfs_commitdata_release(struct nfs_commit_data *data); +void nfs_commit_release_pages(struct nfs_commit_data *data); void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head); void nfs_request_remove_commit_list(struct nfs_page *req); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 5242eae..b1daca7 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -848,7 +848,12 @@ static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_ rpc_call_start(task); } -static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) +static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) +{ + rpc_call_start(task); +} + +static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data) { if (nfs3_async_handle_jukebox(task, data->inode)) return -EAGAIN; @@ -856,7 +861,7 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) return 0; } -static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) +static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT]; } @@ -907,6 +912,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .write_rpc_prepare = nfs3_proc_write_rpc_prepare, .write_done = nfs3_write_done, .commit_setup = nfs3_proc_commit_setup, + .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare, .commit_done = nfs3_commit_done, .lock = nfs3_proc_lock, .clear_acl_cache = nfs3_forget_cached_acls, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index a77cc9a..01e53e9 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1287,7 +1287,7 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req, * }; */ static void encode_commit3args(struct xdr_stream *xdr, - const struct nfs_writeargs *args) + const struct nfs_commitargs *args) { __be32 *p; @@ -1300,7 +1300,7 @@ static void encode_commit3args(struct xdr_stream *xdr, static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req, struct xdr_stream *xdr, - const struct nfs_writeargs *args) + const struct nfs_commitargs *args) { encode_commit3args(xdr, args); } @@ -2319,7 +2319,7 @@ out_status: */ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_writeres *result) + struct nfs_commitres *result) { enum nfs_stat status; int error; diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 15aeba2..675ce3b 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -250,7 +250,7 @@ static int filelayout_write_done_cb(struct rpc_task *task, } /* Fake up some data that will cause nfs_commit_release to retry the writes. */ -static void prepare_to_resend_writes(struct nfs_write_data *data) +static void prepare_to_resend_writes(struct nfs_commit_data *data) { struct nfs_page *first = nfs_list_entry(data->pages.next); @@ -261,11 +261,11 @@ static void prepare_to_resend_writes(struct nfs_write_data *data) } static int filelayout_commit_done_cb(struct rpc_task *task, - struct nfs_write_data *data) + struct nfs_commit_data *data) { int reset = 0; - if (filelayout_async_handle_error(task, data->args.context->state, + if (filelayout_async_handle_error(task, data->context->state, data->ds_clp, &reset) == -EAGAIN) { dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", __func__, data->ds_clp, data->ds_clp->cl_session); @@ -315,15 +315,42 @@ static void filelayout_write_release(void *data) wdata->mds_ops->rpc_release(data); } -static void filelayout_commit_release(void *data) +static void filelayout_commit_prepare(struct rpc_task *task, void *data) { - struct nfs_write_data *wdata = (struct nfs_write_data *)data; + struct nfs_commit_data *wdata = data; - nfs_commit_release_pages(wdata); - if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) - nfs_commit_clear_lock(NFS_I(wdata->inode)); - put_lseg(wdata->lseg); - nfs_commitdata_release(wdata); + if (nfs41_setup_sequence(wdata->ds_clp->cl_session, + &wdata->args.seq_args, &wdata->res.seq_res, + task)) + return; + + rpc_call_start(task); +} + +static void filelayout_write_commit_done(struct rpc_task *task, void *data) +{ + struct nfs_commit_data *wdata = data; + + /* Note this may cause RPC to be resent */ + wdata->mds_ops->rpc_call_done(task, data); +} + +static void filelayout_commit_count_stats(struct rpc_task *task, void *data) +{ + struct nfs_commit_data *cdata = data; + + rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics); +} + +static void filelayout_commit_release(void *calldata) +{ + struct nfs_commit_data *data = calldata; + + nfs_commit_release_pages(data); + if (atomic_dec_and_test(&NFS_I(data->inode)->commits_outstanding)) + nfs_commit_clear_lock(NFS_I(data->inode)); + put_lseg(data->lseg); + nfs_commitdata_release(data); } static const struct rpc_call_ops filelayout_read_call_ops = { @@ -341,9 +368,9 @@ static const struct rpc_call_ops filelayout_write_call_ops = { }; static const struct rpc_call_ops filelayout_commit_call_ops = { - .rpc_call_prepare = filelayout_write_prepare, - .rpc_call_done = filelayout_write_call_done, - .rpc_count_stats = filelayout_write_count_stats, + .rpc_call_prepare = filelayout_commit_prepare, + .rpc_call_done = filelayout_write_commit_done, + .rpc_count_stats = filelayout_commit_count_stats, .rpc_release = filelayout_commit_release, }; @@ -922,7 +949,7 @@ select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i) return flseg->fh_array[i]; } -static int filelayout_initiate_commit(struct nfs_write_data *data, int how) +static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) { struct pnfs_layout_segment *lseg = data->lseg; struct nfs4_pnfs_ds *ds; @@ -941,12 +968,12 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how) return -EAGAIN; } dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); - data->write_done_cb = filelayout_commit_done_cb; + data->commit_done_cb = filelayout_commit_done_cb; data->ds_clp = ds->ds_clp; fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); if (fh) data->args.fh = fh; - return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient, + return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data, &filelayout_commit_call_ops, how); } @@ -1008,7 +1035,7 @@ alloc_ds_commits(struct inode *inode, struct list_head *list) { struct nfs4_fl_commit_info *fl_cinfo; struct nfs4_fl_commit_bucket *bucket; - struct nfs_write_data *data; + struct nfs_commit_data *data; int i, j; unsigned int nreq = 0; @@ -1044,7 +1071,7 @@ static int filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, int how) { - struct nfs_write_data *data, *tmp; + struct nfs_commit_data *data, *tmp; LIST_HEAD(list); unsigned int nreq = 0; @@ -1071,7 +1098,7 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, list_del_init(&data->pages); if (!data->lseg) { nfs_init_commit(data, mds_pages, NULL); - nfs_initiate_commit(data, NFS_CLIENT(inode), + nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how); } else { struct nfs4_fl_commit_info *fl_cinfo; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 75eb883..cc04b6e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3468,7 +3468,17 @@ static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_ rpc_call_start(task); } -static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data) +static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) +{ + if (nfs4_setup_sequence(NFS_SERVER(data->inode), + &data->args.seq_args, + &data->res.seq_res, + task)) + return; + rpc_call_start(task); +} + +static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *data) { struct inode *inode = data->inode; @@ -3480,14 +3490,14 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *dat return 0; } -static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) +static int nfs4_commit_done(struct rpc_task *task, struct nfs_commit_data *data) { if (!nfs4_sequence_done(task, &data->res.seq_res)) return -EAGAIN; - return data->write_done_cb(task, data); + return data->commit_done_cb(task, data); } -static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) +static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) { struct nfs_server *server = NFS_SERVER(data->inode); @@ -3496,8 +3506,8 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa data->res.fattr = NULL; } else data->args.bitmask = server->cache_consistency_bitmask; - if (!data->write_done_cb) - data->write_done_cb = nfs4_commit_done_cb; + if (data->commit_done_cb == NULL) + data->commit_done_cb = nfs4_commit_done_cb; data->res.server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); @@ -6591,6 +6601,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .write_rpc_prepare = nfs4_proc_write_rpc_prepare, .write_done = nfs4_write_done, .commit_setup = nfs4_proc_commit_setup, + .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare, .commit_done = nfs4_commit_done, .lock = nfs4_proc_lock, .clear_acl_cache = nfs4_zap_acl_attr, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c54aae3..4c3cc0e 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1103,7 +1103,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg encode_nfs4_stateid(xdr, arg->stateid); } -static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) +static void encode_commit(struct xdr_stream *xdr, const struct nfs_commitargs *args, struct compound_hdr *hdr) { __be32 *p; @@ -2448,7 +2448,7 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr, * a COMMIT request */ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_writeargs *args) + struct nfs_commitargs *args) { struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), @@ -4102,7 +4102,7 @@ static int decode_verifier(struct xdr_stream *xdr, void *verifier) return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE); } -static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) +static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res) { int status; @@ -6353,7 +6353,7 @@ out: * Decode COMMIT response */ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr, - struct nfs_writeres *res) + struct nfs_commitres *res) { struct compound_hdr hdr; int status; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index b63b6f4..bf80503 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -688,8 +688,13 @@ static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_d rpc_call_start(task); } +static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) +{ + BUG(); +} + static void -nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) +nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) { BUG(); } @@ -764,6 +769,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .write_rpc_prepare = nfs_proc_write_rpc_prepare, .write_done = nfs_write_done, .commit_setup = nfs_proc_commit_setup, + .commit_rpc_prepare = nfs_proc_commit_rpc_prepare, .lock = nfs_proc_lock, .lock_check_bounds = nfs_lock_check_bounds, .close_context = nfs_close_context, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c074623..54f7c0f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -48,11 +48,12 @@ static const struct rpc_call_ops nfs_commit_ops; static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; +static struct kmem_cache *nfs_cdata_cachep; static mempool_t *nfs_commit_mempool; -struct nfs_write_data *nfs_commitdata_alloc(void) +struct nfs_commit_data *nfs_commitdata_alloc(void) { - struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); + struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); if (p) { memset(p, 0, sizeof(*p)); @@ -62,10 +63,8 @@ struct nfs_write_data *nfs_commitdata_alloc(void) } EXPORT_SYMBOL_GPL(nfs_commitdata_alloc); -void nfs_commit_free(struct nfs_write_data *p) +void nfs_commit_free(struct nfs_commit_data *p) { - if (p && (p->pagevec != &p->page_array[0])) - kfree(p->pagevec); mempool_free(p, nfs_commit_mempool); } EXPORT_SYMBOL_GPL(nfs_commit_free); @@ -1179,6 +1178,13 @@ void nfs_write_prepare(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->write_rpc_prepare(task, data); } +void nfs_commit_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_commit_data *data = calldata; + + NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); +} + static const struct rpc_call_ops nfs_write_partial_ops = { .rpc_call_prepare = nfs_write_prepare, .rpc_call_done = nfs_writeback_done_partial, @@ -1355,16 +1361,14 @@ void nfs_commit_clear_lock(struct nfs_inode *nfsi) } EXPORT_SYMBOL_GPL(nfs_commit_clear_lock); -void nfs_commitdata_release(void *data) +void nfs_commitdata_release(struct nfs_commit_data *data) { - struct nfs_write_data *wdata = data; - - put_nfs_open_context(wdata->args.context); - nfs_commit_free(wdata); + put_nfs_open_context(data->context); + nfs_commit_free(data); } EXPORT_SYMBOL_GPL(nfs_commitdata_release); -int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt, +int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, const struct rpc_call_ops *call_ops, int how) { @@ -1403,7 +1407,7 @@ EXPORT_SYMBOL_GPL(nfs_initiate_commit); /* * Set up the argument/result storage required for the RPC call. */ -void nfs_init_commit(struct nfs_write_data *data, +void nfs_init_commit(struct nfs_commit_data *data, struct list_head *head, struct pnfs_layout_segment *lseg) { @@ -1424,8 +1428,7 @@ void nfs_init_commit(struct nfs_write_data *data, /* Note: we always request a commit of the entire inode */ data->args.offset = 0; data->args.count = 0; - data->args.context = get_nfs_open_context(first->wb_context); - data->res.count = 0; + data->context = get_nfs_open_context(first->wb_context); data->res.fattr = &data->fattr; data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); @@ -1455,7 +1458,7 @@ EXPORT_SYMBOL_GPL(nfs_retry_commit); static int nfs_commit_list(struct inode *inode, struct list_head *head, int how) { - struct nfs_write_data *data; + struct nfs_commit_data *data; data = nfs_commitdata_alloc(); @@ -1464,7 +1467,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) /* Set up the argument struct */ nfs_init_commit(data, head, NULL); - return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); + return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how); out_bad: nfs_retry_commit(head, NULL); nfs_commit_clear_lock(NFS_I(inode)); @@ -1476,7 +1479,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) */ static void nfs_commit_done(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = calldata; + struct nfs_commit_data *data = calldata; dprintk("NFS: %5u nfs_commit_done (status %d)\n", task->tk_pid, task->tk_status); @@ -1485,7 +1488,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->commit_done(task, data); } -void nfs_commit_release_pages(struct nfs_write_data *data) +void nfs_commit_release_pages(struct nfs_commit_data *data) { struct nfs_page *req; int status = data->task.tk_status; @@ -1526,7 +1529,7 @@ EXPORT_SYMBOL_GPL(nfs_commit_release_pages); static void nfs_commit_release(void *calldata) { - struct nfs_write_data *data = calldata; + struct nfs_commit_data *data = calldata; nfs_commit_release_pages(data); nfs_commit_clear_lock(NFS_I(data->inode)); @@ -1534,7 +1537,7 @@ static void nfs_commit_release(void *calldata) } static const struct rpc_call_ops nfs_commit_ops = { - .rpc_call_prepare = nfs_write_prepare, + .rpc_call_prepare = nfs_commit_prepare, .rpc_call_done = nfs_commit_done, .rpc_release = nfs_commit_release, }; @@ -1753,6 +1756,13 @@ int __init nfs_init_writepagecache(void) if (nfs_wdata_mempool == NULL) return -ENOMEM; + nfs_cdata_cachep = kmem_cache_create("nfs_commit_data", + sizeof(struct nfs_commit_data), + 0, SLAB_HWCACHE_ALIGN, + NULL); + if (nfs_cdata_cachep == NULL) + return -ENOMEM; + nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, nfs_wdata_cachep); if (nfs_commit_mempool == NULL) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 52a1bdb..d5d68f3 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -552,8 +552,8 @@ extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) extern int nfs_commit_inode(struct inode *, int); -extern struct nfs_write_data *nfs_commitdata_alloc(void); -extern void nfs_commit_free(struct nfs_write_data *wdata); +extern struct nfs_commit_data *nfs_commitdata_alloc(void); +extern void nfs_commit_free(struct nfs_commit_data *data); #else static inline int nfs_commit_inode(struct inode *inode, int how) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7ba3551..8fb036a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -519,6 +519,24 @@ struct nfs_writeres { }; /* + * Arguments to the commit call. + */ +struct nfs_commitargs { + struct nfs_fh *fh; + __u64 offset; + __u32 count; + const u32 *bitmask; + struct nfs4_sequence_args seq_args; +}; + +struct nfs_commitres { + struct nfs_fattr *fattr; + struct nfs_writeverf *verf; + const struct nfs_server *server; + struct nfs4_sequence_res seq_res; +}; + +/* * Common arguments to the unlink call */ struct nfs_removeargs { @@ -1171,6 +1189,8 @@ struct nfs_read_data { struct page *page_array[NFS_PAGEVEC_SIZE]; }; +struct nfs_direct_req; + struct nfs_write_data { struct rpc_task task; struct inode *inode; @@ -1186,7 +1206,6 @@ struct nfs_write_data { struct nfs_writeres res; /* result struct */ struct pnfs_layout_segment *lseg; struct nfs_client *ds_clp; /* pNFS data server */ - int ds_commit_index; const struct rpc_call_ops *mds_ops; int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); #ifdef CONFIG_NFS_V4 @@ -1197,6 +1216,25 @@ struct nfs_write_data { struct page *page_array[NFS_PAGEVEC_SIZE]; }; +struct nfs_commit_data { + struct rpc_task task; + struct inode *inode; + struct rpc_cred *cred; + struct nfs_fattr fattr; + struct nfs_writeverf verf; + struct list_head pages; /* Coalesced requests we wish to flush */ + struct list_head list; /* lists of struct nfs_write_data */ + struct nfs_direct_req *dreq; /* O_DIRECT request */ + struct nfs_commitargs args; /* argument struct */ + struct nfs_commitres res; /* result struct */ + struct nfs_open_context *context; + struct pnfs_layout_segment *lseg; + struct nfs_client *ds_clp; /* pNFS data server */ + int ds_commit_index; + const struct rpc_call_ops *mds_ops; + int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data); +}; + struct nfs_unlinkdata { struct hlist_node list; struct nfs_removeargs args; @@ -1277,8 +1315,9 @@ struct nfs_rpc_ops { void (*write_setup) (struct nfs_write_data *, struct rpc_message *); void (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *); int (*write_done) (struct rpc_task *, struct nfs_write_data *); - void (*commit_setup) (struct nfs_write_data *, struct rpc_message *); - int (*commit_done) (struct rpc_task *, struct nfs_write_data *); + void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *); + void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *); + int (*commit_done) (struct rpc_task *, struct nfs_commit_data *); int (*lock)(struct file *, int, struct file_lock *); int (*lock_check_bounds)(const struct file_lock *); void (*clear_acl_cache)(struct inode *); -- cgit v0.10.2 From 31f6852a4c187c031456581b35e146c0d5bbdecd Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:40 -0400 Subject: NFS: dprintks in directio code were referencing task after put Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 5897dfe..fb7fbaa 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -357,15 +357,15 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) break; - rpc_put_task(task); dprintk("NFS: %5u initiated direct read call " "(req %s/%Ld, %zu bytes @ offset %Lu)\n", - data->task.tk_pid, + task->tk_pid, inode->i_sb->s_id, (long long)NFS_FILEID(inode), bytes, (unsigned long long)data->args.offset); + rpc_put_task(task); started += bytes; user_addr += bytes; @@ -784,15 +784,15 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) break; - rpc_put_task(task); dprintk("NFS: %5u initiated direct write call " "(req %s/%Ld, %zu bytes @ offset %Lu)\n", - data->task.tk_pid, + task->tk_pid, inode->i_sb->s_id, (long long)NFS_FILEID(inode), bytes, (unsigned long long)data->args.offset); + rpc_put_task(task); started += bytes; user_addr += bytes; -- cgit v0.10.2 From c5996c4efb95bbb80a25acc890357c9eae998eeb Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:41 -0400 Subject: NFS: reverse arg order in nfs_initiate_[read|write] Make it consistent with nfs_initiate_commit. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 29ab441..650127f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -296,7 +296,8 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); struct nfs_pageio_descriptor; /* read.c */ -extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, +extern int nfs_initiate_read(struct rpc_clnt *clnt, + struct nfs_read_data *data, const struct rpc_call_ops *call_ops); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, @@ -315,8 +316,8 @@ extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_writedata_release(struct nfs_write_data *wdata); extern void nfs_commit_free(struct nfs_commit_data *p); -extern int nfs_initiate_write(struct nfs_write_data *data, - struct rpc_clnt *clnt, +extern int nfs_initiate_write(struct rpc_clnt *clnt, + struct nfs_write_data *data, const struct rpc_call_ops *call_ops, int how); extern void nfs_write_prepare(struct rpc_task *task, void *calldata); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 675ce3b..adbadcb 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -413,7 +413,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) data->mds_offset = offset; /* Perform an asynchronous read to ds */ - status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient, + status = nfs_initiate_read(ds->ds_clp->cl_rpcclient, data, &filelayout_read_call_ops); BUG_ON(status != 0); return PNFS_ATTEMPTED; @@ -460,7 +460,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) data->args.offset = filelayout_get_dserver_offset(lseg, offset); /* Perform an asynchronous write */ - status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient, + status = nfs_initiate_write(ds->ds_clp->cl_rpcclient, data, &filelayout_write_call_ops, sync); BUG_ON(status != 0); return PNFS_ATTEMPTED; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 0a4be28..4ddba67 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -169,7 +169,8 @@ static void nfs_readpage_release(struct nfs_page *req) nfs_release_request(req); } -int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, +int nfs_initiate_read(struct rpc_clnt *clnt, + struct nfs_read_data *data, const struct rpc_call_ops *call_ops) { struct inode *inode = data->inode; @@ -240,7 +241,7 @@ static int nfs_do_read(struct nfs_read_data *data, { struct inode *inode = data->args.context->dentry->d_inode; - return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); + return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops); } static int diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 54f7c0f..76735dd 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -836,8 +836,8 @@ static int flush_task_priority(int how) return RPC_PRIORITY_NORMAL; } -int nfs_initiate_write(struct nfs_write_data *data, - struct rpc_clnt *clnt, +int nfs_initiate_write(struct rpc_clnt *clnt, + struct nfs_write_data *data, const struct rpc_call_ops *call_ops, int how) { @@ -937,7 +937,7 @@ static int nfs_do_write(struct nfs_write_data *data, { struct inode *inode = data->args.context->dentry->d_inode; - return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); + return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how); } static int nfs_do_multiple_writes(struct list_head *head, -- cgit v0.10.2 From cd12ae326f5c040f61d64233514609adabe84ab8 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:42 -0400 Subject: NFS: remove unnecessary casts of void pointers in nfs4filelayout.c Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index adbadcb..31afd81 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -191,7 +191,7 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata) */ static void filelayout_read_prepare(struct rpc_task *task, void *data) { - struct nfs_read_data *rdata = (struct nfs_read_data *)data; + struct nfs_read_data *rdata = data; rdata->read_done_cb = filelayout_read_done_cb; @@ -205,7 +205,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) static void filelayout_read_call_done(struct rpc_task *task, void *data) { - struct nfs_read_data *rdata = (struct nfs_read_data *)data; + struct nfs_read_data *rdata = data; dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); @@ -215,14 +215,14 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data) static void filelayout_read_count_stats(struct rpc_task *task, void *data) { - struct nfs_read_data *rdata = (struct nfs_read_data *)data; + struct nfs_read_data *rdata = data; rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics); } static void filelayout_read_release(void *data) { - struct nfs_read_data *rdata = (struct nfs_read_data *)data; + struct nfs_read_data *rdata = data; put_lseg(rdata->lseg); rdata->mds_ops->rpc_release(data); @@ -282,7 +282,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task, static void filelayout_write_prepare(struct rpc_task *task, void *data) { - struct nfs_write_data *wdata = (struct nfs_write_data *)data; + struct nfs_write_data *wdata = data; if (nfs41_setup_sequence(wdata->ds_clp->cl_session, &wdata->args.seq_args, &wdata->res.seq_res, @@ -294,7 +294,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) static void filelayout_write_call_done(struct rpc_task *task, void *data) { - struct nfs_write_data *wdata = (struct nfs_write_data *)data; + struct nfs_write_data *wdata = data; /* Note this may cause RPC to be resent */ wdata->mds_ops->rpc_call_done(task, data); @@ -302,14 +302,14 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data) static void filelayout_write_count_stats(struct rpc_task *task, void *data) { - struct nfs_write_data *wdata = (struct nfs_write_data *)data; + struct nfs_write_data *wdata = data; rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics); } static void filelayout_write_release(void *data) { - struct nfs_write_data *wdata = (struct nfs_write_data *)data; + struct nfs_write_data *wdata = data; put_lseg(wdata->lseg); wdata->mds_ops->rpc_release(data); -- cgit v0.10.2 From b5542849764aa56fd3f05c0041195b637b9d2ac2 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:43 -0400 Subject: NFS: use req_offset where appropriate Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 31afd81..c536328 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -776,8 +776,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, !nfs_generic_pg_test(pgio, prev, req)) return false; - p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; - r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; + p_stripe = (u64)req_offset(prev); + r_stripe = (u64)req_offset(req); stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; do_div(p_stripe, stripe_unit); @@ -896,8 +896,7 @@ filelayout_choose_commit_list(struct nfs_page *req, * to store the value calculated in filelayout_write_pagelist * and just use that here. */ - j = nfs4_fl_calc_j_index(lseg, - (loff_t)req->wb_index << PAGE_CACHE_SHIFT); + j = nfs4_fl_calc_j_index(lseg, req_offset(req)); i = select_bucket_index(fl, j); buckets = FILELAYOUT_FROM_HDR(lseg->pls_layout)->commit_info.buckets; list = &buckets[i].written; -- cgit v0.10.2 From cd841605f7a721878d8a2d1362484723d8abf569 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:44 -0400 Subject: NFS: create common nfs_pgio_header for both read and write In order to avoid duplicating all the data in nfs_read_data whenever we split it up into multiple RPC calls (either due to a short read result or due to rsize < PAGE_SIZE), we split out the bits that are the same per RPC call into a separate "header" structure. The goal this patch moves towards is to have a single header refcounted by several rpc_data structures. Thus, want to always refer from rpc_data to the header, and not the other way. This patch comes close to that ideal, but the directio code currently needs some special casing, isolated in the nfs_direct_[read_write]hdr_release() functions. This will be dealt with in a future patch. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 7f6a23f..7a48251 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -187,7 +187,6 @@ static void bl_end_io_read(struct bio *bio, int err) struct parallel_io *par = bio->bi_private; const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct nfs_read_data *rdata = (struct nfs_read_data *)par->data; do { struct page *page = bvec->bv_page; @@ -198,9 +197,12 @@ static void bl_end_io_read(struct bio *bio, int err) SetPageUptodate(page); } while (bvec >= bio->bi_io_vec); if (!uptodate) { - if (!rdata->pnfs_error) - rdata->pnfs_error = -EIO; - pnfs_set_lo_fail(rdata->lseg); + struct nfs_read_data *rdata = par->data; + struct nfs_pgio_header *header = rdata->header; + + if (!header->pnfs_error) + header->pnfs_error = -EIO; + pnfs_set_lo_fail(header->lseg); } bio_put(bio); put_parallel(par); @@ -221,7 +223,7 @@ bl_end_par_io_read(void *data, int unused) { struct nfs_read_data *rdata = data; - rdata->task.tk_status = rdata->pnfs_error; + rdata->task.tk_status = rdata->header->pnfs_error; INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); schedule_work(&rdata->task.u.tk_work); } @@ -229,6 +231,7 @@ bl_end_par_io_read(void *data, int unused) static enum pnfs_try_status bl_read_pagelist(struct nfs_read_data *rdata) { + struct nfs_pgio_header *header = rdata->header; int i, hole; struct bio *bio = NULL; struct pnfs_block_extent *be = NULL, *cow_read = NULL; @@ -256,10 +259,10 @@ bl_read_pagelist(struct nfs_read_data *rdata) bl_put_extent(cow_read); bio = bl_submit_bio(READ, bio); /* Get the next one */ - be = bl_find_get_extent(BLK_LSEG2EXT(rdata->lseg), + be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read); if (!be) { - rdata->pnfs_error = -EIO; + header->pnfs_error = -EIO; goto out; } extent_length = be->be_length - @@ -286,7 +289,7 @@ bl_read_pagelist(struct nfs_read_data *rdata) isect, pages[i], be_read, bl_end_io_read, par); if (IS_ERR(bio)) { - rdata->pnfs_error = PTR_ERR(bio); + header->pnfs_error = PTR_ERR(bio); bio = NULL; goto out; } @@ -294,9 +297,9 @@ bl_read_pagelist(struct nfs_read_data *rdata) isect += PAGE_CACHE_SECTORS; extent_length -= PAGE_CACHE_SECTORS; } - if ((isect << SECTOR_SHIFT) >= rdata->inode->i_size) { + if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { rdata->res.eof = 1; - rdata->res.count = rdata->inode->i_size - f_offset; + rdata->res.count = header->inode->i_size - f_offset; } else { rdata->res.count = (isect << SECTOR_SHIFT) - f_offset; } @@ -345,7 +348,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err) struct parallel_io *par = bio->bi_private; const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct nfs_write_data *wdata = (struct nfs_write_data *)par->data; do { struct page *page = bvec->bv_page; @@ -358,9 +360,12 @@ static void bl_end_io_write_zero(struct bio *bio, int err) } while (bvec >= bio->bi_io_vec); if (unlikely(!uptodate)) { - if (!wdata->pnfs_error) - wdata->pnfs_error = -EIO; - pnfs_set_lo_fail(wdata->lseg); + struct nfs_write_data *data = par->data; + struct nfs_pgio_header *header = data->header; + + if (!header->pnfs_error) + header->pnfs_error = -EIO; + pnfs_set_lo_fail(header->lseg); } bio_put(bio); put_parallel(par); @@ -370,12 +375,13 @@ static void bl_end_io_write(struct bio *bio, int err) { struct parallel_io *par = bio->bi_private; const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct nfs_write_data *wdata = (struct nfs_write_data *)par->data; + struct nfs_write_data *data = par->data; + struct nfs_pgio_header *header = data->header; if (!uptodate) { - if (!wdata->pnfs_error) - wdata->pnfs_error = -EIO; - pnfs_set_lo_fail(wdata->lseg); + if (!header->pnfs_error) + header->pnfs_error = -EIO; + pnfs_set_lo_fail(header->lseg); } bio_put(bio); put_parallel(par); @@ -391,9 +397,9 @@ static void bl_write_cleanup(struct work_struct *work) dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); wdata = container_of(task, struct nfs_write_data, task); - if (likely(!wdata->pnfs_error)) { + if (likely(!wdata->header->pnfs_error)) { /* Marks for LAYOUTCOMMIT */ - mark_extents_written(BLK_LSEG2EXT(wdata->lseg), + mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), wdata->args.offset, wdata->args.count); } pnfs_ld_write_done(wdata); @@ -404,12 +410,12 @@ static void bl_end_par_io_write(void *data, int num_se) { struct nfs_write_data *wdata = data; - if (unlikely(wdata->pnfs_error)) { - bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval, + if (unlikely(wdata->header->pnfs_error)) { + bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, num_se); } - wdata->task.tk_status = wdata->pnfs_error; + wdata->task.tk_status = wdata->header->pnfs_error; wdata->verf.committed = NFS_FILE_SYNC; INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); schedule_work(&wdata->task.u.tk_work); @@ -540,6 +546,7 @@ check_page: static enum pnfs_try_status bl_write_pagelist(struct nfs_write_data *wdata, int sync) { + struct nfs_pgio_header *header = wdata->header; int i, ret, npg_zero, pg_index, last = 0; struct bio *bio = NULL; struct pnfs_block_extent *be = NULL, *cow_read = NULL; @@ -552,7 +559,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) pgoff_t index; u64 temp; int npg_per_block = - NFS_SERVER(wdata->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; + NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); /* At this point, wdata->pages is a (sequential) list of nfs_pages. @@ -566,7 +573,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) /* At this point, have to be more careful with error handling */ isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); - be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); + be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read); if (!be || !is_writable(be, isect)) { dprintk("%s no matching extents!\n", __func__); goto out_mds; @@ -597,10 +604,10 @@ fill_invalid_ext: dprintk("%s zero %dth page: index %lu isect %llu\n", __func__, npg_zero, index, (unsigned long long)isect); - page = bl_find_get_zeroing_page(wdata->inode, index, + page = bl_find_get_zeroing_page(header->inode, index, cow_read); if (unlikely(IS_ERR(page))) { - wdata->pnfs_error = PTR_ERR(page); + header->pnfs_error = PTR_ERR(page); goto out; } else if (page == NULL) goto next_page; @@ -612,7 +619,7 @@ fill_invalid_ext: __func__, ret); end_page_writeback(page); page_cache_release(page); - wdata->pnfs_error = ret; + header->pnfs_error = ret; goto out; } if (likely(!bl_push_one_short_extent(be->be_inval))) @@ -620,11 +627,11 @@ fill_invalid_ext: else { end_page_writeback(page); page_cache_release(page); - wdata->pnfs_error = -ENOMEM; + header->pnfs_error = -ENOMEM; goto out; } /* FIXME: This should be done in bi_end_io */ - mark_extents_written(BLK_LSEG2EXT(wdata->lseg), + mark_extents_written(BLK_LSEG2EXT(header->lseg), page->index << PAGE_CACHE_SHIFT, PAGE_CACHE_SIZE); @@ -632,7 +639,7 @@ fill_invalid_ext: isect, page, be, bl_end_io_write_zero, par); if (IS_ERR(bio)) { - wdata->pnfs_error = PTR_ERR(bio); + header->pnfs_error = PTR_ERR(bio); bio = NULL; goto out; } @@ -653,10 +660,10 @@ next_page: bl_put_extent(be); bio = bl_submit_bio(WRITE, bio); /* Get the next one */ - be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), + be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, NULL); if (!be || !is_writable(be, isect)) { - wdata->pnfs_error = -EINVAL; + header->pnfs_error = -EINVAL; goto out; } if (be->be_state == PNFS_BLOCK_INVALID_DATA) { @@ -664,7 +671,7 @@ next_page: be->be_inval))) par->bse_count++; else { - wdata->pnfs_error = -ENOMEM; + header->pnfs_error = -ENOMEM; goto out; } } @@ -677,7 +684,7 @@ next_page: if (unlikely(ret)) { dprintk("%s bl_mark_sectors_init fail %d\n", __func__, ret); - wdata->pnfs_error = ret; + header->pnfs_error = ret; goto out; } } @@ -685,7 +692,7 @@ next_page: isect, pages[i], be, bl_end_io_write, par); if (IS_ERR(bio)) { - wdata->pnfs_error = PTR_ERR(bio); + header->pnfs_error = PTR_ERR(bio); bio = NULL; goto out; } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index fb7fbaa..56176af 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -242,7 +242,7 @@ static void nfs_direct_read_release(void *calldata) { struct nfs_read_data *data = calldata; - struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + struct nfs_direct_req *dreq = (struct nfs_direct_req *)data->header->req; int status = data->task.tk_status; spin_lock(&dreq->lock); @@ -269,6 +269,15 @@ static const struct rpc_call_ops nfs_read_direct_ops = { .rpc_release = nfs_direct_read_release, }; +static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr) +{ + struct nfs_read_data *data = &rhdr->rpc_data; + + if (data->pagevec != data->page_array) + kfree(data->pagevec); + nfs_readhdr_free(&rhdr->header); +} + /* * For each rsize'd chunk of the user's buffer, dispatch an NFS READ * operation. If nfs_readdata_alloc() or get_user_pages() fails, @@ -301,6 +310,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, ssize_t started = 0; do { + struct nfs_read_header *rhdr; struct nfs_read_data *data; size_t bytes; @@ -308,23 +318,24 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, bytes = min(rsize,count); result = -ENOMEM; - data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes)); - if (unlikely(!data)) + rhdr = nfs_readhdr_alloc(nfs_page_array_len(pgbase, bytes)); + if (unlikely(!rhdr)) break; + data = &rhdr->rpc_data; down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, data->npages, 1, 0, data->pagevec, NULL); up_read(¤t->mm->mmap_sem); if (result < 0) { - nfs_readdata_free(data); + nfs_direct_readhdr_release(rhdr); break; } if ((unsigned)result < data->npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { nfs_direct_release_pages(data->pagevec, result); - nfs_readdata_free(data); + nfs_direct_readhdr_release(rhdr); break; } bytes -= pgbase; @@ -333,9 +344,9 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, get_dreq(dreq); - data->req = (struct nfs_page *) dreq; - data->inode = inode; - data->cred = msg.rpc_cred; + rhdr->header.req = (struct nfs_page *) dreq; + rhdr->header.inode = inode; + rhdr->header.cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); data->args.context = get_nfs_open_context(ctx); data->args.lock_context = dreq->l_ctx; @@ -447,13 +458,23 @@ out: return result; } +static void nfs_direct_writehdr_release(struct nfs_write_header *whdr) +{ + struct nfs_write_data *data = &whdr->rpc_data; + + if (data->pagevec != data->page_array) + kfree(data->pagevec); + nfs_writehdr_free(&whdr->header); +} + static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) { while (!list_empty(&dreq->rewrite_list)) { - struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); - list_del(&data->pages); - nfs_direct_release_pages(data->pagevec, data->npages); - nfs_writedata_free(data); + struct nfs_pgio_header *hdr = list_entry(dreq->rewrite_list.next, struct nfs_pgio_header, pages); + struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); + list_del(&hdr->pages); + nfs_direct_release_pages(whdr->rpc_data.pagevec, whdr->rpc_data.npages); + nfs_direct_writehdr_release(whdr); } } @@ -463,6 +484,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) struct inode *inode = dreq->inode; struct list_head *p; struct nfs_write_data *data; + struct nfs_pgio_header *hdr; struct rpc_task *task; struct rpc_message msg = { .rpc_cred = dreq->ctx->cred, @@ -479,7 +501,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) get_dreq(dreq); list_for_each(p, &dreq->rewrite_list) { - data = list_entry(p, struct nfs_write_data, pages); + hdr = list_entry(p, struct nfs_pgio_header, pages); + data = &(container_of(hdr, struct nfs_write_header, header))->rpc_data; get_dreq(dreq); @@ -652,7 +675,8 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata) static void nfs_direct_write_release(void *calldata) { struct nfs_write_data *data = calldata; - struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + struct nfs_pgio_header *hdr = data->header; + struct nfs_direct_req *dreq = (struct nfs_direct_req *) hdr->req; int status = data->task.tk_status; spin_lock(&dreq->lock); @@ -684,7 +708,7 @@ out_unlock: spin_unlock(&dreq->lock); if (put_dreq(dreq)) - nfs_direct_write_complete(dreq, data->inode); + nfs_direct_write_complete(dreq, hdr->inode); } static const struct rpc_call_ops nfs_write_direct_ops = { @@ -725,6 +749,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, ssize_t started = 0; do { + struct nfs_write_header *whdr; struct nfs_write_data *data; size_t bytes; @@ -732,23 +757,25 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, bytes = min(wsize,count); result = -ENOMEM; - data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes)); - if (unlikely(!data)) + whdr = nfs_writehdr_alloc(nfs_page_array_len(pgbase, bytes)); + if (unlikely(!whdr)) break; + data = &whdr->rpc_data; + down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, data->npages, 0, 0, data->pagevec, NULL); up_read(¤t->mm->mmap_sem); if (result < 0) { - nfs_writedata_free(data); + nfs_direct_writehdr_release(whdr); break; } if ((unsigned)result < data->npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { nfs_direct_release_pages(data->pagevec, result); - nfs_writedata_free(data); + nfs_direct_writehdr_release(whdr); break; } bytes -= pgbase; @@ -757,11 +784,11 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, get_dreq(dreq); - list_move_tail(&data->pages, &dreq->rewrite_list); + list_move_tail(&whdr->header.pages, &dreq->rewrite_list); - data->req = (struct nfs_page *) dreq; - data->inode = inode; - data->cred = msg.rpc_cred; + whdr->header.req = (struct nfs_page *) dreq; + whdr->header.inode = inode; + whdr->header.cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); data->args.context = ctx; data->args.lock_context = dreq->l_ctx; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 650127f..7dc9be1 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -296,6 +296,8 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); struct nfs_pageio_descriptor; /* read.c */ +extern struct nfs_read_header *nfs_readhdr_alloc(unsigned int npages); +extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); extern int nfs_initiate_read(struct rpc_clnt *clnt, struct nfs_read_data *data, const struct rpc_call_ops *call_ops); @@ -309,6 +311,8 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ +extern struct nfs_write_header *nfs_writehdr_alloc(unsigned int npages); +extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head); extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index b1daca7..56dcefc 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -811,11 +811,13 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) { - if (nfs3_async_handle_jukebox(task, data->inode)) + struct inode *inode = data->header->inode; + + if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; - nfs_invalidate_atime(data->inode); - nfs_refresh_inode(data->inode, &data->fattr); + nfs_invalidate_atime(inode); + nfs_refresh_inode(inode, &data->fattr); return 0; } @@ -831,10 +833,12 @@ static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) { - if (nfs3_async_handle_jukebox(task, data->inode)) + struct inode *inode = data->header->inode; + + if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; if (task->tk_status >= 0) - nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); + nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); return 0; } diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index c536328..ad1d680 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -148,6 +148,7 @@ wait_on_recovery: static int filelayout_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) { + struct nfs_pgio_header *hdr = data->header; int reset = 0; dprintk("%s DS read\n", __func__); @@ -157,7 +158,7 @@ static int filelayout_read_done_cb(struct rpc_task *task, dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", __func__, data->ds_clp, data->ds_clp->cl_session); if (reset) { - pnfs_set_lo_fail(data->lseg); + pnfs_set_lo_fail(hdr->lseg); nfs4_reset_read(task, data); } rpc_restart_call_prepare(task); @@ -175,13 +176,15 @@ static int filelayout_read_done_cb(struct rpc_task *task, static void filelayout_set_layoutcommit(struct nfs_write_data *wdata) { - if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds || + struct nfs_pgio_header *hdr = wdata->header; + + if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds || wdata->res.verf->committed == NFS_FILE_SYNC) return; pnfs_set_layoutcommit(wdata); - dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino, - (unsigned long) NFS_I(wdata->inode)->layout->plh_lwb); + dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, + (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); } /* @@ -210,27 +213,28 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data) dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); /* Note this may cause RPC to be resent */ - rdata->mds_ops->rpc_call_done(task, data); + rdata->header->mds_ops->rpc_call_done(task, data); } static void filelayout_read_count_stats(struct rpc_task *task, void *data) { struct nfs_read_data *rdata = data; - rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics); + rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); } static void filelayout_read_release(void *data) { struct nfs_read_data *rdata = data; - put_lseg(rdata->lseg); - rdata->mds_ops->rpc_release(data); + put_lseg(rdata->header->lseg); + rdata->header->mds_ops->rpc_release(data); } static int filelayout_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) { + struct nfs_pgio_header *hdr = data->header; int reset = 0; if (filelayout_async_handle_error(task, data->args.context->state, @@ -238,7 +242,7 @@ static int filelayout_write_done_cb(struct rpc_task *task, dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", __func__, data->ds_clp, data->ds_clp->cl_session); if (reset) { - pnfs_set_lo_fail(data->lseg); + pnfs_set_lo_fail(hdr->lseg); nfs4_reset_write(task, data); } rpc_restart_call_prepare(task); @@ -297,22 +301,22 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data) struct nfs_write_data *wdata = data; /* Note this may cause RPC to be resent */ - wdata->mds_ops->rpc_call_done(task, data); + wdata->header->mds_ops->rpc_call_done(task, data); } static void filelayout_write_count_stats(struct rpc_task *task, void *data) { struct nfs_write_data *wdata = data; - rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics); + rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); } static void filelayout_write_release(void *data) { struct nfs_write_data *wdata = data; - put_lseg(wdata->lseg); - wdata->mds_ops->rpc_release(data); + put_lseg(wdata->header->lseg); + wdata->header->mds_ops->rpc_release(data); } static void filelayout_commit_prepare(struct rpc_task *task, void *data) @@ -377,7 +381,8 @@ static const struct rpc_call_ops filelayout_commit_call_ops = { static enum pnfs_try_status filelayout_read_pagelist(struct nfs_read_data *data) { - struct pnfs_layout_segment *lseg = data->lseg; + struct nfs_pgio_header *hdr = data->header; + struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; loff_t offset = data->args.offset; u32 j, idx; @@ -385,7 +390,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) int status; dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", - __func__, data->inode->i_ino, + __func__, hdr->inode->i_ino, data->args.pgbase, (size_t)data->args.count, offset); if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) @@ -423,7 +428,8 @@ filelayout_read_pagelist(struct nfs_read_data *data) static enum pnfs_try_status filelayout_write_pagelist(struct nfs_write_data *data, int sync) { - struct pnfs_layout_segment *lseg = data->lseg; + struct nfs_pgio_header *hdr = data->header; + struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; loff_t offset = data->args.offset; u32 j, idx; @@ -445,7 +451,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) return PNFS_NOT_ATTEMPTED; } dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__, - data->inode->i_ino, sync, (size_t) data->args.count, offset, + hdr->inode->i_ino, sync, (size_t) data->args.count, offset, ds->ds_remotestr); data->write_done_cb = filelayout_write_done_cb; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cc04b6e..5375862 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3336,12 +3336,12 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, void __nfs4_read_done_cb(struct nfs_read_data *data) { - nfs_invalidate_atime(data->inode); + nfs_invalidate_atime(data->header->inode); } static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) { - struct nfs_server *server = NFS_SERVER(data->inode); + struct nfs_server *server = NFS_SERVER(data->header->inode); if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { rpc_restart_call_prepare(task); @@ -3376,7 +3376,7 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) { - if (nfs4_setup_sequence(NFS_SERVER(data->inode), + if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, &data->res.seq_res, task)) @@ -3387,22 +3387,25 @@ static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da /* Reset the the nfs_read_data to send the read to the MDS. */ void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data) { + struct nfs_pgio_header *hdr = data->header; + struct inode *inode = hdr->inode; + dprintk("%s Reset task for i/o through\n", __func__); - put_lseg(data->lseg); - data->lseg = NULL; + put_lseg(hdr->lseg); + hdr->lseg = NULL; + data->ds_clp = NULL; /* offsets will differ in the dense stripe case */ data->args.offset = data->mds_offset; - data->ds_clp = NULL; - data->args.fh = NFS_FH(data->inode); + data->args.fh = NFS_FH(inode); data->read_done_cb = nfs4_read_done_cb; - task->tk_ops = data->mds_ops; - rpc_task_reset_client(task, NFS_CLIENT(data->inode)); + task->tk_ops = hdr->mds_ops; + rpc_task_reset_client(task, NFS_CLIENT(inode)); } EXPORT_SYMBOL_GPL(nfs4_reset_read); static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) { - struct inode *inode = data->inode; + struct inode *inode = data->header->inode; if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { rpc_restart_call_prepare(task); @@ -3426,25 +3429,28 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) /* Reset the the nfs_write_data to send the write to the MDS. */ void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data) { + struct nfs_pgio_header *hdr = data->header; + struct inode *inode = hdr->inode; + dprintk("%s Reset task for i/o through\n", __func__); - put_lseg(data->lseg); - data->lseg = NULL; - data->ds_clp = NULL; + put_lseg(hdr->lseg); + hdr->lseg = NULL; + data->ds_clp = NULL; data->write_done_cb = nfs4_write_done_cb; - data->args.fh = NFS_FH(data->inode); + data->args.fh = NFS_FH(inode); data->args.bitmask = data->res.server->cache_consistency_bitmask; data->args.offset = data->mds_offset; data->res.fattr = &data->fattr; - task->tk_ops = data->mds_ops; - rpc_task_reset_client(task, NFS_CLIENT(data->inode)); + task->tk_ops = hdr->mds_ops; + rpc_task_reset_client(task, NFS_CLIENT(inode)); } EXPORT_SYMBOL_GPL(nfs4_reset_write); static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) { - struct nfs_server *server = NFS_SERVER(data->inode); + struct nfs_server *server = NFS_SERVER(data->header->inode); - if (data->lseg) { + if (data->header->lseg) { data->args.bitmask = NULL; data->res.fattr = NULL; } else @@ -3460,7 +3466,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) { - if (nfs4_setup_sequence(NFS_SERVER(data->inode), + if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, &data->res.seq_res, task)) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 4bff4a3..fbf4874 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -440,11 +440,12 @@ static void _read_done(struct ore_io_state *ios, void *private) int objio_read_pagelist(struct nfs_read_data *rdata) { + struct nfs_pgio_header *hdr = rdata->header; struct objio_state *objios; int ret; - ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true, - rdata->lseg, rdata->args.pages, rdata->args.pgbase, + ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true, + hdr->lseg, rdata->args.pages, rdata->args.pgbase, rdata->args.offset, rdata->args.count, rdata, GFP_KERNEL, &objios); if (unlikely(ret)) @@ -483,12 +484,12 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) { struct objio_state *objios = priv; struct nfs_write_data *wdata = objios->oir.rpcdata; + struct address_space *mapping = wdata->header->inode->i_mapping; pgoff_t index = offset / PAGE_SIZE; - struct page *page = find_get_page(wdata->inode->i_mapping, index); + struct page *page = find_get_page(mapping, index); if (!page) { - page = find_or_create_page(wdata->inode->i_mapping, - index, GFP_NOFS); + page = find_or_create_page(mapping, index, GFP_NOFS); if (unlikely(!page)) { dprintk("%s: grab_cache_page Failed index=0x%lx\n", __func__, index); @@ -518,11 +519,12 @@ static const struct _ore_r4w_op _r4w_op = { int objio_write_pagelist(struct nfs_write_data *wdata, int how) { + struct nfs_pgio_header *hdr = wdata->header; struct objio_state *objios; int ret; - ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false, - wdata->lseg, wdata->args.pages, wdata->args.pgbase, + ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false, + hdr->lseg, wdata->args.pages, wdata->args.pgbase, wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, &objios); if (unlikely(ret)) diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 595c5fc..8746135 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -258,7 +258,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) if (status >= 0) rdata->res.count = status; else - rdata->pnfs_error = status; + rdata->header->pnfs_error = status; objlayout_iodone(oir); /* must not use oir after this point */ @@ -279,12 +279,14 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) enum pnfs_try_status objlayout_read_pagelist(struct nfs_read_data *rdata) { + struct nfs_pgio_header *hdr = rdata->header; + struct inode *inode = hdr->inode; loff_t offset = rdata->args.offset; size_t count = rdata->args.count; int err; loff_t eof; - eof = i_size_read(rdata->inode); + eof = i_size_read(inode); if (unlikely(offset + count > eof)) { if (offset >= eof) { err = 0; @@ -297,17 +299,17 @@ objlayout_read_pagelist(struct nfs_read_data *rdata) } rdata->res.eof = (offset + count) >= eof; - _fix_verify_io_params(rdata->lseg, &rdata->args.pages, + _fix_verify_io_params(hdr->lseg, &rdata->args.pages, &rdata->args.pgbase, rdata->args.offset, rdata->args.count); dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", - __func__, rdata->inode->i_ino, offset, count, rdata->res.eof); + __func__, inode->i_ino, offset, count, rdata->res.eof); err = objio_read_pagelist(rdata); out: if (unlikely(err)) { - rdata->pnfs_error = err; + hdr->pnfs_error = err; dprintk("%s: Returned Error %d\n", __func__, err); return PNFS_NOT_ATTEMPTED; } @@ -340,7 +342,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) wdata->res.count = status; wdata->verf.committed = oir->committed; } else { - wdata->pnfs_error = status; + wdata->header->pnfs_error = status; } objlayout_iodone(oir); /* must not use oir after this point */ @@ -363,15 +365,16 @@ enum pnfs_try_status objlayout_write_pagelist(struct nfs_write_data *wdata, int how) { + struct nfs_pgio_header *hdr = wdata->header; int err; - _fix_verify_io_params(wdata->lseg, &wdata->args.pages, + _fix_verify_io_params(hdr->lseg, &wdata->args.pages, &wdata->args.pgbase, wdata->args.offset, wdata->args.count); err = objio_write_pagelist(wdata, how); if (unlikely(err)) { - wdata->pnfs_error = err; + hdr->pnfs_error = err; dprintk("%s: Returned Error %d\n", __func__, err); return PNFS_NOT_ATTEMPTED; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 9c4d14a..d705da4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1191,13 +1191,15 @@ static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head * static void pnfs_ld_handle_write_error(struct nfs_write_data *data) { - dprintk("pnfs write error = %d\n", data->pnfs_error); - if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & + struct nfs_pgio_header *hdr = data->header; + + dprintk("pnfs write error = %d\n", hdr->pnfs_error); + if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & PNFS_LAYOUTRET_ON_ERROR) { - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags); - pnfs_return_layout(data->inode); + clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); + pnfs_return_layout(hdr->inode); } - data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages); + data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, &hdr->pages); } /* @@ -1205,13 +1207,15 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data) */ void pnfs_ld_write_done(struct nfs_write_data *data) { - if (likely(!data->pnfs_error)) { + struct nfs_pgio_header *hdr = data->header; + + if (!hdr->pnfs_error) { pnfs_set_layoutcommit(data); - data->mds_ops->rpc_call_done(&data->task, data); + hdr->mds_ops->rpc_call_done(&data->task, data); } else pnfs_ld_handle_write_error(data); - put_lseg(data->lseg); - data->mds_ops->rpc_release(data); + put_lseg(hdr->lseg); + hdr->mds_ops->rpc_release(data); } EXPORT_SYMBOL_GPL(pnfs_ld_write_done); @@ -1219,12 +1223,14 @@ static void pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, struct nfs_write_data *data) { - list_splice_tail_init(&data->pages, &desc->pg_list); - if (data->req && list_empty(&data->req->wb_list)) - nfs_list_add_request(data->req, &desc->pg_list); + struct nfs_pgio_header *hdr = data->header; + + list_splice_tail_init(&hdr->pages, &desc->pg_list); + if (hdr->req && list_empty(&hdr->req->wb_list)) + nfs_list_add_request(hdr->req, &desc->pg_list); nfs_pageio_reset_write_mds(desc); desc->pg_recoalesce = 1; - put_lseg(data->lseg); + put_lseg(hdr->lseg); nfs_writedata_release(data); } @@ -1234,20 +1240,21 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, struct pnfs_layout_segment *lseg, int how) { - struct inode *inode = wdata->inode; + struct nfs_pgio_header *hdr = wdata->header; + struct inode *inode = hdr->inode; enum pnfs_try_status trypnfs; struct nfs_server *nfss = NFS_SERVER(inode); - wdata->mds_ops = call_ops; - wdata->lseg = get_lseg(lseg); + hdr->mds_ops = call_ops; + hdr->lseg = get_lseg(lseg); dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, inode->i_ino, wdata->args.count, wdata->args.offset, how); trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); if (trypnfs == PNFS_NOT_ATTEMPTED) { - put_lseg(wdata->lseg); - wdata->lseg = NULL; + put_lseg(hdr->lseg); + hdr->lseg = NULL; } else nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); @@ -1318,13 +1325,15 @@ static int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *h static void pnfs_ld_handle_read_error(struct nfs_read_data *data) { - dprintk("pnfs read error = %d\n", data->pnfs_error); - if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & + struct nfs_pgio_header *hdr = data->header; + + dprintk("pnfs read error = %d\n", hdr->pnfs_error); + if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & PNFS_LAYOUTRET_ON_ERROR) { - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags); - pnfs_return_layout(data->inode); + clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); + pnfs_return_layout(hdr->inode); } - data->task.tk_status = pnfs_read_done_resend_to_mds(data->inode, &data->pages); + data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, &hdr->pages); } /* @@ -1332,13 +1341,15 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data) */ void pnfs_ld_read_done(struct nfs_read_data *data) { - if (likely(!data->pnfs_error)) { + struct nfs_pgio_header *hdr = data->header; + + if (likely(!hdr->pnfs_error)) { __nfs4_read_done_cb(data); - data->mds_ops->rpc_call_done(&data->task, data); + hdr->mds_ops->rpc_call_done(&data->task, data); } else pnfs_ld_handle_read_error(data); - put_lseg(data->lseg); - data->mds_ops->rpc_release(data); + put_lseg(hdr->lseg); + hdr->mds_ops->rpc_release(data); } EXPORT_SYMBOL_GPL(pnfs_ld_read_done); @@ -1346,9 +1357,11 @@ static void pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, struct nfs_read_data *data) { - list_splice_tail_init(&data->pages, &desc->pg_list); - if (data->req && list_empty(&data->req->wb_list)) - nfs_list_add_request(data->req, &desc->pg_list); + struct nfs_pgio_header *hdr = data->header; + + list_splice_tail_init(&hdr->pages, &desc->pg_list); + if (hdr->req && list_empty(&hdr->req->wb_list)) + nfs_list_add_request(hdr->req, &desc->pg_list); nfs_pageio_reset_read_mds(desc); desc->pg_recoalesce = 1; nfs_readdata_release(data); @@ -1362,20 +1375,21 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, const struct rpc_call_ops *call_ops, struct pnfs_layout_segment *lseg) { - struct inode *inode = rdata->inode; + struct nfs_pgio_header *hdr = rdata->header; + struct inode *inode = hdr->inode; struct nfs_server *nfss = NFS_SERVER(inode); enum pnfs_try_status trypnfs; - rdata->mds_ops = call_ops; - rdata->lseg = get_lseg(lseg); + hdr->mds_ops = call_ops; + hdr->lseg = get_lseg(lseg); dprintk("%s: Reading ino:%lu %u@%llu\n", __func__, inode->i_ino, rdata->args.count, rdata->args.offset); trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); if (trypnfs == PNFS_NOT_ATTEMPTED) { - put_lseg(rdata->lseg); - rdata->lseg = NULL; + put_lseg(hdr->lseg); + hdr->lseg = NULL; } else { nfs_inc_stats(inode, NFSIOS_PNFS_READ); } @@ -1450,30 +1464,32 @@ EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); void pnfs_set_layoutcommit(struct nfs_write_data *wdata) { - struct nfs_inode *nfsi = NFS_I(wdata->inode); + struct nfs_pgio_header *hdr = wdata->header; + struct inode *inode = hdr->inode; + struct nfs_inode *nfsi = NFS_I(inode); loff_t end_pos = wdata->mds_offset + wdata->res.count; bool mark_as_dirty = false; - spin_lock(&nfsi->vfs_inode.i_lock); + spin_lock(&inode->i_lock); if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { mark_as_dirty = true; dprintk("%s: Set layoutcommit for inode %lu ", - __func__, wdata->inode->i_ino); + __func__, inode->i_ino); } - if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) { + if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) { /* references matched in nfs4_layoutcommit_release */ - get_lseg(wdata->lseg); + get_lseg(hdr->lseg); } if (end_pos > nfsi->layout->plh_lwb) nfsi->layout->plh_lwb = end_pos; - spin_unlock(&nfsi->vfs_inode.i_lock); + spin_unlock(&inode->i_lock); dprintk("%s: lseg %p end_pos %llu\n", - __func__, wdata->lseg, nfsi->layout->plh_lwb); + __func__, hdr->lseg, nfsi->layout->plh_lwb); /* if pnfs_layoutcommit_inode() runs between inode locks, the next one * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ if (mark_as_dirty) - mark_inode_dirty_sync(wdata->inode); + mark_inode_dirty_sync(inode); } EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index bf80503..22ee705 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -641,12 +641,14 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) { + struct inode *inode = data->header->inode; + if (nfs_async_handle_expired_key(task)) return -EAGAIN; - nfs_invalidate_atime(data->inode); + nfs_invalidate_atime(inode); if (task->tk_status >= 0) { - nfs_refresh_inode(data->inode, data->res.fattr); + nfs_refresh_inode(inode, data->res.fattr); /* Emulate the eof flag, which isn't normally needed in NFSv2 * as it is guaranteed to always return the file attributes */ @@ -668,11 +670,13 @@ static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) { + struct inode *inode = data->header->inode; + if (nfs_async_handle_expired_key(task)) return -EAGAIN; if (task->tk_status >= 0) - nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); + nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); return 0; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 4ddba67..d6d4682 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -35,19 +35,24 @@ static const struct rpc_call_ops nfs_read_full_ops; static struct kmem_cache *nfs_rdata_cachep; -struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) +struct nfs_read_header *nfs_readhdr_alloc(unsigned int pagecount) { - struct nfs_read_data *p; + struct nfs_read_header *p; p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); if (p) { - INIT_LIST_HEAD(&p->pages); - p->npages = pagecount; - if (pagecount <= ARRAY_SIZE(p->page_array)) - p->pagevec = p->page_array; + struct nfs_pgio_header *hdr = &p->header; + struct nfs_read_data *data = &p->rpc_data; + + INIT_LIST_HEAD(&hdr->pages); + INIT_LIST_HEAD(&data->list); + data->npages = pagecount; + data->header = hdr; + if (pagecount <= ARRAY_SIZE(data->page_array)) + data->pagevec = data->page_array; else { - p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); - if (!p->pagevec) { + data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); + if (!data->pagevec) { kmem_cache_free(nfs_rdata_cachep, p); p = NULL; } @@ -56,17 +61,19 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) return p; } -void nfs_readdata_free(struct nfs_read_data *p) +void nfs_readhdr_free(struct nfs_pgio_header *hdr) { - if (p && (p->pagevec != &p->page_array[0])) - kfree(p->pagevec); - kmem_cache_free(nfs_rdata_cachep, p); + struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header); + + kmem_cache_free(nfs_rdata_cachep, rhdr); } void nfs_readdata_release(struct nfs_read_data *rdata) { put_nfs_open_context(rdata->args.context); - nfs_readdata_free(rdata); + if (rdata->pagevec != rdata->page_array) + kfree(rdata->pagevec); + nfs_readhdr_free(rdata->header); } static @@ -173,13 +180,13 @@ int nfs_initiate_read(struct rpc_clnt *clnt, struct nfs_read_data *data, const struct rpc_call_ops *call_ops) { - struct inode *inode = data->inode; + struct inode *inode = data->header->inode; int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; struct rpc_task *task; struct rpc_message msg = { .rpc_argp = &data->args, .rpc_resp = &data->res, - .rpc_cred = data->cred, + .rpc_cred = data->header->cred, }; struct rpc_task_setup task_setup_data = { .task = &data->task, @@ -216,11 +223,11 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read); static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, unsigned int count, unsigned int offset) { - struct inode *inode = req->wb_context->dentry->d_inode; + struct inode *inode = data->header->inode; - data->req = req; - data->inode = inode; - data->cred = req->wb_context->cred; + data->header->req = req; + data->header->inode = inode; + data->header->cred = req->wb_context->cred; data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; @@ -239,7 +246,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, static int nfs_do_read(struct nfs_read_data *data, const struct rpc_call_ops *call_ops) { - struct inode *inode = data->args.context->dentry->d_inode; + struct inode *inode = data->header->inode; return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops); } @@ -293,6 +300,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head { struct nfs_page *req = nfs_list_entry(desc->pg_list.next); struct page *page = req->wb_page; + struct nfs_read_header *rhdr; struct nfs_read_data *data; size_t rsize = desc->pg_bsize, nbytes; unsigned int offset; @@ -306,9 +314,10 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head do { size_t len = min(nbytes,rsize); - data = nfs_readdata_alloc(1); - if (!data) + rhdr = nfs_readhdr_alloc(1); + if (!rhdr) goto out_bad; + data = &rhdr->rpc_data; data->pagevec[0] = page; nfs_read_rpcsetup(req, data, len, offset); list_add(&data->list, res); @@ -333,26 +342,28 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head * { struct nfs_page *req; struct page **pages; + struct nfs_read_header *rhdr; struct nfs_read_data *data; struct list_head *head = &desc->pg_list; int ret = 0; - data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, - desc->pg_count)); - if (!data) { + rhdr = nfs_readhdr_alloc(nfs_page_array_len(desc->pg_base, + desc->pg_count)); + if (!rhdr) { nfs_async_read_error(head); ret = -ENOMEM; goto out; } + data = &rhdr->rpc_data; pages = data->pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); - nfs_list_add_request(req, &data->pages); + nfs_list_add_request(req, &rhdr->header.pages); *pages++ = req->wb_page; } - req = nfs_list_entry(data->pages.next); + req = nfs_list_entry(rhdr->header.pages.next); nfs_read_rpcsetup(req, data, desc->pg_count, 0); list_add(&data->list, res); @@ -390,20 +401,21 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = { */ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) { + struct inode *inode = data->header->inode; int status; dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, task->tk_status); - status = NFS_PROTO(data->inode)->read_done(task, data); + status = NFS_PROTO(inode)->read_done(task, data); if (status != 0) return status; - nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); + nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count); if (task->tk_status == -ESTALE) { - set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags); - nfs_mark_for_revalidate(data->inode); + set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); + nfs_mark_for_revalidate(inode); } return 0; } @@ -417,7 +429,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data return; /* This is a short read! */ - nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); + nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); /* Has the server at least made some progress? */ if (resp->count == 0) return; @@ -449,7 +461,7 @@ static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) static void nfs_readpage_release_partial(void *calldata) { struct nfs_read_data *data = calldata; - struct nfs_page *req = data->req; + struct nfs_page *req = data->header->req; struct page *page = req->wb_page; int status = data->task.tk_status; @@ -461,13 +473,13 @@ static void nfs_readpage_release_partial(void *calldata) SetPageUptodate(page); nfs_readpage_release(req); } - nfs_readdata_release(calldata); + nfs_readdata_release(data); } void nfs_read_prepare(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; - NFS_PROTO(data->inode)->read_rpc_prepare(task, data); + NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); } static const struct rpc_call_ops nfs_read_partial_ops = { @@ -524,9 +536,10 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) static void nfs_readpage_release_full(void *calldata) { struct nfs_read_data *data = calldata; + struct nfs_pgio_header *hdr = data->header; - while (!list_empty(&data->pages)) { - struct nfs_page *req = nfs_list_entry(data->pages.next); + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); nfs_list_remove_request(req); nfs_readpage_release(req); @@ -685,7 +698,7 @@ out: int __init nfs_init_readpagecache(void) { nfs_rdata_cachep = kmem_cache_create("nfs_read_data", - sizeof(struct nfs_read_data), + sizeof(struct nfs_read_header), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_rdata_cachep == NULL) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 76735dd..dbb5c0a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -69,19 +69,24 @@ void nfs_commit_free(struct nfs_commit_data *p) } EXPORT_SYMBOL_GPL(nfs_commit_free); -struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) +struct nfs_write_header *nfs_writehdr_alloc(unsigned int pagecount) { - struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); + struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); if (p) { + struct nfs_pgio_header *hdr = &p->header; + struct nfs_write_data *data = &p->rpc_data; + memset(p, 0, sizeof(*p)); - INIT_LIST_HEAD(&p->pages); - p->npages = pagecount; - if (pagecount <= ARRAY_SIZE(p->page_array)) - p->pagevec = p->page_array; + INIT_LIST_HEAD(&hdr->pages); + INIT_LIST_HEAD(&data->list); + data->npages = pagecount; + data->header = hdr; + if (pagecount <= ARRAY_SIZE(data->page_array)) + data->pagevec = data->page_array; else { - p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); - if (!p->pagevec) { + data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); + if (!data->pagevec) { mempool_free(p, nfs_wdata_mempool); p = NULL; } @@ -90,17 +95,18 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) return p; } -void nfs_writedata_free(struct nfs_write_data *p) +void nfs_writehdr_free(struct nfs_pgio_header *hdr) { - if (p && (p->pagevec != &p->page_array[0])) - kfree(p->pagevec); - mempool_free(p, nfs_wdata_mempool); + struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); + mempool_free(whdr, nfs_wdata_mempool); } void nfs_writedata_release(struct nfs_write_data *wdata) { put_nfs_open_context(wdata->args.context); - nfs_writedata_free(wdata); + if (wdata->pagevec != wdata->page_array) + kfree(wdata->pagevec); + nfs_writehdr_free(wdata->header); } static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) @@ -507,9 +513,8 @@ static inline int nfs_write_need_commit(struct nfs_write_data *data) { if (data->verf.committed == NFS_DATA_SYNC) - return data->lseg == NULL; - else - return data->verf.committed != NFS_FILE_SYNC; + return data->header->lseg == NULL; + return data->verf.committed != NFS_FILE_SYNC; } static inline @@ -517,7 +522,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req, struct nfs_write_data *data) { if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { - nfs_mark_request_commit(req, data->lseg); + nfs_mark_request_commit(req, data->header->lseg); return 1; } if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { @@ -841,13 +846,13 @@ int nfs_initiate_write(struct rpc_clnt *clnt, const struct rpc_call_ops *call_ops, int how) { - struct inode *inode = data->inode; + struct inode *inode = data->header->inode; int priority = flush_task_priority(how); struct rpc_task *task; struct rpc_message msg = { .rpc_argp = &data->args, .rpc_resp = &data->res, - .rpc_cred = data->cred, + .rpc_cred = data->header->cred, }; struct rpc_task_setup task_setup_data = { .rpc_client = clnt, @@ -896,14 +901,15 @@ static void nfs_write_rpcsetup(struct nfs_page *req, unsigned int count, unsigned int offset, int how) { + struct nfs_pgio_header *hdr = data->header; struct inode *inode = req->wb_context->dentry->d_inode; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ - data->req = req; - data->inode = inode = req->wb_context->dentry->d_inode; - data->cred = req->wb_context->cred; + hdr->req = req; + hdr->inode = inode = req->wb_context->dentry->d_inode; + hdr->cred = req->wb_context->cred; data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; @@ -935,7 +941,7 @@ static int nfs_do_write(struct nfs_write_data *data, const struct rpc_call_ops *call_ops, int how) { - struct inode *inode = data->args.context->dentry->d_inode; + struct inode *inode = data->header->inode; return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how); } @@ -981,6 +987,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head { struct nfs_page *req = nfs_list_entry(desc->pg_list.next); struct page *page = req->wb_page; + struct nfs_write_header *whdr; struct nfs_write_data *data; size_t wsize = desc->pg_bsize, nbytes; unsigned int offset; @@ -1000,9 +1007,10 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head do { size_t len = min(nbytes, wsize); - data = nfs_writedata_alloc(1); - if (!data) + whdr = nfs_writehdr_alloc(1); + if (!whdr) goto out_bad; + data = &whdr->rpc_data; data->pagevec[0] = page; nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags); list_add(&data->list, res); @@ -1036,13 +1044,14 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r { struct nfs_page *req; struct page **pages; + struct nfs_write_header *whdr; struct nfs_write_data *data; struct list_head *head = &desc->pg_list; int ret = 0; - data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, - desc->pg_count)); - if (!data) { + whdr = nfs_writehdr_alloc(nfs_page_array_len(desc->pg_base, + desc->pg_count)); + if (!whdr) { while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); @@ -1051,14 +1060,15 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r ret = -ENOMEM; goto out; } + data = &whdr->rpc_data; pages = data->pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); - nfs_list_add_request(req, &data->pages); + nfs_list_add_request(req, &whdr->header.pages); *pages++ = req->wb_page; } - req = nfs_list_entry(data->pages.next); + req = nfs_list_entry(whdr->header.pages.next); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) @@ -1126,10 +1136,11 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) dprintk("NFS: %5u write(%s/%lld %d@%lld)", task->tk_pid, - data->req->wb_context->dentry->d_inode->i_sb->s_id, + data->header->inode->i_sb->s_id, (long long) - NFS_FILEID(data->req->wb_context->dentry->d_inode), - data->req->wb_bytes, (long long)req_offset(data->req)); + NFS_FILEID(data->header->inode), + data->header->req->wb_bytes, + (long long)req_offset(data->header->req)); nfs_writeback_done(task, data); } @@ -1137,7 +1148,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) static void nfs_writeback_release_partial(void *calldata) { struct nfs_write_data *data = calldata; - struct nfs_page *req = data->req; + struct nfs_page *req = data->header->req; struct page *page = req->wb_page; int status = data->task.tk_status; @@ -1169,13 +1180,13 @@ static void nfs_writeback_release_partial(void *calldata) out: if (atomic_dec_and_test(&req->wb_complete)) nfs_writepage_release(req, data); - nfs_writedata_release(calldata); + nfs_writedata_release(data); } void nfs_write_prepare(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; - NFS_PROTO(data->inode)->write_rpc_prepare(task, data); + NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); } void nfs_commit_prepare(struct rpc_task *task, void *calldata) @@ -1208,11 +1219,12 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) static void nfs_writeback_release_full(void *calldata) { struct nfs_write_data *data = calldata; + struct nfs_pgio_header *hdr = data->header; int status = data->task.tk_status; /* Update attributes as result of writeback. */ - while (!list_empty(&data->pages)) { - struct nfs_page *req = nfs_list_entry(data->pages.next); + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct page *page = req->wb_page; nfs_list_remove_request(req); @@ -1233,7 +1245,7 @@ static void nfs_writeback_release_full(void *calldata) if (nfs_write_need_commit(data)) { memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); - nfs_mark_request_commit(req, data->lseg); + nfs_mark_request_commit(req, hdr->lseg); dprintk(" marked for commit\n"); goto next; } @@ -1244,7 +1256,7 @@ remove_request: nfs_unlock_request(req); nfs_end_page_writeback(page); } - nfs_writedata_release(calldata); + nfs_writedata_release(data); } static const struct rpc_call_ops nfs_write_full_ops = { @@ -1261,6 +1273,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) { struct nfs_writeargs *argp = &data->args; struct nfs_writeres *resp = &data->res; + struct inode *inode = data->header->inode; int status; dprintk("NFS: %5u nfs_writeback_done (status %d)\n", @@ -1273,10 +1286,10 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) * another writer had changed the file, but some applications * depend on tighter cache coherency when writing. */ - status = NFS_PROTO(data->inode)->write_done(task, data); + status = NFS_PROTO(inode)->write_done(task, data); if (status != 0) return; - nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (resp->verf->committed < argp->stable && task->tk_status >= 0) { @@ -1294,7 +1307,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) if (time_before(complain, jiffies)) { dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", - NFS_SERVER(data->inode)->nfs_client->cl_hostname, + NFS_SERVER(inode)->nfs_client->cl_hostname, resp->verf->committed, argp->stable); complain = jiffies + 300 * HZ; } @@ -1304,7 +1317,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) if (task->tk_status >= 0 && resp->count < argp->count) { static unsigned long complain; - nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE); + nfs_inc_stats(inode, NFSIOS_SHORTWRITE); /* Has the server at least made some progress? */ if (resp->count != 0) { @@ -1333,7 +1346,6 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) /* Can't do anything about it except throw an error. */ task->tk_status = -EIO; } - return; } @@ -1745,7 +1757,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", - sizeof(struct nfs_write_data), + sizeof(struct nfs_write_header), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_wdata_cachep == NULL) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index d5d68f3..8d3a2b8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -569,12 +569,6 @@ nfs_have_writebacks(struct inode *inode) } /* - * Allocate nfs_write_data structures - */ -extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages); -extern void nfs_writedata_free(struct nfs_write_data *); - -/* * linux/fs/nfs/read.c */ extern int nfs_readpage(struct file *, struct page *); @@ -585,12 +579,6 @@ extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, struct page *); /* - * Allocate nfs_read_data structures - */ -extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages); -extern void nfs_readdata_free(struct nfs_read_data *); - -/* * linux/fs/nfs3proc.c */ #ifdef CONFIG_NFS_V3_ACL diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 8fb036a..fee3241 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1168,52 +1168,58 @@ struct nfs_page; #define NFS_PAGEVEC_SIZE (8U) struct nfs_read_data { + struct nfs_pgio_header *header; + struct list_head list; struct rpc_task task; - struct inode *inode; - struct rpc_cred *cred; struct nfs_fattr fattr; /* fattr storage */ - struct list_head pages; /* Coalesced read requests */ - struct list_head list; /* lists of struct nfs_read_data */ - struct nfs_page *req; /* multi ops per nfs_page */ struct page **pagevec; unsigned int npages; /* Max length of pagevec */ struct nfs_readargs args; struct nfs_readres res; unsigned long timestamp; /* For lease renewal */ - struct pnfs_layout_segment *lseg; - struct nfs_client *ds_clp; /* pNFS data server */ - const struct rpc_call_ops *mds_ops; int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); __u64 mds_offset; - int pnfs_error; struct page *page_array[NFS_PAGEVEC_SIZE]; + struct nfs_client *ds_clp; /* pNFS data server */ +}; + +struct nfs_pgio_header { + struct inode *inode; + struct rpc_cred *cred; + struct list_head pages; + struct nfs_page *req; + struct pnfs_layout_segment *lseg; + const struct rpc_call_ops *mds_ops; + int pnfs_error; +}; + +struct nfs_read_header { + struct nfs_pgio_header header; + struct nfs_read_data rpc_data; }; struct nfs_direct_req; struct nfs_write_data { + struct nfs_pgio_header *header; + struct list_head list; struct rpc_task task; - struct inode *inode; - struct rpc_cred *cred; struct nfs_fattr fattr; struct nfs_writeverf verf; - struct list_head pages; /* Coalesced requests we wish to flush */ - struct list_head list; /* lists of struct nfs_write_data */ - struct nfs_page *req; /* multi ops per nfs_page */ struct page **pagevec; unsigned int npages; /* Max length of pagevec */ struct nfs_writeargs args; /* argument struct */ struct nfs_writeres res; /* result struct */ - struct pnfs_layout_segment *lseg; - struct nfs_client *ds_clp; /* pNFS data server */ - const struct rpc_call_ops *mds_ops; - int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); -#ifdef CONFIG_NFS_V4 unsigned long timestamp; /* For lease renewal */ -#endif + int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); __u64 mds_offset; /* Filelayout dense stripe */ - int pnfs_error; struct page *page_array[NFS_PAGEVEC_SIZE]; + struct nfs_client *ds_clp; /* pNFS data server */ +}; + +struct nfs_write_header { + struct nfs_pgio_header header; + struct nfs_write_data rpc_data; }; struct nfs_commit_data { -- cgit v0.10.2 From 30dd374f6fc1b202db3a1b57b61afff1326bad92 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:45 -0400 Subject: NFS: create struct nfs_page_array Both nfs_read_data and nfs_write_data devote several fields which can be combined into a single shared struct. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 7a48251..7ae8a60 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -242,7 +242,7 @@ bl_read_pagelist(struct nfs_read_data *rdata) int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, - rdata->npages, f_offset, (unsigned int)rdata->args.count); + rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); par = alloc_parallel(rdata); if (!par) @@ -252,7 +252,7 @@ bl_read_pagelist(struct nfs_read_data *rdata) isect = (sector_t) (f_offset >> SECTOR_SHIFT); /* Code assumes extents are page-aligned */ - for (i = pg_index; i < rdata->npages; i++) { + for (i = pg_index; i < rdata->pages.npages; i++) { if (!extent_length) { /* We've used up the previous extent */ bl_put_extent(be); @@ -285,7 +285,8 @@ bl_read_pagelist(struct nfs_read_data *rdata) struct pnfs_block_extent *be_read; be_read = (hole && cow_read) ? cow_read : be; - bio = bl_add_page_to_bio(bio, rdata->npages - i, READ, + bio = bl_add_page_to_bio(bio, rdata->pages.npages - i, + READ, isect, pages[i], be_read, bl_end_io_read, par); if (IS_ERR(bio)) { @@ -654,7 +655,7 @@ next_page: /* Middle pages */ pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT; - for (i = pg_index; i < wdata->npages; i++) { + for (i = pg_index; i < wdata->pages.npages; i++) { if (!extent_length) { /* We've used up the previous extent */ bl_put_extent(be); @@ -688,7 +689,7 @@ next_page: goto out; } } - bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE, + bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, isect, pages[i], be, bl_end_io_write, par); if (IS_ERR(bio)) { diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 56176af..0faba4c 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -252,11 +252,11 @@ static void nfs_direct_read_release(void *calldata) } else { dreq->count += data->res.count; spin_unlock(&dreq->lock); - nfs_direct_dirty_pages(data->pagevec, + nfs_direct_dirty_pages(data->pages.pagevec, data->args.pgbase, data->res.count); } - nfs_direct_release_pages(data->pagevec, data->npages); + nfs_direct_release_pages(data->pages.pagevec, data->pages.npages); if (put_dreq(dreq)) nfs_direct_complete(dreq); @@ -273,8 +273,8 @@ static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr) { struct nfs_read_data *data = &rhdr->rpc_data; - if (data->pagevec != data->page_array) - kfree(data->pagevec); + if (data->pages.pagevec != data->pages.page_array) + kfree(data->pages.pagevec); nfs_readhdr_free(&rhdr->header); } @@ -312,6 +312,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, do { struct nfs_read_header *rhdr; struct nfs_read_data *data; + struct nfs_page_array *pages; size_t bytes; pgbase = user_addr & ~PAGE_MASK; @@ -322,24 +323,25 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, if (unlikely(!rhdr)) break; data = &rhdr->rpc_data; + pages = &data->pages; down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, - data->npages, 1, 0, data->pagevec, NULL); + pages->npages, 1, 0, pages->pagevec, NULL); up_read(¤t->mm->mmap_sem); if (result < 0) { nfs_direct_readhdr_release(rhdr); break; } - if ((unsigned)result < data->npages) { + if ((unsigned)result < pages->npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { - nfs_direct_release_pages(data->pagevec, result); + nfs_direct_release_pages(pages->pagevec, result); nfs_direct_readhdr_release(rhdr); break; } bytes -= pgbase; - data->npages = result; + pages->npages = result; } get_dreq(dreq); @@ -352,7 +354,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, data->args.lock_context = dreq->l_ctx; data->args.offset = pos; data->args.pgbase = pgbase; - data->args.pages = data->pagevec; + data->args.pages = pages->pagevec; data->args.count = bytes; data->res.fattr = &data->fattr; data->res.eof = 0; @@ -462,8 +464,8 @@ static void nfs_direct_writehdr_release(struct nfs_write_header *whdr) { struct nfs_write_data *data = &whdr->rpc_data; - if (data->pagevec != data->page_array) - kfree(data->pagevec); + if (data->pages.pagevec != data->pages.page_array) + kfree(data->pages.pagevec); nfs_writehdr_free(&whdr->header); } @@ -472,8 +474,10 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) while (!list_empty(&dreq->rewrite_list)) { struct nfs_pgio_header *hdr = list_entry(dreq->rewrite_list.next, struct nfs_pgio_header, pages); struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); + struct nfs_page_array *p = &whdr->rpc_data.pages; + list_del(&hdr->pages); - nfs_direct_release_pages(whdr->rpc_data.pagevec, whdr->rpc_data.npages); + nfs_direct_release_pages(p->pagevec, p->npages); nfs_direct_writehdr_release(whdr); } } @@ -751,6 +755,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, do { struct nfs_write_header *whdr; struct nfs_write_data *data; + struct nfs_page_array *pages; size_t bytes; pgbase = user_addr & ~PAGE_MASK; @@ -762,24 +767,25 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, break; data = &whdr->rpc_data; + pages = &data->pages; down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, - data->npages, 0, 0, data->pagevec, NULL); + pages->npages, 0, 0, pages->pagevec, NULL); up_read(¤t->mm->mmap_sem); if (result < 0) { nfs_direct_writehdr_release(whdr); break; } - if ((unsigned)result < data->npages) { + if ((unsigned)result < pages->npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { - nfs_direct_release_pages(data->pagevec, result); + nfs_direct_release_pages(pages->pagevec, result); nfs_direct_writehdr_release(whdr); break; } bytes -= pgbase; - data->npages = result; + pages->npages = result; } get_dreq(dreq); @@ -794,7 +800,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, data->args.lock_context = dreq->l_ctx; data->args.offset = pos; data->args.pgbase = pgbase; - data->args.pages = data->pagevec; + data->args.pages = pages->pagevec; data->args.count = bytes; data->args.stable = sync; data->res.fattr = &data->fattr; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7dc9be1..5c3d77f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -210,6 +210,7 @@ extern void nfs_destroy_writepagecache(void); extern int __init nfs_init_directcache(void); extern void nfs_destroy_directcache(void); +extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount); /* nfs2xdr.c */ extern int nfs_stat_to_errno(enum nfs_stat); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d21fcea..d349bd4 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -26,6 +26,19 @@ static struct kmem_cache *nfs_page_cachep; +bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) +{ + p->npages = pagecount; + if (pagecount <= ARRAY_SIZE(p->page_array)) + p->pagevec = p->page_array; + else { + p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); + if (!p->pagevec) + p->npages = 0; + } + return p->pagevec != NULL; +} + static inline struct nfs_page * nfs_page_alloc(void) { diff --git a/fs/nfs/read.c b/fs/nfs/read.c index d6d4682..f6ab30b 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -46,16 +46,10 @@ struct nfs_read_header *nfs_readhdr_alloc(unsigned int pagecount) INIT_LIST_HEAD(&hdr->pages); INIT_LIST_HEAD(&data->list); - data->npages = pagecount; data->header = hdr; - if (pagecount <= ARRAY_SIZE(data->page_array)) - data->pagevec = data->page_array; - else { - data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); - if (!data->pagevec) { - kmem_cache_free(nfs_rdata_cachep, p); - p = NULL; - } + if (!nfs_pgarray_set(&data->pages, pagecount)) { + kmem_cache_free(nfs_rdata_cachep, p); + p = NULL; } } return p; @@ -71,8 +65,8 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr) void nfs_readdata_release(struct nfs_read_data *rdata) { put_nfs_open_context(rdata->args.context); - if (rdata->pagevec != rdata->page_array) - kfree(rdata->pagevec); + if (rdata->pages.pagevec != rdata->pages.page_array) + kfree(rdata->pages.pagevec); nfs_readhdr_free(rdata->header); } @@ -232,7 +226,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; data->args.pgbase = req->wb_pgbase + offset; - data->args.pages = data->pagevec; + data->args.pages = data->pages.pagevec; data->args.count = count; data->args.context = get_nfs_open_context(req->wb_context); data->args.lock_context = req->wb_lock_context; @@ -318,7 +312,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head if (!rhdr) goto out_bad; data = &rhdr->rpc_data; - data->pagevec[0] = page; + data->pages.pagevec[0] = page; nfs_read_rpcsetup(req, data, len, offset); list_add(&data->list, res); requests++; @@ -356,7 +350,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head * } data = &rhdr->rpc_data; - pages = data->pagevec; + pages = data->pages.pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dbb5c0a..2efae04 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -80,16 +80,10 @@ struct nfs_write_header *nfs_writehdr_alloc(unsigned int pagecount) memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&hdr->pages); INIT_LIST_HEAD(&data->list); - data->npages = pagecount; data->header = hdr; - if (pagecount <= ARRAY_SIZE(data->page_array)) - data->pagevec = data->page_array; - else { - data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); - if (!data->pagevec) { - mempool_free(p, nfs_wdata_mempool); - p = NULL; - } + if (!nfs_pgarray_set(&data->pages, pagecount)) { + mempool_free(p, nfs_wdata_mempool); + p = NULL; } } return p; @@ -104,8 +98,8 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr) void nfs_writedata_release(struct nfs_write_data *wdata) { put_nfs_open_context(wdata->args.context); - if (wdata->pagevec != wdata->page_array) - kfree(wdata->pagevec); + if (wdata->pages.pagevec != wdata->pages.page_array) + kfree(wdata->pages.pagevec); nfs_writehdr_free(wdata->header); } @@ -916,7 +910,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req, /* pnfs_set_layoutcommit needs this */ data->mds_offset = data->args.offset; data->args.pgbase = req->wb_pgbase + offset; - data->args.pages = data->pagevec; + data->args.pages = data->pages.pagevec; data->args.count = count; data->args.context = get_nfs_open_context(req->wb_context); data->args.lock_context = req->wb_lock_context; @@ -1011,7 +1005,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head if (!whdr) goto out_bad; data = &whdr->rpc_data; - data->pagevec[0] = page; + data->pages.pagevec[0] = page; nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags); list_add(&data->list, res); requests++; @@ -1061,7 +1055,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r goto out; } data = &whdr->rpc_data; - pages = data->pagevec; + pages = data->pages.pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index fee3241..e34beaf 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1167,19 +1167,23 @@ struct nfs_page; #define NFS_PAGEVEC_SIZE (8U) +struct nfs_page_array { + struct page **pagevec; + unsigned int npages; /* Max length of pagevec */ + struct page *page_array[NFS_PAGEVEC_SIZE]; +}; + struct nfs_read_data { struct nfs_pgio_header *header; struct list_head list; struct rpc_task task; struct nfs_fattr fattr; /* fattr storage */ - struct page **pagevec; - unsigned int npages; /* Max length of pagevec */ struct nfs_readargs args; struct nfs_readres res; unsigned long timestamp; /* For lease renewal */ int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); __u64 mds_offset; - struct page *page_array[NFS_PAGEVEC_SIZE]; + struct nfs_page_array pages; struct nfs_client *ds_clp; /* pNFS data server */ }; @@ -1206,14 +1210,12 @@ struct nfs_write_data { struct rpc_task task; struct nfs_fattr fattr; struct nfs_writeverf verf; - struct page **pagevec; - unsigned int npages; /* Max length of pagevec */ struct nfs_writeargs args; /* argument struct */ struct nfs_writeres res; /* result struct */ unsigned long timestamp; /* For lease renewal */ int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); __u64 mds_offset; /* Filelayout dense stripe */ - struct page *page_array[NFS_PAGEVEC_SIZE]; + struct nfs_page_array pages; struct nfs_client *ds_clp; /* pNFS data server */ }; -- cgit v0.10.2 From 4db6e0b74c0f6dfc2f9c0690e8df512e3b635983 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:46 -0400 Subject: NFS: merge _full and _partial read rpc_ops Decouple nfs_pgio_header and nfs_read_data, and have (possibly multiple) nfs_read_datas each take a refcount on nfs_pgio_header. For the moment keeps nfs_read_header as a way to preallocate a single nfs_read_data with the nfs_pgio_header. The code doesn't need this, and would be prettier without, but given the amount of churn I am already introducing I didn't want to play with tuning new mempools. This also fixes bug in pnfs_ld_handle_read_error. In the case of desc->pg_bsize < PAGE_CACHE_SIZE, the pages list was empty, causing replay attempt to do nothing. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 0faba4c..90b00ce 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -319,10 +319,16 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, bytes = min(rsize,count); result = -ENOMEM; - rhdr = nfs_readhdr_alloc(nfs_page_array_len(pgbase, bytes)); + rhdr = nfs_readhdr_alloc(); if (unlikely(!rhdr)) break; - data = &rhdr->rpc_data; + data = nfs_readdata_alloc(&rhdr->header, nfs_page_array_len(pgbase, bytes)); + if (!data) { + nfs_readhdr_free(&rhdr->header); + break; + } + data->header = &rhdr->header; + atomic_inc(&data->header->refcnt); pages = &data->pages; down_read(¤t->mm->mmap_sem); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5c3d77f..33af5e5 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -200,6 +200,7 @@ struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry) extern struct svc_version nfs4_callback_version1; extern struct svc_version nfs4_callback_version4; +struct nfs_pageio_descriptor; /* pagelist.c */ extern int __init nfs_init_nfspagecache(void); extern void nfs_destroy_nfspagecache(void); @@ -211,6 +212,10 @@ extern void nfs_destroy_writepagecache(void); extern int __init nfs_init_directcache(void); extern void nfs_destroy_directcache(void); extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount); +extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr, + void (*release)(struct nfs_pgio_header *hdr)); +void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); /* nfs2xdr.c */ extern int nfs_stat_to_errno(enum nfs_stat); @@ -295,17 +300,19 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *, extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); #endif -struct nfs_pageio_descriptor; /* read.c */ -extern struct nfs_read_header *nfs_readhdr_alloc(unsigned int npages); +extern void nfs_async_read_error(struct list_head *head); +extern struct nfs_read_header *nfs_readhdr_alloc(void); extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); +extern void nfs_read_completion(struct nfs_pgio_header *hdr); +extern struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, + unsigned int pagecount); extern int nfs_initiate_read(struct rpc_clnt *clnt, struct nfs_read_data *data, const struct rpc_call_ops *call_ops); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, - struct list_head *head); - + struct nfs_pgio_header *hdr); extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index ad1d680..333e765 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -227,7 +227,6 @@ static void filelayout_read_release(void *data) { struct nfs_read_data *rdata = data; - put_lseg(rdata->header->lseg); rdata->header->mds_ops->rpc_release(data); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5375862..ce31ab2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3391,8 +3391,6 @@ void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data) struct inode *inode = hdr->inode; dprintk("%s Reset task for i/o through\n", __func__); - put_lseg(hdr->lseg); - hdr->lseg = NULL; data->ds_clp = NULL; /* offsets will differ in the dense stripe case */ data->args.offset = data->mds_offset; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d349bd4..cd4c038 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -39,6 +39,30 @@ bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) return p->pagevec != NULL; } +void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr, + void (*release)(struct nfs_pgio_header *hdr)) +{ + hdr->req = nfs_list_entry(desc->pg_list.next); + hdr->inode = desc->pg_inode; + hdr->cred = hdr->req->wb_context->cred; + hdr->io_start = req_offset(hdr->req); + hdr->good_bytes = desc->pg_count; + hdr->release = release; +} + +void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) +{ + spin_lock(&hdr->lock); + if (pos < hdr->io_start + hdr->good_bytes) { + set_bit(NFS_IOHDR_ERROR, &hdr->flags); + clear_bit(NFS_IOHDR_EOF, &hdr->flags); + hdr->good_bytes = pos - hdr->io_start; + hdr->error = error; + } + spin_unlock(&hdr->lock); +} + static inline struct nfs_page * nfs_page_alloc(void) { diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d705da4..d1a91db 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1333,7 +1333,9 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data) clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); pnfs_return_layout(hdr->inode); } - data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, &hdr->pages); + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) + data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, + &hdr->pages); } /* @@ -1348,7 +1350,6 @@ void pnfs_ld_read_done(struct nfs_read_data *data) hdr->mds_ops->rpc_call_done(&data->task, data); } else pnfs_ld_handle_read_error(data); - put_lseg(hdr->lseg); hdr->mds_ops->rpc_release(data); } EXPORT_SYMBOL_GPL(pnfs_ld_read_done); @@ -1359,11 +1360,11 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, { struct nfs_pgio_header *hdr = data->header; - list_splice_tail_init(&hdr->pages, &desc->pg_list); - if (hdr->req && list_empty(&hdr->req->wb_list)) - nfs_list_add_request(hdr->req, &desc->pg_list); - nfs_pageio_reset_read_mds(desc); - desc->pg_recoalesce = 1; + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { + list_splice_tail_init(&hdr->pages, &desc->pg_list); + nfs_pageio_reset_read_mds(desc); + desc->pg_recoalesce = 1; + } nfs_readdata_release(data); } @@ -1381,18 +1382,13 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, enum pnfs_try_status trypnfs; hdr->mds_ops = call_ops; - hdr->lseg = get_lseg(lseg); dprintk("%s: Reading ino:%lu %u@%llu\n", __func__, inode->i_ino, rdata->args.count, rdata->args.offset); trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); - if (trypnfs == PNFS_NOT_ATTEMPTED) { - put_lseg(hdr->lseg); - hdr->lseg = NULL; - } else { + if (trypnfs != PNFS_NOT_ATTEMPTED) nfs_inc_stats(inode, NFSIOS_PNFS_READ); - } dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); return trypnfs; } @@ -1408,7 +1404,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea while (!list_empty(head)) { enum pnfs_try_status trypnfs; - data = list_entry(head->next, struct nfs_read_data, list); + data = list_first_entry(head, struct nfs_read_data, list); list_del_init(&data->list); trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); @@ -1418,20 +1414,41 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea put_lseg(lseg); } +static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) +{ + put_lseg(hdr->lseg); + nfs_readhdr_free(hdr); +} + int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { - LIST_HEAD(head); + struct nfs_read_header *rhdr; + struct nfs_pgio_header *hdr; int ret; - ret = nfs_generic_pagein(desc, &head); - if (ret != 0) { + rhdr = nfs_readhdr_alloc(); + if (!rhdr) { + nfs_async_read_error(&desc->pg_list); + ret = -ENOMEM; put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return ret; } - pnfs_do_multiple_reads(desc, &head); - return 0; + hdr = &rhdr->header; + nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); + hdr->lseg = get_lseg(desc->pg_lseg); + atomic_inc(&hdr->refcnt); + ret = nfs_generic_pagein(desc, hdr); + if (ret != 0) { + put_lseg(desc->pg_lseg); + desc->pg_lseg = NULL; + set_bit(NFS_IOHDR_REDO, &hdr->flags); + } else + pnfs_do_multiple_reads(desc, &hdr->rpc_list); + if (atomic_dec_and_test(&hdr->refcnt)) + nfs_read_completion(hdr); + return ret; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index f6ab30b..c9633b2 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -30,29 +30,49 @@ #define NFSDBG_FACILITY NFSDBG_PAGECACHE static const struct nfs_pageio_ops nfs_pageio_read_ops; -static const struct rpc_call_ops nfs_read_partial_ops; -static const struct rpc_call_ops nfs_read_full_ops; +static const struct rpc_call_ops nfs_read_common_ops; static struct kmem_cache *nfs_rdata_cachep; -struct nfs_read_header *nfs_readhdr_alloc(unsigned int pagecount) +struct nfs_read_header *nfs_readhdr_alloc() { - struct nfs_read_header *p; + struct nfs_read_header *rhdr; - p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); - if (p) { - struct nfs_pgio_header *hdr = &p->header; - struct nfs_read_data *data = &p->rpc_data; + rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); + if (rhdr) { + struct nfs_pgio_header *hdr = &rhdr->header; INIT_LIST_HEAD(&hdr->pages); - INIT_LIST_HEAD(&data->list); + INIT_LIST_HEAD(&hdr->rpc_list); + spin_lock_init(&hdr->lock); + atomic_set(&hdr->refcnt, 0); + } + return rhdr; +} + +struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, + unsigned int pagecount) +{ + struct nfs_read_data *data, *prealloc; + + prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data; + if (prealloc->header == NULL) + data = prealloc; + else + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + goto out; + + if (nfs_pgarray_set(&data->pages, pagecount)) { data->header = hdr; - if (!nfs_pgarray_set(&data->pages, pagecount)) { - kmem_cache_free(nfs_rdata_cachep, p); - p = NULL; - } + atomic_inc(&hdr->refcnt); + } else { + if (data != prealloc) + kfree(data); + data = NULL; } - return p; +out: + return data; } void nfs_readhdr_free(struct nfs_pgio_header *hdr) @@ -64,10 +84,18 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr) void nfs_readdata_release(struct nfs_read_data *rdata) { + struct nfs_pgio_header *hdr = rdata->header; + struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header); + put_nfs_open_context(rdata->args.context); if (rdata->pages.pagevec != rdata->pages.page_array) kfree(rdata->pages.pagevec); - nfs_readhdr_free(rdata->header); + if (rdata != &read_header->rpc_data) + kfree(rdata); + else + rdata->header = NULL; + if (atomic_dec_and_test(&hdr->refcnt)) + nfs_read_completion(hdr); } static @@ -79,35 +107,6 @@ int nfs_return_empty_page(struct page *page) return 0; } -static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) -{ - unsigned int remainder = data->args.count - data->res.count; - unsigned int base = data->args.pgbase + data->res.count; - unsigned int pglen; - struct page **pages; - - if (data->res.eof == 0 || remainder == 0) - return; - /* - * Note: "remainder" can never be negative, since we check for - * this in the XDR code. - */ - pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; - base &= ~PAGE_CACHE_MASK; - pglen = PAGE_CACHE_SIZE - base; - for (;;) { - if (remainder <= pglen) { - zero_user(*pages, base, remainder); - break; - } - zero_user(*pages, base, pglen); - pages++; - remainder -= pglen; - pglen = PAGE_CACHE_SIZE; - base = 0; - } -} - void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode) { @@ -170,6 +169,46 @@ static void nfs_readpage_release(struct nfs_page *req) nfs_release_request(req); } +/* Note io was page aligned */ +void nfs_read_completion(struct nfs_pgio_header *hdr) +{ + unsigned long bytes = 0; + + if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) + goto out; + if (!test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + struct page *page = req->wb_page; + + if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { + if (bytes > hdr->good_bytes) + zero_user(page, 0, PAGE_SIZE); + else if (hdr->good_bytes - bytes < PAGE_SIZE) + zero_user_segment(page, + hdr->good_bytes & ~PAGE_MASK, + PAGE_SIZE); + } + SetPageUptodate(page); + nfs_list_remove_request(req); + nfs_readpage_release(req); + bytes += PAGE_SIZE; + } + } else { + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + + bytes += req->wb_bytes; + if (bytes <= hdr->good_bytes) + SetPageUptodate(req->wb_page); + nfs_list_remove_request(req); + nfs_readpage_release(req); + } + } +out: + hdr->release(hdr); +} + int nfs_initiate_read(struct rpc_clnt *clnt, struct nfs_read_data *data, const struct rpc_call_ops *call_ops) @@ -214,16 +253,12 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read); /* * Set up the NFS read request struct */ -static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, +static void nfs_read_rpcsetup(struct nfs_read_data *data, unsigned int count, unsigned int offset) { - struct inode *inode = data->header->inode; - - data->header->req = req; - data->header->inode = inode; - data->header->cred = req->wb_context->cred; + struct nfs_page *req = data->header->req; - data->args.fh = NFS_FH(inode); + data->args.fh = NFS_FH(data->header->inode); data->args.offset = req_offset(req) + offset; data->args.pgbase = req->wb_pgbase + offset; data->args.pages = data->pages.pagevec; @@ -255,7 +290,7 @@ nfs_do_multiple_reads(struct list_head *head, while (!list_empty(head)) { int ret2; - data = list_entry(head->next, struct nfs_read_data, list); + data = list_first_entry(head, struct nfs_read_data, list); list_del_init(&data->list); ret2 = nfs_do_read(data, call_ops); @@ -265,7 +300,7 @@ nfs_do_multiple_reads(struct list_head *head, return ret; } -static void +void nfs_async_read_error(struct list_head *head) { struct nfs_page *req; @@ -290,11 +325,11 @@ nfs_async_read_error(struct list_head *head) * won't see the new data until our attribute cache is updated. This is more * or less conventional NFS client behavior. */ -static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) +static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) { - struct nfs_page *req = nfs_list_entry(desc->pg_list.next); + struct nfs_page *req = hdr->req; struct page *page = req->wb_page; - struct nfs_read_header *rhdr; struct nfs_read_data *data; size_t rsize = desc->pg_bsize, nbytes; unsigned int offset; @@ -302,85 +337,97 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head int ret = 0; nfs_list_remove_request(req); + nfs_list_add_request(req, &hdr->pages); offset = 0; nbytes = desc->pg_count; do { size_t len = min(nbytes,rsize); - rhdr = nfs_readhdr_alloc(1); - if (!rhdr) + data = nfs_readdata_alloc(hdr, 1); + if (!data) goto out_bad; - data = &rhdr->rpc_data; data->pages.pagevec[0] = page; - nfs_read_rpcsetup(req, data, len, offset); - list_add(&data->list, res); + nfs_read_rpcsetup(data, len, offset); + list_add(&data->list, &hdr->rpc_list); requests++; nbytes -= len; offset += len; } while(nbytes != 0); - atomic_set(&req->wb_complete, requests); - desc->pg_rpc_callops = &nfs_read_partial_ops; + desc->pg_rpc_callops = &nfs_read_common_ops; return ret; out_bad: - while (!list_empty(res)) { - data = list_entry(res->next, struct nfs_read_data, list); + while (!list_empty(&hdr->rpc_list)) { + data = list_first_entry(&hdr->rpc_list, struct nfs_read_data, list); list_del(&data->list); nfs_readdata_release(data); } - nfs_readpage_release(req); + nfs_async_read_error(&hdr->pages); return -ENOMEM; } -static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res) +static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) { struct nfs_page *req; struct page **pages; - struct nfs_read_header *rhdr; - struct nfs_read_data *data; + struct nfs_read_data *data; struct list_head *head = &desc->pg_list; int ret = 0; - rhdr = nfs_readhdr_alloc(nfs_page_array_len(desc->pg_base, - desc->pg_count)); - if (!rhdr) { + data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base, + desc->pg_count)); + if (!data) { nfs_async_read_error(head); ret = -ENOMEM; goto out; } - data = &rhdr->rpc_data; pages = data->pages.pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); - nfs_list_add_request(req, &rhdr->header.pages); + nfs_list_add_request(req, &hdr->pages); *pages++ = req->wb_page; } - req = nfs_list_entry(rhdr->header.pages.next); - nfs_read_rpcsetup(req, data, desc->pg_count, 0); - list_add(&data->list, res); - desc->pg_rpc_callops = &nfs_read_full_ops; + nfs_read_rpcsetup(data, desc->pg_count, 0); + list_add(&data->list, &hdr->rpc_list); + desc->pg_rpc_callops = &nfs_read_common_ops; out: return ret; } -int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head) +int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) { if (desc->pg_bsize < PAGE_CACHE_SIZE) - return nfs_pagein_multi(desc, head); - return nfs_pagein_one(desc, head); + return nfs_pagein_multi(desc, hdr); + return nfs_pagein_one(desc, hdr); } static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { - LIST_HEAD(head); + struct nfs_read_header *rhdr; + struct nfs_pgio_header *hdr; int ret; - ret = nfs_generic_pagein(desc, &head); + rhdr = nfs_readhdr_alloc(); + if (!rhdr) { + nfs_async_read_error(&desc->pg_list); + return -ENOMEM; + } + hdr = &rhdr->header; + nfs_pgheader_init(desc, hdr, nfs_readhdr_free); + atomic_inc(&hdr->refcnt); + ret = nfs_generic_pagein(desc, hdr); if (ret == 0) - ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops); + ret = nfs_do_multiple_reads(&hdr->rpc_list, + desc->pg_rpc_callops); + else + set_bit(NFS_IOHDR_REDO, &hdr->flags); + if (atomic_dec_and_test(&hdr->refcnt)) + nfs_read_completion(hdr); return ret; } @@ -419,15 +466,13 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data struct nfs_readargs *argp = &data->args; struct nfs_readres *resp = &data->res; - if (resp->eof || resp->count == argp->count) - return; - /* This is a short read! */ nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); /* Has the server at least made some progress? */ - if (resp->count == 0) + if (resp->count == 0) { + nfs_set_pgio_error(data->header, -EIO, argp->offset); return; - + } /* Yes, so retry the read at the end of the data */ data->mds_offset += resp->count; argp->offset += resp->count; @@ -436,38 +481,34 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data rpc_restart_call_prepare(task); } -/* - * Handle a read reply that fills part of a page. - */ -static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) +static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; - + struct nfs_pgio_header *hdr = data->header; + + /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */ if (nfs_readpage_result(task, data) != 0) return; if (task->tk_status < 0) - return; - - nfs_readpage_truncate_uninitialised_page(data); - nfs_readpage_retry(task, data); + nfs_set_pgio_error(hdr, task->tk_status, data->args.offset); + else if (data->res.eof) { + loff_t bound; + + bound = data->args.offset + data->res.count; + spin_lock(&hdr->lock); + if (bound < hdr->io_start + hdr->good_bytes) { + set_bit(NFS_IOHDR_EOF, &hdr->flags); + clear_bit(NFS_IOHDR_ERROR, &hdr->flags); + hdr->good_bytes = bound - hdr->io_start; + } + spin_unlock(&hdr->lock); + } else if (data->res.count != data->args.count) + nfs_readpage_retry(task, data); } -static void nfs_readpage_release_partial(void *calldata) +static void nfs_readpage_release_common(void *calldata) { - struct nfs_read_data *data = calldata; - struct nfs_page *req = data->header->req; - struct page *page = req->wb_page; - int status = data->task.tk_status; - - if (status < 0) - set_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags); - - if (atomic_dec_and_test(&req->wb_complete)) { - if (!test_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags)) - SetPageUptodate(page); - nfs_readpage_release(req); - } - nfs_readdata_release(data); + nfs_readdata_release(calldata); } void nfs_read_prepare(struct rpc_task *task, void *calldata) @@ -476,75 +517,10 @@ void nfs_read_prepare(struct rpc_task *task, void *calldata) NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); } -static const struct rpc_call_ops nfs_read_partial_ops = { - .rpc_call_prepare = nfs_read_prepare, - .rpc_call_done = nfs_readpage_result_partial, - .rpc_release = nfs_readpage_release_partial, -}; - -static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) -{ - unsigned int count = data->res.count; - unsigned int base = data->args.pgbase; - struct page **pages; - - if (data->res.eof) - count = data->args.count; - if (unlikely(count == 0)) - return; - pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; - base &= ~PAGE_CACHE_MASK; - count += base; - for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) - SetPageUptodate(*pages); - if (count == 0) - return; - /* Was this a short read? */ - if (data->res.eof || data->res.count == data->args.count) - SetPageUptodate(*pages); -} - -/* - * This is the callback from RPC telling us whether a reply was - * received or some error occurred (timeout or socket shutdown). - */ -static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) -{ - struct nfs_read_data *data = calldata; - - if (nfs_readpage_result(task, data) != 0) - return; - if (task->tk_status < 0) - return; - /* - * Note: nfs_readpage_retry may change the values of - * data->args. In the multi-page case, we therefore need - * to ensure that we call nfs_readpage_set_pages_uptodate() - * first. - */ - nfs_readpage_truncate_uninitialised_page(data); - nfs_readpage_set_pages_uptodate(data); - nfs_readpage_retry(task, data); -} - -static void nfs_readpage_release_full(void *calldata) -{ - struct nfs_read_data *data = calldata; - struct nfs_pgio_header *hdr = data->header; - - while (!list_empty(&hdr->pages)) { - struct nfs_page *req = nfs_list_entry(hdr->pages.next); - - nfs_list_remove_request(req); - nfs_readpage_release(req); - } - nfs_readdata_release(calldata); -} - -static const struct rpc_call_ops nfs_read_full_ops = { +static const struct rpc_call_ops nfs_read_common_ops = { .rpc_call_prepare = nfs_read_prepare, - .rpc_call_done = nfs_readpage_result_full, - .rpc_release = nfs_readpage_release_full, + .rpc_call_done = nfs_readpage_result_common, + .rpc_release = nfs_readpage_release_common, }; /* diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index eac30d6..5c52034 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -27,7 +27,6 @@ enum { PG_CLEAN, PG_NEED_COMMIT, PG_NEED_RESCHED, - PG_PARTIAL_READ_FAILED, PG_COMMIT_TO_DS, }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e34beaf..1648621 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1187,14 +1187,30 @@ struct nfs_read_data { struct nfs_client *ds_clp; /* pNFS data server */ }; +/* used as flag bits in nfs_pgio_header */ +enum { + NFS_IOHDR_ERROR = 0, + NFS_IOHDR_EOF, + NFS_IOHDR_REDO, +}; + struct nfs_pgio_header { struct inode *inode; struct rpc_cred *cred; struct list_head pages; + struct list_head rpc_list; + atomic_t refcnt; struct nfs_page *req; struct pnfs_layout_segment *lseg; + loff_t io_start; const struct rpc_call_ops *mds_ops; + void (*release) (struct nfs_pgio_header *hdr); + spinlock_t lock; + /* fields protected by lock */ int pnfs_error; + int error; /* merge with pnfs_error */ + unsigned long good_bytes; /* boundary of good data */ + unsigned long flags; }; struct nfs_read_header { -- cgit v0.10.2 From 6c75dc0d498caa402fb17b1bf769835a9db875c8 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:47 -0400 Subject: NFS: merge _full and _partial write rpc_ops Decouple nfs_pgio_header and nfs_write_data, and have (possibly multiple) nfs_write_datas each take a refcount on nfs_pgio_header. For the moment keeps nfs_write_header as a way to preallocate a single nfs_write_data with the nfs_pgio_header. The code doesn't need this, and would be prettier without, but given the amount of churn I am already introducing I didn't want to play with tuning new mempools. This also fixes bug in pnfs_ld_handle_write_error. In the case of desc->pg_bsize < PAGE_CACHE_SIZE, the pages list was empty, causing replay attempt to do nothing. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 90b00ce..22a40c4 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -768,11 +768,17 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, bytes = min(wsize,count); result = -ENOMEM; - whdr = nfs_writehdr_alloc(nfs_page_array_len(pgbase, bytes)); + whdr = nfs_writehdr_alloc(); if (unlikely(!whdr)) break; - data = &whdr->rpc_data; + data = nfs_writedata_alloc(&whdr->header, nfs_page_array_len(pgbase, bytes)); + if (!data) { + nfs_writehdr_free(&whdr->header); + break; + } + data->header = &whdr->header; + atomic_inc(&data->header->refcnt); pages = &data->pages; down_read(¤t->mm->mmap_sem); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 33af5e5..16bc9c4 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -319,10 +319,14 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ -extern struct nfs_write_header *nfs_writehdr_alloc(unsigned int npages); +extern void nfs_async_write_error(struct list_head *head); +extern struct nfs_write_header *nfs_writehdr_alloc(void); extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); +extern struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, + unsigned int pagecount); +extern void nfs_write_completion(struct nfs_pgio_header *hdr); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, - struct list_head *head); + struct nfs_pgio_header *hdr); extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 333e765..02d8170 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -314,7 +314,6 @@ static void filelayout_write_release(void *data) { struct nfs_write_data *wdata = data; - put_lseg(wdata->header->lseg); wdata->header->mds_ops->rpc_release(data); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ce31ab2..87af80d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3431,8 +3431,6 @@ void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data) struct inode *inode = hdr->inode; dprintk("%s Reset task for i/o through\n", __func__); - put_lseg(hdr->lseg); - hdr->lseg = NULL; data->ds_clp = NULL; data->write_done_cb = nfs4_write_done_cb; data->args.fh = NFS_FH(inode); @@ -3448,7 +3446,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag { struct nfs_server *server = NFS_SERVER(data->header->inode); - if (data->header->lseg) { + if (data->ds_clp) { data->args.bitmask = NULL; data->res.fattr = NULL; } else diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d1a91db..d515f00 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1199,7 +1199,9 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data) clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); pnfs_return_layout(hdr->inode); } - data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, &hdr->pages); + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) + data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, + &hdr->pages); } /* @@ -1214,7 +1216,6 @@ void pnfs_ld_write_done(struct nfs_write_data *data) hdr->mds_ops->rpc_call_done(&data->task, data); } else pnfs_ld_handle_write_error(data); - put_lseg(hdr->lseg); hdr->mds_ops->rpc_release(data); } EXPORT_SYMBOL_GPL(pnfs_ld_write_done); @@ -1225,12 +1226,11 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, { struct nfs_pgio_header *hdr = data->header; - list_splice_tail_init(&hdr->pages, &desc->pg_list); - if (hdr->req && list_empty(&hdr->req->wb_list)) - nfs_list_add_request(hdr->req, &desc->pg_list); - nfs_pageio_reset_write_mds(desc); - desc->pg_recoalesce = 1; - put_lseg(hdr->lseg); + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { + list_splice_tail_init(&hdr->pages, &desc->pg_list); + nfs_pageio_reset_write_mds(desc); + desc->pg_recoalesce = 1; + } nfs_writedata_release(data); } @@ -1246,18 +1246,12 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, struct nfs_server *nfss = NFS_SERVER(inode); hdr->mds_ops = call_ops; - hdr->lseg = get_lseg(lseg); dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, inode->i_ino, wdata->args.count, wdata->args.offset, how); - trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); - if (trypnfs == PNFS_NOT_ATTEMPTED) { - put_lseg(hdr->lseg); - hdr->lseg = NULL; - } else + if (trypnfs != PNFS_NOT_ATTEMPTED) nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); - dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); return trypnfs; } @@ -1273,7 +1267,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he while (!list_empty(head)) { enum pnfs_try_status trypnfs; - data = list_entry(head->next, struct nfs_write_data, list); + data = list_first_entry(head, struct nfs_write_data, list); list_del_init(&data->list); trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); @@ -1283,20 +1277,40 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he put_lseg(lseg); } +static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) +{ + put_lseg(hdr->lseg); + nfs_writehdr_free(hdr); +} + int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { - LIST_HEAD(head); + struct nfs_write_header *whdr; + struct nfs_pgio_header *hdr; int ret; - ret = nfs_generic_flush(desc, &head); - if (ret != 0) { + whdr = nfs_writehdr_alloc(); + if (!whdr) { + nfs_async_write_error(&desc->pg_list); put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; - return ret; + return -ENOMEM; } - pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags); - return 0; + hdr = &whdr->header; + nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); + hdr->lseg = get_lseg(desc->pg_lseg); + atomic_inc(&hdr->refcnt); + ret = nfs_generic_flush(desc, hdr); + if (ret != 0) { + put_lseg(desc->pg_lseg); + desc->pg_lseg = NULL; + set_bit(NFS_IOHDR_REDO, &hdr->flags); + } else + pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); + if (atomic_dec_and_test(&hdr->refcnt)) + nfs_write_completion(hdr); + return ret; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2efae04..076075e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -42,8 +42,7 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, struct inode *inode, int ioflags); static void nfs_redirty_request(struct nfs_page *req); -static const struct rpc_call_ops nfs_write_partial_ops; -static const struct rpc_call_ops nfs_write_full_ops; +static const struct rpc_call_ops nfs_write_common_ops; static const struct rpc_call_ops nfs_commit_ops; static struct kmem_cache *nfs_wdata_cachep; @@ -69,26 +68,47 @@ void nfs_commit_free(struct nfs_commit_data *p) } EXPORT_SYMBOL_GPL(nfs_commit_free); -struct nfs_write_header *nfs_writehdr_alloc(unsigned int pagecount) +struct nfs_write_header *nfs_writehdr_alloc(void) { struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); if (p) { struct nfs_pgio_header *hdr = &p->header; - struct nfs_write_data *data = &p->rpc_data; memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&hdr->pages); - INIT_LIST_HEAD(&data->list); - data->header = hdr; - if (!nfs_pgarray_set(&data->pages, pagecount)) { - mempool_free(p, nfs_wdata_mempool); - p = NULL; - } + INIT_LIST_HEAD(&hdr->rpc_list); + spin_lock_init(&hdr->lock); + atomic_set(&hdr->refcnt, 0); } return p; } +struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, + unsigned int pagecount) +{ + struct nfs_write_data *data, *prealloc; + + prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data; + if (prealloc->header == NULL) + data = prealloc; + else + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + goto out; + + if (nfs_pgarray_set(&data->pages, pagecount)) { + data->header = hdr; + atomic_inc(&hdr->refcnt); + } else { + if (data != prealloc) + kfree(data); + data = NULL; + } +out: + return data; +} + void nfs_writehdr_free(struct nfs_pgio_header *hdr) { struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); @@ -97,10 +117,18 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr) void nfs_writedata_release(struct nfs_write_data *wdata) { + struct nfs_pgio_header *hdr = wdata->header; + struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header); + put_nfs_open_context(wdata->args.context); if (wdata->pages.pagevec != wdata->pages.page_array) kfree(wdata->pages.pagevec); - nfs_writehdr_free(wdata->header); + if (wdata != &write_header->rpc_data) + kfree(wdata); + else + wdata->header = NULL; + if (atomic_dec_and_test(&hdr->refcnt)) + nfs_write_completion(hdr); } static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) @@ -511,20 +539,6 @@ int nfs_write_need_commit(struct nfs_write_data *data) return data->verf.committed != NFS_FILE_SYNC; } -static inline -int nfs_reschedule_unstable_write(struct nfs_page *req, - struct nfs_write_data *data) -{ - if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { - nfs_mark_request_commit(req, data->header->lseg); - return 1; - } - if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { - nfs_mark_request_dirty(req); - return 1; - } - return 0; -} #else static void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) @@ -542,13 +556,43 @@ int nfs_write_need_commit(struct nfs_write_data *data) return 0; } -static inline -int nfs_reschedule_unstable_write(struct nfs_page *req, - struct nfs_write_data *data) +#endif + +void nfs_write_completion(struct nfs_pgio_header *hdr) { - return 0; + unsigned long bytes = 0; + + if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) + goto out; + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + struct page *page = req->wb_page; + + bytes += req->wb_bytes; + nfs_list_remove_request(req); + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && + (hdr->good_bytes < bytes)) { + nfs_set_pageerror(page); + nfs_context_set_write_error(req->wb_context, hdr->error); + goto remove_req; + } + if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { + nfs_mark_request_dirty(req); + goto next; + } + if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { + nfs_mark_request_commit(req, hdr->lseg); + goto next; + } +remove_req: + nfs_inode_remove_request(req); +next: + nfs_unlock_request(req); + nfs_end_page_writeback(page); + } +out: + hdr->release(hdr); } -#endif #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static int @@ -813,17 +857,6 @@ int nfs_updatepage(struct file *file, struct page *page, return status; } -static void nfs_writepage_release(struct nfs_page *req, - struct nfs_write_data *data) -{ - struct page *page = req->wb_page; - - if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data)) - nfs_inode_remove_request(req); - nfs_unlock_request(req); - nfs_end_page_writeback(page); -} - static int flush_task_priority(int how) { switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { @@ -890,22 +923,16 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write); /* * Set up the argument/result storage required for the RPC call. */ -static void nfs_write_rpcsetup(struct nfs_page *req, - struct nfs_write_data *data, +static void nfs_write_rpcsetup(struct nfs_write_data *data, unsigned int count, unsigned int offset, int how) { - struct nfs_pgio_header *hdr = data->header; - struct inode *inode = req->wb_context->dentry->d_inode; + struct nfs_page *req = data->header->req; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ - hdr->req = req; - hdr->inode = inode = req->wb_context->dentry->d_inode; - hdr->cred = req->wb_context->cred; - - data->args.fh = NFS_FH(inode); + data->args.fh = NFS_FH(data->header->inode); data->args.offset = req_offset(req) + offset; /* pnfs_set_layoutcommit needs this */ data->mds_offset = data->args.offset; @@ -919,7 +946,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req, case 0: break; case FLUSH_COND_STABLE: - if (nfs_need_commit(NFS_I(inode))) + if (nfs_need_commit(NFS_I(data->header->inode))) break; default: data->args.stable = NFS_FILE_SYNC; @@ -950,7 +977,7 @@ static int nfs_do_multiple_writes(struct list_head *head, while (!list_empty(head)) { int ret2; - data = list_entry(head->next, struct nfs_write_data, list); + data = list_first_entry(head, struct nfs_write_data, list); list_del_init(&data->list); ret2 = nfs_do_write(data, call_ops, how); @@ -973,15 +1000,26 @@ static void nfs_redirty_request(struct nfs_page *req) nfs_end_page_writeback(page); } +void nfs_async_write_error(struct list_head *head) +{ + struct nfs_page *req; + + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_redirty_request(req); + } +} + /* * Generate multiple small requests to write out a single * contiguous dirty area on one page. */ -static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) +static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) { - struct nfs_page *req = nfs_list_entry(desc->pg_list.next); + struct nfs_page *req = hdr->req; struct page *page = req->wb_page; - struct nfs_write_header *whdr; struct nfs_write_data *data; size_t wsize = desc->pg_bsize, nbytes; unsigned int offset; @@ -989,6 +1027,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head int ret = 0; nfs_list_remove_request(req); + nfs_list_add_request(req, &hdr->pages); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit || @@ -1001,28 +1040,27 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head do { size_t len = min(nbytes, wsize); - whdr = nfs_writehdr_alloc(1); - if (!whdr) + data = nfs_writedata_alloc(hdr, 1); + if (!data) goto out_bad; - data = &whdr->rpc_data; data->pages.pagevec[0] = page; - nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags); - list_add(&data->list, res); + nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags); + list_add(&data->list, &hdr->rpc_list); requests++; nbytes -= len; offset += len; } while (nbytes != 0); atomic_set(&req->wb_complete, requests); - desc->pg_rpc_callops = &nfs_write_partial_ops; + desc->pg_rpc_callops = &nfs_write_common_ops; return ret; out_bad: - while (!list_empty(res)) { - data = list_entry(res->next, struct nfs_write_data, list); + while (!list_empty(&hdr->rpc_list)) { + data = list_first_entry(&hdr->rpc_list, struct nfs_write_data, list); list_del(&data->list); nfs_writedata_release(data); } - nfs_redirty_request(req); + nfs_async_write_error(&hdr->pages); return -ENOMEM; } @@ -1034,64 +1072,74 @@ out_bad: * This is the case if nfs_updatepage detects a conflicting request * that has been written but not committed. */ -static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res) +static int nfs_flush_one(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) { struct nfs_page *req; struct page **pages; - struct nfs_write_header *whdr; struct nfs_write_data *data; struct list_head *head = &desc->pg_list; int ret = 0; - whdr = nfs_writehdr_alloc(nfs_page_array_len(desc->pg_base, - desc->pg_count)); - if (!whdr) { - while (!list_empty(head)) { - req = nfs_list_entry(head->next); - nfs_list_remove_request(req); - nfs_redirty_request(req); - } + data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base, + desc->pg_count)); + if (!data) { + nfs_async_write_error(head); ret = -ENOMEM; goto out; } - data = &whdr->rpc_data; + pages = data->pages.pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); - nfs_list_add_request(req, &whdr->header.pages); + nfs_list_add_request(req, &hdr->pages); *pages++ = req->wb_page; } - req = nfs_list_entry(whdr->header.pages.next); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) desc->pg_ioflags &= ~FLUSH_COND_STABLE; /* Set up the argument struct */ - nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); - list_add(&data->list, res); - desc->pg_rpc_callops = &nfs_write_full_ops; + nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags); + list_add(&data->list, &hdr->rpc_list); + desc->pg_rpc_callops = &nfs_write_common_ops; out: return ret; } -int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head) +int nfs_generic_flush(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) { if (desc->pg_bsize < PAGE_CACHE_SIZE) - return nfs_flush_multi(desc, head); - return nfs_flush_one(desc, head); + return nfs_flush_multi(desc, hdr); + return nfs_flush_one(desc, hdr); } static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { - LIST_HEAD(head); + struct nfs_write_header *whdr; + struct nfs_pgio_header *hdr; int ret; - ret = nfs_generic_flush(desc, &head); + whdr = nfs_writehdr_alloc(); + if (!whdr) { + nfs_async_write_error(&desc->pg_list); + return -ENOMEM; + } + hdr = &whdr->header; + nfs_pgheader_init(desc, hdr, nfs_writehdr_free); + atomic_inc(&hdr->refcnt); + ret = nfs_generic_flush(desc, hdr); if (ret == 0) - ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops, - desc->pg_ioflags); + ret = nfs_do_multiple_writes(&hdr->rpc_list, + desc->pg_rpc_callops, + desc->pg_ioflags); + else + set_bit(NFS_IOHDR_REDO, &hdr->flags); + if (atomic_dec_and_test(&hdr->refcnt)) + nfs_write_completion(hdr); return ret; } @@ -1121,62 +1169,6 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, nfs_pageio_init_write_mds(pgio, inode, ioflags); } -/* - * Handle a write reply that flushed part of a page. - */ -static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) -{ - struct nfs_write_data *data = calldata; - - dprintk("NFS: %5u write(%s/%lld %d@%lld)", - task->tk_pid, - data->header->inode->i_sb->s_id, - (long long) - NFS_FILEID(data->header->inode), - data->header->req->wb_bytes, - (long long)req_offset(data->header->req)); - - nfs_writeback_done(task, data); -} - -static void nfs_writeback_release_partial(void *calldata) -{ - struct nfs_write_data *data = calldata; - struct nfs_page *req = data->header->req; - struct page *page = req->wb_page; - int status = data->task.tk_status; - - if (status < 0) { - nfs_set_pageerror(page); - nfs_context_set_write_error(req->wb_context, status); - dprintk(", error = %d\n", status); - goto out; - } - - if (nfs_write_need_commit(data)) { - struct inode *inode = page->mapping->host; - - spin_lock(&inode->i_lock); - if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) { - /* Do nothing we need to resend the writes */ - } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) { - memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); - dprintk(" defer commit\n"); - } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) { - set_bit(PG_NEED_RESCHED, &req->wb_flags); - clear_bit(PG_NEED_COMMIT, &req->wb_flags); - dprintk(" server reboot detected\n"); - } - spin_unlock(&inode->i_lock); - } else - dprintk(" OK\n"); - -out: - if (atomic_dec_and_test(&req->wb_complete)) - nfs_writepage_release(req, data); - nfs_writedata_release(data); -} - void nfs_write_prepare(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; @@ -1190,12 +1182,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); } -static const struct rpc_call_ops nfs_write_partial_ops = { - .rpc_call_prepare = nfs_write_prepare, - .rpc_call_done = nfs_writeback_done_partial, - .rpc_release = nfs_writeback_release_partial, -}; - /* * Handle a write reply that flushes a whole page. * @@ -1203,60 +1189,37 @@ static const struct rpc_call_ops nfs_write_partial_ops = { * writebacks since the page->count is kept > 1 for as long * as the page has a write request pending. */ -static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) +static void nfs_writeback_done_common(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; nfs_writeback_done(task, data); } -static void nfs_writeback_release_full(void *calldata) +static void nfs_writeback_release_common(void *calldata) { struct nfs_write_data *data = calldata; struct nfs_pgio_header *hdr = data->header; int status = data->task.tk_status; + struct nfs_page *req = hdr->req; - /* Update attributes as result of writeback. */ - while (!list_empty(&hdr->pages)) { - struct nfs_page *req = nfs_list_entry(hdr->pages.next); - struct page *page = req->wb_page; - - nfs_list_remove_request(req); - - dprintk("NFS: %5u write (%s/%lld %d@%lld)", - data->task.tk_pid, - req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), - req->wb_bytes, - (long long)req_offset(req)); - - if (status < 0) { - nfs_set_pageerror(page); - nfs_context_set_write_error(req->wb_context, status); - dprintk(", error = %d\n", status); - goto remove_request; - } - - if (nfs_write_need_commit(data)) { + if ((status >= 0) && nfs_write_need_commit(data)) { + spin_lock(&hdr->lock); + if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) + ; /* Do nothing */ + else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); - nfs_mark_request_commit(req, hdr->lseg); - dprintk(" marked for commit\n"); - goto next; - } - dprintk(" OK\n"); -remove_request: - nfs_inode_remove_request(req); - next: - nfs_unlock_request(req); - nfs_end_page_writeback(page); + else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) + set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); + spin_unlock(&hdr->lock); } nfs_writedata_release(data); } -static const struct rpc_call_ops nfs_write_full_ops = { +static const struct rpc_call_ops nfs_write_common_ops = { .rpc_call_prepare = nfs_write_prepare, - .rpc_call_done = nfs_writeback_done_full, - .rpc_release = nfs_writeback_release_full, + .rpc_call_done = nfs_writeback_done_common, + .rpc_release = nfs_writeback_release_common, }; @@ -1307,38 +1270,40 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) } } #endif - /* Is this a short write? */ - if (task->tk_status >= 0 && resp->count < argp->count) { + if (task->tk_status < 0) + nfs_set_pgio_error(data->header, task->tk_status, argp->offset); + else if (resp->count < argp->count) { static unsigned long complain; + /* This a short write! */ nfs_inc_stats(inode, NFSIOS_SHORTWRITE); /* Has the server at least made some progress? */ - if (resp->count != 0) { - /* Was this an NFSv2 write or an NFSv3 stable write? */ - if (resp->verf->committed != NFS_UNSTABLE) { - /* Resend from where the server left off */ - data->mds_offset += resp->count; - argp->offset += resp->count; - argp->pgbase += resp->count; - argp->count -= resp->count; - } else { - /* Resend as a stable write in order to avoid - * headaches in the case of a server crash. - */ - argp->stable = NFS_FILE_SYNC; + if (resp->count == 0) { + if (time_before(complain, jiffies)) { + printk(KERN_WARNING + "NFS: Server wrote zero bytes, expected %u.\n", + argp->count); + complain = jiffies + 300 * HZ; } - rpc_restart_call_prepare(task); + nfs_set_pgio_error(data->header, -EIO, argp->offset); + task->tk_status = -EIO; return; } - if (time_before(complain, jiffies)) { - printk(KERN_WARNING - "NFS: Server wrote zero bytes, expected %u.\n", - argp->count); - complain = jiffies + 300 * HZ; + /* Was this an NFSv2 write or an NFSv3 stable write? */ + if (resp->verf->committed != NFS_UNSTABLE) { + /* Resend from where the server left off */ + data->mds_offset += resp->count; + argp->offset += resp->count; + argp->pgbase += resp->count; + argp->count -= resp->count; + } else { + /* Resend as a stable write in order to avoid + * headaches in the case of a server crash. + */ + argp->stable = NFS_FILE_SYNC; } - /* Can't do anything about it except throw an error. */ - task->tk_status = -EIO; + rpc_restart_call_prepare(task); } } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 1648621..0d17db7 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1192,6 +1192,8 @@ enum { NFS_IOHDR_ERROR = 0, NFS_IOHDR_EOF, NFS_IOHDR_REDO, + NFS_IOHDR_NEED_COMMIT, + NFS_IOHDR_NEED_RESCHED, }; struct nfs_pgio_header { -- cgit v0.10.2 From 061ae2edb7375ab6776468b075da71008a098b55 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:48 -0400 Subject: NFS: create completion structure to pass into page_init functions Factors out the code that will need to change when directio starts using these code paths. This will allow directio to use the generic pagein and flush routines Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 16bc9c4..3ef8fcd 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -300,11 +300,10 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *, extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); #endif +struct nfs_pgio_completion_ops; /* read.c */ -extern void nfs_async_read_error(struct list_head *head); extern struct nfs_read_header *nfs_readhdr_alloc(void); extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); -extern void nfs_read_completion(struct nfs_pgio_header *hdr); extern struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, unsigned int pagecount); extern int nfs_initiate_read(struct rpc_clnt *clnt, @@ -314,21 +313,21 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, - struct inode *inode); + struct inode *inode, + const struct nfs_pgio_completion_ops *compl_ops); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ -extern void nfs_async_write_error(struct list_head *head); extern struct nfs_write_header *nfs_writehdr_alloc(void); extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); extern struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, unsigned int pagecount); -extern void nfs_write_completion(struct nfs_pgio_header *hdr); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags); + struct inode *inode, int ioflags, + const struct nfs_pgio_completion_ops *compl_ops); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_writedata_release(struct nfs_write_data *wdata); extern void nfs_commit_free(struct nfs_commit_data *p); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index cd4c038..4cf2a68 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -49,6 +49,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, hdr->io_start = req_offset(hdr->req); hdr->good_bytes = desc->pg_count; hdr->release = release; + hdr->completion_ops = desc->pg_completion_ops; } void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) @@ -240,6 +241,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test); void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, const struct nfs_pageio_ops *pg_ops, + const struct nfs_pgio_completion_ops *compl_ops, size_t bsize, int io_flags) { @@ -252,6 +254,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_recoalesce = 0; desc->pg_inode = inode; desc->pg_ops = pg_ops; + desc->pg_completion_ops = compl_ops; desc->pg_ioflags = io_flags; desc->pg_error = 0; desc->pg_lseg = NULL; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d515f00..b3a0c01 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1113,26 +1113,31 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page * EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); bool -pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) +pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, + const struct nfs_pgio_completion_ops *compl_ops) { struct nfs_server *server = NFS_SERVER(inode); struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; if (ld == NULL) return false; - nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0); + nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, + server->rsize, 0); return true; } bool -pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) +pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, + int ioflags, + const struct nfs_pgio_completion_ops *compl_ops) { struct nfs_server *server = NFS_SERVER(inode); struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; if (ld == NULL) return false; - nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags); + nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, + server->wsize, ioflags); return true; } @@ -1162,13 +1167,15 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, } EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); -static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head) +static int pnfs_write_done_resend_to_mds(struct inode *inode, + struct list_head *head, + const struct nfs_pgio_completion_ops *compl_ops) { struct nfs_pageio_descriptor pgio; LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE); + nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops); while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); @@ -1201,7 +1208,8 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data) } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, - &hdr->pages); + &hdr->pages, + hdr->completion_ops); } /* @@ -1292,7 +1300,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) whdr = nfs_writehdr_alloc(); if (!whdr) { - nfs_async_write_error(&desc->pg_list); + desc->pg_completion_ops->error_cleanup(&hdr->pages); put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return -ENOMEM; @@ -1309,18 +1317,20 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) } else pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); if (atomic_dec_and_test(&hdr->refcnt)) - nfs_write_completion(hdr); + hdr->completion_ops->completion(hdr); return ret; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); -static int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head) +static int pnfs_read_done_resend_to_mds(struct inode *inode, + struct list_head *head, + const struct nfs_pgio_completion_ops *compl_ops) { struct nfs_pageio_descriptor pgio; LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_read_mds(&pgio, inode); + nfs_pageio_init_read_mds(&pgio, inode, compl_ops); while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); @@ -1349,7 +1359,8 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data) } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, - &hdr->pages); + &hdr->pages, + hdr->completion_ops); } /* @@ -1443,7 +1454,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) rhdr = nfs_readhdr_alloc(); if (!rhdr) { - nfs_async_read_error(&desc->pg_list); + desc->pg_completion_ops->error_cleanup(&desc->pg_list); ret = -ENOMEM; put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; @@ -1461,7 +1472,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) } else pnfs_do_multiple_reads(desc, &hdr->rpc_list); if (atomic_dec_and_test(&hdr->refcnt)) - nfs_read_completion(hdr); + hdr->completion_ops->completion(hdr); return ret; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 442ebf6..734e4ef 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -168,8 +168,10 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); void get_layout_hdr(struct pnfs_layout_hdr *lo); void put_lseg(struct pnfs_layout_segment *lseg); -bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); -bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int); +bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, + const struct nfs_pgio_completion_ops *); +bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, + int, const struct nfs_pgio_completion_ops *); void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); void unset_pnfs_layoutdriver(struct nfs_server *); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c9633b2..5e78af1 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -31,6 +31,7 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops; static const struct rpc_call_ops nfs_read_common_ops; +static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; static struct kmem_cache *nfs_rdata_cachep; @@ -95,7 +96,7 @@ void nfs_readdata_release(struct nfs_read_data *rdata) else rdata->header = NULL; if (atomic_dec_and_test(&hdr->refcnt)) - nfs_read_completion(hdr); + hdr->completion_ops->completion(hdr); } static @@ -108,9 +109,10 @@ int nfs_return_empty_page(struct page *page) } void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, - struct inode *inode) + struct inode *inode, + const struct nfs_pgio_completion_ops *compl_ops) { - nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, + nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops, NFS_SERVER(inode)->rsize, 0); } @@ -122,10 +124,11 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, - struct inode *inode) + struct inode *inode, + const struct nfs_pgio_completion_ops *compl_ops) { - if (!pnfs_pageio_init_read(pgio, inode)) - nfs_pageio_init_read_mds(pgio, inode); + if (!pnfs_pageio_init_read(pgio, inode, compl_ops)) + nfs_pageio_init_read_mds(pgio, inode, compl_ops); } int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, @@ -146,7 +149,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, if (len < PAGE_CACHE_SIZE) zero_user_segment(page, len, PAGE_CACHE_SIZE); - nfs_pageio_init_read(&pgio, inode); + nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops); nfs_pageio_add_request(&pgio, new); nfs_pageio_complete(&pgio); return 0; @@ -170,7 +173,7 @@ static void nfs_readpage_release(struct nfs_page *req) } /* Note io was page aligned */ -void nfs_read_completion(struct nfs_pgio_header *hdr) +static void nfs_read_completion(struct nfs_pgio_header *hdr) { unsigned long bytes = 0; @@ -300,7 +303,7 @@ nfs_do_multiple_reads(struct list_head *head, return ret; } -void +static void nfs_async_read_error(struct list_head *head) { struct nfs_page *req; @@ -312,6 +315,11 @@ nfs_async_read_error(struct list_head *head) } } +static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { + .error_cleanup = nfs_async_read_error, + .completion = nfs_read_completion, +}; + /* * Generate multiple requests to fill a single page. * @@ -362,7 +370,7 @@ out_bad: list_del(&data->list); nfs_readdata_release(data); } - nfs_async_read_error(&hdr->pages); + desc->pg_completion_ops->error_cleanup(&hdr->pages); return -ENOMEM; } @@ -378,7 +386,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base, desc->pg_count)); if (!data) { - nfs_async_read_error(head); + desc->pg_completion_ops->error_cleanup(head); ret = -ENOMEM; goto out; } @@ -414,7 +422,7 @@ static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) rhdr = nfs_readhdr_alloc(); if (!rhdr) { - nfs_async_read_error(&desc->pg_list); + desc->pg_completion_ops->error_cleanup(&desc->pg_list); return -ENOMEM; } hdr = &rhdr->header; @@ -427,7 +435,7 @@ static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) else set_bit(NFS_IOHDR_REDO, &hdr->flags); if (atomic_dec_and_test(&hdr->refcnt)) - nfs_read_completion(hdr); + hdr->completion_ops->completion(hdr); return ret; } @@ -652,7 +660,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) goto read_complete; /* all pages were read */ - nfs_pageio_init_read(&pgio, inode); + nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 076075e..1503972 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -40,10 +40,12 @@ * Local function declarations */ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, - struct inode *inode, int ioflags); + struct inode *inode, int ioflags, + const struct nfs_pgio_completion_ops *compl_ops); static void nfs_redirty_request(struct nfs_page *req); static const struct rpc_call_ops nfs_write_common_ops; static const struct rpc_call_ops nfs_commit_ops; +static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; @@ -128,7 +130,7 @@ void nfs_writedata_release(struct nfs_write_data *wdata) else wdata->header = NULL; if (atomic_dec_and_test(&hdr->refcnt)) - nfs_write_completion(hdr); + hdr->completion_ops->completion(hdr); } static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) @@ -337,7 +339,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc struct nfs_pageio_descriptor pgio; int err; - nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc)); + nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc), + &nfs_async_write_completion_ops); err = nfs_do_writepage(page, wbc, &pgio); nfs_pageio_complete(&pgio); if (err < 0) @@ -380,7 +383,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); - nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); + nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), + &nfs_async_write_completion_ops); err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); nfs_pageio_complete(&pgio); @@ -558,7 +562,7 @@ int nfs_write_need_commit(struct nfs_write_data *data) #endif -void nfs_write_completion(struct nfs_pgio_header *hdr) +static void nfs_write_completion(struct nfs_pgio_header *hdr) { unsigned long bytes = 0; @@ -1000,7 +1004,7 @@ static void nfs_redirty_request(struct nfs_page *req) nfs_end_page_writeback(page); } -void nfs_async_write_error(struct list_head *head) +static void nfs_async_write_error(struct list_head *head) { struct nfs_page *req; @@ -1011,6 +1015,11 @@ void nfs_async_write_error(struct list_head *head) } } +static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { + .error_cleanup = nfs_async_write_error, + .completion = nfs_write_completion, +}; + /* * Generate multiple small requests to write out a single * contiguous dirty area on one page. @@ -1060,7 +1069,7 @@ out_bad: list_del(&data->list); nfs_writedata_release(data); } - nfs_async_write_error(&hdr->pages); + desc->pg_completion_ops->error_cleanup(&hdr->pages); return -ENOMEM; } @@ -1084,7 +1093,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base, desc->pg_count)); if (!data) { - nfs_async_write_error(head); + desc->pg_completion_ops->error_cleanup(head); ret = -ENOMEM; goto out; } @@ -1125,7 +1134,7 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) whdr = nfs_writehdr_alloc(); if (!whdr) { - nfs_async_write_error(&desc->pg_list); + desc->pg_completion_ops->error_cleanup(&hdr->pages); return -ENOMEM; } hdr = &whdr->header; @@ -1139,7 +1148,7 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) else set_bit(NFS_IOHDR_REDO, &hdr->flags); if (atomic_dec_and_test(&hdr->refcnt)) - nfs_write_completion(hdr); + hdr->completion_ops->completion(hdr); return ret; } @@ -1149,9 +1158,10 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = { }; void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags) + struct inode *inode, int ioflags, + const struct nfs_pgio_completion_ops *compl_ops) { - nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, + nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops, NFS_SERVER(inode)->wsize, ioflags); } @@ -1163,10 +1173,11 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags) + struct inode *inode, int ioflags, + const struct nfs_pgio_completion_ops *compl_ops) { - if (!pnfs_pageio_init_write(pgio, inode, ioflags)) - nfs_pageio_init_write_mds(pgio, inode, ioflags); + if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops)) + nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops); } void nfs_write_prepare(struct rpc_task *task, void *calldata) diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 5c52034..bc5b7a5 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -67,6 +67,7 @@ struct nfs_pageio_descriptor { int pg_ioflags; int pg_error; const struct rpc_call_ops *pg_rpc_callops; + const struct nfs_pgio_completion_ops *pg_completion_ops; struct pnfs_layout_segment *pg_lseg; }; @@ -83,6 +84,7 @@ extern void nfs_release_request(struct nfs_page *req); extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, const struct nfs_pageio_ops *pg_ops, + const struct nfs_pgio_completion_ops *compl_ops, size_t bsize, int how); extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0d17db7..6fa1d22 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1207,6 +1207,7 @@ struct nfs_pgio_header { loff_t io_start; const struct rpc_call_ops *mds_ops; void (*release) (struct nfs_pgio_header *hdr); + const struct nfs_pgio_completion_ops *completion_ops; spinlock_t lock; /* fields protected by lock */ int pnfs_error; @@ -1261,6 +1262,11 @@ struct nfs_commit_data { int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data); }; +struct nfs_pgio_completion_ops { + void (*error_cleanup)(struct list_head *head); + void (*completion)(struct nfs_pgio_header *hdr); +}; + struct nfs_unlinkdata { struct hlist_node list; struct nfs_removeargs args; -- cgit v0.10.2 From 9533da2979757258d3fd5429d830a297013d69ed Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:49 -0400 Subject: NFS: remove unused wb_complete field from struct nfs_page Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 4cf2a68..5d01a16 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -114,7 +114,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, * long write-back delay. This will be adjusted in * update_nfs_request below if the region is not locked. */ req->wb_page = page; - atomic_set(&req->wb_complete, 0); req->wb_index = page->index; page_cache_get(page); BUG_ON(PagePrivate(page)); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 1503972..705bf01 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1059,7 +1059,6 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, nbytes -= len; offset += len; } while (nbytes != 0); - atomic_set(&req->wb_complete, requests); desc->pg_rpc_callops = &nfs_write_common_ops; return ret; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index bc5b7a5..0a5b63f 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -36,7 +36,6 @@ struct nfs_page { struct page *wb_page; /* page to read in/write out */ struct nfs_open_context *wb_context; /* File state context info */ struct nfs_lock_context *wb_lock_context; /* lock context info */ - atomic_t wb_complete; /* i/os we're waiting for */ pgoff_t wb_index; /* Offset >> PAGE_CACHE_SHIFT */ unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */ wb_pgbase, /* Start of page data */ -- cgit v0.10.2 From 1825a0d08f22463e5a8f4b1636473efd057a3479 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 19:55:31 -0400 Subject: NFS: prepare coalesce testing for directio The coalesce code made assumptions that will no longer be true once non-page aligned io occurs. This introduces no change in current behavior, but allows for more general situations to come. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 02d8170..e40523f 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -796,6 +796,16 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, { BUG_ON(pgio->pg_lseg != NULL); + if (req->wb_offset != req->wb_pgbase) { + /* + * Handling unaligned pages is difficult, because have to + * somehow split a req in two in certain cases in the + * pg.test code. Avoid this by just not using pnfs + * in this case. + */ + nfs_pageio_reset_read_mds(pgio); + return; + } pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, 0, @@ -815,6 +825,8 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, BUG_ON(pgio->pg_lseg != NULL); + if (req->wb_offset != req->wb_pgbase) + goto out_mds; pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, 0, diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 5d01a16..638ca7f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -280,12 +280,12 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, return false; if (req->wb_context->state != prev->wb_context->state) return false; - if (req->wb_index != (prev->wb_index + 1)) - return false; if (req->wb_pgbase != 0) return false; if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) return false; + if (req_offset(req) != req_offset(prev) + prev->wb_bytes) + return false; return pgio->pg_ops->pg_test(pgio, prev, req); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b3a0c01..4da05e4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1082,6 +1082,10 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r { BUG_ON(pgio->pg_lseg != NULL); + if (req->wb_offset != req->wb_pgbase) { + nfs_pageio_reset_read_mds(pgio); + return; + } pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, req_offset(req), @@ -1100,6 +1104,10 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page * { BUG_ON(pgio->pg_lseg != NULL); + if (req->wb_offset != req->wb_pgbase) { + nfs_pageio_reset_write_mds(pgio); + return; + } pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, req_offset(req), -- cgit v0.10.2 From 584aa810b6240d88c28113a90c5029449814a3b5 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:51 -0400 Subject: NFS: rewrite directio read to use async coalesce code This also has the advantage that it allows directio to use pnfs. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 22a40c4..4ba9a2c 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -124,22 +124,6 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_ return -EINVAL; } -static void nfs_direct_dirty_pages(struct page **pages, unsigned int pgbase, size_t count) -{ - unsigned int npages; - unsigned int i; - - if (count == 0) - return; - pages += (pgbase >> PAGE_SHIFT); - npages = (count + (pgbase & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; - for (i = 0; i < npages; i++) { - struct page *page = pages[i]; - if (!PageCompound(page)) - set_page_dirty(page); - } -} - static void nfs_direct_release_pages(struct page **pages, unsigned int npages) { unsigned int i; @@ -226,58 +210,92 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) nfs_direct_req_release(dreq); } -/* - * We must hold a reference to all the pages in this direct read request - * until the RPCs complete. This could be long *after* we are woken up in - * nfs_direct_wait (for instance, if someone hits ^C on a slow server). - */ -static void nfs_direct_read_result(struct rpc_task *task, void *calldata) +void nfs_direct_readpage_release(struct nfs_page *req) { - struct nfs_read_data *data = calldata; - - nfs_readpage_result(task, data); + dprintk("NFS: direct read done (%s/%lld %d@%lld)\n", + req->wb_context->dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_bytes, + (long long)req_offset(req)); + nfs_release_request(req); } -static void nfs_direct_read_release(void *calldata) +static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) { + unsigned long bytes = 0; + struct nfs_direct_req *dreq = hdr->dreq; - struct nfs_read_data *data = calldata; - struct nfs_direct_req *dreq = (struct nfs_direct_req *)data->header->req; - int status = data->task.tk_status; + if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) + goto out_put; spin_lock(&dreq->lock); - if (unlikely(status < 0)) { - dreq->error = status; - spin_unlock(&dreq->lock); + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) + dreq->error = hdr->error; + else + dreq->count += hdr->good_bytes; + spin_unlock(&dreq->lock); + + if (!test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + struct page *page = req->wb_page; + + if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { + if (bytes > hdr->good_bytes) + zero_user(page, 0, PAGE_SIZE); + else if (hdr->good_bytes - bytes < PAGE_SIZE) + zero_user_segment(page, + hdr->good_bytes & ~PAGE_MASK, + PAGE_SIZE); + } + bytes += req->wb_bytes; + nfs_list_remove_request(req); + nfs_direct_readpage_release(req); + if (!PageCompound(page)) + set_page_dirty(page); + page_cache_release(page); + } } else { - dreq->count += data->res.count; - spin_unlock(&dreq->lock); - nfs_direct_dirty_pages(data->pages.pagevec, - data->args.pgbase, - data->res.count); + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + + if (bytes < hdr->good_bytes) + if (!PageCompound(req->wb_page)) + set_page_dirty(req->wb_page); + bytes += req->wb_bytes; + page_cache_release(req->wb_page); + nfs_list_remove_request(req); + nfs_direct_readpage_release(req); + } } - nfs_direct_release_pages(data->pages.pagevec, data->pages.npages); - +out_put: if (put_dreq(dreq)) nfs_direct_complete(dreq); - nfs_readdata_release(data); + hdr->release(hdr); } -static const struct rpc_call_ops nfs_read_direct_ops = { - .rpc_call_prepare = nfs_read_prepare, - .rpc_call_done = nfs_direct_read_result, - .rpc_release = nfs_direct_read_release, -}; - -static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr) +static void nfs_sync_pgio_error(struct list_head *head) { - struct nfs_read_data *data = &rhdr->rpc_data; + struct nfs_page *req; - if (data->pages.pagevec != data->pages.page_array) - kfree(data->pages.pagevec); - nfs_readhdr_free(&rhdr->header); + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_release_request(req); + } } +static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr) +{ + get_dreq(hdr->dreq); +} + +static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { + .error_cleanup = nfs_sync_pgio_error, + .init_hdr = nfs_direct_pgio_init, + .completion = nfs_direct_read_completion, +}; + /* * For each rsize'd chunk of the user's buffer, dispatch an NFS READ * operation. If nfs_readdata_alloc() or get_user_pages() fails, @@ -285,118 +303,85 @@ static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr) * handled automatically by nfs_direct_read_result(). Otherwise, if * no requests have been sent, just return an error. */ -static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, +static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc, const struct iovec *iov, loff_t pos) { + struct nfs_direct_req *dreq = desc->pg_dreq; struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; unsigned long user_addr = (unsigned long)iov->iov_base; size_t count = iov->iov_len; size_t rsize = NFS_SERVER(inode)->rsize; - struct rpc_task *task; - struct rpc_message msg = { - .rpc_cred = ctx->cred, - }; - struct rpc_task_setup task_setup_data = { - .rpc_client = NFS_CLIENT(inode), - .rpc_message = &msg, - .callback_ops = &nfs_read_direct_ops, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, - }; unsigned int pgbase; int result; ssize_t started = 0; + struct page **pagevec = NULL; + unsigned int npages; do { - struct nfs_read_header *rhdr; - struct nfs_read_data *data; - struct nfs_page_array *pages; size_t bytes; + int i; pgbase = user_addr & ~PAGE_MASK; - bytes = min(rsize,count); + bytes = min(max(rsize, PAGE_SIZE), count); result = -ENOMEM; - rhdr = nfs_readhdr_alloc(); - if (unlikely(!rhdr)) - break; - data = nfs_readdata_alloc(&rhdr->header, nfs_page_array_len(pgbase, bytes)); - if (!data) { - nfs_readhdr_free(&rhdr->header); + npages = nfs_page_array_len(pgbase, bytes); + if (!pagevec) + pagevec = kmalloc(npages * sizeof(struct page *), + GFP_KERNEL); + if (!pagevec) break; - } - data->header = &rhdr->header; - atomic_inc(&data->header->refcnt); - pages = &data->pages; - down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, - pages->npages, 1, 0, pages->pagevec, NULL); + npages, 1, 0, pagevec, NULL); up_read(¤t->mm->mmap_sem); - if (result < 0) { - nfs_direct_readhdr_release(rhdr); + if (result < 0) break; - } - if ((unsigned)result < pages->npages) { + if ((unsigned)result < npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { - nfs_direct_release_pages(pages->pagevec, result); - nfs_direct_readhdr_release(rhdr); + nfs_direct_release_pages(pagevec, result); break; } bytes -= pgbase; - pages->npages = result; + npages = result; } - get_dreq(dreq); - - rhdr->header.req = (struct nfs_page *) dreq; - rhdr->header.inode = inode; - rhdr->header.cred = msg.rpc_cred; - data->args.fh = NFS_FH(inode); - data->args.context = get_nfs_open_context(ctx); - data->args.lock_context = dreq->l_ctx; - data->args.offset = pos; - data->args.pgbase = pgbase; - data->args.pages = pages->pagevec; - data->args.count = bytes; - data->res.fattr = &data->fattr; - data->res.eof = 0; - data->res.count = bytes; - nfs_fattr_init(&data->fattr); - msg.rpc_argp = &data->args; - msg.rpc_resp = &data->res; - - task_setup_data.task = &data->task; - task_setup_data.callback_data = data; - NFS_PROTO(inode)->read_setup(data, &msg); - - task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) - break; - - dprintk("NFS: %5u initiated direct read call " - "(req %s/%Ld, %zu bytes @ offset %Lu)\n", - task->tk_pid, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - bytes, - (unsigned long long)data->args.offset); - rpc_put_task(task); - - started += bytes; - user_addr += bytes; - pos += bytes; - /* FIXME: Remove this unnecessary math from final patch */ - pgbase += bytes; - pgbase &= ~PAGE_MASK; - BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); - - count -= bytes; + for (i = 0; i < npages; i++) { + struct nfs_page *req; + unsigned int req_len = min(bytes, PAGE_SIZE - pgbase); + /* XXX do we need to do the eof zeroing found in async_filler? */ + req = nfs_create_request(dreq->ctx, dreq->inode, + pagevec[i], + pgbase, req_len); + if (IS_ERR(req)) { + nfs_direct_release_pages(pagevec + i, + npages - i); + result = PTR_ERR(req); + break; + } + req->wb_index = pos >> PAGE_SHIFT; + req->wb_offset = pos & ~PAGE_MASK; + if (!nfs_pageio_add_request(desc, req)) { + result = desc->pg_error; + nfs_release_request(req); + nfs_direct_release_pages(pagevec + i, + npages - i); + break; + } + pgbase = 0; + bytes -= req_len; + started += req_len; + user_addr += req_len; + pos += req_len; + count -= req_len; + } } while (count != 0); + kfree(pagevec); + if (started) return started; return result < 0 ? (ssize_t) result : -EFAULT; @@ -407,15 +392,19 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, unsigned long nr_segs, loff_t pos) { + struct nfs_pageio_descriptor desc; ssize_t result = -EINVAL; size_t requested_bytes = 0; unsigned long seg; + nfs_pageio_init_read(&desc, dreq->inode, + &nfs_direct_read_completion_ops); get_dreq(dreq); + desc.pg_dreq = dreq; for (seg = 0; seg < nr_segs; seg++) { const struct iovec *vec = &iov[seg]; - result = nfs_direct_read_schedule_segment(dreq, vec, pos); + result = nfs_direct_read_schedule_segment(&desc, vec, pos); if (result < 0) break; requested_bytes += result; @@ -424,6 +413,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, pos += vec->iov_len; } + nfs_pageio_complete(&desc); + /* * If no bytes were started, return the error, and let the * generic layer handle the completion. diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 3ef8fcd..cd5d4a3 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -304,8 +304,9 @@ struct nfs_pgio_completion_ops; /* read.c */ extern struct nfs_read_header *nfs_readhdr_alloc(void); extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); -extern struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, - unsigned int pagecount); +extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, + struct inode *inode, + const struct nfs_pgio_completion_ops *compl_ops); extern int nfs_initiate_read(struct rpc_clnt *clnt, struct nfs_read_data *data, const struct rpc_call_ops *call_ops); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 638ca7f..33a21ca 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -48,8 +48,11 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, hdr->cred = hdr->req->wb_context->cred; hdr->io_start = req_offset(hdr->req); hdr->good_bytes = desc->pg_count; + hdr->dreq = desc->pg_dreq; hdr->release = release; hdr->completion_ops = desc->pg_completion_ops; + if (hdr->completion_ops->init_hdr) + hdr->completion_ops->init_hdr(hdr); } void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) @@ -116,9 +119,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, req->wb_page = page; req->wb_index = page->index; page_cache_get(page); - BUG_ON(PagePrivate(page)); - BUG_ON(!PageLocked(page)); - BUG_ON(page->mapping->host != inode); req->wb_offset = offset; req->wb_pgbase = offset; req->wb_bytes = count; @@ -257,6 +257,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_ioflags = io_flags; desc->pg_error = 0; desc->pg_lseg = NULL; + desc->pg_dreq = NULL; } /** diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 5e78af1..35e2dce 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -51,8 +51,8 @@ struct nfs_read_header *nfs_readhdr_alloc() return rhdr; } -struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, - unsigned int pagecount) +static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, + unsigned int pagecount) { struct nfs_read_data *data, *prealloc; @@ -123,9 +123,9 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) } EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); -static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, - struct inode *inode, - const struct nfs_pgio_completion_ops *compl_ops) +void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, + struct inode *inode, + const struct nfs_pgio_completion_ops *compl_ops) { if (!pnfs_pageio_init_read(pgio, inode, compl_ops)) nfs_pageio_init_read_mds(pgio, inode, compl_ops); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 0a5b63f..f9ee9eb 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -68,6 +68,7 @@ struct nfs_pageio_descriptor { const struct rpc_call_ops *pg_rpc_callops; const struct nfs_pgio_completion_ops *pg_completion_ops; struct pnfs_layout_segment *pg_lseg; + struct nfs_direct_req *pg_dreq; }; #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6fa1d22..38687b8 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1208,6 +1208,7 @@ struct nfs_pgio_header { const struct rpc_call_ops *mds_ops; void (*release) (struct nfs_pgio_header *hdr); const struct nfs_pgio_completion_ops *completion_ops; + struct nfs_direct_req *dreq; spinlock_t lock; /* fields protected by lock */ int pnfs_error; @@ -1221,8 +1222,6 @@ struct nfs_read_header { struct nfs_read_data rpc_data; }; -struct nfs_direct_req; - struct nfs_write_data { struct nfs_pgio_header *header; struct list_head list; @@ -1264,6 +1263,7 @@ struct nfs_commit_data { struct nfs_pgio_completion_ops { void (*error_cleanup)(struct list_head *head); + void (*init_hdr)(struct nfs_pgio_header *hdr); void (*completion)(struct nfs_pgio_header *hdr); }; -- cgit v0.10.2 From 84c53ab5c093058c756dcef1879d38be6de90a3c Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:52 -0400 Subject: NFS: create nfs_generic_commit_list Simple refactoring. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 705bf01..2500f1c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1523,6 +1523,17 @@ static const struct rpc_call_ops nfs_commit_ops = { .rpc_release = nfs_commit_release, }; +static int nfs_generic_commit_list(struct inode *inode, struct list_head *head, + int how) +{ + int status; + + status = pnfs_commit_list(inode, head, how); + if (status == PNFS_NOT_ATTEMPTED) + status = nfs_commit_list(inode, head, how); + return status; +} + int nfs_commit_inode(struct inode *inode, int how) { LIST_HEAD(head); @@ -1536,9 +1547,7 @@ int nfs_commit_inode(struct inode *inode, int how) if (res) { int error; - error = pnfs_commit_list(inode, &head, how); - if (error == PNFS_NOT_ATTEMPTED) - error = nfs_commit_list(inode, &head, how); + error = nfs_generic_commit_list(inode, &head, how); if (error < 0) return error; if (!may_wait) -- cgit v0.10.2 From ea2cf2282b4278461266013e9c002ee1c66700ff Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:53 -0400 Subject: NFS: create struct nfs_commit_info It is COMMIT that is handled the most differently between the paged and direct paths. Create a structure that encapsulates everything either path needs to know about the commit state. We could use void to hide some of the layout driver stuff, but Trond suggests pulling it out to ensure type checking, given the huge changes being made, and the fact that it doesn't interfere with other drivers. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e8bbfa5..59a12c6a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1547,7 +1547,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi) nfsi->delegation_state = 0; init_rwsem(&nfsi->rwsem); nfsi->layout = NULL; - atomic_set(&nfsi->commits_outstanding, 0); + atomic_set(&nfsi->commit_info.rpcs_out, 0); #endif } @@ -1559,9 +1559,9 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->open_files); INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); - INIT_LIST_HEAD(&nfsi->commit_list); + INIT_LIST_HEAD(&nfsi->commit_info.list); nfsi->npages = 0; - nfsi->ncommit = 0; + nfsi->commit_info.ncommit = 0; atomic_set(&nfsi->silly_count, 1); INIT_HLIST_HEAD(&nfsi->silly_list); init_waitqueue_head(&nfsi->waitqueue); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index cd5d4a3..145e9e7 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -346,12 +346,18 @@ extern void nfs_init_commit(struct nfs_commit_data *data, struct list_head *head, struct pnfs_layout_segment *lseg); void nfs_retry_commit(struct list_head *page_list, - struct pnfs_layout_segment *lseg); + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo); void nfs_commit_clear_lock(struct nfs_inode *nfsi); void nfs_commitdata_release(struct nfs_commit_data *data); void nfs_commit_release_pages(struct nfs_commit_data *data); -void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head); -void nfs_request_remove_commit_list(struct nfs_page *req); +void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, + struct nfs_commit_info *cinfo); +void nfs_request_remove_commit_list(struct nfs_page *req, + struct nfs_commit_info *cinfo); +void nfs_init_cinfo(struct nfs_commit_info *cinfo, + struct inode *inode, + struct nfs_direct_req *dreq); #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index e40523f..fe2cb55 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -347,9 +347,11 @@ static void filelayout_commit_count_stats(struct rpc_task *task, void *data) static void filelayout_commit_release(void *calldata) { struct nfs_commit_data *data = calldata; + struct nfs_commit_info cinfo; nfs_commit_release_pages(data); - if (atomic_dec_and_test(&NFS_I(data->inode)->commits_outstanding)) + nfs_init_cinfo(&cinfo, data->inode, data->dreq); + if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) nfs_commit_clear_lock(NFS_I(data->inode)); put_lseg(data->lseg); nfs_commitdata_release(data); @@ -695,17 +697,16 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg) static int filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo, gfp_t gfp_flags) { struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); - struct nfs4_filelayout *flo = FILELAYOUT_FROM_HDR(lseg->pls_layout); - - struct nfs4_fl_commit_bucket *buckets; + struct pnfs_commit_bucket *buckets; int size; if (fl->commit_through_mds) return 0; - if (flo->commit_info.nbuckets != 0) { + if (cinfo->ds->nbuckets != 0) { /* This assumes there is only one IOMODE_RW lseg. What * we really want to do is have a layout_hdr level * dictionary of keys, each @@ -718,25 +719,25 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, size = (fl->stripe_type == STRIPE_SPARSE) ? fl->dsaddr->ds_num : fl->dsaddr->stripe_count; - buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), + buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), gfp_flags); if (!buckets) return -ENOMEM; else { int i; - spin_lock(&lseg->pls_layout->plh_inode->i_lock); - if (flo->commit_info.nbuckets != 0) + spin_lock(cinfo->lock); + if (cinfo->ds->nbuckets != 0) kfree(buckets); else { - flo->commit_info.buckets = buckets; - flo->commit_info.nbuckets = size; + cinfo->ds->buckets = buckets; + cinfo->ds->nbuckets = size; for (i = 0; i < size; i++) { INIT_LIST_HEAD(&buckets[i].written); INIT_LIST_HEAD(&buckets[i].committing); } } - spin_unlock(&lseg->pls_layout->plh_inode->i_lock); + spin_unlock(cinfo->lock); return 0; } } @@ -821,6 +822,7 @@ static void filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { + struct nfs_commit_info cinfo; int status; BUG_ON(pgio->pg_lseg != NULL); @@ -836,7 +838,8 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, /* If no lseg, fall back to write through mds */ if (pgio->pg_lseg == NULL) goto out_mds; - status = filelayout_alloc_commit_info(pgio->pg_lseg, GFP_NOFS); + nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq); + status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS); if (status < 0) { put_lseg(pgio->pg_lseg); pgio->pg_lseg = NULL; @@ -871,40 +874,42 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) * If this will make the bucket empty, it will need to put the lseg reference. */ static void -filelayout_clear_request_commit(struct nfs_page *req) +filelayout_clear_request_commit(struct nfs_page *req, + struct nfs_commit_info *cinfo) { struct pnfs_layout_segment *freeme = NULL; - struct inode *inode = req->wb_context->dentry->d_inode; - spin_lock(&inode->i_lock); + spin_lock(cinfo->lock); if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) goto out; + cinfo->ds->nwritten--; if (list_is_singular(&req->wb_list)) { - struct nfs4_fl_commit_bucket *bucket; + struct pnfs_commit_bucket *bucket; bucket = list_first_entry(&req->wb_list, - struct nfs4_fl_commit_bucket, + struct pnfs_commit_bucket, written); freeme = bucket->wlseg; bucket->wlseg = NULL; } out: - nfs_request_remove_commit_list(req); - spin_unlock(&inode->i_lock); + nfs_request_remove_commit_list(req, cinfo); + spin_unlock(cinfo->lock); put_lseg(freeme); } static struct list_head * filelayout_choose_commit_list(struct nfs_page *req, - struct pnfs_layout_segment *lseg) + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) { struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); u32 i, j; struct list_head *list; - struct nfs4_fl_commit_bucket *buckets; + struct pnfs_commit_bucket *buckets; if (fl->commit_through_mds) - return &NFS_I(req->wb_context->dentry->d_inode)->commit_list; + return &cinfo->mds->list; /* Note that we are calling nfs4_fl_calc_j_index on each page * that ends up being committed to a data server. An attractive @@ -914,7 +919,7 @@ filelayout_choose_commit_list(struct nfs_page *req, */ j = nfs4_fl_calc_j_index(lseg, req_offset(req)); i = select_bucket_index(fl, j); - buckets = FILELAYOUT_FROM_HDR(lseg->pls_layout)->commit_info.buckets; + buckets = cinfo->ds->buckets; list = &buckets[i].written; if (list_empty(list)) { /* Non-empty buckets hold a reference on the lseg. That ref @@ -926,17 +931,19 @@ filelayout_choose_commit_list(struct nfs_page *req, buckets[i].wlseg = get_lseg(lseg); } set_bit(PG_COMMIT_TO_DS, &req->wb_flags); + cinfo->ds->nwritten++; return list; } static void filelayout_mark_request_commit(struct nfs_page *req, - struct pnfs_layout_segment *lseg) + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) { struct list_head *list; - list = filelayout_choose_commit_list(req, lseg); - nfs_request_add_commit_list(req, list); + list = filelayout_choose_commit_list(req, lseg, cinfo); + nfs_request_add_commit_list(req, list, cinfo); } static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) @@ -993,8 +1000,9 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) } static int -filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, - spinlock_t *lock) +filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, + struct nfs_commit_info *cinfo, + int max) { struct list_head *src = &bucket->written; struct list_head *dst = &bucket->committing; @@ -1004,9 +1012,9 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, list_for_each_entry_safe(req, tmp, src, wb_list) { if (!nfs_lock_request(req)) continue; - if (cond_resched_lock(lock)) + if (cond_resched_lock(cinfo->lock)) list_safe_reset_next(req, tmp, wb_list); - nfs_request_remove_commit_list(req); + nfs_request_remove_commit_list(req, cinfo); clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); nfs_list_add_request(req, dst); ret++; @@ -1014,6 +1022,8 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, break; } if (ret) { + cinfo->ds->nwritten -= ret; + cinfo->ds->ncommitting += ret; bucket->clseg = bucket->wlseg; if (list_empty(src)) bucket->wlseg = NULL; @@ -1024,37 +1034,32 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, } /* Move reqs from written to committing lists, returning count of number moved. - * Note called with i_lock held. + * Note called with cinfo->lock held. */ -static int filelayout_scan_commit_lists(struct inode *inode, int max, - spinlock_t *lock) +static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo, + int max) { - struct nfs4_fl_commit_info *fl_cinfo; int i, rv = 0, cnt; - fl_cinfo = &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info; - if (fl_cinfo->nbuckets == 0) - goto out_done; - for (i = 0; i < fl_cinfo->nbuckets && max != 0; i++) { - cnt = filelayout_scan_ds_commit_list(&fl_cinfo->buckets[i], - max, lock); + for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { + cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i], + cinfo, max); max -= cnt; rv += cnt; } -out_done: return rv; } static unsigned int -alloc_ds_commits(struct inode *inode, struct list_head *list) +alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) { - struct nfs4_fl_commit_info *fl_cinfo; - struct nfs4_fl_commit_bucket *bucket; + struct pnfs_ds_commit_info *fl_cinfo; + struct pnfs_commit_bucket *bucket; struct nfs_commit_data *data; int i, j; unsigned int nreq = 0; - fl_cinfo = &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info; + fl_cinfo = cinfo->ds; bucket = fl_cinfo->buckets; for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { if (list_empty(&bucket->committing)) @@ -1073,7 +1078,7 @@ alloc_ds_commits(struct inode *inode, struct list_head *list) for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) { if (list_empty(&bucket->committing)) continue; - nfs_retry_commit(&bucket->committing, bucket->clseg); + nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); put_lseg(bucket->clseg); bucket->clseg = NULL; } @@ -1084,7 +1089,7 @@ alloc_ds_commits(struct inode *inode, struct list_head *list) /* This follows nfs_commit_list pretty closely */ static int filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, - int how) + int how, struct nfs_commit_info *cinfo) { struct nfs_commit_data *data, *tmp; LIST_HEAD(list); @@ -1097,17 +1102,17 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, list_add(&data->pages, &list); nreq++; } else - nfs_retry_commit(mds_pages, NULL); + nfs_retry_commit(mds_pages, NULL, cinfo); } - nreq += alloc_ds_commits(inode, &list); + nreq += alloc_ds_commits(cinfo, &list); if (nreq == 0) { nfs_commit_clear_lock(NFS_I(inode)); goto out; } - atomic_add(nreq, &NFS_I(inode)->commits_outstanding); + atomic_add(nreq, &cinfo->mds->rpcs_out); list_for_each_entry_safe(data, tmp, &list, pages) { list_del_init(&data->pages); @@ -1116,14 +1121,15 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how); } else { - struct nfs4_fl_commit_info *fl_cinfo; + struct pnfs_commit_bucket *buckets; - fl_cinfo = &FILELAYOUT_FROM_HDR(data->lseg->pls_layout)->commit_info; - nfs_init_commit(data, &fl_cinfo->buckets[data->ds_commit_index].committing, data->lseg); + buckets = cinfo->ds->buckets; + nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg); filelayout_initiate_commit(data, how); } } out: + cinfo->ds->ncommitting = 0; return PNFS_ATTEMPTED; } @@ -1148,6 +1154,12 @@ filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo) kfree(FILELAYOUT_FROM_HDR(lo)); } +static struct pnfs_ds_commit_info * +filelayout_get_ds_info(struct inode *inode) +{ + return &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info; +} + static struct pnfs_layoutdriver_type filelayout_type = { .id = LAYOUT_NFSV4_1_FILES, .name = "LAYOUT_NFSV4_1_FILES", @@ -1158,6 +1170,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { .free_lseg = filelayout_free_lseg, .pg_read_ops = &filelayout_pg_read_ops, .pg_write_ops = &filelayout_pg_write_ops, + .get_ds_info = &filelayout_get_ds_info, .mark_request_commit = filelayout_mark_request_commit, .clear_request_commit = filelayout_clear_request_commit, .scan_commit_lists = filelayout_scan_commit_lists, diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 333a3ac..96b89bb 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -74,18 +74,6 @@ struct nfs4_file_layout_dsaddr { struct nfs4_pnfs_ds *ds_list[1]; }; -struct nfs4_fl_commit_bucket { - struct list_head written; - struct list_head committing; - struct pnfs_layout_segment *wlseg; - struct pnfs_layout_segment *clseg; -}; - -struct nfs4_fl_commit_info { - int nbuckets; - struct nfs4_fl_commit_bucket *buckets; -}; - struct nfs4_filelayout_segment { struct pnfs_layout_segment generic_hdr; u32 stripe_type; @@ -100,7 +88,7 @@ struct nfs4_filelayout_segment { struct nfs4_filelayout { struct pnfs_layout_hdr generic_hdr; - struct nfs4_fl_commit_info commit_info; + struct pnfs_ds_commit_info commit_info; }; static inline struct nfs4_filelayout * diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 734e4ef..4cd8760 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -94,11 +94,18 @@ struct pnfs_layoutdriver_type { const struct nfs_pageio_ops *pg_read_ops; const struct nfs_pageio_ops *pg_write_ops; + struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode); void (*mark_request_commit) (struct nfs_page *req, - struct pnfs_layout_segment *lseg); - void (*clear_request_commit) (struct nfs_page *req); - int (*scan_commit_lists) (struct inode *inode, int max, spinlock_t *lock); - int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo); + void (*clear_request_commit) (struct nfs_page *req, + struct nfs_commit_info *cinfo); + int (*scan_commit_lists) (struct nfs_commit_info *cinfo, + int max); + int (*commit_pagelist)(struct inode *inode, + struct list_head *mds_pages, + int how, + struct nfs_commit_info *cinfo); /* * Return PNFS_ATTEMPTED to indicate the layout code has attempted @@ -263,49 +270,57 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss) } static inline int -pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) +pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how, + struct nfs_commit_info *cinfo) { - if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags)) + if (cinfo->ds == NULL || cinfo->ds->ncommitting == 0) return PNFS_NOT_ATTEMPTED; - return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how); + return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how, cinfo); +} + +static inline struct pnfs_ds_commit_info * +pnfs_get_ds_info(struct inode *inode) +{ + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + + if (ld == NULL || ld->get_ds_info == NULL) + return NULL; + return ld->get_ds_info(inode); } static inline bool -pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) { struct inode *inode = req->wb_context->dentry->d_inode; struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; if (lseg == NULL || ld->mark_request_commit == NULL) return false; - ld->mark_request_commit(req, lseg); + ld->mark_request_commit(req, lseg, cinfo); return true; } static inline bool -pnfs_clear_request_commit(struct nfs_page *req) +pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo) { struct inode *inode = req->wb_context->dentry->d_inode; struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; if (ld == NULL || ld->clear_request_commit == NULL) return false; - ld->clear_request_commit(req); + ld->clear_request_commit(req, cinfo); return true; } static inline int -pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) +pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo, + int max) { - struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; - int ret; - - if (ld == NULL || ld->scan_commit_lists == NULL) + if (cinfo->ds == NULL || cinfo->ds->nwritten == 0) return 0; - ret = ld->scan_commit_lists(inode, max, lock); - if (ret != 0) - set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); - return ret; + else + return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max); } /* Should the pNFS client commit and return the layout upon a setattr */ @@ -409,25 +424,34 @@ static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, st } static inline int -pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) +pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how, + struct nfs_commit_info *cinfo) { return PNFS_NOT_ATTEMPTED; } +static inline struct pnfs_ds_commit_info * +pnfs_get_ds_info(struct inode *inode) +{ + return NULL; +} + static inline bool -pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) { return false; } static inline bool -pnfs_clear_request_commit(struct nfs_page *req) +pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo) { return false; } static inline int -pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) +pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo, + int max) { return 0; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2500f1c..18bf700 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -452,65 +452,79 @@ nfs_mark_request_dirty(struct nfs_page *req) /** * nfs_request_add_commit_list - add request to a commit list * @req: pointer to a struct nfs_page - * @head: commit list head + * @dst: commit list head + * @cinfo: holds list lock and accounting info * - * This sets the PG_CLEAN bit, updates the inode global count of + * This sets the PG_CLEAN bit, updates the cinfo count of * number of outstanding requests requiring a commit as well as * the MM page stats. * - * The caller must _not_ hold the inode->i_lock, but must be + * The caller must _not_ hold the cinfo->lock, but must be * holding the nfs_page lock. */ void -nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head) +nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, + struct nfs_commit_info *cinfo) { - struct inode *inode = req->wb_context->dentry->d_inode; - set_bit(PG_CLEAN, &(req)->wb_flags); - spin_lock(&inode->i_lock); - nfs_list_add_request(req, head); - NFS_I(inode)->ncommit++; - spin_unlock(&inode->i_lock); + spin_lock(cinfo->lock); + nfs_list_add_request(req, dst); + cinfo->mds->ncommit++; + spin_unlock(cinfo->lock); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + __mark_inode_dirty(req->wb_context->dentry->d_inode, I_DIRTY_DATASYNC); } EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); /** * nfs_request_remove_commit_list - Remove request from a commit list * @req: pointer to a nfs_page + * @cinfo: holds list lock and accounting info * - * This clears the PG_CLEAN bit, and updates the inode global count of + * This clears the PG_CLEAN bit, and updates the cinfo's count of * number of outstanding requests requiring a commit * It does not update the MM page stats. * - * The caller _must_ hold the inode->i_lock and the nfs_page lock. + * The caller _must_ hold the cinfo->lock and the nfs_page lock. */ void -nfs_request_remove_commit_list(struct nfs_page *req) +nfs_request_remove_commit_list(struct nfs_page *req, + struct nfs_commit_info *cinfo) { - struct inode *inode = req->wb_context->dentry->d_inode; - if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) return; nfs_list_remove_request(req); - NFS_I(inode)->ncommit--; + cinfo->mds->ncommit--; } EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); +static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, + struct inode *inode) +{ + cinfo->lock = &inode->i_lock; + cinfo->mds = &NFS_I(inode)->commit_info; + cinfo->ds = pnfs_get_ds_info(inode); +} + +void nfs_init_cinfo(struct nfs_commit_info *cinfo, + struct inode *inode, + struct nfs_direct_req *dreq) +{ + nfs_init_cinfo_from_inode(cinfo, inode); +} +EXPORT_SYMBOL_GPL(nfs_init_cinfo); /* * Add a request to the inode's commit list. */ static void -nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) { - struct inode *inode = req->wb_context->dentry->d_inode; - - if (pnfs_mark_request_commit(req, lseg)) + if (pnfs_mark_request_commit(req, lseg, cinfo)) return; - nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list); + nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo); } static void @@ -525,11 +539,13 @@ nfs_clear_request_commit(struct nfs_page *req) { if (test_bit(PG_CLEAN, &req->wb_flags)) { struct inode *inode = req->wb_context->dentry->d_inode; + struct nfs_commit_info cinfo; - if (!pnfs_clear_request_commit(req)) { - spin_lock(&inode->i_lock); - nfs_request_remove_commit_list(req); - spin_unlock(&inode->i_lock); + nfs_init_cinfo_from_inode(&cinfo, inode); + if (!pnfs_clear_request_commit(req, &cinfo)) { + spin_lock(cinfo.lock); + nfs_request_remove_commit_list(req, &cinfo); + spin_unlock(cinfo.lock); } nfs_clear_page_commit(req->wb_page); } @@ -545,7 +561,8 @@ int nfs_write_need_commit(struct nfs_write_data *data) #else static void -nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) { } @@ -564,10 +581,12 @@ int nfs_write_need_commit(struct nfs_write_data *data) static void nfs_write_completion(struct nfs_pgio_header *hdr) { + struct nfs_commit_info cinfo; unsigned long bytes = 0; if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) goto out; + nfs_init_cinfo_from_inode(&cinfo, hdr->inode); while (!list_empty(&hdr->pages)) { struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct page *page = req->wb_page; @@ -585,7 +604,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) goto next; } if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { - nfs_mark_request_commit(req, hdr->lseg); + nfs_mark_request_commit(req, hdr->lseg, &cinfo); goto next; } remove_req: @@ -599,16 +618,16 @@ out: } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -static int -nfs_need_commit(struct nfs_inode *nfsi) +static unsigned long +nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { - return nfsi->ncommit > 0; + return cinfo->mds->ncommit; } -/* i_lock held by caller */ +/* cinfo->lock held by caller */ static int -nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max, - spinlock_t *lock) +nfs_scan_commit_list(struct list_head *src, struct list_head *dst, + struct nfs_commit_info *cinfo, int max) { struct nfs_page *req, *tmp; int ret = 0; @@ -616,9 +635,9 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max, list_for_each_entry_safe(req, tmp, src, wb_list) { if (!nfs_lock_request(req)) continue; - if (cond_resched_lock(lock)) + if (cond_resched_lock(cinfo->lock)) list_safe_reset_next(req, tmp, wb_list); - nfs_request_remove_commit_list(req); + nfs_request_remove_commit_list(req, cinfo); nfs_list_add_request(req, dst); ret++; if (ret == max) @@ -630,37 +649,38 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max, /* * nfs_scan_commit - Scan an inode for commit requests * @inode: NFS inode to scan - * @dst: destination list + * @dst: mds destination list + * @cinfo: mds and ds lists of reqs ready to commit * * Moves requests from the inode's 'commit' request list. * The requests are *not* checked to ensure that they form a contiguous set. */ static int -nfs_scan_commit(struct inode *inode, struct list_head *dst) +nfs_scan_commit(struct inode *inode, struct list_head *dst, + struct nfs_commit_info *cinfo) { - struct nfs_inode *nfsi = NFS_I(inode); int ret = 0; - spin_lock(&inode->i_lock); - if (nfsi->ncommit > 0) { + spin_lock(cinfo->lock); + if (cinfo->mds->ncommit > 0) { const int max = INT_MAX; - ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max, - &inode->i_lock); - ret += pnfs_scan_commit_lists(inode, max - ret, - &inode->i_lock); + ret = nfs_scan_commit_list(&cinfo->mds->list, dst, + cinfo, max); + ret += pnfs_scan_commit_lists(inode, cinfo, max - ret); } - spin_unlock(&inode->i_lock); + spin_unlock(cinfo->lock); return ret; } #else -static inline int nfs_need_commit(struct nfs_inode *nfsi) +static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { return 0; } -static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst) +static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, + struct nfs_commit_info *cinfo) { return 0; } @@ -929,7 +949,7 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write); */ static void nfs_write_rpcsetup(struct nfs_write_data *data, unsigned int count, unsigned int offset, - int how) + int how, struct nfs_commit_info *cinfo) { struct nfs_page *req = data->header->req; @@ -950,7 +970,7 @@ static void nfs_write_rpcsetup(struct nfs_write_data *data, case 0: break; case FLUSH_COND_STABLE: - if (nfs_need_commit(NFS_I(data->header->inode))) + if (nfs_reqs_to_commit(cinfo)) break; default: data->args.stable = NFS_FILE_SYNC; @@ -1034,12 +1054,14 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, unsigned int offset; int requests = 0; int ret = 0; + struct nfs_commit_info cinfo; + nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); nfs_list_remove_request(req); nfs_list_add_request(req, &hdr->pages); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && - (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit || + (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) || desc->pg_count > wsize)) desc->pg_ioflags &= ~FLUSH_COND_STABLE; @@ -1053,7 +1075,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, if (!data) goto out_bad; data->pages.pagevec[0] = page; - nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags); + nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo); list_add(&data->list, &hdr->rpc_list); requests++; nbytes -= len; @@ -1088,6 +1110,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct nfs_write_data *data; struct list_head *head = &desc->pg_list; int ret = 0; + struct nfs_commit_info cinfo; data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base, desc->pg_count)); @@ -1097,6 +1120,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, goto out; } + nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); pages = data->pages.pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); @@ -1106,11 +1130,11 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, } if ((desc->pg_ioflags & FLUSH_COND_STABLE) && - (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) + (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) desc->pg_ioflags &= ~FLUSH_COND_STABLE; /* Set up the argument struct */ - nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags); + nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); list_add(&data->list, &hdr->rpc_list); desc->pg_rpc_callops = &nfs_write_common_ops; out: @@ -1417,14 +1441,15 @@ void nfs_init_commit(struct nfs_commit_data *data, EXPORT_SYMBOL_GPL(nfs_init_commit); void nfs_retry_commit(struct list_head *page_list, - struct pnfs_layout_segment *lseg) + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) { struct nfs_page *req; while (!list_empty(page_list)) { req = nfs_list_entry(page_list->next); nfs_list_remove_request(req); - nfs_mark_request_commit(req, lseg); + nfs_mark_request_commit(req, lseg, cinfo); dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); dec_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); @@ -1437,7 +1462,8 @@ EXPORT_SYMBOL_GPL(nfs_retry_commit); * Commit dirty pages */ static int -nfs_commit_list(struct inode *inode, struct list_head *head, int how) +nfs_commit_list(struct inode *inode, struct list_head *head, int how, + struct nfs_commit_info *cinfo) { struct nfs_commit_data *data; @@ -1450,7 +1476,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) nfs_init_commit(data, head, NULL); return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how); out_bad: - nfs_retry_commit(head, NULL); + nfs_retry_commit(head, NULL, cinfo); nfs_commit_clear_lock(NFS_I(inode)); return -ENOMEM; } @@ -1524,30 +1550,32 @@ static const struct rpc_call_ops nfs_commit_ops = { }; static int nfs_generic_commit_list(struct inode *inode, struct list_head *head, - int how) + int how, struct nfs_commit_info *cinfo) { int status; - status = pnfs_commit_list(inode, head, how); + status = pnfs_commit_list(inode, head, how, cinfo); if (status == PNFS_NOT_ATTEMPTED) - status = nfs_commit_list(inode, head, how); + status = nfs_commit_list(inode, head, how, cinfo); return status; } int nfs_commit_inode(struct inode *inode, int how) { LIST_HEAD(head); + struct nfs_commit_info cinfo; int may_wait = how & FLUSH_SYNC; int res; res = nfs_commit_set_lock(NFS_I(inode), may_wait); if (res <= 0) goto out_mark_dirty; - res = nfs_scan_commit(inode, &head); + nfs_init_cinfo_from_inode(&cinfo, inode); + res = nfs_scan_commit(inode, &head, &cinfo); if (res) { int error; - error = nfs_generic_commit_list(inode, &head, how); + error = nfs_generic_commit_list(inode, &head, how, &cinfo); if (error < 0) return error; if (!may_wait) @@ -1578,14 +1606,14 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr int ret = 0; /* no commits means nothing needs to be done */ - if (!nfsi->ncommit) + if (!nfsi->commit_info.ncommit) return ret; if (wbc->sync_mode == WB_SYNC_NONE) { /* Don't commit yet if this is a non-blocking flush and there * are a lot of outstanding writes for this mapping. */ - if (nfsi->ncommit <= (nfsi->npages >> 1)) + if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1)) goto out_mark_dirty; /* don't wait for the COMMIT response */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 8d3a2b8..8a88c16 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -179,8 +179,7 @@ struct nfs_inode { __be32 cookieverf[2]; unsigned long npages; - unsigned long ncommit; - struct list_head commit_list; + struct nfs_mds_commit_info commit_info; /* Open contexts for shared mmap writes */ struct list_head open_files; @@ -201,7 +200,6 @@ struct nfs_inode { /* pNFS layout information */ struct pnfs_layout_hdr *layout; - atomic_t commits_outstanding; #endif /* CONFIG_NFS_V4*/ #ifdef CONFIG_NFS_FSCACHE struct fscache_cookie *fscache; @@ -230,7 +228,6 @@ struct nfs_inode { #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ -#define NFS_INO_PNFS_COMMIT (8) /* use pnfs code for commit */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 38687b8..224e1e8 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1079,6 +1079,21 @@ struct nfstime4 { }; #ifdef CONFIG_NFS_V4_1 + +struct pnfs_commit_bucket { + struct list_head written; + struct list_head committing; + struct pnfs_layout_segment *wlseg; + struct pnfs_layout_segment *clseg; +}; + +struct pnfs_ds_commit_info { + int nwritten; + int ncommitting; + int nbuckets; + struct pnfs_commit_bucket *buckets; +}; + #define NFS4_EXCHANGE_ID_LEN (48) struct nfs41_exchange_id_args { struct nfs_client *client; @@ -1242,6 +1257,18 @@ struct nfs_write_header { struct nfs_write_data rpc_data; }; +struct nfs_mds_commit_info { + atomic_t rpcs_out; + unsigned long ncommit; + struct list_head list; +}; + +struct nfs_commit_info { + spinlock_t *lock; + struct nfs_mds_commit_info *mds; + struct pnfs_ds_commit_info *ds; +}; + struct nfs_commit_data { struct rpc_task task; struct inode *inode; -- cgit v0.10.2 From f453a54a01c7c0453ad9550906e3d2663dd486ac Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:54 -0400 Subject: NFS: create nfs_commit_completion_ops Factors out the code that needs to change when directio starts using these code paths. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 145e9e7..137f5cd 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -344,13 +344,12 @@ extern int nfs_initiate_commit(struct rpc_clnt *clnt, int how); extern void nfs_init_commit(struct nfs_commit_data *data, struct list_head *head, - struct pnfs_layout_segment *lseg); + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo); void nfs_retry_commit(struct list_head *page_list, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo); -void nfs_commit_clear_lock(struct nfs_inode *nfsi); void nfs_commitdata_release(struct nfs_commit_data *data); -void nfs_commit_release_pages(struct nfs_commit_data *data); void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, struct nfs_commit_info *cinfo); void nfs_request_remove_commit_list(struct nfs_page *req, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index fe2cb55..26d1da4 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -347,12 +347,8 @@ static void filelayout_commit_count_stats(struct rpc_task *task, void *data) static void filelayout_commit_release(void *calldata) { struct nfs_commit_data *data = calldata; - struct nfs_commit_info cinfo; - nfs_commit_release_pages(data); - nfs_init_cinfo(&cinfo, data->inode, data->dreq); - if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) - nfs_commit_clear_lock(NFS_I(data->inode)); + data->completion_ops->completion(data); put_lseg(data->lseg); nfs_commitdata_release(data); } @@ -1108,7 +1104,7 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, nreq += alloc_ds_commits(cinfo, &list); if (nreq == 0) { - nfs_commit_clear_lock(NFS_I(inode)); + cinfo->completion_ops->error_cleanup(NFS_I(inode)); goto out; } @@ -1117,14 +1113,14 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, list_for_each_entry_safe(data, tmp, &list, pages) { list_del_init(&data->pages); if (!data->lseg) { - nfs_init_commit(data, mds_pages, NULL); + nfs_init_commit(data, mds_pages, NULL, cinfo); nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how); } else { struct pnfs_commit_bucket *buckets; buckets = cinfo->ds->buckets; - nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg); + nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo); filelayout_initiate_commit(data, how); } } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 18bf700..333d01d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -46,6 +46,7 @@ static void nfs_redirty_request(struct nfs_page *req); static const struct rpc_call_ops nfs_write_common_ops; static const struct rpc_call_ops nfs_commit_ops; static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; +static const struct nfs_commit_completion_ops nfs_commit_completion_ops; static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; @@ -505,6 +506,7 @@ static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, cinfo->lock = &inode->i_lock; cinfo->mds = &NFS_I(inode)->commit_info; cinfo->ds = pnfs_get_ds_info(inode); + cinfo->completion_ops = &nfs_commit_completion_ops; } void nfs_init_cinfo(struct nfs_commit_info *cinfo, @@ -1358,13 +1360,12 @@ static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) return (ret < 0) ? ret : 1; } -void nfs_commit_clear_lock(struct nfs_inode *nfsi) +static void nfs_commit_clear_lock(struct nfs_inode *nfsi) { clear_bit(NFS_INO_COMMIT, &nfsi->flags); smp_mb__after_clear_bit(); wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); } -EXPORT_SYMBOL_GPL(nfs_commit_clear_lock); void nfs_commitdata_release(struct nfs_commit_data *data) { @@ -1413,8 +1414,9 @@ EXPORT_SYMBOL_GPL(nfs_initiate_commit); * Set up the argument/result storage required for the RPC call. */ void nfs_init_commit(struct nfs_commit_data *data, - struct list_head *head, - struct pnfs_layout_segment *lseg) + struct list_head *head, + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) { struct nfs_page *first = nfs_list_entry(head->next); struct inode *inode = first->wb_context->dentry->d_inode; @@ -1428,6 +1430,7 @@ void nfs_init_commit(struct nfs_commit_data *data, data->cred = first->wb_context->cred; data->lseg = lseg; /* reference transferred */ data->mds_ops = &nfs_commit_ops; + data->completion_ops = cinfo->completion_ops; data->args.fh = NFS_FH(data->inode); /* Note: we always request a commit of the entire inode */ @@ -1473,11 +1476,12 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, goto out_bad; /* Set up the argument struct */ - nfs_init_commit(data, head, NULL); + nfs_init_commit(data, head, NULL, cinfo); + atomic_inc(&cinfo->mds->rpcs_out); return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how); out_bad: nfs_retry_commit(head, NULL, cinfo); - nfs_commit_clear_lock(NFS_I(inode)); + cinfo->completion_ops->error_cleanup(NFS_I(inode)); return -ENOMEM; } @@ -1495,10 +1499,11 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->commit_done(task, data); } -void nfs_commit_release_pages(struct nfs_commit_data *data) +static void nfs_commit_release_pages(struct nfs_commit_data *data) { struct nfs_page *req; int status = data->task.tk_status; + struct nfs_commit_info cinfo; while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); @@ -1531,15 +1536,16 @@ void nfs_commit_release_pages(struct nfs_commit_data *data) next: nfs_unlock_request(req); } + nfs_init_cinfo(&cinfo, data->inode, data->dreq); + if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) + nfs_commit_clear_lock(NFS_I(data->inode)); } -EXPORT_SYMBOL_GPL(nfs_commit_release_pages); static void nfs_commit_release(void *calldata) { struct nfs_commit_data *data = calldata; - nfs_commit_release_pages(data); - nfs_commit_clear_lock(NFS_I(data->inode)); + data->completion_ops->completion(data); nfs_commitdata_release(calldata); } @@ -1549,6 +1555,11 @@ static const struct rpc_call_ops nfs_commit_ops = { .rpc_release = nfs_commit_release, }; +static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { + .completion = nfs_commit_release_pages, + .error_cleanup = nfs_commit_clear_lock, +}; + static int nfs_generic_commit_list(struct inode *inode, struct list_head *head, int how, struct nfs_commit_info *cinfo) { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 224e1e8..0e8b88a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1263,10 +1263,18 @@ struct nfs_mds_commit_info { struct list_head list; }; +struct nfs_commit_data; +struct nfs_inode; +struct nfs_commit_completion_ops { + void (*error_cleanup) (struct nfs_inode *nfsi); + void (*completion) (struct nfs_commit_data *data); +}; + struct nfs_commit_info { spinlock_t *lock; struct nfs_mds_commit_info *mds; struct pnfs_ds_commit_info *ds; + const struct nfs_commit_completion_ops *completion_ops; }; struct nfs_commit_data { @@ -1285,6 +1293,7 @@ struct nfs_commit_data { struct nfs_client *ds_clp; /* pNFS data server */ int ds_commit_index; const struct rpc_call_ops *mds_ops; + const struct nfs_commit_completion_ops *completion_ops; int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data); }; -- cgit v0.10.2 From b359f9d09bcbaede09243cfe844172ba055d89fd Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:55 -0400 Subject: NFS: add dreq to nfs_commit_info Need this to pass into nfs_commitdata_init, in order to keep data->dreq accurate. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 333d01d..44a93d8 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -506,6 +506,7 @@ static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, cinfo->lock = &inode->i_lock; cinfo->mds = &NFS_I(inode)->commit_info; cinfo->ds = pnfs_get_ds_info(inode); + cinfo->dreq = NULL; cinfo->completion_ops = &nfs_commit_completion_ops; } @@ -1431,6 +1432,7 @@ void nfs_init_commit(struct nfs_commit_data *data, data->lseg = lseg; /* reference transferred */ data->mds_ops = &nfs_commit_ops; data->completion_ops = cinfo->completion_ops; + data->dreq = cinfo->dreq; data->args.fh = NFS_FH(data->inode); /* Note: we always request a commit of the entire inode */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0e8b88a..5f563bd 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1274,6 +1274,7 @@ struct nfs_commit_info { spinlock_t *lock; struct nfs_mds_commit_info *mds; struct pnfs_ds_commit_info *ds; + struct nfs_direct_req *dreq; /* O_DIRECT request */ const struct nfs_commit_completion_ops *completion_ops; }; -- cgit v0.10.2 From 56f9cd684d25f1bae901c5a872b8427f8b417c3f Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:56 -0400 Subject: NFS: avoid some stat gathering for direct io Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 44a93d8..56db9e7f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -472,9 +472,13 @@ nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, nfs_list_add_request(req, dst); cinfo->mds->ncommit++; spin_unlock(cinfo->lock); - inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); - __mark_inode_dirty(req->wb_context->dentry->d_inode, I_DIRTY_DATASYNC); + if (!cinfo->dreq) { + inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + inc_bdi_stat(req->wb_page->mapping->backing_dev_info, + BDI_RECLAIMABLE); + __mark_inode_dirty(req->wb_context->dentry->d_inode, + I_DIRTY_DATASYNC); + } } EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); @@ -1455,9 +1459,11 @@ void nfs_retry_commit(struct list_head *page_list, req = nfs_list_entry(page_list->next); nfs_list_remove_request(req); nfs_mark_request_commit(req, lseg, cinfo); - dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - dec_bdi_stat(req->wb_page->mapping->backing_dev_info, - BDI_RECLAIMABLE); + if (!cinfo->dreq) { + dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + dec_bdi_stat(req->wb_page->mapping->backing_dev_info, + BDI_RECLAIMABLE); + } nfs_unlock_request(req); } } -- cgit v0.10.2 From 1763da1234cba663b849476d451bdccac5147859 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:57 -0400 Subject: NFS: rewrite directio write to use async coalesce code This also has the advantage that it allows directio to use pnfs. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4ba9a2c..d44de2f 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -56,6 +56,7 @@ #include "internal.h" #include "iostat.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -81,16 +82,19 @@ struct nfs_direct_req { struct completion completion; /* wait for i/o completion */ /* commit state */ - struct list_head rewrite_list; /* saved nfs_write_data structs */ - struct nfs_commit_data *commit_data; /* special write_data for commits */ + struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */ + struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */ + struct work_struct work; int flags; #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ #define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ struct nfs_writeverf verf; /* unstable write verifier */ }; +static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops; +static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops; static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); -static const struct rpc_call_ops nfs_write_direct_ops; +static void nfs_direct_write_schedule_work(struct work_struct *work); static inline void get_dreq(struct nfs_direct_req *dreq) { @@ -131,6 +135,16 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages) page_cache_release(pages[i]); } +void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, + struct nfs_direct_req *dreq) +{ + cinfo->lock = &dreq->lock; + cinfo->mds = &dreq->mds_cinfo; + cinfo->ds = &dreq->ds_cinfo; + cinfo->dreq = dreq; + cinfo->completion_ops = &nfs_direct_commit_completion_ops; +} + static inline struct nfs_direct_req *nfs_direct_req_alloc(void) { struct nfs_direct_req *dreq; @@ -142,7 +156,11 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) kref_init(&dreq->kref); kref_get(&dreq->kref); init_completion(&dreq->completion); - INIT_LIST_HEAD(&dreq->rewrite_list); + dreq->mds_cinfo.ncommit = 0; + atomic_set(&dreq->mds_cinfo.rpcs_out, 0); + INIT_LIST_HEAD(&dreq->mds_cinfo.list); + INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); + memset(&dreq->ds_cinfo, 0, sizeof(dreq->ds_cinfo)); dreq->iocb = NULL; dreq->ctx = NULL; dreq->l_ctx = NULL; @@ -457,112 +475,60 @@ out: return result; } -static void nfs_direct_writehdr_release(struct nfs_write_header *whdr) -{ - struct nfs_write_data *data = &whdr->rpc_data; - - if (data->pages.pagevec != data->pages.page_array) - kfree(data->pages.pagevec); - nfs_writehdr_free(&whdr->header); -} - -static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) -{ - while (!list_empty(&dreq->rewrite_list)) { - struct nfs_pgio_header *hdr = list_entry(dreq->rewrite_list.next, struct nfs_pgio_header, pages); - struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); - struct nfs_page_array *p = &whdr->rpc_data.pages; - - list_del(&hdr->pages); - nfs_direct_release_pages(p->pagevec, p->npages); - nfs_direct_writehdr_release(whdr); - } -} - #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { - struct inode *inode = dreq->inode; - struct list_head *p; - struct nfs_write_data *data; - struct nfs_pgio_header *hdr; - struct rpc_task *task; - struct rpc_message msg = { - .rpc_cred = dreq->ctx->cred, - }; - struct rpc_task_setup task_setup_data = { - .rpc_client = NFS_CLIENT(inode), - .rpc_message = &msg, - .callback_ops = &nfs_write_direct_ops, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, - }; + struct nfs_pageio_descriptor desc; + struct nfs_page *req, *tmp; + LIST_HEAD(reqs); + struct nfs_commit_info cinfo; + LIST_HEAD(failed); + + nfs_init_cinfo_from_dreq(&cinfo, dreq); + pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo); + spin_lock(cinfo.lock); + nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0); + spin_unlock(cinfo.lock); dreq->count = 0; get_dreq(dreq); - list_for_each(p, &dreq->rewrite_list) { - hdr = list_entry(p, struct nfs_pgio_header, pages); - data = &(container_of(hdr, struct nfs_write_header, header))->rpc_data; - - get_dreq(dreq); - - /* Use stable writes */ - data->args.stable = NFS_FILE_SYNC; - - /* - * Reset data->res. - */ - nfs_fattr_init(&data->fattr); - data->res.count = data->args.count; - memset(&data->verf, 0, sizeof(data->verf)); - - /* - * Reuse data->task; data->args should not have changed - * since the original request was sent. - */ - task_setup_data.task = &data->task; - task_setup_data.callback_data = data; - msg.rpc_argp = &data->args; - msg.rpc_resp = &data->res; - NFS_PROTO(inode)->write_setup(data, &msg); - - /* - * We're called via an RPC callback, so BKL is already held. - */ - task = rpc_run_task(&task_setup_data); - if (!IS_ERR(task)) - rpc_put_task(task); - - dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n", - data->task.tk_pid, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - data->args.count, - (unsigned long long)data->args.offset); - } + nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, + &nfs_direct_write_completion_ops); + desc.pg_dreq = dreq; - if (put_dreq(dreq)) - nfs_direct_write_complete(dreq, inode); -} + list_for_each_entry_safe(req, tmp, &reqs, wb_list) { + if (!nfs_pageio_add_request(&desc, req)) { + nfs_list_add_request(req, &failed); + spin_lock(cinfo.lock); + dreq->flags = 0; + dreq->error = -EIO; + spin_unlock(cinfo.lock); + } + } + nfs_pageio_complete(&desc); -static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) -{ - struct nfs_commit_data *data = calldata; + while (!list_empty(&failed)) { + page_cache_release(req->wb_page); + nfs_release_request(req); + nfs_unlock_request(req); + } - /* Call the NFS version-specific code */ - NFS_PROTO(data->inode)->commit_done(task, data); + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, dreq->inode); } -static void nfs_direct_commit_release(void *calldata) +static void nfs_direct_commit_complete(struct nfs_commit_data *data) { - struct nfs_commit_data *data = calldata; struct nfs_direct_req *dreq = data->dreq; + struct nfs_commit_info cinfo; + struct nfs_page *req; int status = data->task.tk_status; + nfs_init_cinfo_from_dreq(&cinfo, dreq); if (status < 0) { dprintk("NFS: %5u commit failed with error %d.\n", - data->task.tk_pid, status); + data->task.tk_pid, status); dreq->flags = NFS_ODIRECT_RESCHED_WRITES; } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); @@ -570,59 +536,49 @@ static void nfs_direct_commit_release(void *calldata) } dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); - nfs_direct_write_complete(dreq, data->inode); - nfs_commit_free(data); + while (!list_empty(&data->pages)) { + req = nfs_list_entry(data->pages.next); + nfs_list_remove_request(req); + if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { + /* Note the rewrite will go through mds */ + nfs_mark_request_commit(req, NULL, &cinfo); + } else { + page_cache_release(req->wb_page); + nfs_release_request(req); + } + nfs_unlock_request(req); + } + + if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) + nfs_direct_write_complete(dreq, data->inode); } -static const struct rpc_call_ops nfs_commit_direct_ops = { - .rpc_call_prepare = nfs_commit_prepare, - .rpc_call_done = nfs_direct_commit_result, - .rpc_release = nfs_direct_commit_release, +static void nfs_direct_error_cleanup(struct nfs_inode *nfsi) +{ + /* There is no lock to clear */ +} + +static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = { + .completion = nfs_direct_commit_complete, + .error_cleanup = nfs_direct_error_cleanup, }; static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) { - struct nfs_commit_data *data = dreq->commit_data; - struct rpc_task *task; - struct rpc_message msg = { - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = dreq->ctx->cred, - }; - struct rpc_task_setup task_setup_data = { - .task = &data->task, - .rpc_client = NFS_CLIENT(dreq->inode), - .rpc_message = &msg, - .callback_ops = &nfs_commit_direct_ops, - .callback_data = data, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, - }; - - data->inode = dreq->inode; - data->cred = msg.rpc_cred; - - data->args.fh = NFS_FH(data->inode); - data->args.offset = 0; - data->args.count = 0; - data->res.fattr = &data->fattr; - data->res.verf = &data->verf; - nfs_fattr_init(&data->fattr); - - NFS_PROTO(data->inode)->commit_setup(data, &msg); - - /* Note: task.tk_ops->rpc_release will free dreq->commit_data */ - dreq->commit_data = NULL; - - dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); - - task = rpc_run_task(&task_setup_data); - if (!IS_ERR(task)) - rpc_put_task(task); + int res; + struct nfs_commit_info cinfo; + LIST_HEAD(mds_list); + + nfs_init_cinfo_from_dreq(&cinfo, dreq); + nfs_scan_commit(dreq->inode, &mds_list, &cinfo); + res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo); + if (res < 0) /* res == -ENOMEM */ + nfs_direct_write_reschedule(dreq); } -static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) +static void nfs_direct_write_schedule_work(struct work_struct *work) { + struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work); int flags = dreq->flags; dreq->flags = 0; @@ -634,90 +590,29 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode nfs_direct_write_reschedule(dreq); break; default: - if (dreq->commit_data != NULL) - nfs_commit_free(dreq->commit_data); - nfs_direct_free_writedata(dreq); - nfs_zap_mapping(inode, inode->i_mapping); + nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping); nfs_direct_complete(dreq); } } -static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) +static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) { - dreq->commit_data = nfs_commitdata_alloc(); - if (dreq->commit_data != NULL) - dreq->commit_data->dreq = dreq; + schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */ } + #else -static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq) -{ - dreq->commit_data = NULL; -} static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) { - nfs_direct_free_writedata(dreq); nfs_zap_mapping(inode, inode->i_mapping); nfs_direct_complete(dreq); } #endif -static void nfs_direct_write_result(struct rpc_task *task, void *calldata) -{ - struct nfs_write_data *data = calldata; - - nfs_writeback_done(task, data); -} - /* * NB: Return the value of the first error return code. Subsequent * errors after the first one are ignored. */ -static void nfs_direct_write_release(void *calldata) -{ - struct nfs_write_data *data = calldata; - struct nfs_pgio_header *hdr = data->header; - struct nfs_direct_req *dreq = (struct nfs_direct_req *) hdr->req; - int status = data->task.tk_status; - - spin_lock(&dreq->lock); - - if (unlikely(status < 0)) { - /* An error has occurred, so we should not commit */ - dreq->flags = 0; - dreq->error = status; - } - if (unlikely(dreq->error != 0)) - goto out_unlock; - - dreq->count += data->res.count; - - if (data->res.verf->committed != NFS_FILE_SYNC) { - switch (dreq->flags) { - case 0: - memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf)); - dreq->flags = NFS_ODIRECT_DO_COMMIT; - break; - case NFS_ODIRECT_DO_COMMIT: - if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) { - dprintk("NFS: %5u write verify failed\n", data->task.tk_pid); - dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - } - } - } -out_unlock: - spin_unlock(&dreq->lock); - - if (put_dreq(dreq)) - nfs_direct_write_complete(dreq, hdr->inode); -} - -static const struct rpc_call_ops nfs_write_direct_ops = { - .rpc_call_prepare = nfs_write_prepare, - .rpc_call_done = nfs_direct_write_result, - .rpc_release = nfs_direct_write_release, -}; - /* * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE * operation. If nfs_writedata_alloc() or get_user_pages() fails, @@ -725,143 +620,181 @@ static const struct rpc_call_ops nfs_write_direct_ops = { * handled automatically by nfs_direct_write_result(). Otherwise, if * no requests have been sent, just return an error. */ -static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, +static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc, const struct iovec *iov, - loff_t pos, int sync) + loff_t pos) { + struct nfs_direct_req *dreq = desc->pg_dreq; struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; unsigned long user_addr = (unsigned long)iov->iov_base; size_t count = iov->iov_len; - struct rpc_task *task; - struct rpc_message msg = { - .rpc_cred = ctx->cred, - }; - struct rpc_task_setup task_setup_data = { - .rpc_client = NFS_CLIENT(inode), - .rpc_message = &msg, - .callback_ops = &nfs_write_direct_ops, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, - }; size_t wsize = NFS_SERVER(inode)->wsize; unsigned int pgbase; int result; ssize_t started = 0; + struct page **pagevec = NULL; + unsigned int npages; do { - struct nfs_write_header *whdr; - struct nfs_write_data *data; - struct nfs_page_array *pages; size_t bytes; + int i; pgbase = user_addr & ~PAGE_MASK; - bytes = min(wsize,count); + bytes = min(max(wsize, PAGE_SIZE), count); result = -ENOMEM; - whdr = nfs_writehdr_alloc(); - if (unlikely(!whdr)) + npages = nfs_page_array_len(pgbase, bytes); + if (!pagevec) + pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL); + if (!pagevec) break; - data = nfs_writedata_alloc(&whdr->header, nfs_page_array_len(pgbase, bytes)); - if (!data) { - nfs_writehdr_free(&whdr->header); - break; - } - data->header = &whdr->header; - atomic_inc(&data->header->refcnt); - pages = &data->pages; - down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, - pages->npages, 0, 0, pages->pagevec, NULL); + npages, 0, 0, pagevec, NULL); up_read(¤t->mm->mmap_sem); - if (result < 0) { - nfs_direct_writehdr_release(whdr); + if (result < 0) break; - } - if ((unsigned)result < pages->npages) { + + if ((unsigned)result < npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { - nfs_direct_release_pages(pages->pagevec, result); - nfs_direct_writehdr_release(whdr); + nfs_direct_release_pages(pagevec, result); break; } bytes -= pgbase; - pages->npages = result; + npages = result; } - get_dreq(dreq); - - list_move_tail(&whdr->header.pages, &dreq->rewrite_list); - - whdr->header.req = (struct nfs_page *) dreq; - whdr->header.inode = inode; - whdr->header.cred = msg.rpc_cred; - data->args.fh = NFS_FH(inode); - data->args.context = ctx; - data->args.lock_context = dreq->l_ctx; - data->args.offset = pos; - data->args.pgbase = pgbase; - data->args.pages = pages->pagevec; - data->args.count = bytes; - data->args.stable = sync; - data->res.fattr = &data->fattr; - data->res.count = bytes; - data->res.verf = &data->verf; - nfs_fattr_init(&data->fattr); - - task_setup_data.task = &data->task; - task_setup_data.callback_data = data; - msg.rpc_argp = &data->args; - msg.rpc_resp = &data->res; - NFS_PROTO(inode)->write_setup(data, &msg); - - task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) - break; + for (i = 0; i < npages; i++) { + struct nfs_page *req; + unsigned int req_len = min(bytes, PAGE_SIZE - pgbase); - dprintk("NFS: %5u initiated direct write call " - "(req %s/%Ld, %zu bytes @ offset %Lu)\n", - task->tk_pid, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - bytes, - (unsigned long long)data->args.offset); - rpc_put_task(task); - - started += bytes; - user_addr += bytes; - pos += bytes; - - /* FIXME: Remove this useless math from the final patch */ - pgbase += bytes; - pgbase &= ~PAGE_MASK; - BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); - - count -= bytes; + req = nfs_create_request(dreq->ctx, dreq->inode, + pagevec[i], + pgbase, req_len); + if (IS_ERR(req)) { + nfs_direct_release_pages(pagevec + i, + npages - i); + result = PTR_ERR(req); + break; + } + nfs_lock_request(req); + req->wb_index = pos >> PAGE_SHIFT; + req->wb_offset = pos & ~PAGE_MASK; + if (!nfs_pageio_add_request(desc, req)) { + result = desc->pg_error; + nfs_unlock_request(req); + nfs_release_request(req); + nfs_direct_release_pages(pagevec + i, + npages - i); + } + pgbase = 0; + bytes -= req_len; + started += req_len; + user_addr += req_len; + pos += req_len; + count -= req_len; + } } while (count != 0); + kfree(pagevec); + if (started) return started; return result < 0 ? (ssize_t) result : -EFAULT; } +static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) +{ + struct nfs_direct_req *dreq = hdr->dreq; + struct nfs_commit_info cinfo; + int bit = -1; + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + + if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) + goto out_put; + + nfs_init_cinfo_from_dreq(&cinfo, dreq); + + spin_lock(&dreq->lock); + + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { + dreq->flags = 0; + dreq->error = hdr->error; + } + if (dreq->error != 0) + bit = NFS_IOHDR_ERROR; + else { + dreq->count += hdr->good_bytes; + if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + bit = NFS_IOHDR_NEED_RESCHED; + } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { + if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) + bit = NFS_IOHDR_NEED_RESCHED; + else if (dreq->flags == 0) { + memcpy(&dreq->verf, &req->wb_verf, + sizeof(dreq->verf)); + bit = NFS_IOHDR_NEED_COMMIT; + dreq->flags = NFS_ODIRECT_DO_COMMIT; + } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { + if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) { + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + bit = NFS_IOHDR_NEED_RESCHED; + } else + bit = NFS_IOHDR_NEED_COMMIT; + } + } + } + spin_unlock(&dreq->lock); + + while (!list_empty(&hdr->pages)) { + req = nfs_list_entry(hdr->pages.next); + nfs_list_remove_request(req); + switch (bit) { + case NFS_IOHDR_NEED_RESCHED: + case NFS_IOHDR_NEED_COMMIT: + nfs_mark_request_commit(req, hdr->lseg, &cinfo); + break; + default: + page_cache_release(req->wb_page); + nfs_release_request(req); + } + nfs_unlock_request(req); + } + +out_put: + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, hdr->inode); + hdr->release(hdr); +} + +static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { + .error_cleanup = nfs_sync_pgio_error, + .init_hdr = nfs_direct_pgio_init, + .completion = nfs_direct_write_completion, +}; + static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, const struct iovec *iov, unsigned long nr_segs, - loff_t pos, int sync) + loff_t pos) { + struct nfs_pageio_descriptor desc; ssize_t result = 0; size_t requested_bytes = 0; unsigned long seg; + nfs_pageio_init_write(&desc, dreq->inode, FLUSH_COND_STABLE, + &nfs_direct_write_completion_ops); + desc.pg_dreq = dreq; get_dreq(dreq); for (seg = 0; seg < nr_segs; seg++) { const struct iovec *vec = &iov[seg]; - result = nfs_direct_write_schedule_segment(dreq, vec, - pos, sync); + result = nfs_direct_write_schedule_segment(&desc, vec, pos); if (result < 0) break; requested_bytes += result; @@ -869,6 +802,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, break; pos += vec->iov_len; } + nfs_pageio_complete(&desc); /* * If no bytes were started, return the error, and let the @@ -891,16 +825,10 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, ssize_t result = -ENOMEM; struct inode *inode = iocb->ki_filp->f_mapping->host; struct nfs_direct_req *dreq; - size_t wsize = NFS_SERVER(inode)->wsize; - int sync = NFS_UNSTABLE; dreq = nfs_direct_req_alloc(); if (!dreq) goto out; - nfs_alloc_commit_data(dreq); - - if (dreq->commit_data == NULL || count <= wsize) - sync = NFS_FILE_SYNC; dreq->inode = inode; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); @@ -910,7 +838,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync); + result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos); if (!result) result = nfs_direct_wait(dreq); out_release: @@ -1030,10 +958,15 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, task_io_account_write(count); retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); + if (retval > 0) { + struct inode *inode = mapping->host; - if (retval > 0) iocb->ki_pos = pos + retval; - + spin_lock(&inode->i_lock); + if (i_size_read(inode) < iocb->ki_pos) + i_size_write(inode, iocb->ki_pos); + spin_unlock(&inode->i_lock); + } out: return retval; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 137f5cd..d68810f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -320,10 +320,11 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ +extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, + struct inode *inode, int ioflags, + const struct nfs_pgio_completion_ops *compl_ops); extern struct nfs_write_header *nfs_writehdr_alloc(void); extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); -extern struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, - unsigned int pagecount); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, @@ -346,6 +347,15 @@ extern void nfs_init_commit(struct nfs_commit_data *data, struct list_head *head, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo); +int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, + struct nfs_commit_info *cinfo, int max); +int nfs_scan_commit(struct inode *inode, struct list_head *dst, + struct nfs_commit_info *cinfo); +void nfs_mark_request_commit(struct nfs_page *req, + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo); +int nfs_generic_commit_list(struct inode *inode, struct list_head *head, + int how, struct nfs_commit_info *cinfo); void nfs_retry_commit(struct list_head *page_list, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo); @@ -365,6 +375,10 @@ extern int nfs_migrate_page(struct address_space *, #define nfs_migrate_page NULL #endif +/* direct.c */ +void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, + struct nfs_direct_req *dreq); + /* nfs4proc.c */ extern void __nfs4_read_done_cb(struct nfs_read_data *); extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 26d1da4..806a55f 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -996,12 +996,9 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) } static int -filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, - struct nfs_commit_info *cinfo, - int max) +transfer_commit_list(struct list_head *src, struct list_head *dst, + struct nfs_commit_info *cinfo, int max) { - struct list_head *src = &bucket->written; - struct list_head *dst = &bucket->committing; struct nfs_page *req, *tmp; int ret = 0; @@ -1014,9 +1011,22 @@ filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); nfs_list_add_request(req, dst); ret++; - if (ret == max) + if ((ret == max) && !cinfo->dreq) break; } + return ret; +} + +static int +filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, + struct nfs_commit_info *cinfo, + int max) +{ + struct list_head *src = &bucket->written; + struct list_head *dst = &bucket->committing; + int ret; + + ret = transfer_commit_list(src, dst, cinfo, max); if (ret) { cinfo->ds->nwritten -= ret; cinfo->ds->ncommitting += ret; @@ -1046,6 +1056,27 @@ static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo, return rv; } +/* Pull everything off the committing lists and dump into @dst */ +static void filelayout_recover_commit_reqs(struct list_head *dst, + struct nfs_commit_info *cinfo) +{ + struct pnfs_commit_bucket *b; + int i; + + /* NOTE cinfo->lock is NOT held, relying on fact that this is + * only called on single thread per dreq. + * Can't take the lock because need to do put_lseg + */ + for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { + if (transfer_commit_list(&b->written, dst, cinfo, 0)) { + BUG_ON(!list_empty(&b->written)); + put_lseg(b->wlseg); + b->wlseg = NULL; + } + } + cinfo->ds->nwritten = 0; +} + static unsigned int alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) { @@ -1170,6 +1201,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { .mark_request_commit = filelayout_mark_request_commit, .clear_request_commit = filelayout_clear_request_commit, .scan_commit_lists = filelayout_scan_commit_lists, + .recover_commit_reqs = filelayout_recover_commit_reqs, .commit_pagelist = filelayout_commit_pagelist, .read_pagelist = filelayout_read_pagelist, .write_pagelist = filelayout_write_pagelist, diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 4cd8760..8efbee7 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -102,6 +102,8 @@ struct pnfs_layoutdriver_type { struct nfs_commit_info *cinfo); int (*scan_commit_lists) (struct nfs_commit_info *cinfo, int max); + void (*recover_commit_reqs) (struct list_head *list, + struct nfs_commit_info *cinfo); int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how, @@ -323,6 +325,15 @@ pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo, return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max); } +static inline void +pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list, + struct nfs_commit_info *cinfo) +{ + if (cinfo->ds == NULL || cinfo->ds->nwritten == 0) + return; + NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); +} + /* Should the pNFS client commit and return the layout upon a setattr */ static inline bool pnfs_ld_layoutret_on_setattr(struct inode *inode) @@ -456,6 +467,12 @@ pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo, return 0; } +static inline void +pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list, + struct nfs_commit_info *cinfo) +{ +} + static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) { return 0; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 56db9e7f..fec214b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -39,9 +39,6 @@ /* * Local function declarations */ -static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, - struct inode *inode, int ioflags, - const struct nfs_pgio_completion_ops *compl_ops); static void nfs_redirty_request(struct nfs_page *req); static const struct rpc_call_ops nfs_write_common_ops; static const struct rpc_call_ops nfs_commit_ops; @@ -87,8 +84,8 @@ struct nfs_write_header *nfs_writehdr_alloc(void) return p; } -struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, - unsigned int pagecount) +static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, + unsigned int pagecount) { struct nfs_write_data *data, *prealloc; @@ -518,14 +515,17 @@ void nfs_init_cinfo(struct nfs_commit_info *cinfo, struct inode *inode, struct nfs_direct_req *dreq) { - nfs_init_cinfo_from_inode(cinfo, inode); + if (dreq) + nfs_init_cinfo_from_dreq(cinfo, dreq); + else + nfs_init_cinfo_from_inode(cinfo, inode); } EXPORT_SYMBOL_GPL(nfs_init_cinfo); /* * Add a request to the inode's commit list. */ -static void +void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo) { @@ -567,7 +567,7 @@ int nfs_write_need_commit(struct nfs_write_data *data) } #else -static void +void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo) { @@ -632,7 +632,7 @@ nfs_reqs_to_commit(struct nfs_commit_info *cinfo) } /* cinfo->lock held by caller */ -static int +int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, struct nfs_commit_info *cinfo, int max) { @@ -647,7 +647,7 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, nfs_request_remove_commit_list(req, cinfo); nfs_list_add_request(req, dst); ret++; - if (ret == max) + if ((ret == max) && !cinfo->dreq) break; } return ret; @@ -662,7 +662,7 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, * Moves requests from the inode's 'commit' request list. * The requests are *not* checked to ensure that they form a contiguous set. */ -static int +int nfs_scan_commit(struct inode *inode, struct list_head *dst, struct nfs_commit_info *cinfo) { @@ -686,8 +686,8 @@ static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) return 0; } -static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, - struct nfs_commit_info *cinfo) +int nfs_scan_commit(struct inode *inode, struct list_head *dst, + struct nfs_commit_info *cinfo) { return 0; } @@ -1202,9 +1202,9 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) } EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); -static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags, - const struct nfs_pgio_completion_ops *compl_ops) +void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, + struct inode *inode, int ioflags, + const struct nfs_pgio_completion_ops *compl_ops) { if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops)) nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops); @@ -1568,8 +1568,8 @@ static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { .error_cleanup = nfs_commit_clear_lock, }; -static int nfs_generic_commit_list(struct inode *inode, struct list_head *head, - int how, struct nfs_commit_info *cinfo) +int nfs_generic_commit_list(struct inode *inode, struct list_head *head, + int how, struct nfs_commit_info *cinfo) { int status; -- cgit v0.10.2 From df0117481cd94dbb8970f4be9d05b0568fa09ab1 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Tue, 24 Apr 2012 14:50:34 -0400 Subject: NFS: Prevent garbage cinfo->ds from leaking out This is a bugfix that applies on top of the previous directio patches, that fixes a bug introduced in "NFS: create struct nfs_commit_info". Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 806a55f..80a63f6 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -1184,7 +1184,12 @@ filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo) static struct pnfs_ds_commit_info * filelayout_get_ds_info(struct inode *inode) { - return &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info; + struct pnfs_layout_hdr *layout = NFS_I(inode)->layout; + + if (layout == NULL) + return NULL; + else + return &FILELAYOUT_FROM_HDR(layout)->commit_info; } static struct pnfs_layoutdriver_type filelayout_type = { -- cgit v0.10.2 From 2671bfc3beb44e70636bd0208274426db57f73b5 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Fri, 27 Apr 2012 13:27:44 -0400 Subject: NFS: Remove secinfo knowledge out of the generic client And also remove the unneeded rpc_op. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d68810f..d699444 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -286,9 +286,6 @@ extern void nfs_sb_deactive(struct super_block *sb); extern char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen); extern struct vfsmount *nfs_d_automount(struct path *path); -#ifdef CONFIG_NFS_V4 -rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); -#endif /* getroot.c */ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index d51868e..2a9591b 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -119,35 +119,6 @@ Elong: } #ifdef CONFIG_NFS_V4 -rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) -{ - struct gss_api_mech *mech; - struct xdr_netobj oid; - int i; - rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX; - - for (i = 0; i < flavors->num_flavors; i++) { - struct nfs4_secinfo_flavor *flavor; - flavor = &flavors->flavors[i]; - - if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) { - pseudoflavor = flavor->flavor; - break; - } else if (flavor->flavor == RPC_AUTH_GSS) { - oid.len = flavor->gss.sec_oid4.len; - oid.data = flavor->gss.sec_oid4.data; - mech = gss_mech_get_by_OID(&oid); - if (!mech) - continue; - pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service); - gss_mech_put(mech); - break; - } - } - - return pseudoflavor; -} - static struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir, struct qstr *name, struct nfs_fh *fh, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 8d75021..53a487e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -206,6 +206,7 @@ extern const struct dentry_operations nfs4_dentry_operations; extern const struct inode_operations nfs4_dir_inode_operations; /* nfs4namespace.c */ +rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); /* nfs4proc.c */ diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index a7f3ded..a69ee39 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -132,6 +132,35 @@ static size_t nfs_parse_server_name(char *string, size_t len, return ret; } +rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) +{ + struct gss_api_mech *mech; + struct xdr_netobj oid; + int i; + rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX; + + for (i = 0; i < flavors->num_flavors; i++) { + struct nfs4_secinfo_flavor *flavor; + flavor = &flavors->flavors[i]; + + if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) { + pseudoflavor = flavor->flavor; + break; + } else if (flavor->flavor == RPC_AUTH_GSS) { + oid.len = flavor->gss.sec_oid4.len; + oid.data = flavor->gss.sec_oid4.data; + mech = gss_mech_get_by_OID(&oid); + if (!mech) + continue; + pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service); + gss_mech_put(mech); + break; + } + } + + return pseudoflavor; +} + static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name) { struct page *page; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 87af80d..fa661b9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6610,7 +6610,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .close_context = nfs4_close_context, .open_context = nfs4_atomic_open, .init_client = nfs4_init_client, - .secinfo = nfs4_proc_secinfo, }; static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5f563bd..eb1f143 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1397,7 +1397,6 @@ struct nfs_rpc_ops { struct iattr *iattr); int (*init_client) (struct nfs_client *, const struct rpc_timeout *, const char *, rpc_authflavor_t, int); - int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); }; /* -- cgit v0.10.2 From 281cad46b34db4dbb1d1e603f7b9cfe25d1ae7c9 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Fri, 27 Apr 2012 13:27:45 -0400 Subject: NFS: Create a submount rpc_op This simplifies the code for v2 and v3 and gives v4 a chance to decide on referrals without needing to modify the generic client. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d699444..0fd1efa 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -185,17 +185,6 @@ static inline void nfs_fs_proc_exit(void) } #endif -/* nfs4namespace.c */ -#ifdef CONFIG_NFS_V4 -extern struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry); -#else -static inline -struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry) -{ - return ERR_PTR(-ENOENT); -} -#endif - /* callback_xdr.c */ extern struct svc_version nfs4_callback_version1; extern struct svc_version nfs4_callback_version4; @@ -286,6 +275,10 @@ extern void nfs_sb_deactive(struct super_block *sb); extern char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen); extern struct vfsmount *nfs_d_automount(struct path *path); +struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *, + struct nfs_fh *, struct nfs_fattr *); +struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *, + struct nfs_fattr *, rpc_authflavor_t); /* getroot.c */ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 2a9591b..e36fd8a 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -26,11 +26,6 @@ static LIST_HEAD(nfs_automount_list); static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts); int nfs_mountpoint_expiry_timeout = 500 * HZ; -static struct vfsmount *nfs_do_submount(struct dentry *dentry, - struct nfs_fh *fh, - struct nfs_fattr *fattr, - rpc_authflavor_t authflavor); - /* * nfs_path - reconstruct the path given an arbitrary dentry * @base - used to return pointer to the end of devname part of path @@ -118,35 +113,6 @@ Elong: return ERR_PTR(-ENAMETOOLONG); } -#ifdef CONFIG_NFS_V4 -static struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir, - struct qstr *name, - struct nfs_fh *fh, - struct nfs_fattr *fattr) -{ - int err; - - if (NFS_PROTO(dir)->version == 4) - return nfs4_proc_lookup_mountpoint(dir, name, fh, fattr); - - err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr); - if (err) - return ERR_PTR(err); - return rpc_clone_client(NFS_SERVER(dir)->client); -} -#else /* CONFIG_NFS_V4 */ -static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir, - struct qstr *name, - struct nfs_fh *fh, - struct nfs_fattr *fattr) -{ - int err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr); - if (err) - return ERR_PTR(err); - return rpc_clone_client(NFS_SERVER(dir)->client); -} -#endif /* CONFIG_NFS_V4 */ - /* * nfs_d_automount - Handle crossing a mountpoint on the server * @path - The mountpoint @@ -162,10 +128,9 @@ static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir, struct vfsmount *nfs_d_automount(struct path *path) { struct vfsmount *mnt; - struct dentry *parent; + struct nfs_server *server = NFS_SERVER(path->dentry->d_inode); struct nfs_fh *fh = NULL; struct nfs_fattr *fattr = NULL; - struct rpc_clnt *client; dprintk("--> nfs_d_automount()\n"); @@ -181,21 +146,7 @@ struct vfsmount *nfs_d_automount(struct path *path) dprintk("%s: enter\n", __func__); - /* Look it up again to get its attributes */ - parent = dget_parent(path->dentry); - client = nfs_lookup_mountpoint(parent->d_inode, &path->dentry->d_name, fh, fattr); - dput(parent); - if (IS_ERR(client)) { - mnt = ERR_CAST(client); - goto out; - } - - if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) - mnt = nfs_do_refmount(client, path->dentry); - else - mnt = nfs_do_submount(path->dentry, fh, fattr, client->cl_auth->au_flavor); - rpc_shutdown_client(client); - + mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr); if (IS_ERR(mnt)) goto out; @@ -268,10 +219,8 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, * @authflavor - security flavor to use when performing the mount * */ -static struct vfsmount *nfs_do_submount(struct dentry *dentry, - struct nfs_fh *fh, - struct nfs_fattr *fattr, - rpc_authflavor_t authflavor) +struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, + struct nfs_fattr *fattr, rpc_authflavor_t authflavor) { struct nfs_clone_mount mountdata = { .sb = dentry->d_sb, @@ -304,3 +253,19 @@ out: dprintk("<-- nfs_do_submount() = %p\n", mnt); return mnt; } + +struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry, + struct nfs_fh *fh, struct nfs_fattr *fattr) +{ + int err; + struct dentry *parent = dget_parent(dentry); + + /* Look it up again to get its attributes */ + err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode, + &dentry->d_name, fh, fattr); + dput(parent); + if (err != 0) + return ERR_PTR(err); + + return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor); +} diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 56dcefc..c23214d 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -885,6 +885,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .file_inode_ops = &nfs3_file_inode_operations, .file_ops = &nfs_file_operations, .getroot = nfs3_proc_get_root, + .submount = nfs_submount, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, .lookup = nfs3_proc_lookup, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 53a487e..97365b0 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -208,6 +208,8 @@ extern const struct inode_operations nfs4_dir_inode_operations; /* nfs4namespace.c */ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); +struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *, + struct nfs_fh *, struct nfs_fattr *); /* nfs4proc.c */ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index a69ee39..80fc0fe 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -329,7 +329,7 @@ out: * @dentry - dentry of referral * */ -struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry) +static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry) { struct vfsmount *mnt = ERR_PTR(-ENOMEM); struct dentry *parent; @@ -370,3 +370,25 @@ out: dprintk("%s: done\n", __func__); return mnt; } + +struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry, + struct nfs_fh *fh, struct nfs_fattr *fattr) +{ + struct dentry *parent = dget_parent(dentry); + struct rpc_clnt *client; + struct vfsmount *mnt; + + /* Look it up again to get its attributes and sec flavor */ + client = nfs4_proc_lookup_mountpoint(parent->d_inode, &dentry->d_name, fh, fattr); + dput(parent); + if (IS_ERR(client)) + return ERR_CAST(client); + + if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) + mnt = nfs_do_refmount(client, dentry); + else + mnt = nfs_do_submount(dentry, fh, fattr, client->cl_auth->au_flavor); + + rpc_shutdown_client(client); + return mnt; +} diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index fa661b9..2091af2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6571,6 +6571,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .file_inode_ops = &nfs4_file_inode_operations, .file_ops = &nfs4_file_operations, .getroot = nfs4_proc_get_root, + .submount = nfs4_submount, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, .lookup = nfs4_proc_lookup, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 22ee705..76b3229 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -742,6 +742,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .file_inode_ops = &nfs_file_inode_operations, .file_ops = &nfs_file_operations, .getroot = nfs_proc_get_root, + .submount = nfs_submount, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, .lookup = nfs_proc_lookup, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index eb1f143..4dada94 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1341,6 +1341,8 @@ struct nfs_rpc_ops { int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); + struct vfsmount *(*submount) (struct nfs_server *, struct dentry *, + struct nfs_fh *, struct nfs_fattr *); int (*getattr) (struct nfs_server *, struct nfs_fh *, struct nfs_fattr *); int (*setattr) (struct dentry *, struct nfs_fattr *, -- cgit v0.10.2 From 80a16b21a81eb639f0b726549f4c46c0e9aff92e Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Fri, 27 Apr 2012 13:27:46 -0400 Subject: NFS: Remove extra rpc_clnt argument to proc_lookup Now that I'm doing secinfo automatically in the v4 code this extra argument isn't needed. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8789210..82b42e2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1143,7 +1143,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) if (fhandle == NULL || fattr == NULL) goto out_error; - error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); if (error) goto out_bad; if (nfs_compare_fh(NFS_FH(inode), fhandle)) @@ -1299,7 +1299,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru parent = dentry->d_parent; /* Protect against concurrent sillydeletes */ nfs_block_sillyrename(parent); - error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); if (error == -ENOENT) goto no_entry; if (error < 0) { @@ -1646,7 +1646,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, if (dentry->d_inode) goto out; if (fhandle->size == 0) { - error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); if (error) goto out_error; } diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index e36fd8a..08b9c93 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -261,8 +261,7 @@ struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry, struct dentry *parent = dget_parent(dentry); /* Look it up again to get its attributes */ - err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode, - &dentry->d_name, fh, fattr); + err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr); dput(parent); if (err != 0) return ERR_PTR(err); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index c23214d..48bcad2 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -142,7 +142,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int -nfs3_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, +nfs3_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs3_diropargs arg = { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2091af2..1780391 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2578,7 +2578,7 @@ out: return err; } -static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, +static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { int status; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 76b3229..fea9163 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -178,7 +178,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int -nfs_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, +nfs_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs_diropargs arg = { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 4dada94..c940d46 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1347,7 +1347,7 @@ struct nfs_rpc_ops { struct nfs_fattr *); int (*setattr) (struct dentry *, struct nfs_fattr *, struct iattr *); - int (*lookup) (struct rpc_clnt *clnt, struct inode *, struct qstr *, + int (*lookup) (struct inode *, struct qstr *, struct nfs_fh *, struct nfs_fattr *); int (*access) (struct inode *, struct nfs_access_entry *); int (*readlink)(struct inode *, struct page *, unsigned int, -- cgit v0.10.2 From 9b5415b536cc3193e9608a7fced1372df8ce4dcf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 14:31:47 -0400 Subject: NFS: Fix a use-before-initialised warning in fs/nfs/write.c and fs/nfs/pnfs.c If the allocation of nfs_write_header fails, the list of nfs_pages that needs to be cleaned up is still on desc->pg_list... Reported-by: Bryan Schumaker Signed-off-by: Trond Myklebust Acked-by: Fred Isaman diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 4da05e4..39cbac5 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1308,7 +1308,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) whdr = nfs_writehdr_alloc(); if (!whdr) { - desc->pg_completion_ops->error_cleanup(&hdr->pages); + desc->pg_completion_ops->error_cleanup(&desc->pg_list); put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return -ENOMEM; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index fec214b..3636191 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1164,7 +1164,7 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) whdr = nfs_writehdr_alloc(); if (!whdr) { - desc->pg_completion_ops->error_cleanup(&hdr->pages); + desc->pg_completion_ops->error_cleanup(&desc->pg_list); return -ENOMEM; } hdr = &whdr->header; -- cgit v0.10.2 From b58fee2189b17719c846f65ffe9483c2814e6605 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Apr 2012 13:06:53 -0400 Subject: NFS: pnfs_pageio_init_read() and init_write() need an extra argument This is only when CONFIG_NFS_V4_1 isn't enabled. Signed-off-by: Bryan Schumaker Acked-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 8efbee7..f20054b 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -424,12 +424,14 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) { } -static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) +static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, + const struct nfs_pgio_completion_ops *compl_ops) { return false; } -static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) +static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, + const struct nfs_pgio_completion_ops *compl_ops) { return false; } -- cgit v0.10.2 From 24fc9211f4d48c04882a52e42b21c9b4abc4f9bf Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Apr 2012 13:27:11 -0400 Subject: NFS: Define nfs_direct_write_schedule_work() when v3 and v4 are disabled v2 doesn't have commits, so this function can be a no-op. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index d44de2f..e83545c 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -601,6 +601,9 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode } #else +static void nfs_direct_write_schedule_work(struct work_struct *work) +{ +} static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) { -- cgit v0.10.2 From 68cd6fa4f3be07ba648e22617dfa16a40d671d19 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Apr 2012 14:30:22 -0400 Subject: NFS: Define dummy nfs_init_cinfo() and nfs_init_cinfo_from_inode() These are needed when v3 and v4 are not enabled. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3636191..2f80aa5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -567,6 +567,17 @@ int nfs_write_need_commit(struct nfs_write_data *data) } #else +static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, + struct inode *inode) +{ +} + +void nfs_init_cinfo(struct nfs_commit_info *cinfo, + struct inode *inode, + struct nfs_direct_req *dreq) +{ +} + void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo) -- cgit v0.10.2 From 71e8cc00c63e8518ce86b4079355fc9086a4869d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Apr 2012 13:22:54 -0400 Subject: NFS: Ensure that we break out of read/write_schedule_segment on error Currently we do break out of the for() loop, but we also need to break out of the enclosing do {} while()... Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e83545c..f30d5c2 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -396,7 +396,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de pos += req_len; count -= req_len; } - } while (count != 0); + } while (count != 0 && result >= 0); kfree(pagevec); @@ -692,6 +692,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d nfs_release_request(req); nfs_direct_release_pages(pagevec + i, npages - i); + break; } pgbase = 0; bytes -= req_len; @@ -700,7 +701,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d pos += req_len; count -= req_len; } - } while (count != 0); + } while (count != 0 && result >= 0); kfree(pagevec); -- cgit v0.10.2 From 3e9e0ca3f19e911ce13c2e6c9858fcb41a37496c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Apr 2012 13:40:06 -0400 Subject: NFS: O_DIRECT pgio_completion_ops error_cleanup must unlock the request Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f30d5c2..af02bde 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -292,7 +292,7 @@ out_put: hdr->release(hdr); } -static void nfs_sync_pgio_error(struct list_head *head) +static void nfs_read_sync_pgio_error(struct list_head *head) { struct nfs_page *req; @@ -309,7 +309,7 @@ static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr) } static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { - .error_cleanup = nfs_sync_pgio_error, + .error_cleanup = nfs_read_sync_pgio_error, .init_hdr = nfs_direct_pgio_init, .completion = nfs_direct_read_completion, }; @@ -775,8 +775,20 @@ out_put: hdr->release(hdr); } +static void nfs_write_sync_pgio_error(struct list_head *head) +{ + struct nfs_page *req; + + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_release_request(req); + nfs_unlock_request(req); + } +} + static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { - .error_cleanup = nfs_sync_pgio_error, + .error_cleanup = nfs_write_sync_pgio_error, .init_hdr = nfs_direct_pgio_init, .completion = nfs_direct_write_completion, }; -- cgit v0.10.2 From 6d74743b088d116e31fe1b73f47e782ee2016b94 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Apr 2012 13:27:31 -0400 Subject: NFS: Simplify O_DIRECT page referencing The O_DIRECT code shouldn't need to hold 2 references to each page. The reference held by the struct nfs_page should suffice. Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index af02bde..78d1ead 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -268,10 +268,9 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) } bytes += req->wb_bytes; nfs_list_remove_request(req); - nfs_direct_readpage_release(req); if (!PageCompound(page)) set_page_dirty(page); - page_cache_release(page); + nfs_direct_readpage_release(req); } } else { while (!list_empty(&hdr->pages)) { @@ -281,7 +280,6 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) if (!PageCompound(req->wb_page)) set_page_dirty(req->wb_page); bytes += req->wb_bytes; - page_cache_release(req->wb_page); nfs_list_remove_request(req); nfs_direct_readpage_release(req); } @@ -375,8 +373,6 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de pagevec[i], pgbase, req_len); if (IS_ERR(req)) { - nfs_direct_release_pages(pagevec + i, - npages - i); result = PTR_ERR(req); break; } @@ -385,8 +381,6 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de if (!nfs_pageio_add_request(desc, req)) { result = desc->pg_error; nfs_release_request(req); - nfs_direct_release_pages(pagevec + i, - npages - i); break; } pgbase = 0; @@ -396,6 +390,8 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de pos += req_len; count -= req_len; } + /* The nfs_page now hold references to these pages */ + nfs_direct_release_pages(pagevec, npages); } while (count != 0 && result >= 0); kfree(pagevec); @@ -509,7 +505,6 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) nfs_pageio_complete(&desc); while (!list_empty(&failed)) { - page_cache_release(req->wb_page); nfs_release_request(req); nfs_unlock_request(req); } @@ -542,10 +537,8 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { /* Note the rewrite will go through mds */ nfs_mark_request_commit(req, NULL, &cinfo); - } else { - page_cache_release(req->wb_page); + } else nfs_release_request(req); - } nfs_unlock_request(req); } @@ -678,8 +671,6 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d pagevec[i], pgbase, req_len); if (IS_ERR(req)) { - nfs_direct_release_pages(pagevec + i, - npages - i); result = PTR_ERR(req); break; } @@ -690,8 +681,6 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d result = desc->pg_error; nfs_unlock_request(req); nfs_release_request(req); - nfs_direct_release_pages(pagevec + i, - npages - i); break; } pgbase = 0; @@ -701,6 +690,8 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d pos += req_len; count -= req_len; } + /* The nfs_page now hold references to these pages */ + nfs_direct_release_pages(pagevec, npages); } while (count != 0 && result >= 0); kfree(pagevec); @@ -763,7 +754,6 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) nfs_mark_request_commit(req, hdr->lseg, &cinfo); break; default: - page_cache_release(req->wb_page); nfs_release_request(req); } nfs_unlock_request(req); -- cgit v0.10.2 From 292f3eeef00a20fa0ef4feec62792ad0065760a0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Apr 2012 18:31:49 -0400 Subject: NFS: Use kmem_cache_zalloc() in nfs_direct_req_alloc Simplify the initialisation of O_DIRECT requests. Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 78d1ead..f17e469 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -149,26 +149,16 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) { struct nfs_direct_req *dreq; - dreq = kmem_cache_alloc(nfs_direct_cachep, GFP_KERNEL); + dreq = kmem_cache_zalloc(nfs_direct_cachep, GFP_KERNEL); if (!dreq) return NULL; kref_init(&dreq->kref); kref_get(&dreq->kref); init_completion(&dreq->completion); - dreq->mds_cinfo.ncommit = 0; - atomic_set(&dreq->mds_cinfo.rpcs_out, 0); INIT_LIST_HEAD(&dreq->mds_cinfo.list); INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); - memset(&dreq->ds_cinfo, 0, sizeof(dreq->ds_cinfo)); - dreq->iocb = NULL; - dreq->ctx = NULL; - dreq->l_ctx = NULL; spin_lock_init(&dreq->lock); - atomic_set(&dreq->io_count, 0); - dreq->count = 0; - dreq->error = 0; - dreq->flags = 0; return dreq; } -- cgit v0.10.2 From 4f97615d19c370d1d907ef37f8bcd9c3672851ca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Apr 2012 18:39:20 -0400 Subject: NFS: Fix a compile issue when CONFIG_NFS_V4_1 is undefined struct nfs_direct_req can't compile when struct pnfs_ds_commit_info is undefined. Reported-by: Bryan Schumaker Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c940d46..6deb8f0 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1176,6 +1176,11 @@ struct nfs41_free_stateid_res { struct nfs4_sequence_res seq_res; }; +#else + +struct pnfs_ds_commit_info { +}; + #endif /* CONFIG_NFS_V4_1 */ struct nfs_page; -- cgit v0.10.2 From 9146ab5055152bbacb5690c384df2fd610fb3c68 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 May 2012 11:21:43 -0400 Subject: NFS: Read cleanups Remove unused variables, and reformat some code. Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 35e2dce..20a0293 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -341,8 +341,6 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct nfs_read_data *data; size_t rsize = desc->pg_bsize, nbytes; unsigned int offset; - int requests = 0; - int ret = 0; nfs_list_remove_request(req); nfs_list_add_request(req, &hdr->pages); @@ -358,12 +356,11 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, data->pages.pagevec[0] = page; nfs_read_rpcsetup(data, len, offset); list_add(&data->list, &hdr->rpc_list); - requests++; nbytes -= len; offset += len; - } while(nbytes != 0); + } while (nbytes != 0); desc->pg_rpc_callops = &nfs_read_common_ops; - return ret; + return 0; out_bad: while (!list_empty(&hdr->rpc_list)) { data = list_first_entry(&hdr->rpc_list, struct nfs_read_data, list); @@ -387,8 +384,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, desc->pg_count)); if (!data) { desc->pg_completion_ops->error_cleanup(head); - ret = -ENOMEM; - goto out; + return -ENOMEM; } pages = data->pages.pagevec; @@ -402,8 +398,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, nfs_read_rpcsetup(data, desc->pg_count, 0); list_add(&data->list, &hdr->rpc_list); desc->pg_rpc_callops = &nfs_read_common_ops; -out: - return ret; + return 0; } int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, -- cgit v0.10.2 From 25b11dcdbfcad69a5ec03265e2dce19e5eca936b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 May 2012 12:07:22 -0400 Subject: NFS: Clean up nfs read and write error paths Move the error handling for nfs_generic_pagein() into a single function. Ditto for nfs_generic_flush(). Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 39cbac5..6fdeca2 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1321,7 +1321,6 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) if (ret != 0) { put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; - set_bit(NFS_IOHDR_REDO, &hdr->flags); } else pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); if (atomic_dec_and_test(&hdr->refcnt)) @@ -1476,7 +1475,6 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) if (ret != 0) { put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; - set_bit(NFS_IOHDR_REDO, &hdr->flags); } else pnfs_do_multiple_reads(desc, &hdr->rpc_list); if (atomic_dec_and_test(&hdr->refcnt)) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 20a0293..1961a19 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -320,6 +320,19 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { .completion = nfs_read_completion, }; +static void nfs_pagein_error(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) +{ + set_bit(NFS_IOHDR_REDO, &hdr->flags); + while (!list_empty(&hdr->rpc_list)) { + struct nfs_read_data *data = list_first_entry(&hdr->rpc_list, + struct nfs_read_data, list); + list_del(&data->list); + nfs_readdata_release(data); + } + desc->pg_completion_ops->error_cleanup(&desc->pg_list); +} + /* * Generate multiple requests to fill a single page. * @@ -342,33 +355,27 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, size_t rsize = desc->pg_bsize, nbytes; unsigned int offset; - nfs_list_remove_request(req); - nfs_list_add_request(req, &hdr->pages); - offset = 0; nbytes = desc->pg_count; do { size_t len = min(nbytes,rsize); data = nfs_readdata_alloc(hdr, 1); - if (!data) - goto out_bad; + if (!data) { + nfs_pagein_error(desc, hdr); + return -ENOMEM; + } data->pages.pagevec[0] = page; nfs_read_rpcsetup(data, len, offset); list_add(&data->list, &hdr->rpc_list); nbytes -= len; offset += len; } while (nbytes != 0); + + nfs_list_remove_request(req); + nfs_list_add_request(req, &hdr->pages); desc->pg_rpc_callops = &nfs_read_common_ops; return 0; -out_bad: - while (!list_empty(&hdr->rpc_list)) { - data = list_first_entry(&hdr->rpc_list, struct nfs_read_data, list); - list_del(&data->list); - nfs_readdata_release(data); - } - desc->pg_completion_ops->error_cleanup(&hdr->pages); - return -ENOMEM; } static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, @@ -378,12 +385,11 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct page **pages; struct nfs_read_data *data; struct list_head *head = &desc->pg_list; - int ret = 0; data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base, desc->pg_count)); if (!data) { - desc->pg_completion_ops->error_cleanup(head); + nfs_pagein_error(desc, hdr); return -ENOMEM; } @@ -427,8 +433,6 @@ static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) if (ret == 0) ret = nfs_do_multiple_reads(&hdr->rpc_list, desc->pg_rpc_callops); - else - set_bit(NFS_IOHDR_REDO, &hdr->flags); if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); return ret; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2f80aa5..d1e4f81 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1058,6 +1058,19 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { .completion = nfs_write_completion, }; +static void nfs_flush_error(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) +{ + set_bit(NFS_IOHDR_REDO, &hdr->flags); + while (!list_empty(&hdr->rpc_list)) { + struct nfs_write_data *data = list_first_entry(&hdr->rpc_list, + struct nfs_write_data, list); + list_del(&data->list); + nfs_writedata_release(data); + } + desc->pg_completion_ops->error_cleanup(&desc->pg_list); +} + /* * Generate multiple small requests to write out a single * contiguous dirty area on one page. @@ -1071,12 +1084,9 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, size_t wsize = desc->pg_bsize, nbytes; unsigned int offset; int requests = 0; - int ret = 0; struct nfs_commit_info cinfo; nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); - nfs_list_remove_request(req); - nfs_list_add_request(req, &hdr->pages); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) || @@ -1090,8 +1100,10 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, size_t len = min(nbytes, wsize); data = nfs_writedata_alloc(hdr, 1); - if (!data) - goto out_bad; + if (!data) { + nfs_flush_error(desc, hdr); + return -ENOMEM; + } data->pages.pagevec[0] = page; nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo); list_add(&data->list, &hdr->rpc_list); @@ -1099,17 +1111,10 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, nbytes -= len; offset += len; } while (nbytes != 0); + nfs_list_remove_request(req); + nfs_list_add_request(req, &hdr->pages); desc->pg_rpc_callops = &nfs_write_common_ops; - return ret; - -out_bad: - while (!list_empty(&hdr->rpc_list)) { - data = list_first_entry(&hdr->rpc_list, struct nfs_write_data, list); - list_del(&data->list); - nfs_writedata_release(data); - } - desc->pg_completion_ops->error_cleanup(&hdr->pages); - return -ENOMEM; + return 0; } /* @@ -1127,15 +1132,13 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct page **pages; struct nfs_write_data *data; struct list_head *head = &desc->pg_list; - int ret = 0; struct nfs_commit_info cinfo; data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base, desc->pg_count)); if (!data) { - desc->pg_completion_ops->error_cleanup(head); - ret = -ENOMEM; - goto out; + nfs_flush_error(desc, hdr); + return -ENOMEM; } nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); @@ -1155,8 +1158,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); list_add(&data->list, &hdr->rpc_list); desc->pg_rpc_callops = &nfs_write_common_ops; -out: - return ret; + return 0; } int nfs_generic_flush(struct nfs_pageio_descriptor *desc, @@ -1186,8 +1188,6 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) ret = nfs_do_multiple_writes(&hdr->rpc_list, desc->pg_rpc_callops, desc->pg_ioflags); - else - set_bit(NFS_IOHDR_REDO, &hdr->flags); if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); return ret; -- cgit v0.10.2 From 4bd8b010136afa0df9122a08bad361686bda0a1d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 May 2012 12:49:58 -0400 Subject: NFS: Simplify the nfs_read_completion functions Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f17e469..aab3016 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -243,36 +243,28 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) dreq->count += hdr->good_bytes; spin_unlock(&dreq->lock); - if (!test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { - while (!list_empty(&hdr->pages)) { - struct nfs_page *req = nfs_list_entry(hdr->pages.next); - struct page *page = req->wb_page; - - if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { - if (bytes > hdr->good_bytes) - zero_user(page, 0, PAGE_SIZE); - else if (hdr->good_bytes - bytes < PAGE_SIZE) - zero_user_segment(page, - hdr->good_bytes & ~PAGE_MASK, - PAGE_SIZE); - } - bytes += req->wb_bytes; - nfs_list_remove_request(req); - if (!PageCompound(page)) - set_page_dirty(page); - nfs_direct_readpage_release(req); + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + struct page *page = req->wb_page; + + if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { + if (bytes > hdr->good_bytes) + zero_user(page, 0, PAGE_SIZE); + else if (hdr->good_bytes - bytes < PAGE_SIZE) + zero_user_segment(page, + hdr->good_bytes & ~PAGE_MASK, + PAGE_SIZE); } - } else { - while (!list_empty(&hdr->pages)) { - struct nfs_page *req = nfs_list_entry(hdr->pages.next); - - if (bytes < hdr->good_bytes) - if (!PageCompound(req->wb_page)) - set_page_dirty(req->wb_page); - bytes += req->wb_bytes; - nfs_list_remove_request(req); - nfs_direct_readpage_release(req); + if (!PageCompound(page)) { + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { + if (bytes < hdr->good_bytes) + set_page_dirty(page); + } else + set_page_dirty(page); } + bytes += req->wb_bytes; + nfs_list_remove_request(req); + nfs_direct_readpage_release(req); } out_put: if (put_dreq(dreq)) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 1961a19..37c9eb2 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -179,34 +179,26 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr) if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) goto out; - if (!test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { - while (!list_empty(&hdr->pages)) { - struct nfs_page *req = nfs_list_entry(hdr->pages.next); - struct page *page = req->wb_page; - - if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { - if (bytes > hdr->good_bytes) - zero_user(page, 0, PAGE_SIZE); - else if (hdr->good_bytes - bytes < PAGE_SIZE) - zero_user_segment(page, - hdr->good_bytes & ~PAGE_MASK, - PAGE_SIZE); - } - SetPageUptodate(page); - nfs_list_remove_request(req); - nfs_readpage_release(req); - bytes += PAGE_SIZE; + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + struct page *page = req->wb_page; + + if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { + if (bytes > hdr->good_bytes) + zero_user(page, 0, PAGE_SIZE); + else if (hdr->good_bytes - bytes < PAGE_SIZE) + zero_user_segment(page, + hdr->good_bytes & ~PAGE_MASK, + PAGE_SIZE); } - } else { - while (!list_empty(&hdr->pages)) { - struct nfs_page *req = nfs_list_entry(hdr->pages.next); - - bytes += req->wb_bytes; + bytes += req->wb_bytes; + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { if (bytes <= hdr->good_bytes) - SetPageUptodate(req->wb_page); - nfs_list_remove_request(req); - nfs_readpage_release(req); - } + SetPageUptodate(page); + } else + SetPageUptodate(page); + nfs_list_remove_request(req); + nfs_readpage_release(req); } out: hdr->release(hdr); -- cgit v0.10.2 From a9f6991b6cd3f55aa8482633337cd811d84d0dd8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 13:48:17 -0400 Subject: NFSv4: Fix a typo in NFS4_enc_link_sz Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4c3cc0e..fe61424 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -626,9 +626,9 @@ static int nfs4_stat_to_errno(int); encode_savefh_maxsz + \ encode_putfh_maxsz + \ encode_link_maxsz + \ - decode_getattr_maxsz + \ + encode_getattr_maxsz + \ encode_restorefh_maxsz + \ - decode_getattr_maxsz) + encode_getattr_maxsz) #define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ -- cgit v0.10.2 From 9e907fec6ef7705ba07e22f034dacf102d29a538 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 13:48:17 -0400 Subject: NFSv4: Delegreturn only needs the cache consistency bitmask In order to do close-to-open cache consistency checking after a delegreturn, we don't need to retrieve the full set of attributes. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1780391..111a3cc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4117,7 +4117,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); data->args.fhandle = &data->fh; data->args.stateid = &data->stateid; - data->args.bitmask = server->attr_bitmask; + data->args.bitmask = server->cache_consistency_bitmask; nfs_copy_fh(&data->fh, NFS_FH(inode)); nfs4_stateid_copy(&data->stateid, stateid); data->res.fattr = &data->fattr; -- cgit v0.10.2 From e144cbcc251f16c1a14b9256cda73ab4aebe933a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 28 Apr 2012 16:05:03 -0400 Subject: NFSv4: Retrieve attributes _before_ calling delegreturn In order to retrieve cache consistency attributes before anyone else has a chance to change the inode, we need to put the GETATTR op _before_ the DELEGRETURN op. We can then use that as part of a 'nfs_post_op_update_inode_force_wcc()' call, to ensure that we update the attributes without clearing our cached data. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 111a3cc..2e0fbff 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4138,9 +4138,10 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co if (status != 0) goto out; status = data->rpc_status; - if (status != 0) - goto out; - nfs_refresh_inode(inode, &data->fattr); + if (status == 0) + nfs_post_op_update_inode_force_wcc(inode, &data->fattr); + else + nfs_refresh_inode(inode, &data->fattr); out: rpc_put_task(task); return status; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index fe61424..ac7a3b0 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2602,8 +2602,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fhandle, &hdr); - encode_delegreturn(xdr, args->stateid, &hdr); encode_getfattr(xdr, args->bitmask, &hdr); + encode_delegreturn(xdr, args->stateid, &hdr); encode_nops(&hdr); } @@ -6527,10 +6527,10 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, status = decode_putfh(xdr); if (status != 0) goto out; - status = decode_delegreturn(xdr); + status = decode_getfattr(xdr, res->fattr, res->server); if (status != 0) goto out; - decode_getfattr(xdr, res->fattr, res->server); + status = decode_delegreturn(xdr); out: return status; } -- cgit v0.10.2 From b4b1eadf7c5f00636500ad47f68edc0666e63ea5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 29 Apr 2012 11:23:50 -0400 Subject: NFS: Don't force page cache revalidations when holding a delegation If we're holding a delegation, then we already know that our page cache is valid. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 59a12c6a..fed27c0 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -870,6 +870,15 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map return 0; } +static bool nfs_mapping_need_revalidate_inode(struct inode *inode) +{ + if (nfs_have_delegated_attributes(inode)) + return false; + return (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE) + || nfs_attribute_timeout(inode) + || NFS_STALE(inode); +} + /** * nfs_revalidate_mapping - Revalidate the pagecache * @inode - pointer to host inode @@ -880,9 +889,7 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) struct nfs_inode *nfsi = NFS_I(inode); int ret = 0; - if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) - || nfs_attribute_cache_expired(inode) - || NFS_STALE(inode)) { + if (nfs_mapping_need_revalidate_inode(inode)) { ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); if (ret < 0) goto out; -- cgit v0.10.2 From 01da47bde78ff2149f6546a0f17e25983aaddd7b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 29 Apr 2012 12:30:19 -0400 Subject: NFS: Optimise away nfs_check_inode_attributes() when holding a delegation We already know that the attribute cache is valid. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index fed27c0..81946e7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -955,6 +955,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat unsigned long invalid = 0; + if (nfs_have_delegated_attributes(inode)) + return 0; /* Has the inode gone and changed behind our back? */ if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) return -EIO; -- cgit v0.10.2 From 8d197a568fc337c66729b289c7fa0f28c14ba5ac Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 29 Apr 2012 12:50:01 -0400 Subject: NFS: Always trust the PageUptodate flag when we have a delegation We can always use the optimal full page write if we know that we hold a delegation. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d1e4f81..6f263da 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -850,10 +850,14 @@ int nfs_flush_incompatible(struct file *file, struct page *page) * the PageUptodate() flag. In this case, we will need to turn off * write optimisations that depend on the page contents being correct. */ -static int nfs_write_pageuptodate(struct page *page, struct inode *inode) +static bool nfs_write_pageuptodate(struct page *page, struct inode *inode) { - return PageUptodate(page) && - !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA)); + if (nfs_have_delegated_attributes(inode)) + goto out; + if (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE) + return false; +out: + return PageUptodate(page) != 0; } /* -- cgit v0.10.2 From 4124bbc52118e7da6f7ad41cc247fa16f4b3f051 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 13:48:17 -0400 Subject: NFS: Simplify nfs_fhget() If the inode is being initialised, there is no point in setting flags such as NFS_INO_INVALID_ACCESS, NFS_INO_INVALID_ACL or NFS_INO_INVALID_DATA since there are no cached access calls, acls or data caches to invalidate. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 81946e7..8d67e5e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -285,9 +285,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_mode = fattr->mode; if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 && nfs_server_capable(inode, NFS_CAP_MODE)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ @@ -337,24 +335,19 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_MTIME) inode->i_mtime = fattr->mtime; else if (nfs_server_capable(inode, NFS_CAP_MTIME)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_DATA; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (fattr->valid & NFS_ATTR_FATTR_CTIME) inode->i_ctime = fattr->ctime; else if (nfs_server_capable(inode, NFS_CAP_CTIME)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (fattr->valid & NFS_ATTR_FATTR_CHANGE) inode->i_version = fattr->change_attr; else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_DATA; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (fattr->valid & NFS_ATTR_FATTR_SIZE) inode->i_size = nfs_size_to_loff_t(fattr->size); else nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_DATA | NFS_INO_REVAL_PAGECACHE; if (fattr->valid & NFS_ATTR_FATTR_NLINK) set_nlink(inode, fattr->nlink); @@ -363,15 +356,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_OWNER) inode->i_uid = fattr->uid; else if (nfs_server_capable(inode, NFS_CAP_OWNER)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (fattr->valid & NFS_ATTR_FATTR_GROUP) inode->i_gid = fattr->gid; else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { -- cgit v0.10.2 From 6a4506c0b56889aaa15bcf50b3c75f46a8d0a3bd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 13:48:17 -0400 Subject: NFS: Change attribute updates should set NFS_INO_REVAL_PAGECACHE Signed-off-by: Trond Myklebust diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 8d67e5e..9d76c0b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1312,7 +1312,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (inode->i_version != fattr->change_attr) { dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + invalid |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_DATA + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_PAGECACHE; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); inode->i_version = fattr->change_attr; -- cgit v0.10.2 From 3a1556e8662cc425c433b463fcdae138908ca467 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 13:48:18 -0400 Subject: NFSv2/v3: Simulate the change attribute Use the ctime to simulate a change attribute for NFSv2 and NFSv3. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 60f7e4e..a8f8de6 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -880,7 +880,7 @@ static int nfs_init_server(struct nfs_server *server, server->options = data->options; server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| - NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; + NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME|NFS_CAP_CHANGE_ATTR; if (data->rsize) server->rsize = nfs_block_size(data->rsize, NULL); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 0fd1efa..1855e8f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -493,3 +493,15 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) PAGE_SIZE - 1) >> PAGE_SHIFT; } +/* + * Convert a struct timespec into a 64-bit change attribute + * + * This does approximately the same thing as timespec_to_ns(), + * but for calculation efficiency, we multiply the seconds by + * 1024*1024*1024. + */ +static inline +u64 nfs_timespec_to_change_attr(const struct timespec *ts) +{ + return ((u64)ts->tv_sec << 30) + ts->tv_nsec; +} diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 1f56000..c99008e 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -313,6 +313,8 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr) p = xdr_decode_time(p, &fattr->atime); p = xdr_decode_time(p, &fattr->mtime); xdr_decode_time(p, &fattr->ctime); + fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime); + return 0; out_overflow: print_overflow_msg(__func__, xdr); diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 01e53e9..ee284c2 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -675,6 +675,7 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr) p = xdr_decode_nfstime3(p, &fattr->atime); p = xdr_decode_nfstime3(p, &fattr->mtime); xdr_decode_nfstime3(p, &fattr->ctime); + fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime); fattr->valid |= NFS_ATTR_FATTR_V3; return 0; @@ -725,12 +726,14 @@ static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr) goto out_overflow; fattr->valid |= NFS_ATTR_FATTR_PRESIZE + | NFS_ATTR_FATTR_PRECHANGE | NFS_ATTR_FATTR_PREMTIME | NFS_ATTR_FATTR_PRECTIME; p = xdr_decode_size3(p, &fattr->pre_size); p = xdr_decode_nfstime3(p, &fattr->pre_mtime); xdr_decode_nfstime3(p, &fattr->pre_ctime); + fattr->pre_change_attr = nfs_timespec_to_change_attr(&fattr->pre_ctime); return 0; out_overflow: diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6deb8f0..bc36808 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -106,14 +106,14 @@ struct nfs_fattr { | NFS_ATTR_FATTR_FILEID \ | NFS_ATTR_FATTR_ATIME \ | NFS_ATTR_FATTR_MTIME \ - | NFS_ATTR_FATTR_CTIME) + | NFS_ATTR_FATTR_CTIME \ + | NFS_ATTR_FATTR_CHANGE) #define NFS_ATTR_FATTR_V2 (NFS_ATTR_FATTR \ | NFS_ATTR_FATTR_BLOCKS_USED) #define NFS_ATTR_FATTR_V3 (NFS_ATTR_FATTR \ | NFS_ATTR_FATTR_SPACE_USED) #define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \ - | NFS_ATTR_FATTR_SPACE_USED \ - | NFS_ATTR_FATTR_CHANGE) + | NFS_ATTR_FATTR_SPACE_USED) /* * Info on the file system -- cgit v0.10.2 From fee7fe196c41847c135cde41b0ec790f53ee6fcf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 13:48:18 -0400 Subject: NFS: Simplify the cache invalidation code Now that NFSv2 and NFSv3 have simulated change attributes, instead of using all three of mtime, ctime and change attribute to manage data cache consistency, we can simplify the code to just use the change attribute. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9d76c0b..0d53113 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -958,7 +958,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat /* Verify a few of the more important attributes */ if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime)) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + invalid |= NFS_INO_INVALID_ATTR; if (fattr->valid & NFS_ATTR_FATTR_SIZE) { cur_size = i_size_read(inode); @@ -1325,38 +1325,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid |= save_cache_validity; if (fattr->valid & NFS_ATTR_FATTR_MTIME) { - /* NFSv2/v3: Check if the mtime agrees */ - if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { - dprintk("NFS: mtime change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; - if (S_ISDIR(inode->i_mode)) - nfs_force_lookup_revalidate(inode); - memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); - } + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); } else if (server->caps & NFS_CAP_MTIME) invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_DATA - | NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_CTIME) { - /* If ctime has changed we should definitely clear access+acl caches */ - if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - /* and probably clear data for a directory too as utimes can cause - * havoc with our cache. - */ - if (S_ISDIR(inode->i_mode)) { - invalid |= NFS_INO_INVALID_DATA; - nfs_force_lookup_revalidate(inode); - } - memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); - } + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); } else if (server->caps & NFS_CAP_CTIME) invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); /* Check if our cached file size is stale */ -- cgit v0.10.2 From 90ff0c548d1220d31f80e498b587393895705e6c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 13:48:18 -0400 Subject: NFSv4: Simplify the NFSv4 OPEN compound Get rid of the post-op GETATTR on the directory in order to reduce the amount of processing done on the server. The cost is that if we later need to stat() the directory, then we know that the ctime and mtime are likely to be invalid. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2e0fbff..f01c3d1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -788,7 +788,6 @@ struct nfs4_opendata { struct nfs4_string owner_name; struct nfs4_string group_name; struct nfs_fattr f_attr; - struct nfs_fattr dir_attr; struct dentry *dir; struct dentry *dentry; struct nfs4_state_owner *owner; @@ -804,12 +803,10 @@ struct nfs4_opendata { static void nfs4_init_opendata_res(struct nfs4_opendata *p) { p->o_res.f_attr = &p->f_attr; - p->o_res.dir_attr = &p->dir_attr; p->o_res.seqid = p->o_arg.seqid; p->c_res.seqid = p->c_arg.seqid; p->o_res.server = p->o_arg.server; nfs_fattr_init(&p->f_attr); - nfs_fattr_init(&p->dir_attr); nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name); } @@ -843,7 +840,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.name = &dentry->d_name; p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; - p->o_arg.dir_bitmask = server->cache_consistency_bitmask; p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; if (attrs != NULL && attrs->ia_valid != 0) { __be32 verf[2]; @@ -1611,8 +1607,6 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data) nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr); - nfs_refresh_inode(dir, o_res->dir_attr); - if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { status = _nfs4_proc_open_confirm(data); if (status != 0) @@ -1645,11 +1639,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) nfs_fattr_map_and_free_names(server, &data->f_attr); - if (o_arg->open_flags & O_CREAT) { + if (o_arg->open_flags & O_CREAT) update_changeattr(dir, &o_res->cinfo); - nfs_post_op_update_inode(dir, o_res->dir_attr); - } else - nfs_refresh_inode(dir, o_res->dir_attr); if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) server->caps &= ~NFS_CAP_POSIX_LOCK; if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index ac7a3b0..6e878dc 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -431,20 +431,14 @@ static int nfs4_stat_to_errno(int); #define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ - encode_savefh_maxsz + \ encode_open_maxsz + \ encode_getfh_maxsz + \ - encode_getattr_maxsz + \ - encode_restorefh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ - decode_savefh_maxsz + \ decode_open_maxsz + \ decode_getfh_maxsz + \ - decode_getattr_maxsz + \ - decode_restorefh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_open_confirm_sz \ (compound_encode_hdr_maxsz + \ @@ -2191,12 +2185,9 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); - encode_savefh(xdr, &hdr); encode_open(xdr, args, &hdr); encode_getfh(xdr, &hdr); encode_getfattr(xdr, args->bitmask, &hdr); - encode_restorefh(xdr, &hdr); - encode_getfattr(xdr, args->dir_bitmask, &hdr); encode_nops(&hdr); } @@ -6075,19 +6066,12 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status) goto out; - status = decode_savefh(xdr); - if (status) - goto out; status = decode_open(xdr, res); if (status) goto out; if (decode_getfh(xdr, &res->fh) != 0) goto out; - if (decode_getfattr(xdr, res->f_attr, res->server) != 0) - goto out; - if (decode_restorefh(xdr) != 0) - goto out; - decode_getfattr(xdr, res->dir_attr, res->server); + decode_getfattr(xdr, res->f_attr, res->server); out: return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index bc36808..92a929f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -338,7 +338,6 @@ struct nfs_openargs { const struct qstr * name; const struct nfs_server *server; /* Needed for ID mapping */ const u32 * bitmask; - const u32 * dir_bitmask; __u32 claim; struct nfs4_sequence_args seq_args; }; @@ -349,7 +348,6 @@ struct nfs_openres { struct nfs4_change_info cinfo; __u32 rflags; struct nfs_fattr * f_attr; - struct nfs_fattr * dir_attr; struct nfs_seqid * seqid; const struct nfs_server *server; fmode_t delegation_type; -- cgit v0.10.2 From 7c317fcfbae773e493ecee1c53738db774b1d0ca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 13:48:18 -0400 Subject: NFSv4: Simplify the NFSv4 CREATE compound Get rid of the post-op GETATTR on the directory in order to reduce the amount of processing done on the server. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f01c3d1..619bc1e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2994,7 +2994,6 @@ struct nfs4_createdata { struct nfs4_create_res res; struct nfs_fh fh; struct nfs_fattr fattr; - struct nfs_fattr dir_fattr; }; static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, @@ -3018,9 +3017,7 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, data->res.server = server; data->res.fh = &data->fh; data->res.fattr = &data->fattr; - data->res.dir_fattr = &data->dir_fattr; nfs_fattr_init(data->res.fattr); - nfs_fattr_init(data->res.dir_fattr); } return data; } @@ -3031,7 +3028,6 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_ &data->arg.seq_args, &data->res.seq_res, 1); if (status == 0) { update_changeattr(dir, &data->res.dir_cinfo); - nfs_post_op_update_inode(dir, data->res.dir_fattr); status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); } return status; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 6e878dc..1a70097 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -647,20 +647,14 @@ static int nfs4_stat_to_errno(int); #define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ - encode_savefh_maxsz + \ encode_create_maxsz + \ encode_getfh_maxsz + \ - encode_getattr_maxsz + \ - encode_restorefh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ - decode_savefh_maxsz + \ decode_create_maxsz + \ decode_getfh_maxsz + \ - decode_getattr_maxsz + \ - decode_restorefh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ @@ -2119,12 +2113,9 @@ static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->dir_fh, &hdr); - encode_savefh(xdr, &hdr); encode_create(xdr, args, &hdr); encode_getfh(xdr, &hdr); encode_getfattr(xdr, args->bitmask, &hdr); - encode_restorefh(xdr, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -5895,21 +5886,13 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status) goto out; - status = decode_savefh(xdr); - if (status) - goto out; status = decode_create(xdr, &res->dir_cinfo); if (status) goto out; status = decode_getfh(xdr, res->fh); if (status) goto out; - if (decode_getfattr(xdr, res->fattr, res->server)) - goto out; - status = decode_restorefh(xdr); - if (status) - goto out; - decode_getfattr(xdr, res->dir_fattr, res->server); + decode_getfattr(xdr, res->fattr, res->server); out: return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 92a929f..696a17e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -855,7 +855,6 @@ struct nfs4_create_res { struct nfs_fh * fh; struct nfs_fattr * fattr; struct nfs4_change_info dir_cinfo; - struct nfs_fattr * dir_fattr; struct nfs4_sequence_res seq_res; }; -- cgit v0.10.2 From 778d28172f710184855bcfeadcdd6b46997c4de2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 13:48:19 -0400 Subject: NFSv4: Simplify the NFSv4 REMOVE, LINK and RENAME compounds Get rid of the post-op GETATTR on the directory in order to reduce the amount of processing done on the server. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 619bc1e..c746b0c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2775,7 +2775,6 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name) .fh = NFS_FH(dir), .name.len = name->len, .name.name = name->name, - .bitmask = server->attr_bitmask, }; struct nfs_removeres res = { .server = server, @@ -2785,19 +2784,11 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name) .rpc_argp = &args, .rpc_resp = &res, }; - int status = -ENOMEM; - - res.dir_attr = nfs_alloc_fattr(); - if (res.dir_attr == NULL) - goto out; + int status; status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); - if (status == 0) { + if (status == 0) update_changeattr(dir, &res.cinfo); - nfs_post_op_update_inode(dir, res.dir_attr); - } - nfs_free_fattr(res.dir_attr); -out: return status; } @@ -2819,7 +2810,6 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) struct nfs_removeargs *args = msg->rpc_argp; struct nfs_removeres *res = msg->rpc_resp; - args->bitmask = server->cache_consistency_bitmask; res->server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; nfs41_init_sequence(&args->seq_args, &res->seq_res, 1); @@ -2844,7 +2834,6 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) return 0; update_changeattr(dir, &res->cinfo); - nfs_post_op_update_inode(dir, res->dir_attr); return 1; } @@ -2855,7 +2844,6 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) struct nfs_renameres *res = msg->rpc_resp; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; - arg->bitmask = server->attr_bitmask; res->server = server; nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1); } @@ -2881,9 +2869,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, return 0; update_changeattr(old_dir, &res->old_cinfo); - nfs_post_op_update_inode(old_dir, res->old_fattr); update_changeattr(new_dir, &res->new_cinfo); - nfs_post_op_update_inode(new_dir, res->new_fattr); return 1; } @@ -2896,7 +2882,6 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, .new_dir = NFS_FH(new_dir), .old_name = old_name, .new_name = new_name, - .bitmask = server->attr_bitmask, }; struct nfs_renameres res = { .server = server, @@ -2908,21 +2893,11 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, }; int status = -ENOMEM; - res.old_fattr = nfs_alloc_fattr(); - res.new_fattr = nfs_alloc_fattr(); - if (res.old_fattr == NULL || res.new_fattr == NULL) - goto out; - status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { update_changeattr(old_dir, &res.old_cinfo); - nfs_post_op_update_inode(old_dir, res.old_fattr); update_changeattr(new_dir, &res.new_cinfo); - nfs_post_op_update_inode(new_dir, res.new_fattr); } -out: - nfs_free_fattr(res.new_fattr); - nfs_free_fattr(res.old_fattr); return status; } @@ -2960,18 +2935,15 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr * int status = -ENOMEM; res.fattr = nfs_alloc_fattr(); - res.dir_attr = nfs_alloc_fattr(); - if (res.fattr == NULL || res.dir_attr == NULL) + if (res.fattr == NULL) goto out; status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { update_changeattr(dir, &res.cinfo); - nfs_post_op_update_inode(dir, res.dir_attr); nfs_post_op_update_inode(inode, res.fattr); } out: - nfs_free_fattr(res.dir_attr); nfs_free_fattr(res.fattr); return status; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1a70097..49483f1 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -589,38 +589,29 @@ static int nfs4_stat_to_errno(int); #define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ - encode_remove_maxsz + \ - encode_getattr_maxsz) + encode_remove_maxsz) #define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ - decode_remove_maxsz + \ - decode_getattr_maxsz) + decode_remove_maxsz) #define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_savefh_maxsz + \ encode_putfh_maxsz + \ - encode_rename_maxsz + \ - encode_getattr_maxsz + \ - encode_restorefh_maxsz + \ - encode_getattr_maxsz) + encode_rename_maxsz) #define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_savefh_maxsz + \ decode_putfh_maxsz + \ - decode_rename_maxsz + \ - decode_getattr_maxsz + \ - decode_restorefh_maxsz + \ - decode_getattr_maxsz) + decode_rename_maxsz) #define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_savefh_maxsz + \ encode_putfh_maxsz + \ encode_link_maxsz + \ - encode_getattr_maxsz + \ encode_restorefh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \ @@ -629,7 +620,6 @@ static int nfs4_stat_to_errno(int); decode_savefh_maxsz + \ decode_putfh_maxsz + \ decode_link_maxsz + \ - decode_getattr_maxsz + \ decode_restorefh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \ @@ -2052,7 +2042,6 @@ static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr, encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); encode_remove(xdr, &args->name, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -2072,9 +2061,6 @@ static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr, encode_savefh(xdr, &hdr); encode_putfh(xdr, args->new_dir, &hdr); encode_rename(xdr, args->old_name, args->new_name, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); - encode_restorefh(xdr, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -2094,7 +2080,6 @@ static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr, encode_savefh(xdr, &hdr); encode_putfh(xdr, args->dir_fh, &hdr); encode_link(xdr, args->name, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); encode_restorefh(xdr, &hdr); encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); @@ -5782,9 +5767,6 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr, if (status) goto out; status = decode_remove(xdr, &res->cinfo); - if (status) - goto out; - decode_getfattr(xdr, res->dir_attr, res->server); out: return status; } @@ -5814,15 +5796,6 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr, if (status) goto out; status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo); - if (status) - goto out; - /* Current FH is target directory */ - if (decode_getfattr(xdr, res->new_fattr, res->server)) - goto out; - status = decode_restorefh(xdr); - if (status) - goto out; - decode_getfattr(xdr, res->old_fattr, res->server); out: return status; } @@ -5858,8 +5831,6 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr, * Note order: OP_LINK leaves the directory as the current * filehandle. */ - if (decode_getfattr(xdr, res->dir_attr, res->server)) - goto out; status = decode_restorefh(xdr); if (status) goto out; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 696a17e..2e53a3f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -540,7 +540,6 @@ struct nfs_commitres { struct nfs_removeargs { const struct nfs_fh *fh; struct qstr name; - const u32 * bitmask; struct nfs4_sequence_args seq_args; }; @@ -559,7 +558,6 @@ struct nfs_renameargs { const struct nfs_fh *new_dir; const struct qstr *old_name; const struct qstr *new_name; - const u32 *bitmask; struct nfs4_sequence_args seq_args; }; -- cgit v0.10.2 From 5a37f85131c526ed7a3991d4dc2845498f81c1de Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 28 Apr 2012 14:55:16 -0400 Subject: NFSv4: Don't request cache consistency attributes on some writes We don't need cache consistency information when we're doing O_DIRECT writes. Ditto for the case of delegated writes. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c746b0c..64b67f3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3370,7 +3370,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data } if (task->tk_status >= 0) { renew_lease(NFS_SERVER(inode), data->timestamp); - nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); + nfs_post_op_update_inode_force_wcc(inode, &data->fattr); } return 0; } @@ -3401,15 +3401,30 @@ void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data) } EXPORT_SYMBOL_GPL(nfs4_reset_write); +static +bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data) +{ + const struct nfs_pgio_header *hdr = data->header; + + /* Don't request attributes for pNFS or O_DIRECT writes */ + if (data->ds_clp != NULL || hdr->dreq != NULL) + return false; + /* Otherwise, request attributes if and only if we don't hold + * a delegation + */ + return nfs_have_delegation(hdr->inode, FMODE_READ) == 0; +} + static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) { struct nfs_server *server = NFS_SERVER(data->header->inode); - if (data->ds_clp) { + if (!nfs4_write_need_cache_consistency_data(data)) { data->args.bitmask = NULL; data->res.fattr = NULL; } else data->args.bitmask = server->cache_consistency_bitmask; + if (!data->write_done_cb) data->write_done_cb = nfs4_write_done_cb; data->res.server = server; -- cgit v0.10.2 From 8582715e733d08bc98fe629db0601360d70de4dc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 29 Apr 2012 10:44:42 -0400 Subject: NFSv4: COMMIT does not need post-op attributes No attributes are supposed to change during a COMMIT call, so there is no need to request post-op attributes. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 64b67f3..98eb48d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3462,7 +3462,6 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *da rpc_restart_call_prepare(task); return -EAGAIN; } - nfs_refresh_inode(inode, data->res.fattr); return 0; } @@ -3477,11 +3476,6 @@ static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess { struct nfs_server *server = NFS_SERVER(data->inode); - if (data->lseg) { - data->args.bitmask = NULL; - data->res.fattr = NULL; - } else - data->args.bitmask = server->cache_consistency_bitmask; if (data->commit_done_cb == NULL) data->commit_done_cb = nfs4_commit_done_cb; data->res.server = server; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 49483f1..db040e9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -421,13 +421,11 @@ static int nfs4_stat_to_errno(int); #define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ - encode_commit_maxsz + \ - encode_getattr_maxsz) + encode_commit_maxsz) #define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ - decode_commit_maxsz + \ - decode_getattr_maxsz) + decode_commit_maxsz) #define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -2425,8 +2423,6 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr, encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); encode_commit(xdr, args, &hdr); - if (args->bitmask) - encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -6306,10 +6302,6 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr, if (status) goto out; status = decode_commit(xdr, res); - if (status) - goto out; - if (res->fattr) - decode_getfattr(xdr, res->fattr, res->server); out: return status; } -- cgit v0.10.2 From d69ee9b85541a69a1092f5da675bd23256dc62af Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 May 2012 17:37:59 -0400 Subject: NFS: Adapt readdirplus to application usage patterns While the use of READDIRPLUS is significantly more efficient than READDIR followed by many LOOKUP calls, it is still less efficient than just READDIR if the attributes are not required. This patch tracks when lookups are attempted on the directory, and uses that information to selectively disable READDIRPLUS on that directory. The first 'readdir' call is always served using READDIRPLUS. Subsequent calls only use READDIRPLUS if there was a successful lookup or revalidation on a child in the mean time. Credit for the original idea should go to Neil Brown. See: http://www.spinics.net/lists/linux-nfs/msg19996.html However, the implementation in this patch differs from Neil's in that it focuses on tracking lookups rather than calls to stat(). Signed-off-by: Trond Myklebust Cc: Neil Brown diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 82b42e2..d0884c0 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -475,6 +475,29 @@ different: } static +bool nfs_use_readdirplus(struct inode *dir, struct file *filp) +{ + if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) + return false; + if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags)) + return true; + if (filp->f_pos == 0) + return true; + return false; +} + +/* + * This function is called by the lookup code to request the use of + * readdirplus to accelerate any future lookups in the same + * directory. + */ +static +void nfs_advise_use_readdirplus(struct inode *dir) +{ + set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags); +} + +static void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) { struct qstr filename = { @@ -874,7 +897,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) desc->file = filp; desc->dir_cookie = &dir_ctx->dir_cookie; desc->decode = NFS_PROTO(inode)->decode_dirent; - desc->plus = NFS_USE_READDIRPLUS(inode); + desc->plus = nfs_use_readdirplus(inode, filp) ? 1 : 0; nfs_block_sillyrename(dentry); res = nfs_revalidate_mapping(inode, filp->f_mapping); @@ -1114,7 +1137,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) if (!inode) { if (nfs_neg_need_reval(dir, dentry, nd)) goto out_bad; - goto out_valid; + goto out_valid_noent; } if (is_bad_inode(inode)) { @@ -1156,6 +1179,9 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) out_set_verifier: nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_valid: + /* Success: notify readdir to use READDIRPLUS */ + nfs_advise_use_readdirplus(dir); + out_valid_noent: dput(parent); dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", __func__, dentry->d_parent->d_name.name, @@ -1311,6 +1337,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru if (IS_ERR(res)) goto out_unblock_sillyrename; + /* Success: notify readdir to use READDIRPLUS */ + nfs_advise_use_readdirplus(dir); + no_entry: res = d_materialise_unique(dentry, inode); if (res != NULL) { diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0d53113..9f17cd1 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -298,8 +298,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; inode->i_data.a_ops = &nfs_dir_aops; - if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) - set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); /* Deal with crossing mountpoints */ if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT || fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 8a88c16..6cc7dba 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -314,11 +314,6 @@ static inline int nfs_server_capable(struct inode *inode, int cap) return NFS_SERVER(inode)->caps & cap; } -static inline int NFS_USE_READDIRPLUS(struct inode *inode) -{ - return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); -} - static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) { dentry->d_time = verf; -- cgit v0.10.2 From bf5fc4028ef751904a114ffc4b5d2cd9f0233142 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 4 May 2012 13:47:16 -0400 Subject: NFS: Fix O_DIRECT compile warnings Fix the following compile warnings: fs/nfs/direct.c: In function 'nfs_direct_read_schedule_segment': fs/nfs/direct.c:325:11: warning: comparison of distinct pointer types lacks a cast [enabled by default] fs/nfs/direct.c:325:11: warning: comparison of distinct pointer types lacks a cast [enabled by default] fs/nfs/direct.c:325:11: warning: comparison of distinct pointer types lacks a cast [enabled by default] fs/nfs/direct.c:352:27: warning: comparison of distinct pointer types lacks a cast [enabled by default] fs/nfs/direct.c: In function 'nfs_direct_write_schedule_segment': fs/nfs/direct.c:622:11: warning: comparison of distinct pointer types lacks a cast [enabled by default] fs/nfs/direct.c:622:11: warning: comparison of distinct pointer types lacks a cast [enabled by default] fs/nfs/direct.c:622:11: warning: comparison of distinct pointer types lacks a cast [enabled by default] fs/nfs/direct.c:650:27: warning: comparison of distinct pointer types lacks a cast [enabled by default] Reported-by: Stephen Rothwell Signed-off-by: Trond Myklebust Cc: Fred Isaman Cc: Stephen Rothwell diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index aab3016..dca9c81 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -322,7 +322,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de int i; pgbase = user_addr & ~PAGE_MASK; - bytes = min(max(rsize, PAGE_SIZE), count); + bytes = min(max_t(size_t, rsize, PAGE_SIZE), count); result = -ENOMEM; npages = nfs_page_array_len(pgbase, bytes); @@ -349,7 +349,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de for (i = 0; i < npages; i++) { struct nfs_page *req; - unsigned int req_len = min(bytes, PAGE_SIZE - pgbase); + unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); /* XXX do we need to do the eof zeroing found in async_filler? */ req = nfs_create_request(dreq->ctx, dreq->inode, pagevec[i], @@ -619,7 +619,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d int i; pgbase = user_addr & ~PAGE_MASK; - bytes = min(max(wsize, PAGE_SIZE), count); + bytes = min(max_t(size_t, wsize, PAGE_SIZE), count); result = -ENOMEM; npages = nfs_page_array_len(pgbase, bytes); @@ -647,7 +647,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d for (i = 0; i < npages; i++) { struct nfs_page *req; - unsigned int req_len = min(bytes, PAGE_SIZE - pgbase); + unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); req = nfs_create_request(dreq->ctx, dreq->inode, pagevec[i], -- cgit v0.10.2 From 1385b8117325e79f74c1e7d1cbf45c789deb85c5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 4 May 2012 13:54:24 -0400 Subject: NFS: Fix sparse warnings Fix the following sparse warnings: fs/nfs/direct.c:221:6: warning: symbol 'nfs_direct_readpage_release' was not declared. Should it be static? fs/nfs/read.c:38:43: warning: non-ANSI function declaration of function 'nfs_readhdr_alloc' fs/nfs/objlayout/objio_osd.c:214:5: warning: symbol '__alloc_objio_seg' was not declared. Should it be static? Reported-by: Dan Carpenter Signed-off-by: Trond Myklebust Cc: Fred Isaman Cc: Boaz Harrosh diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index dca9c81..257d009 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -218,7 +218,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) nfs_direct_req_release(dreq); } -void nfs_direct_readpage_release(struct nfs_page *req) +static void nfs_direct_readpage_release(struct nfs_page *req) { dprintk("NFS: direct read done (%s/%lld %d@%lld)\n", req->wb_context->dentry->d_inode->i_sb->s_id, diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index fbf4874..b47277ba 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -211,7 +211,7 @@ static void copy_single_comp(struct ore_components *oc, unsigned c, memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred)); } -int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, +static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, struct objio_segment **pseg) { /* This is the in memory structure of the objio_segment diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 37c9eb2..f23cf25 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -35,7 +35,7 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; static struct kmem_cache *nfs_rdata_cachep; -struct nfs_read_header *nfs_readhdr_alloc() +struct nfs_read_header *nfs_readhdr_alloc(void) { struct nfs_read_header *rhdr; -- cgit v0.10.2 From 14546c337588370dced50dcaf43398939be9829e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 6 May 2012 19:10:59 -0400 Subject: NFS: Don't do a full flush to disk on close() if we hold a delegation If we hold a delegation then we know that it should be safe to continue to cache the data beyond the close(). However since the process that wrote the data may die after close(), we may still want to send the data to server before those RPCSEC_GSS credentials expire. We therefore compromise by starting writeback to the server, but don't wait for completion. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/file.c b/fs/nfs/file.c index aa9b709..8eda8a6 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -174,6 +174,13 @@ nfs_file_flush(struct file *file, fl_owner_t id) if ((file->f_mode & FMODE_WRITE) == 0) return 0; + /* + * If we're holding a write delegation, then just start the i/o + * but don't wait for completion (or send a commit). + */ + if (nfs_have_delegation(inode, FMODE_WRITE)) + return filemap_fdatawrite(file->f_mapping); + /* Flush writes to the server and return any errors */ return vfs_fsync(file, 0); } -- cgit v0.10.2 From c57d1bc5e043dbb5ba82ded07003d71a8033d899 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 6 May 2012 19:34:17 -0400 Subject: NFS: nfs_inode_return_delegation() should always flush dirty data The assumption is that if you are in a situation where you need to return the delegation, then you should probably stop caching the data anyway. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 89af1d2..a19cb5a 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -380,6 +380,10 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode) * nfs_inode_return_delegation - synchronously return a delegation * @inode: inode to process * + * This routine will always flush any dirty data to disk on the + * assumption that if we need to return the delegation, then + * we should stop caching. + * * Returns zero on success, or a negative errno value. */ int nfs_inode_return_delegation(struct inode *inode) @@ -389,10 +393,10 @@ int nfs_inode_return_delegation(struct inode *inode) struct nfs_delegation *delegation; int err = 0; + nfs_wb_all(inode); if (rcu_access_pointer(nfsi->delegation) != NULL) { delegation = nfs_detach_delegation(nfsi, server); if (delegation != NULL) { - nfs_wb_all(inode); err = __nfs_inode_return_delegation(inode, delegation, 1); } } diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index cd6a7a8..72709c4 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -66,6 +66,7 @@ static inline int nfs_have_delegation(struct inode *inode, fmode_t flags) static inline int nfs_inode_return_delegation(struct inode *inode) { + nfs_wb_all(inode); return 0; } #endif -- cgit v0.10.2 From dc327ed4cd320be689596365372a3683208c3ba0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 6 May 2012 19:46:30 -0400 Subject: NFSv4: nfs_client_return_marked_delegations can't flush data Since even filemap_flush() needs to lock pages that are dirty, we cannot risk calling it from the state manager context. Therefore, we need to move the call to filemap_flush() to nfs_async_inode_return_delegation(). Signed-off-by: Trond Myklebust diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index a19cb5a..bd3a960 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -316,6 +316,10 @@ out: * nfs_client_return_marked_delegations - return previously marked delegations * @clp: nfs_client to process * + * Note that this function is designed to be called by the state + * manager thread. For this reason, it cannot flush the dirty data, + * since that could deadlock in case of a state recovery error. + * * Returns zero on success, or a negative errno value. */ int nfs_client_return_marked_delegations(struct nfs_client *clp) @@ -340,11 +344,9 @@ restart: server); rcu_read_unlock(); - if (delegation != NULL) { - filemap_flush(inode->i_mapping); + if (delegation != NULL) err = __nfs_inode_return_delegation(inode, delegation, 0); - } iput(inode); if (!err) goto restart; @@ -542,6 +544,8 @@ int nfs_async_inode_return_delegation(struct inode *inode, struct nfs_client *clp = server->nfs_client; struct nfs_delegation *delegation; + filemap_flush(inode->i_mapping); + rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); -- cgit v0.10.2 From 3aff4ebb95b20ad8db2c1447e8c52097d89af5a7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 9 May 2012 14:30:35 -0400 Subject: NFS: Prevent a deadlock in the new writeback code We have to unlock the nfs_page before we call nfs_end_page_writeback to avoid races with functions that expect the page to be unlocked when PG_locked and PG_writeback are not set. The problem is that nfs_unlock_request also releases the nfs_page, causing a deadlock if the release of the nfs_open_context triggers an iput() while the PG_writeback flag is still set... The solution is to separate the unlocking and release of the nfs_page, so that we can do the former before nfs_end_page_writeback and the latter after. Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 33a21ca..69146f3 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -128,10 +128,10 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, } /** - * nfs_unlock_request - Unlock request and wake up sleepers. + * nfs_unlock_request_dont_release - Unlock request and wake up sleepers. * @req: */ -void nfs_unlock_request(struct nfs_page *req) +void nfs_unlock_request_dont_release(struct nfs_page *req) { if (!NFS_WBACK_BUSY(req)) { printk(KERN_ERR "NFS: Invalid unlock attempted\n"); @@ -141,6 +141,14 @@ void nfs_unlock_request(struct nfs_page *req) clear_bit(PG_BUSY, &req->wb_flags); smp_mb__after_clear_bit(); wake_up_bit(&req->wb_flags, PG_BUSY); +} + +/** + * nfs_unlock_request - Unlock request and release the nfs_page + */ +void nfs_unlock_request(struct nfs_page *req) +{ + nfs_unlock_request_dont_release(req); nfs_release_request(req); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6f263da..fd36b31 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -628,8 +628,9 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) remove_req: nfs_inode_remove_request(req); next: - nfs_unlock_request(req); + nfs_unlock_request_dont_release(req); nfs_end_page_writeback(page); + nfs_release_request(req); } out: hdr->release(hdr); @@ -1042,8 +1043,9 @@ static void nfs_redirty_request(struct nfs_page *req) struct page *page = req->wb_page; nfs_mark_request_dirty(req); - nfs_unlock_request(req); + nfs_unlock_request_dont_release(req); nfs_end_page_writeback(page); + nfs_release_request(req); } static void nfs_async_write_error(struct list_head *head) diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index f9ee9eb..ef75042 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -96,6 +96,7 @@ extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *req); extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); +extern void nfs_unlock_request_dont_release(struct nfs_page *req); /* * Lock the page of an asynchronous request without getting a new reference -- cgit v0.10.2 From d1182b33ed9b62470cb6ab892a8a301542120086 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 9 May 2012 13:37:43 -0400 Subject: NFS: nfs_set_page_writeback no longer needs to reference the page We now hold a reference to the nfs_page across the calls to nfs_set_page_writeback and nfs_end_page_writeback, and that means we already have a reference to the struct page. Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/fs/nfs/write.c b/fs/nfs/write.c index fd36b31..8382329 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -230,7 +230,6 @@ static int nfs_set_page_writeback(struct page *page) struct inode *inode = page->mapping->host; struct nfs_server *nfss = NFS_SERVER(inode); - page_cache_get(page); if (atomic_long_inc_return(&nfss->writeback) > NFS_CONGESTION_ON_THRESH) { set_bdi_congested(&nfss->backing_dev_info, @@ -246,7 +245,6 @@ static void nfs_end_page_writeback(struct page *page) struct nfs_server *nfss = NFS_SERVER(inode); end_page_writeback(page); - page_cache_release(page); if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); } @@ -607,13 +605,12 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) nfs_init_cinfo_from_inode(&cinfo, hdr->inode); while (!list_empty(&hdr->pages)) { struct nfs_page *req = nfs_list_entry(hdr->pages.next); - struct page *page = req->wb_page; bytes += req->wb_bytes; nfs_list_remove_request(req); if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes < bytes)) { - nfs_set_pageerror(page); + nfs_set_pageerror(req->wb_page); nfs_context_set_write_error(req->wb_context, hdr->error); goto remove_req; } @@ -629,7 +626,7 @@ remove_req: nfs_inode_remove_request(req); next: nfs_unlock_request_dont_release(req); - nfs_end_page_writeback(page); + nfs_end_page_writeback(req->wb_page); nfs_release_request(req); } out: @@ -1040,11 +1037,9 @@ static int nfs_do_multiple_writes(struct list_head *head, */ static void nfs_redirty_request(struct nfs_page *req) { - struct page *page = req->wb_page; - nfs_mark_request_dirty(req); nfs_unlock_request_dont_release(req); - nfs_end_page_writeback(page); + nfs_end_page_writeback(req->wb_page); nfs_release_request(req); } -- cgit v0.10.2 From 7ad84aa9448571678c243f0c5ef383fbe5b50f4f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 9 May 2012 13:19:15 -0400 Subject: NFS: Clean up - simplify nfs_lock_request() We only have two places where we need to grab a reference when trying to lock the nfs_page. We're better off making that explicit. Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 257d009..465ea84 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -657,6 +657,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d break; } nfs_lock_request(req); + kref_get(&req->wb_kref); req->wb_index = pos >> PAGE_SHIFT; req->wb_offset = pos & ~PAGE_MASK; if (!nfs_pageio_add_request(desc, req)) { diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 8382329..553f7ef 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -260,10 +260,10 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo req = nfs_page_find_request_locked(page); if (req == NULL) break; - if (nfs_lock_request_dontget(req)) + if (nfs_lock_request(req)) break; /* Note: If we hold the page lock, as is the case in nfs_writepage, - * then the call to nfs_lock_request_dontget() will always + * then the call to nfs_lock_request() will always * succeed provided that someone hasn't already marked the * request as dirty (in which case we don't care). */ @@ -406,7 +406,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) struct nfs_inode *nfsi = NFS_I(inode); /* Lock the request! */ - nfs_lock_request_dontget(req); + nfs_lock_request(req); spin_lock(&inode->i_lock); if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) @@ -651,6 +651,7 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, list_for_each_entry_safe(req, tmp, src, wb_list) { if (!nfs_lock_request(req)) continue; + kref_get(&req->wb_kref); if (cond_resched_lock(cinfo->lock)) list_safe_reset_next(req, tmp, wb_list); nfs_request_remove_commit_list(req, cinfo); @@ -741,7 +742,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, || end < req->wb_offset) goto out_flushme; - if (nfs_lock_request_dontget(req)) + if (nfs_lock_request(req)) break; /* The request is locked, so wait and then retry */ @@ -1717,7 +1718,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) req = nfs_page_find_request(page); if (req == NULL) break; - if (nfs_lock_request_dontget(req)) { + if (nfs_lock_request(req)) { nfs_clear_request_commit(req); nfs_inode_remove_request(req); /* diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index ef75042..263f30a 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -99,24 +99,14 @@ extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_request_dont_release(struct nfs_page *req); /* - * Lock the page of an asynchronous request without getting a new reference + * Lock the page of an asynchronous request */ static inline int -nfs_lock_request_dontget(struct nfs_page *req) -{ - return !test_and_set_bit(PG_BUSY, &req->wb_flags); -} - -static inline int nfs_lock_request(struct nfs_page *req) { - if (test_and_set_bit(PG_BUSY, &req->wb_flags)) - return 0; - kref_get(&req->wb_kref); - return 1; + return !test_and_set_bit(PG_BUSY, &req->wb_flags); } - /** * nfs_list_add_request - Insert a request into a list * @req: request -- cgit v0.10.2 From 1d1afcbc294cc7c788eb5c7b6b98e8d63caf002c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 9 May 2012 14:04:55 -0400 Subject: NFS: Clean up - Rename nfs_unlock_request and nfs_unlock_request_dont_release Function rename to ensure that the functionality of nfs_unlock_request() mirrors that of nfs_lock_request(). Then let nfs_unlock_and_release_request() do the work of what used to be called nfs_unlock_request()... Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 465ea84..845e201 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -488,7 +488,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) while (!list_empty(&failed)) { nfs_release_request(req); - nfs_unlock_request(req); + nfs_unlock_and_release_request(req); } if (put_dreq(dreq)) @@ -521,7 +521,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) nfs_mark_request_commit(req, NULL, &cinfo); } else nfs_release_request(req); - nfs_unlock_request(req); + nfs_unlock_and_release_request(req); } if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) @@ -662,7 +662,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d req->wb_offset = pos & ~PAGE_MASK; if (!nfs_pageio_add_request(desc, req)) { result = desc->pg_error; - nfs_unlock_request(req); + nfs_unlock_and_release_request(req); nfs_release_request(req); break; } @@ -739,7 +739,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) default: nfs_release_request(req); } - nfs_unlock_request(req); + nfs_unlock_and_release_request(req); } out_put: @@ -756,7 +756,7 @@ static void nfs_write_sync_pgio_error(struct list_head *head) req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_release_request(req); - nfs_unlock_request(req); + nfs_unlock_and_release_request(req); } } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 69146f3..aed913c 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -128,10 +128,10 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, } /** - * nfs_unlock_request_dont_release - Unlock request and wake up sleepers. + * nfs_unlock_request - Unlock request and wake up sleepers. * @req: */ -void nfs_unlock_request_dont_release(struct nfs_page *req) +void nfs_unlock_request(struct nfs_page *req) { if (!NFS_WBACK_BUSY(req)) { printk(KERN_ERR "NFS: Invalid unlock attempted\n"); @@ -144,11 +144,12 @@ void nfs_unlock_request_dont_release(struct nfs_page *req) } /** - * nfs_unlock_request - Unlock request and release the nfs_page + * nfs_unlock_and_release_request - Unlock request and release the nfs_page + * @req: */ -void nfs_unlock_request(struct nfs_page *req) +void nfs_unlock_and_release_request(struct nfs_page *req) { - nfs_unlock_request_dont_release(req); + nfs_unlock_request(req); nfs_release_request(req); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 553f7ef..8ffd7d5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -625,7 +625,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) remove_req: nfs_inode_remove_request(req); next: - nfs_unlock_request_dont_release(req); + nfs_unlock_request(req); nfs_end_page_writeback(req->wb_page); nfs_release_request(req); } @@ -812,7 +812,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, nfs_grow_file(page, offset, count); nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); nfs_mark_request_dirty(req); - nfs_unlock_request(req); + nfs_unlock_and_release_request(req); return 0; } @@ -1039,7 +1039,7 @@ static int nfs_do_multiple_writes(struct list_head *head, static void nfs_redirty_request(struct nfs_page *req) { nfs_mark_request_dirty(req); - nfs_unlock_request_dont_release(req); + nfs_unlock_request(req); nfs_end_page_writeback(req->wb_page); nfs_release_request(req); } @@ -1477,7 +1477,7 @@ void nfs_retry_commit(struct list_head *page_list, dec_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); } - nfs_unlock_request(req); + nfs_unlock_and_release_request(req); } } EXPORT_SYMBOL_GPL(nfs_retry_commit); @@ -1555,7 +1555,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) dprintk(" mismatch\n"); nfs_mark_request_dirty(req); next: - nfs_unlock_request(req); + nfs_unlock_and_release_request(req); } nfs_init_cinfo(&cinfo, data->inode, data->dreq); if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) @@ -1726,7 +1726,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) * page as being dirty */ cancel_dirty_page(page, PAGE_CACHE_SIZE); - nfs_unlock_request(req); + nfs_unlock_and_release_request(req); break; } ret = nfs_wait_on_request(req); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 263f30a..88d166b 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -96,7 +96,7 @@ extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *req); extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); -extern void nfs_unlock_request_dont_release(struct nfs_page *req); +extern void nfs_unlock_and_release_request(struct nfs_page *req); /* * Lock the page of an asynchronous request -- cgit v0.10.2 From 0427708657750bdc03af3491a0297cab5e7efabf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 9 May 2012 13:54:53 -0400 Subject: NFS: Clean up - Simplify reference counting in fs/nfs/direct.c Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 845e201..c47a46e 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -486,10 +486,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) } nfs_pageio_complete(&desc); - while (!list_empty(&failed)) { - nfs_release_request(req); + while (!list_empty(&failed)) nfs_unlock_and_release_request(req); - } if (put_dreq(dreq)) nfs_direct_write_complete(dreq, dreq->inode); @@ -518,9 +516,9 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) nfs_list_remove_request(req); if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { /* Note the rewrite will go through mds */ + kref_get(&req->wb_kref); nfs_mark_request_commit(req, NULL, &cinfo); - } else - nfs_release_request(req); + } nfs_unlock_and_release_request(req); } @@ -657,13 +655,11 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d break; } nfs_lock_request(req); - kref_get(&req->wb_kref); req->wb_index = pos >> PAGE_SHIFT; req->wb_offset = pos & ~PAGE_MASK; if (!nfs_pageio_add_request(desc, req)) { result = desc->pg_error; nfs_unlock_and_release_request(req); - nfs_release_request(req); break; } pgbase = 0; @@ -734,10 +730,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) switch (bit) { case NFS_IOHDR_NEED_RESCHED: case NFS_IOHDR_NEED_COMMIT: + kref_get(&req->wb_kref); nfs_mark_request_commit(req, hdr->lseg, &cinfo); - break; - default: - nfs_release_request(req); } nfs_unlock_and_release_request(req); } @@ -755,7 +749,6 @@ static void nfs_write_sync_pgio_error(struct list_head *head) while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); - nfs_release_request(req); nfs_unlock_and_release_request(req); } } -- cgit v0.10.2 From 3028eb2b324c517da1e9e589743c4a5154f70dd1 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 10 May 2012 15:07:30 -0400 Subject: NFS: Rename nfs4_proc_get_root() This function is really getting the root filehandle and not the root dentry of the filesystem. I also removed the rpc_ops lookup from nfs4_get_rootfh() under the assumption that if we reach this function then we already know we are using NFS v4. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 4ca6f5c..8a0f33e 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -150,7 +150,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh) goto out; /* Start by getting the root filehandle from the server */ - ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); + ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo); if (ret < 0) { dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret); goto out; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 97365b0..edeef71 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -214,6 +214,7 @@ struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *, /* nfs4proc.c */ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); +extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 98eb48d..69212d2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2345,8 +2345,8 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, /* * get the file handle for the "/" directory on the server */ -static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) +int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) { int minor_version = server->nfs_client->cl_minorversion; int status = nfs4_lookup_root(server, fhandle, info); @@ -6539,7 +6539,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .dir_inode_ops = &nfs4_dir_inode_operations, .file_inode_ops = &nfs4_file_inode_operations, .file_ops = &nfs4_file_operations, - .getroot = nfs4_proc_get_root, + .getroot = nfs4_proc_get_rootfh, .submount = nfs4_submount, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, -- cgit v0.10.2 From bae36241be7fab16b2e987d31b6e6bd4456ac188 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 10 May 2012 15:07:31 -0400 Subject: NFS: Create a single nfs_get_root() This patch splits out the NFS v4 specific functionality of nfs4_get_root() into its own rpc_op called by the generic client, and leaves nfs4_proc_get_rootfh() as its own stand alone function. This also allows me to change nfs4_remote_mount(), nfs4_xdev_mount() and nfs4_remote_referral_mount() to use the generic client's nfs_get_root() function. Later patches in this series will collapse these functions into one common function, so using the same get_root() function everywhere simplifies future changes. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 8a0f33e..8abfb19 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -178,87 +178,4 @@ out: return ret; } -/* - * get an NFS4 root dentry from the root filehandle - */ -struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh, - const char *devname) -{ - struct nfs_server *server = NFS_SB(sb); - struct nfs_fattr *fattr = NULL; - struct dentry *ret; - struct inode *inode; - void *name = kstrdup(devname, GFP_KERNEL); - int error; - - dprintk("--> nfs4_get_root()\n"); - - if (!name) - return ERR_PTR(-ENOMEM); - - /* get the info about the server and filesystem */ - error = nfs4_server_capabilities(server, mntfh); - if (error < 0) { - dprintk("nfs_get_root: getcaps error = %d\n", - -error); - kfree(name); - return ERR_PTR(error); - } - - fattr = nfs_alloc_fattr(); - if (fattr == NULL) { - kfree(name); - return ERR_PTR(-ENOMEM); - } - - /* get the actual root for this mount */ - error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr); - if (error < 0) { - dprintk("nfs_get_root: getattr error = %d\n", -error); - ret = ERR_PTR(error); - goto out; - } - - if (fattr->valid & NFS_ATTR_FATTR_FSID && - !nfs_fsid_equal(&server->fsid, &fattr->fsid)) - memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid)); - - inode = nfs_fhget(sb, mntfh, fattr); - if (IS_ERR(inode)) { - dprintk("nfs_get_root: get root inode failed\n"); - ret = ERR_CAST(inode); - goto out; - } - - error = nfs_superblock_set_dummy_root(sb, inode); - if (error != 0) { - ret = ERR_PTR(error); - goto out; - } - - /* root dentries normally start off anonymous and get spliced in later - * if the dentry tree reaches them; however if the dentry already - * exists, we'll pick it up at this point and use it as the root - */ - ret = d_obtain_alias(inode); - if (IS_ERR(ret)) { - dprintk("nfs_get_root: get root dentry failed\n"); - goto out; - } - - security_d_instantiate(ret, inode); - spin_lock(&ret->d_lock); - if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) { - ret->d_fsdata = name; - name = NULL; - } - spin_unlock(&ret->d_lock); -out: - if (name) - kfree(name); - nfs_free_fattr(fattr); - dprintk("<-- nfs4_get_root()\n"); - return ret; -} - #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 69212d2..e6ab15f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -80,6 +80,7 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); +static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, @@ -2363,6 +2364,31 @@ int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle, return nfs4_map_errors(status); } +static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh, + struct nfs_fsinfo *info) +{ + int error; + struct nfs_fattr *fattr = info->fattr; + + error = nfs4_server_capabilities(server, mntfh); + if (error < 0) { + dprintk("nfs4_get_root: getcaps error = %d\n", -error); + return error; + } + + error = nfs4_proc_getattr(server, mntfh, fattr); + if (error < 0) { + dprintk("nfs4_get_root: getattr error = %d\n", -error); + return error; + } + + if (fattr->valid & NFS_ATTR_FATTR_FSID && + !nfs_fsid_equal(&server->fsid, &fattr->fsid)) + memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid)); + + return error; +} + /* * Get locations and (maybe) other attributes of a referral. * Note that we'll actually follow the referral later when @@ -6539,7 +6565,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .dir_inode_ops = &nfs4_dir_inode_operations, .file_inode_ops = &nfs4_file_inode_operations, .file_ops = &nfs4_file_operations, - .getroot = nfs4_proc_get_rootfh, + .getroot = nfs4_proc_get_root, .submount = nfs4_submount, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 4ac7fca..75b1717 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2727,7 +2727,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL); } - mntroot = nfs4_get_root(s, mntfh, dev_name); + mntroot = nfs_get_root(s, mntfh, dev_name); if (IS_ERR(mntroot)) { error = PTR_ERR(mntroot); goto error_splat_super; @@ -2991,7 +2991,7 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags, nfs_fscache_get_super_cookie(s, NULL, data); } - mntroot = nfs4_get_root(s, data->fh, dev_name); + mntroot = nfs_get_root(s, data->fh, dev_name); if (IS_ERR(mntroot)) { error = PTR_ERR(mntroot); goto error_splat_super; @@ -3082,7 +3082,7 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, nfs_fscache_get_super_cookie(s, NULL, data); } - mntroot = nfs4_get_root(s, mntfh, dev_name); + mntroot = nfs_get_root(s, mntfh, dev_name); if (IS_ERR(mntroot)) { error = PTR_ERR(mntroot); goto error_splat_super; -- cgit v0.10.2 From 2311b9439ce8c525f3f8f821fc2ca9a541f673a5 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 10 May 2012 15:07:32 -0400 Subject: NFS: Don't pass mount data to nfs_fscache_get_super_cookie() I intend on creating a single nfs_fs_mount() function used by all our mount paths. To avoid checking between new mounts and clone mounts, I instead pass both structures to a new function in super.c that finds the cache key and then looks up the super cookie. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index ae65c16..c817787 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -64,23 +64,12 @@ void nfs_fscache_release_client_cookie(struct nfs_client *clp) * either by the 'fsc=xxx' option to mount, or by inheriting it from the parent * superblock across an automount point of some nature. */ -void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, - struct nfs_clone_mount *mntdata) +void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int ulen) { struct nfs_fscache_key *key, *xkey; struct nfs_server *nfss = NFS_SB(sb); struct rb_node **p, *parent; - int diff, ulen; - - if (uniq) { - ulen = strlen(uniq); - } else if (mntdata) { - struct nfs_server *mnt_s = NFS_SB(mntdata->sb); - if (mnt_s->fscache_key) { - uniq = mnt_s->fscache_key->key.uniquifier; - ulen = mnt_s->fscache_key->key.uniq_len; - } - } + int diff; if (!uniq) { uniq = ""; diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index b9c572d..2a08b91 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -73,9 +73,7 @@ extern void nfs_fscache_unregister(void); extern void nfs_fscache_get_client_cookie(struct nfs_client *); extern void nfs_fscache_release_client_cookie(struct nfs_client *); -extern void nfs_fscache_get_super_cookie(struct super_block *, - const char *, - struct nfs_clone_mount *); +extern void nfs_fscache_get_super_cookie(struct super_block *, const char *, int); extern void nfs_fscache_release_super_cookie(struct super_block *); extern void nfs_fscache_init_inode_cookie(struct inode *); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 75b1717..f56fb35 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2278,6 +2278,27 @@ static int nfs_compare_super(struct super_block *sb, void *data) return nfs_compare_mount_options(sb, server, mntflags); } +static void nfs_get_cache_cookie(struct super_block *sb, + struct nfs_parsed_mount_data *parsed, + struct nfs_clone_mount *cloned) +{ + char *uniq = NULL; + int ulen = 0; + + if (parsed && parsed->fscache_uniq) { + uniq = parsed->fscache_uniq; + ulen = strlen(parsed->fscache_uniq); + } else if (cloned) { + struct nfs_server *mnt_s = NFS_SB(cloned->sb); + if (mnt_s->fscache_key) { + uniq = mnt_s->fscache_key->key.uniquifier; + ulen = mnt_s->fscache_key->key.uniq_len; + }; + } + + nfs_fscache_get_super_cookie(sb, uniq, ulen); +} + static int nfs_bdi_register(struct nfs_server *server) { return bdi_register_dev(&server->backing_dev_info, server->s_dev); @@ -2352,7 +2373,7 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, if (!s->s_root) { /* initial superblock/root creation */ nfs_fill_super(s, data); - nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL); + nfs_get_cache_cookie(s, data, NULL); } mntroot = nfs_get_root(s, mntfh, dev_name); @@ -2461,7 +2482,7 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, if (!s->s_root) { /* initial superblock/root creation */ nfs_clone_super(s, data->sb); - nfs_fscache_get_super_cookie(s, NULL, data); + nfs_get_cache_cookie(s, NULL, data); } mntroot = nfs_get_root(s, data->fh, dev_name); @@ -2724,7 +2745,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, if (!s->s_root) { /* initial superblock/root creation */ nfs4_fill_super(s); - nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL); + nfs_get_cache_cookie(s, data, NULL); } mntroot = nfs_get_root(s, mntfh, dev_name); @@ -2988,7 +3009,7 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags, if (!s->s_root) { /* initial superblock/root creation */ nfs4_clone_super(s, data->sb); - nfs_fscache_get_super_cookie(s, NULL, data); + nfs_get_cache_cookie(s, NULL, data); } mntroot = nfs_get_root(s, data->fh, dev_name); @@ -3079,7 +3100,7 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, if (!s->s_root) { /* initial superblock/root creation */ nfs4_fill_super(s); - nfs_fscache_get_super_cookie(s, NULL, data); + nfs_get_cache_cookie(s, NULL, data); } mntroot = nfs_get_root(s, mntfh, dev_name); -- cgit v0.10.2 From 586f95cd4ffda7aa120327ec09865b181c809cdf Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 10 May 2012 15:07:33 -0400 Subject: NFS: Remove NFS4_MOUNT_UNSHARED This flag is numerically equivalent to NFS_MOUNT_UNSHARED, so I can remove it to make collapsing functions more straightforward. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f56fb35..40d43e0 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2719,7 +2719,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, } sb_mntdata.server = server; - if (server->flags & NFS4_MOUNT_UNSHARED) + if (server->flags & NFS_MOUNT_UNSHARED) compare_super = NULL; /* -o noac implies -o sync */ @@ -2983,7 +2983,7 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags, } sb_mntdata.server = server; - if (server->flags & NFS4_MOUNT_UNSHARED) + if (server->flags & NFS_MOUNT_UNSHARED) compare_super = NULL; /* -o noac implies -o sync */ @@ -3074,7 +3074,7 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, } sb_mntdata.server = server; - if (server->flags & NFS4_MOUNT_UNSHARED) + if (server->flags & NFS_MOUNT_UNSHARED) compare_super = NULL; /* -o noac implies -o sync */ -- cgit v0.10.2 From c40f8d1d35a27d81b4af9d5d2f7286fd978ae9b2 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 10 May 2012 15:07:34 -0400 Subject: NFS: Create a common fs_mount() function The nfs4_remote_mount() function was only slightly different from the nfs_fs_mount() function used by the generic client. I created a new nfs_mount_info structure to set different parameters to help combine these functions. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 40d43e0..64a62da 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -277,6 +277,11 @@ static match_table_t nfs_vers_tokens = { { Opt_vers_err, NULL } }; +struct nfs_mount_info { + void (*fill_super)(struct super_block *, struct nfs_mount_info *); + struct nfs_parsed_mount_data *parsed; +}; + static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct dentry *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct dentry *); @@ -2129,8 +2134,9 @@ static inline void nfs_initialise_sb(struct super_block *sb) * Finish setting up an NFS2/3 superblock */ static void nfs_fill_super(struct super_block *sb, - struct nfs_parsed_mount_data *data) + struct nfs_mount_info *mount_info) { + struct nfs_parsed_mount_data *data = mount_info->parsed; struct nfs_server *server = NFS_SB(sb); sb->s_blocksize_bits = 0; @@ -2304,47 +2310,21 @@ static int nfs_bdi_register(struct nfs_server *server) return bdi_register_dev(&server->backing_dev_info, server->s_dev); } -static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) +static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, + struct nfs_server *server, + int flags, const char *dev_name, + struct nfs_fh *mntfh, + struct nfs_mount_info *mount_info) { - struct nfs_server *server = NULL; struct super_block *s; - struct nfs_parsed_mount_data *data; - struct nfs_fh *mntfh; struct dentry *mntroot = ERR_PTR(-ENOMEM); int (*compare_super)(struct super_block *, void *) = nfs_compare_super; struct nfs_sb_mountdata sb_mntdata = { .mntflags = flags, + .server = server, }; int error; - data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION); - mntfh = nfs_alloc_fhandle(); - if (data == NULL || mntfh == NULL) - goto out; - - /* Validate the mount data */ - error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); - if (error < 0) { - mntroot = ERR_PTR(error); - goto out; - } - -#ifdef CONFIG_NFS_V4 - if (data->version == 4) { - mntroot = nfs4_try_mount(flags, dev_name, data); - goto out; - } -#endif /* CONFIG_NFS_V4 */ - - /* Get a volume representation */ - server = nfs_create_server(data, mntfh); - if (IS_ERR(server)) { - mntroot = ERR_CAST(server); - goto out; - } - sb_mntdata.server = server; - if (server->flags & NFS_MOUNT_UNSHARED) compare_super = NULL; @@ -2372,23 +2352,21 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, if (!s->s_root) { /* initial superblock/root creation */ - nfs_fill_super(s, data); - nfs_get_cache_cookie(s, data, NULL); + mount_info->fill_super(s, mount_info); + nfs_get_cache_cookie(s, mount_info->parsed, NULL); } mntroot = nfs_get_root(s, mntfh, dev_name); if (IS_ERR(mntroot)) goto error_splat_super; - error = security_sb_set_mnt_opts(s, &data->lsm_opts); + error = security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts); if (error) goto error_splat_root; s->s_flags |= MS_ACTIVE; out: - nfs_free_parsed_mount_data(data); - nfs_free_fhandle(mntfh); return mntroot; out_err_nosb: @@ -2406,6 +2384,52 @@ error_splat_bdi: goto out; } +static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) +{ + struct nfs_server *server; + struct nfs_parsed_mount_data *data = NULL; + struct nfs_mount_info mount_info = { + .fill_super = nfs_fill_super, + }; + struct nfs_fh *mntfh; + struct dentry *mntroot = ERR_PTR(-ENOMEM); + int error; + + data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION); + mntfh = nfs_alloc_fhandle(); + if (data == NULL || mntfh == NULL) + goto out; + + /* Validate the mount data */ + error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); + if (error < 0) { + mntroot = ERR_PTR(error); + goto out; + } + mount_info.parsed = data; + +#ifdef CONFIG_NFS_V4 + if (data->version == 4) { + mntroot = nfs4_try_mount(flags, dev_name, data); + goto out; + } +#endif /* CONFIG_NFS_V4 */ + + /* Get a volume representation */ + server = nfs_create_server(data, mntfh); + if (IS_ERR(server)) { + mntroot = ERR_CAST(server); + goto out; + } + + mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mntfh, &mount_info); +out: + nfs_free_parsed_mount_data(data); + nfs_free_fhandle(mntfh); + return mntroot; +} + /* * Ensure that we unregister the bdi before kill_anon_super * releases the device name @@ -2544,7 +2568,8 @@ static void nfs4_clone_super(struct super_block *sb, /* * Set up an NFS4 superblock */ -static void nfs4_fill_super(struct super_block *sb) +static void nfs4_fill_super(struct super_block *sb, + struct nfs_mount_info *mount_info) { sb->s_time_gran = 1; sb->s_op = &nfs4_sops; @@ -2696,89 +2721,31 @@ static struct dentry * nfs4_remote_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { - struct nfs_parsed_mount_data *data = raw_data; - struct super_block *s; + struct nfs_mount_info mount_info = { + .fill_super = nfs4_fill_super, + .parsed = raw_data, + }; struct nfs_server *server; struct nfs_fh *mntfh; - struct dentry *mntroot; - int (*compare_super)(struct super_block *, void *) = nfs_compare_super; - struct nfs_sb_mountdata sb_mntdata = { - .mntflags = flags, - }; - int error = -ENOMEM; + struct dentry *mntroot = ERR_PTR(-ENOMEM); mntfh = nfs_alloc_fhandle(); - if (data == NULL || mntfh == NULL) + if (mount_info.parsed == NULL || mntfh == NULL) goto out; /* Get a volume representation */ - server = nfs4_create_server(data, mntfh); + server = nfs4_create_server(mount_info.parsed, mntfh); if (IS_ERR(server)) { - error = PTR_ERR(server); + mntroot = ERR_CAST(server); goto out; } - sb_mntdata.server = server; - if (server->flags & NFS_MOUNT_UNSHARED) - compare_super = NULL; - - /* -o noac implies -o sync */ - if (server->flags & NFS_MOUNT_NOAC) - sb_mntdata.mntflags |= MS_SYNCHRONOUS; - - /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); - if (IS_ERR(s)) { - error = PTR_ERR(s); - goto out_free; - } - - if (s->s_fs_info != server) { - nfs_free_server(server); - server = NULL; - } else { - error = nfs_bdi_register(server); - if (error) - goto error_splat_bdi; - } - - if (!s->s_root) { - /* initial superblock/root creation */ - nfs4_fill_super(s); - nfs_get_cache_cookie(s, data, NULL); - } - - mntroot = nfs_get_root(s, mntfh, dev_name); - if (IS_ERR(mntroot)) { - error = PTR_ERR(mntroot); - goto error_splat_super; - } - - error = security_sb_set_mnt_opts(s, &data->lsm_opts); - if (error) - goto error_splat_root; - - s->s_flags |= MS_ACTIVE; - - nfs_free_fhandle(mntfh); - return mntroot; + mntroot = nfs_fs_mount_common(fs_type, server, flags, + dev_name, mntfh, &mount_info); out: nfs_free_fhandle(mntfh); - return ERR_PTR(error); - -out_free: - nfs_free_server(server); - goto out; - -error_splat_root: - dput(mntroot); -error_splat_super: - if (server && !s->s_root) - bdi_unregister(&server->backing_dev_info); -error_splat_bdi: - deactivate_locked_super(s); - goto out; + return mntroot; } static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, @@ -3099,7 +3066,7 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, if (!s->s_root) { /* initial superblock/root creation */ - nfs4_fill_super(s); + nfs4_fill_super(s, NULL); nfs_get_cache_cookie(s, NULL, data); } -- cgit v0.10.2 From 8c958e0c4c52d600bd2ea677eb920fceda8aee49 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 10 May 2012 15:07:35 -0400 Subject: NFS: Create a common xdev_mount() function The only difference between nfs_xdev_mount() and nfs4_xdev_mount() is the clone_super() function called to clone the super block. I can combine these two functions by using the fill_super field in the mount_info structure. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 64a62da..707d1f6 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -280,6 +280,7 @@ static match_table_t nfs_vers_tokens = { struct nfs_mount_info { void (*fill_super)(struct super_block *, struct nfs_mount_info *); struct nfs_parsed_mount_data *parsed; + struct nfs_clone_mount *cloned; }; static void nfs_umount_begin(struct super_block *); @@ -2160,8 +2161,9 @@ static void nfs_fill_super(struct super_block *sb, * Finish setting up a cloned NFS2/3 superblock */ static void nfs_clone_super(struct super_block *sb, - const struct super_block *old_sb) + struct nfs_mount_info *mount_info) { + const struct super_block *old_sb = mount_info->cloned->sb; struct nfs_server *server = NFS_SB(sb); sb->s_blocksize_bits = old_sb->s_blocksize_bits; @@ -2454,13 +2456,13 @@ static void nfs_kill_super(struct super_block *s) } /* - * Clone an NFS2/3 server record on xdev traversal (FSID-change) + * Clone an NFS2/3/4 server record on xdev traversal (FSID-change) */ static struct dentry * -nfs_xdev_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) +nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, + const char *dev_name, struct nfs_mount_info *mount_info) { - struct nfs_clone_mount *data = raw_data; + struct nfs_clone_mount *data = mount_info->cloned; struct super_block *s; struct nfs_server *server; struct dentry *mntroot; @@ -2470,7 +2472,7 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, }; int error; - dprintk("--> nfs_xdev_mount()\n"); + dprintk("--> nfs_xdev_mount_common()\n"); /* create a new volume representation */ server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); @@ -2505,7 +2507,7 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, if (!s->s_root) { /* initial superblock/root creation */ - nfs_clone_super(s, data->sb); + mount_info->fill_super(s, mount_info); nfs_get_cache_cookie(s, NULL, data); } @@ -2525,13 +2527,13 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, /* clone any lsm security options from the parent to the new sb */ security_sb_clone_mnt_opts(data->sb, s); - dprintk("<-- nfs_xdev_mount() = 0\n"); + dprintk("<-- nfs_xdev_mount_common() = 0\n"); return mntroot; out_err_nosb: nfs_free_server(server); out_err_noserver: - dprintk("<-- nfs_xdev_mount() = %d [error]\n", error); + dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error); return ERR_PTR(error); error_splat_super: @@ -2539,18 +2541,33 @@ error_splat_super: bdi_unregister(&server->backing_dev_info); error_splat_bdi: deactivate_locked_super(s); - dprintk("<-- nfs_xdev_mount() = %d [splat]\n", error); + dprintk("<-- nfs_xdev_mount_common() = %d [splat]\n", error); return ERR_PTR(error); } +/* + * Clone an NFS2/3 server record on xdev traversal (FSID-change) + */ +static struct dentry * +nfs_xdev_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + struct nfs_mount_info mount_info = { + .fill_super = nfs_clone_super, + .cloned = raw_data, + }; + return nfs_xdev_mount_common(&nfs_fs_type, flags, dev_name, &mount_info); +} + #ifdef CONFIG_NFS_V4 /* * Finish setting up a cloned NFS4 superblock */ static void nfs4_clone_super(struct super_block *sb, - const struct super_block *old_sb) + struct nfs_mount_info *mount_info) { + const struct super_block *old_sb = mount_info->cloned->sb; sb->s_blocksize_bits = old_sb->s_blocksize_bits; sb->s_blocksize = old_sb->s_blocksize; sb->s_maxbytes = old_sb->s_maxbytes; @@ -2930,86 +2947,11 @@ static struct dentry * nfs4_xdev_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { - struct nfs_clone_mount *data = raw_data; - struct super_block *s; - struct nfs_server *server; - struct dentry *mntroot; - int (*compare_super)(struct super_block *, void *) = nfs_compare_super; - struct nfs_sb_mountdata sb_mntdata = { - .mntflags = flags, + struct nfs_mount_info mount_info = { + .fill_super = nfs4_clone_super, + .cloned = raw_data, }; - int error; - - dprintk("--> nfs4_xdev_mount()\n"); - - /* create a new volume representation */ - server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); - if (IS_ERR(server)) { - error = PTR_ERR(server); - goto out_err_noserver; - } - sb_mntdata.server = server; - - if (server->flags & NFS_MOUNT_UNSHARED) - compare_super = NULL; - - /* -o noac implies -o sync */ - if (server->flags & NFS_MOUNT_NOAC) - sb_mntdata.mntflags |= MS_SYNCHRONOUS; - - /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); - if (IS_ERR(s)) { - error = PTR_ERR(s); - goto out_err_nosb; - } - - if (s->s_fs_info != server) { - nfs_free_server(server); - server = NULL; - } else { - error = nfs_bdi_register(server); - if (error) - goto error_splat_bdi; - } - - if (!s->s_root) { - /* initial superblock/root creation */ - nfs4_clone_super(s, data->sb); - nfs_get_cache_cookie(s, NULL, data); - } - - mntroot = nfs_get_root(s, data->fh, dev_name); - if (IS_ERR(mntroot)) { - error = PTR_ERR(mntroot); - goto error_splat_super; - } - if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) { - dput(mntroot); - error = -ESTALE; - goto error_splat_super; - } - - s->s_flags |= MS_ACTIVE; - - security_sb_clone_mnt_opts(data->sb, s); - - dprintk("<-- nfs4_xdev_mount() = 0\n"); - return mntroot; - -out_err_nosb: - nfs_free_server(server); -out_err_noserver: - dprintk("<-- nfs4_xdev_mount() = %d [error]\n", error); - return ERR_PTR(error); - -error_splat_super: - if (server && !s->s_root) - bdi_unregister(&server->backing_dev_info); -error_splat_bdi: - deactivate_locked_super(s); - dprintk("<-- nfs4_xdev_mount() = %d [splat]\n", error); - return ERR_PTR(error); + return nfs_xdev_mount_common(&nfs4_fs_type, flags, dev_name, &mount_info); } static struct dentry * -- cgit v0.10.2 From 3d176e3fe4f6dc379b252bf43e2e146a8f7caf01 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 10 May 2012 15:07:36 -0400 Subject: NFS: Use nfs_fs_mount_common() for xdev mounts At this point, there are only a few small differences between these two functions. I can set a few function pointers in the nfs_mount_info struct to get around these differences. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 707d1f6..e93a6e9 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -279,6 +279,7 @@ static match_table_t nfs_vers_tokens = { struct nfs_mount_info { void (*fill_super)(struct super_block *, struct nfs_mount_info *); + int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *); struct nfs_parsed_mount_data *parsed; struct nfs_clone_mount *cloned; }; @@ -2312,6 +2313,22 @@ static int nfs_bdi_register(struct nfs_server *server) return bdi_register_dev(&server->backing_dev_info, server->s_dev); } +static int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, + struct nfs_mount_info *mount_info) +{ + return security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts); +} + +static int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, + struct nfs_mount_info *mount_info) +{ + /* clone any lsm security options from the parent to the new sb */ + security_sb_clone_mnt_opts(mount_info->cloned->sb, s); + if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) + return -ESTALE; + return 0; +} + static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, struct nfs_server *server, int flags, const char *dev_name, @@ -2355,14 +2372,14 @@ static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, if (!s->s_root) { /* initial superblock/root creation */ mount_info->fill_super(s, mount_info); - nfs_get_cache_cookie(s, mount_info->parsed, NULL); + nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned); } mntroot = nfs_get_root(s, mntfh, dev_name); if (IS_ERR(mntroot)) goto error_splat_super; - error = security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts); + error = mount_info->set_security(s, mntroot, mount_info); if (error) goto error_splat_root; @@ -2393,6 +2410,7 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, struct nfs_parsed_mount_data *data = NULL; struct nfs_mount_info mount_info = { .fill_super = nfs_fill_super, + .set_security = nfs_set_sb_security, }; struct nfs_fh *mntfh; struct dentry *mntroot = ERR_PTR(-ENOMEM); @@ -2463,13 +2481,8 @@ nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, const char *dev_name, struct nfs_mount_info *mount_info) { struct nfs_clone_mount *data = mount_info->cloned; - struct super_block *s; struct nfs_server *server; - struct dentry *mntroot; - int (*compare_super)(struct super_block *, void *) = nfs_compare_super; - struct nfs_sb_mountdata sb_mntdata = { - .mntflags = flags, - }; + struct dentry *mntroot = ERR_PTR(-ENOMEM); int error; dprintk("--> nfs_xdev_mount_common()\n"); @@ -2478,71 +2491,17 @@ nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); if (IS_ERR(server)) { error = PTR_ERR(server); - goto out_err_noserver; - } - sb_mntdata.server = server; - - if (server->flags & NFS_MOUNT_UNSHARED) - compare_super = NULL; - - /* -o noac implies -o sync */ - if (server->flags & NFS_MOUNT_NOAC) - sb_mntdata.mntflags |= MS_SYNCHRONOUS; - - /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); - if (IS_ERR(s)) { - error = PTR_ERR(s); - goto out_err_nosb; - } - - if (s->s_fs_info != server) { - nfs_free_server(server); - server = NULL; - } else { - error = nfs_bdi_register(server); - if (error) - goto error_splat_bdi; - } - - if (!s->s_root) { - /* initial superblock/root creation */ - mount_info->fill_super(s, mount_info); - nfs_get_cache_cookie(s, NULL, data); - } - - mntroot = nfs_get_root(s, data->fh, dev_name); - if (IS_ERR(mntroot)) { - error = PTR_ERR(mntroot); - goto error_splat_super; - } - if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) { - dput(mntroot); - error = -ESTALE; - goto error_splat_super; + goto out_err; } - s->s_flags |= MS_ACTIVE; - - /* clone any lsm security options from the parent to the new sb */ - security_sb_clone_mnt_opts(data->sb, s); - + mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, data->fh, mount_info); dprintk("<-- nfs_xdev_mount_common() = 0\n"); +out: return mntroot; -out_err_nosb: - nfs_free_server(server); -out_err_noserver: +out_err: dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error); - return ERR_PTR(error); - -error_splat_super: - if (server && !s->s_root) - bdi_unregister(&server->backing_dev_info); -error_splat_bdi: - deactivate_locked_super(s); - dprintk("<-- nfs_xdev_mount_common() = %d [splat]\n", error); - return ERR_PTR(error); + goto out; } /* @@ -2554,6 +2513,7 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, { struct nfs_mount_info mount_info = { .fill_super = nfs_clone_super, + .set_security = nfs_clone_sb_security, .cloned = raw_data, }; return nfs_xdev_mount_common(&nfs_fs_type, flags, dev_name, &mount_info); @@ -2740,6 +2700,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, { struct nfs_mount_info mount_info = { .fill_super = nfs4_fill_super, + .set_security = nfs_set_sb_security, .parsed = raw_data, }; struct nfs_server *server; @@ -2949,6 +2910,7 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags, { struct nfs_mount_info mount_info = { .fill_super = nfs4_clone_super, + .set_security = nfs_clone_sb_security, .cloned = raw_data, }; return nfs_xdev_mount_common(&nfs4_fs_type, flags, dev_name, &mount_info); -- cgit v0.10.2 From 21e4b82e13c038457b4fa4d54d988c9f1865bcf6 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 10 May 2012 15:07:37 -0400 Subject: NFS: Use nfs_fs_mount_common() for remote referral mounts Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e93a6e9..1157189 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2920,95 +2920,32 @@ static struct dentry * nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { - struct nfs_clone_mount *data = raw_data; - struct super_block *s; + struct nfs_mount_info mount_info = { + .fill_super = nfs4_fill_super, + .set_security = nfs_clone_sb_security, + .cloned = raw_data, + }; struct nfs_server *server; - struct dentry *mntroot; + struct dentry *mntroot = ERR_PTR(-ENOMEM); struct nfs_fh *mntfh; - int (*compare_super)(struct super_block *, void *) = nfs_compare_super; - struct nfs_sb_mountdata sb_mntdata = { - .mntflags = flags, - }; - int error = -ENOMEM; dprintk("--> nfs4_referral_get_sb()\n"); mntfh = nfs_alloc_fhandle(); - if (mntfh == NULL) - goto out_err_nofh; + if (mount_info.cloned == NULL || mntfh == NULL) + goto out; /* create a new volume representation */ - server = nfs4_create_referral_server(data, mntfh); + server = nfs4_create_referral_server(mount_info.cloned, mntfh); if (IS_ERR(server)) { - error = PTR_ERR(server); - goto out_err_noserver; - } - sb_mntdata.server = server; - - if (server->flags & NFS_MOUNT_UNSHARED) - compare_super = NULL; - - /* -o noac implies -o sync */ - if (server->flags & NFS_MOUNT_NOAC) - sb_mntdata.mntflags |= MS_SYNCHRONOUS; - - /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); - if (IS_ERR(s)) { - error = PTR_ERR(s); - goto out_err_nosb; - } - - if (s->s_fs_info != server) { - nfs_free_server(server); - server = NULL; - } else { - error = nfs_bdi_register(server); - if (error) - goto error_splat_bdi; - } - - if (!s->s_root) { - /* initial superblock/root creation */ - nfs4_fill_super(s, NULL); - nfs_get_cache_cookie(s, NULL, data); - } - - mntroot = nfs_get_root(s, mntfh, dev_name); - if (IS_ERR(mntroot)) { - error = PTR_ERR(mntroot); - goto error_splat_super; - } - if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) { - dput(mntroot); - error = -ESTALE; - goto error_splat_super; + mntroot = ERR_CAST(server); + goto out; } - s->s_flags |= MS_ACTIVE; - - security_sb_clone_mnt_opts(data->sb, s); - + mntroot = nfs_fs_mount_common(&nfs4_fs_type, server, flags, dev_name, mntfh, &mount_info); +out: nfs_free_fhandle(mntfh); - dprintk("<-- nfs4_referral_get_sb() = 0\n"); return mntroot; - -out_err_nosb: - nfs_free_server(server); -out_err_noserver: - nfs_free_fhandle(mntfh); -out_err_nofh: - dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error); - return ERR_PTR(error); - -error_splat_super: - if (server && !s->s_root) - bdi_unregister(&server->backing_dev_info); -error_splat_bdi: - deactivate_locked_super(s); - nfs_free_fhandle(mntfh); - dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error); - return ERR_PTR(error); } /* -- cgit v0.10.2 From db8333519187d5974cf2ff33910c893bf8727d9f Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 10 May 2012 15:07:38 -0400 Subject: NFS: Let mount data parsing set the NFS version This field is unconditionally set while parsing mount data, so there is no need to fill it in here. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 1157189..db0952d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -945,7 +945,7 @@ static void nfs_umount_begin(struct super_block *sb) rpc_killall_tasks(rpc); } -static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int version) +static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) { struct nfs_parsed_mount_data *data; @@ -960,7 +960,6 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve data->nfs_server.protocol = XPRT_TRANSPORT_TCP; data->auth_flavors[0] = RPC_AUTH_UNIX; data->auth_flavor_len = 1; - data->version = version; data->minorversion = 0; data->net = current->nsproxy->net_ns; security_init_mnt_opts(&data->lsm_opts); @@ -2416,7 +2415,7 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, struct dentry *mntroot = ERR_PTR(-ENOMEM); int error; - data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION); + data = nfs_alloc_parsed_mount_data(); mntfh = nfs_alloc_fhandle(); if (data == NULL || mntfh == NULL) goto out; @@ -2867,7 +2866,7 @@ static struct dentry *nfs4_mount(struct file_system_type *fs_type, int error = -ENOMEM; struct dentry *res = ERR_PTR(-ENOMEM); - data = nfs_alloc_parsed_mount_data(4); + data = nfs_alloc_parsed_mount_data(); if (data == NULL) goto out; -- cgit v0.10.2 From 486aa699ffb6ec28adbc147326d62ac9294de8dc Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 10 May 2012 15:07:39 -0400 Subject: NFS: Create a new nfs_try_mount() This function returns the same same return type as nfs4_try_mount() so they two can be more easily substituted. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index db0952d..c69c806 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -290,6 +290,9 @@ static int nfs_show_options(struct seq_file *, struct dentry *); static int nfs_show_devname(struct seq_file *, struct dentry *); static int nfs_show_path(struct seq_file *, struct dentry *); static int nfs_show_stats(struct seq_file *, struct dentry *); +static struct dentry *nfs_fs_mount_common(struct file_system_type *, + struct nfs_server *, int, const char *, struct nfs_fh *, + struct nfs_mount_info *); static struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *); static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, @@ -1680,8 +1683,8 @@ static int nfs_walk_authlist(struct nfs_parsed_mount_data *args, * Use the remote server's MOUNT service to request the NFS file handle * corresponding to the provided path. */ -static int nfs_try_mount(struct nfs_parsed_mount_data *args, - struct nfs_fh *root_fh) +static int nfs_request_mount(struct nfs_parsed_mount_data *args, + struct nfs_fh *root_fh) { rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS]; unsigned int server_authlist_len = ARRAY_SIZE(server_authlist); @@ -1744,6 +1747,25 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, return nfs_walk_authlist(args, &request); } +static struct dentry *nfs_try_mount(int flags, const char *dev_name, + struct nfs_fh *mntfh, + struct nfs_mount_info *mount_info) +{ + int status; + struct nfs_server *server; + + status = nfs_request_mount(mount_info->parsed, mntfh); + if (status) + return ERR_PTR(status); + + /* Get a volume representation */ + server = nfs_create_server(mount_info->parsed, mntfh); + if (IS_ERR(server)) + return ERR_CAST(server); + + return nfs_fs_mount_common(&nfs_fs_type, server, flags, dev_name, mntfh, mount_info); +} + /* * Split "dev_name" into "hostname:export_path". * @@ -1966,11 +1988,6 @@ static int nfs_validate_mount_data(void *options, PAGE_SIZE, &args->nfs_server.export_path, NFS_MAXPATHLEN); - if (!status) - status = nfs_try_mount(args, mntfh); - - kfree(args->nfs_server.export_path); - args->nfs_server.export_path = NULL; if (status) return status; @@ -2405,7 +2422,6 @@ error_splat_bdi: static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { - struct nfs_server *server; struct nfs_parsed_mount_data *data = NULL; struct nfs_mount_info mount_info = { .fill_super = nfs_fill_super, @@ -2429,20 +2445,12 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, mount_info.parsed = data; #ifdef CONFIG_NFS_V4 - if (data->version == 4) { + if (data->version == 4) mntroot = nfs4_try_mount(flags, dev_name, data); - goto out; - } + else #endif /* CONFIG_NFS_V4 */ + mntroot = nfs_try_mount(flags, dev_name, mntfh, &mount_info); - /* Get a volume representation */ - server = nfs_create_server(data, mntfh); - if (IS_ERR(server)) { - mntroot = ERR_CAST(server); - goto out; - } - - mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mntfh, &mount_info); out: nfs_free_parsed_mount_data(data); nfs_free_fhandle(mntfh); -- cgit v0.10.2 From b72e4f42a33137acc037546277a08f407d3c1016 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 10 May 2012 15:07:40 -0400 Subject: NFS: Create a single function for text mount data The v2/3 and v4 cases were very similar, with just a few parameters changed. This makes it easy to share code. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 1855e8f..4e9b0ff 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -103,6 +103,7 @@ struct nfs_parsed_mount_data { unsigned int version; unsigned int minorversion; char *fscache_uniq; + bool need_mount; struct { struct sockaddr_storage address; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index c69c806..db636d7 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -66,6 +66,7 @@ #include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_VFS +#define NFS_TEXT_DATA 1 #ifdef CONFIG_NFS_V3 #define NFS_DEFAULT_VERSION 3 @@ -333,8 +334,7 @@ static const struct super_operations nfs_sops = { }; #ifdef CONFIG_NFS_V4 -static int nfs4_validate_text_mount_data(void *options, - struct nfs_parsed_mount_data *args, const char *dev_name); +static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); static struct dentry *nfs4_try_mount(int flags, const char *dev_name, struct nfs_parsed_mount_data *data); static struct dentry *nfs4_mount(struct file_system_type *fs_type, @@ -964,6 +964,7 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) data->auth_flavors[0] = RPC_AUTH_UNIX; data->auth_flavor_len = 1; data->minorversion = 0; + data->need_mount = true; data->net = current->nsproxy->net_ns; security_init_mnt_opts(&data->lsm_opts); } @@ -1754,9 +1755,11 @@ static struct dentry *nfs_try_mount(int flags, const char *dev_name, int status; struct nfs_server *server; - status = nfs_request_mount(mount_info->parsed, mntfh); - if (status) - return ERR_PTR(status); + if (mount_info->parsed->need_mount) { + status = nfs_request_mount(mount_info->parsed, mntfh); + if (status) + return ERR_PTR(status); + } /* Get a volume representation */ server = nfs_create_server(mount_info->parsed, mntfh); @@ -1911,6 +1914,7 @@ static int nfs_validate_mount_data(void *options, args->acregmax = data->acregmax; args->acdirmin = data->acdirmin; args->acdirmax = data->acdirmax; + args->need_mount = false; memcpy(sap, &data->addr, sizeof(data->addr)); args->nfs_server.addrlen = sizeof(data->addr); @@ -1962,38 +1966,8 @@ static int nfs_validate_mount_data(void *options, } break; - default: { - int status; - - if (nfs_parse_mount_options((char *)options, args) == 0) - return -EINVAL; - - if (!nfs_verify_server_address(sap)) - goto out_no_address; - - if (args->version == 4) -#ifdef CONFIG_NFS_V4 - return nfs4_validate_text_mount_data(options, - args, dev_name); -#else - goto out_v4_not_compiled; -#endif - - nfs_set_port(sap, &args->nfs_server.port, 0); - - nfs_set_mount_transport_protocol(args); - - status = nfs_parse_devname(dev_name, - &args->nfs_server.hostname, - PAGE_SIZE, - &args->nfs_server.export_path, - NFS_MAXPATHLEN); - - if (status) - return status; - - break; - } + default: + return NFS_TEXT_DATA; } #ifndef CONFIG_NFS_V3 @@ -2022,12 +1996,6 @@ out_v3_not_compiled: return -EPROTONOSUPPORT; #endif /* !CONFIG_NFS_V3 */ -#ifndef CONFIG_NFS_V4 -out_v4_not_compiled: - dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n"); - return -EPROTONOSUPPORT; -#endif /* !CONFIG_NFS_V4 */ - out_nomem: dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); return -ENOMEM; @@ -2041,6 +2009,60 @@ out_invalid_fh: return -EINVAL; } +static int nfs_validate_text_mount_data(void *options, + struct nfs_parsed_mount_data *args, + const char *dev_name) +{ + int port = 0; + int max_namelen = PAGE_SIZE; + int max_pathlen = NFS_MAXPATHLEN; + struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; + + if (nfs_parse_mount_options((char *)options, args) == 0) + return -EINVAL; + + if (!nfs_verify_server_address(sap)) + goto out_no_address; + + if (args->version == 4) { +#ifdef CONFIG_NFS_V4 + port = NFS_PORT; + max_namelen = NFS4_MAXNAMLEN; + max_pathlen = NFS4_MAXPATHLEN; + nfs_validate_transport_protocol(args); + nfs4_validate_mount_flags(args); +#else + goto out_v4_not_compiled; +#endif /* CONFIG_NFS_V4 */ + } else + nfs_set_mount_transport_protocol(args); + + nfs_set_port(sap, &args->nfs_server.port, port); + + if (args->auth_flavor_len > 1) + goto out_bad_auth; + + return nfs_parse_devname(dev_name, + &args->nfs_server.hostname, + max_namelen, + &args->nfs_server.export_path, + max_pathlen); + +#ifndef CONFIG_NFS_V4 +out_v4_not_compiled: + dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n"); + return -EPROTONOSUPPORT; +#endif /* !CONFIG_NFS_V4 */ + +out_no_address: + dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); + return -EINVAL; + +out_bad_auth: + dfprintk(MOUNT, "NFS: Too many RPC auth flavours specified\n"); + return -EINVAL; +} + static int nfs_compare_remount_data(struct nfs_server *nfss, struct nfs_parsed_mount_data *data) @@ -2438,6 +2460,8 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, /* Validate the mount data */ error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); + if (error == NFS_TEXT_DATA) + error = nfs_validate_text_mount_data(raw_data, data, dev_name); if (error < 0) { mntroot = ERR_PTR(error); goto out; @@ -2572,37 +2596,6 @@ static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL); } -static int nfs4_validate_text_mount_data(void *options, - struct nfs_parsed_mount_data *args, - const char *dev_name) -{ - struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; - - nfs_set_port(sap, &args->nfs_server.port, NFS_PORT); - - nfs_validate_transport_protocol(args); - - nfs4_validate_mount_flags(args); - - if (args->version != 4) { - dfprintk(MOUNT, - "NFS4: Illegal mount version\n"); - return -EINVAL; - } - - if (args->auth_flavor_len > 1) { - dfprintk(MOUNT, - "NFS4: Too many RPC auth flavours specified\n"); - return -EINVAL; - } - - return nfs_parse_devname(dev_name, - &args->nfs_server.hostname, - NFS4_MAXNAMLEN, - &args->nfs_server.export_path, - NFS4_MAXPATHLEN); -} - /* * Validate NFSv4 mount options */ @@ -2673,13 +2666,7 @@ static int nfs4_validate_mount_data(void *options, break; default: - if (nfs_parse_mount_options((char *)options, args) == 0) - return -EINVAL; - - if (!nfs_verify_server_address(sap)) - return -EINVAL; - - return nfs4_validate_text_mount_data(options, args, dev_name); + return NFS_TEXT_DATA; } return 0; @@ -2880,6 +2867,8 @@ static struct dentry *nfs4_mount(struct file_system_type *fs_type, /* Validate the mount data */ error = nfs4_validate_mount_data(raw_data, data, dev_name); + if (error == NFS_TEXT_DATA) + error = nfs_validate_text_mount_data(raw_data, data, dev_name); if (error < 0) { res = ERR_PTR(error); goto out; -- cgit v0.10.2 From d72c727cd9de490f936a41634e34cd4a61ba6dd6 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 10 May 2012 15:07:41 -0400 Subject: NFS: Create a single nfs_validate_mount_data() function This new function chooses between the v2/3 parser and the v4 parser by filesystem type. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index db636d7..5b025b0 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -335,6 +335,8 @@ static const struct super_operations nfs_sops = { #ifdef CONFIG_NFS_V4 static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); +static int nfs4_validate_mount_data(void *options, + struct nfs_parsed_mount_data *args, const char *dev_name); static struct dentry *nfs4_try_mount(int flags, const char *dev_name, struct nfs_parsed_mount_data *data); static struct dentry *nfs4_mount(struct file_system_type *fs_type, @@ -1857,10 +1859,10 @@ out_path: * + breaking back: trying proto=udp after proto=tcp, v2 after v3, * mountproto=tcp after mountproto=udp, and so on */ -static int nfs_validate_mount_data(void *options, - struct nfs_parsed_mount_data *args, - struct nfs_fh *mntfh, - const char *dev_name) +static int nfs23_validate_mount_data(void *options, + struct nfs_parsed_mount_data *args, + struct nfs_fh *mntfh, + const char *dev_name) { struct nfs_mount_data *data = (struct nfs_mount_data *)options; struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; @@ -2009,6 +2011,28 @@ out_invalid_fh: return -EINVAL; } +#ifdef CONFIG_NFS_V4 +static int nfs_validate_mount_data(struct file_system_type *fs_type, + void *options, + struct nfs_parsed_mount_data *args, + struct nfs_fh *mntfh, + const char *dev_name) +{ + if (fs_type == &nfs_fs_type) + return nfs23_validate_mount_data(options, args, mntfh, dev_name); + return nfs4_validate_mount_data(options, args, dev_name); +} +#else +static int nfs_validate_mount_data(struct file_system_type *fs_type, + void *options, + struct nfs_parsed_mount_data *args, + struct nfs_fh *mntfh, + const char *dev_name) +{ + return nfs23_validate_mount_data(options, args, mntfh, dev_name); +} +#endif + static int nfs_validate_text_mount_data(void *options, struct nfs_parsed_mount_data *args, const char *dev_name) @@ -2459,7 +2483,7 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, goto out; /* Validate the mount data */ - error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); + error = nfs_validate_mount_data(fs_type, raw_data, data, mntfh, dev_name); if (error == NFS_TEXT_DATA) error = nfs_validate_text_mount_data(raw_data, data, dev_name); if (error < 0) { @@ -2866,7 +2890,7 @@ static struct dentry *nfs4_mount(struct file_system_type *fs_type, goto out; /* Validate the mount data */ - error = nfs4_validate_mount_data(raw_data, data, dev_name); + error = nfs_validate_mount_data(fs_type, raw_data, data, NULL, dev_name); if (error == NFS_TEXT_DATA) error = nfs_validate_text_mount_data(raw_data, data, dev_name); if (error < 0) { -- cgit v0.10.2 From 46058d46d3fcf2900f18d9bd5585c8f89d59e1c4 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 10 May 2012 15:07:42 -0400 Subject: NFS: Allocate parsed mount data directly to the nfs_mount_info structure Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5b025b0..fc62701 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2468,7 +2468,6 @@ error_splat_bdi: static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { - struct nfs_parsed_mount_data *data = NULL; struct nfs_mount_info mount_info = { .fill_super = nfs_fill_super, .set_security = nfs_set_sb_security, @@ -2477,30 +2476,29 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, struct dentry *mntroot = ERR_PTR(-ENOMEM); int error; - data = nfs_alloc_parsed_mount_data(); + mount_info.parsed = nfs_alloc_parsed_mount_data(); mntfh = nfs_alloc_fhandle(); - if (data == NULL || mntfh == NULL) + if (mount_info.parsed == NULL || mntfh == NULL) goto out; /* Validate the mount data */ - error = nfs_validate_mount_data(fs_type, raw_data, data, mntfh, dev_name); + error = nfs_validate_mount_data(fs_type, raw_data, mount_info.parsed, mntfh, dev_name); if (error == NFS_TEXT_DATA) - error = nfs_validate_text_mount_data(raw_data, data, dev_name); + error = nfs_validate_text_mount_data(raw_data, mount_info.parsed, dev_name); if (error < 0) { mntroot = ERR_PTR(error); goto out; } - mount_info.parsed = data; #ifdef CONFIG_NFS_V4 - if (data->version == 4) - mntroot = nfs4_try_mount(flags, dev_name, data); + if (mount_info.parsed->version == 4) + mntroot = nfs4_try_mount(flags, dev_name, mount_info.parsed); else #endif /* CONFIG_NFS_V4 */ mntroot = nfs_try_mount(flags, dev_name, mntfh, &mount_info); out: - nfs_free_parsed_mount_data(data); + nfs_free_parsed_mount_data(mount_info.parsed); nfs_free_fhandle(mntfh); return mntroot; } -- cgit v0.10.2 From 87c7083dc3eba802d6e9f312ec520a4814f59871 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 10 May 2012 15:07:43 -0400 Subject: NFS: Pass mntfh as part of the nfs_mount_info structure This allows me to use the filehandle allocated in nfs_fs_mount() for nfs v4 mounts instead of allocating a new one. Rather than change nfs4_mount() to look almost exactly like nfs_fs_mount(), I instead remove the function. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index fc62701..c3ae819 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -283,6 +283,7 @@ struct nfs_mount_info { int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *); struct nfs_parsed_mount_data *parsed; struct nfs_clone_mount *cloned; + struct nfs_fh *mntfh; }; static void nfs_umount_begin(struct super_block *); @@ -292,8 +293,7 @@ static int nfs_show_devname(struct seq_file *, struct dentry *); static int nfs_show_path(struct seq_file *, struct dentry *); static int nfs_show_stats(struct seq_file *, struct dentry *); static struct dentry *nfs_fs_mount_common(struct file_system_type *, - struct nfs_server *, int, const char *, struct nfs_fh *, - struct nfs_mount_info *); + struct nfs_server *, int, const char *, struct nfs_mount_info *); static struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *); static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, @@ -338,9 +338,7 @@ static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); static int nfs4_validate_mount_data(void *options, struct nfs_parsed_mount_data *args, const char *dev_name); static struct dentry *nfs4_try_mount(int flags, const char *dev_name, - struct nfs_parsed_mount_data *data); -static struct dentry *nfs4_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); + struct nfs_mount_info *mount_info); static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type, @@ -354,7 +352,7 @@ static void nfs4_kill_super(struct super_block *sb); static struct file_system_type nfs4_fs_type = { .owner = THIS_MODULE, .name = "nfs4", - .mount = nfs4_mount, + .mount = nfs_fs_mount, .kill_sb = nfs4_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -1751,24 +1749,23 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args, } static struct dentry *nfs_try_mount(int flags, const char *dev_name, - struct nfs_fh *mntfh, struct nfs_mount_info *mount_info) { int status; struct nfs_server *server; if (mount_info->parsed->need_mount) { - status = nfs_request_mount(mount_info->parsed, mntfh); + status = nfs_request_mount(mount_info->parsed, mount_info->mntfh); if (status) return ERR_PTR(status); } /* Get a volume representation */ - server = nfs_create_server(mount_info->parsed, mntfh); + server = nfs_create_server(mount_info->parsed, mount_info->mntfh); if (IS_ERR(server)) return ERR_CAST(server); - return nfs_fs_mount_common(&nfs_fs_type, server, flags, dev_name, mntfh, mount_info); + return nfs_fs_mount_common(&nfs_fs_type, server, flags, dev_name, mount_info); } /* @@ -2394,7 +2391,6 @@ static int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, struct nfs_server *server, int flags, const char *dev_name, - struct nfs_fh *mntfh, struct nfs_mount_info *mount_info) { struct super_block *s; @@ -2437,7 +2433,7 @@ static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned); } - mntroot = nfs_get_root(s, mntfh, dev_name); + mntroot = nfs_get_root(s, mount_info->mntfh, dev_name); if (IS_ERR(mntroot)) goto error_splat_super; @@ -2472,17 +2468,16 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, .fill_super = nfs_fill_super, .set_security = nfs_set_sb_security, }; - struct nfs_fh *mntfh; struct dentry *mntroot = ERR_PTR(-ENOMEM); int error; mount_info.parsed = nfs_alloc_parsed_mount_data(); - mntfh = nfs_alloc_fhandle(); - if (mount_info.parsed == NULL || mntfh == NULL) + mount_info.mntfh = nfs_alloc_fhandle(); + if (mount_info.parsed == NULL || mount_info.mntfh == NULL) goto out; /* Validate the mount data */ - error = nfs_validate_mount_data(fs_type, raw_data, mount_info.parsed, mntfh, dev_name); + error = nfs_validate_mount_data(fs_type, raw_data, mount_info.parsed, mount_info.mntfh, dev_name); if (error == NFS_TEXT_DATA) error = nfs_validate_text_mount_data(raw_data, mount_info.parsed, dev_name); if (error < 0) { @@ -2492,14 +2487,14 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, #ifdef CONFIG_NFS_V4 if (mount_info.parsed->version == 4) - mntroot = nfs4_try_mount(flags, dev_name, mount_info.parsed); + mntroot = nfs4_try_mount(flags, dev_name, &mount_info); else #endif /* CONFIG_NFS_V4 */ - mntroot = nfs_try_mount(flags, dev_name, mntfh, &mount_info); + mntroot = nfs_try_mount(flags, dev_name, &mount_info); out: nfs_free_parsed_mount_data(mount_info.parsed); - nfs_free_fhandle(mntfh); + nfs_free_fhandle(mount_info.mntfh); return mntroot; } @@ -2540,6 +2535,8 @@ nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, dprintk("--> nfs_xdev_mount_common()\n"); + mount_info->mntfh = data->fh; + /* create a new volume representation */ server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); if (IS_ERR(server)) { @@ -2547,7 +2544,7 @@ nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, goto out_err; } - mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, data->fh, mount_info); + mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info); dprintk("<-- nfs_xdev_mount_common() = 0\n"); out: return mntroot; @@ -2712,33 +2709,25 @@ out_no_address: */ static struct dentry * nfs4_remote_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) + const char *dev_name, void *info) { - struct nfs_mount_info mount_info = { - .fill_super = nfs4_fill_super, - .set_security = nfs_set_sb_security, - .parsed = raw_data, - }; + struct nfs_mount_info *mount_info = info; struct nfs_server *server; - struct nfs_fh *mntfh; struct dentry *mntroot = ERR_PTR(-ENOMEM); - mntfh = nfs_alloc_fhandle(); - if (mount_info.parsed == NULL || mntfh == NULL) - goto out; + mount_info->fill_super = nfs4_fill_super; + mount_info->set_security = nfs_set_sb_security; /* Get a volume representation */ - server = nfs4_create_server(mount_info.parsed, mntfh); + server = nfs4_create_server(mount_info->parsed, mount_info->mntfh); if (IS_ERR(server)) { mntroot = ERR_CAST(server); goto out; } - mntroot = nfs_fs_mount_common(fs_type, server, flags, - dev_name, mntfh, &mount_info); + mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info); out: - nfs_free_fhandle(mntfh); return mntroot; } @@ -2851,17 +2840,18 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, } static struct dentry *nfs4_try_mount(int flags, const char *dev_name, - struct nfs_parsed_mount_data *data) + struct nfs_mount_info *mount_info) { char *export_path; struct vfsmount *root_mnt; struct dentry *res; + struct nfs_parsed_mount_data *data = mount_info->parsed; dfprintk(MOUNT, "--> nfs4_try_mount()\n"); export_path = data->nfs_server.export_path; data->nfs_server.export_path = "/"; - root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data, + root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, data->nfs_server.hostname); data->nfs_server.export_path = export_path; @@ -2873,40 +2863,6 @@ static struct dentry *nfs4_try_mount(int flags, const char *dev_name, return res; } -/* - * Get the superblock for an NFS4 mountpoint - */ -static struct dentry *nfs4_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) -{ - struct nfs_parsed_mount_data *data; - int error = -ENOMEM; - struct dentry *res = ERR_PTR(-ENOMEM); - - data = nfs_alloc_parsed_mount_data(); - if (data == NULL) - goto out; - - /* Validate the mount data */ - error = nfs_validate_mount_data(fs_type, raw_data, data, NULL, dev_name); - if (error == NFS_TEXT_DATA) - error = nfs_validate_text_mount_data(raw_data, data, dev_name); - if (error < 0) { - res = ERR_PTR(error); - goto out; - } - - res = nfs4_try_mount(flags, dev_name, data); - if (IS_ERR(res)) - error = PTR_ERR(res); - -out: - nfs_free_parsed_mount_data(data); - dprintk("<-- nfs4_mount() = %d%s\n", error, - error != 0 ? " [error]" : ""); - return res; -} - static void nfs4_kill_super(struct super_block *sb) { struct nfs_server *server = NFS_SB(sb); @@ -2945,24 +2901,23 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, }; struct nfs_server *server; struct dentry *mntroot = ERR_PTR(-ENOMEM); - struct nfs_fh *mntfh; dprintk("--> nfs4_referral_get_sb()\n"); - mntfh = nfs_alloc_fhandle(); - if (mount_info.cloned == NULL || mntfh == NULL) + mount_info.mntfh = nfs_alloc_fhandle(); + if (mount_info.cloned == NULL || mount_info.mntfh == NULL) goto out; /* create a new volume representation */ - server = nfs4_create_referral_server(mount_info.cloned, mntfh); + server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh); if (IS_ERR(server)) { mntroot = ERR_CAST(server); goto out; } - mntroot = nfs_fs_mount_common(&nfs4_fs_type, server, flags, dev_name, mntfh, &mount_info); + mntroot = nfs_fs_mount_common(&nfs4_fs_type, server, flags, dev_name, &mount_info); out: - nfs_free_fhandle(mntfh); + nfs_free_fhandle(mount_info.mntfh); return mntroot; } -- cgit v0.10.2 From 5e7e5a0da28216fb9d0a49e93ee27668ef4f04f7 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 10 May 2012 16:47:18 -0400 Subject: NFS: Create an NFS v3 stat_to_errno() In theory, NFS v3 can have different error versions than NFS v2. v4 is already using its own nfs4_stat_to_errno() to map error codes, so rather than create something in the generic client for v2 and v3 to share I instead give v3 its own function. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 4e9b0ff..bf64095 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -208,7 +208,6 @@ extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); /* nfs2xdr.c */ -extern int nfs_stat_to_errno(enum nfs_stat); extern struct rpc_procinfo nfs_procedures[]; extern int nfs2_decode_dirent(struct xdr_stream *, struct nfs_entry *, int); diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index c99008e..baf759b 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -61,6 +61,7 @@ #define NFS_readdirres_sz (1) #define NFS_statfsres_sz (1+NFS_info_sz) +static int nfs_stat_to_errno(enum nfs_stat); /* * While encoding arguments, set up the reply buffer in advance to @@ -1111,7 +1112,7 @@ static const struct { * Returns a local errno value, or -EIO if the NFS status code is * not recognized. This function is used jointly by NFSv2 and NFSv3. */ -int nfs_stat_to_errno(enum nfs_stat status) +static int nfs_stat_to_errno(enum nfs_stat status) { int i; diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index ee284c2..902de48 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -86,6 +86,8 @@ XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)) #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz) +static int nfs3_stat_to_errno(enum nfs_stat); + /* * Map file type to S_IFMT bits */ @@ -1388,7 +1390,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req, out: return error; out_default: - return nfs_stat_to_errno(status); + return nfs3_stat_to_errno(status); } /* @@ -1427,7 +1429,7 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req, out: return error; out_status: - return nfs_stat_to_errno(status); + return nfs3_stat_to_errno(status); } /* @@ -1475,7 +1477,7 @@ out_default: error = decode_post_op_attr(xdr, result->dir_attr); if (unlikely(error)) goto out; - return nfs_stat_to_errno(status); + return nfs3_stat_to_errno(status); } /* @@ -1516,7 +1518,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req, out: return error; out_default: - return nfs_stat_to_errno(status); + return nfs3_stat_to_errno(status); } /* @@ -1557,7 +1559,7 @@ static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req, out: return error; out_default: - return nfs_stat_to_errno(status); + return nfs3_stat_to_errno(status); } /* @@ -1639,7 +1641,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, out: return error; out_status: - return nfs_stat_to_errno(status); + return nfs3_stat_to_errno(status); } /* @@ -1709,7 +1711,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr, out: return error; out_status: - return nfs_stat_to_errno(status); + return nfs3_stat_to_errno(status); } /* @@ -1773,7 +1775,7 @@ out_default: error = decode_wcc_data(xdr, result->dir_attr); if (unlikely(error)) goto out; - return nfs_stat_to_errno(status); + return nfs3_stat_to_errno(status); } /* @@ -1812,7 +1814,7 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req, out: return error; out_status: - return nfs_stat_to_errno(status); + return nfs3_stat_to_errno(status); } /* @@ -1856,7 +1858,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req, out: return error; out_status: - return nfs_stat_to_errno(status); + return nfs3_stat_to_errno(status); } /* @@ -1899,7 +1901,7 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr, out: return error; out_status: - return nfs_stat_to_errno(status); + return nfs3_stat_to_errno(status); } /** @@ -2091,7 +2093,7 @@ out_default: error = decode_post_op_attr(xdr, result->dir_attr); if (unlikely(error)) goto out; - return nfs_stat_to_errno(status); + return nfs3_stat_to_errno(status); } /* @@ -2159,7 +2161,7 @@ static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req, out: return error; out_status: - return nfs_stat_to_errno(status); + return nfs3_stat_to_errno(status); } /* @@ -2235,7 +2237,7 @@ static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req, out: return error; out_status: - return nfs_stat_to_errno(status); + return nfs3_stat_to_errno(status); } /* @@ -2298,7 +2300,7 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req, out: return error; out_status: - return nfs_stat_to_errno(status); + return nfs3_stat_to_errno(status); } /* @@ -2339,7 +2341,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, out: return error; out_status: - return nfs_stat_to_errno(status); + return nfs3_stat_to_errno(status); } #ifdef CONFIG_NFS_V3_ACL @@ -2404,7 +2406,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req, out: return error; out_default: - return nfs_stat_to_errno(status); + return nfs3_stat_to_errno(status); } static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req, @@ -2423,11 +2425,76 @@ static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req, out: return error; out_default: - return nfs_stat_to_errno(status); + return nfs3_stat_to_errno(status); } #endif /* CONFIG_NFS_V3_ACL */ + +/* + * We need to translate between nfs status return values and + * the local errno values which may not be the same. + */ +static const struct { + int stat; + int errno; +} nfs_errtbl[] = { + { NFS_OK, 0 }, + { NFSERR_PERM, -EPERM }, + { NFSERR_NOENT, -ENOENT }, + { NFSERR_IO, -errno_NFSERR_IO}, + { NFSERR_NXIO, -ENXIO }, +/* { NFSERR_EAGAIN, -EAGAIN }, */ + { NFSERR_ACCES, -EACCES }, + { NFSERR_EXIST, -EEXIST }, + { NFSERR_XDEV, -EXDEV }, + { NFSERR_NODEV, -ENODEV }, + { NFSERR_NOTDIR, -ENOTDIR }, + { NFSERR_ISDIR, -EISDIR }, + { NFSERR_INVAL, -EINVAL }, + { NFSERR_FBIG, -EFBIG }, + { NFSERR_NOSPC, -ENOSPC }, + { NFSERR_ROFS, -EROFS }, + { NFSERR_MLINK, -EMLINK }, + { NFSERR_NAMETOOLONG, -ENAMETOOLONG }, + { NFSERR_NOTEMPTY, -ENOTEMPTY }, + { NFSERR_DQUOT, -EDQUOT }, + { NFSERR_STALE, -ESTALE }, + { NFSERR_REMOTE, -EREMOTE }, +#ifdef EWFLUSH + { NFSERR_WFLUSH, -EWFLUSH }, +#endif + { NFSERR_BADHANDLE, -EBADHANDLE }, + { NFSERR_NOT_SYNC, -ENOTSYNC }, + { NFSERR_BAD_COOKIE, -EBADCOOKIE }, + { NFSERR_NOTSUPP, -ENOTSUPP }, + { NFSERR_TOOSMALL, -ETOOSMALL }, + { NFSERR_SERVERFAULT, -EREMOTEIO }, + { NFSERR_BADTYPE, -EBADTYPE }, + { NFSERR_JUKEBOX, -EJUKEBOX }, + { -1, -EIO } +}; + +/** + * nfs3_stat_to_errno - convert an NFS status code to a local errno + * @status: NFS status code to convert + * + * Returns a local errno value, or -EIO if the NFS status code is + * not recognized. This function is used jointly by NFSv2 and NFSv3. + */ +static int nfs3_stat_to_errno(enum nfs_stat status) +{ + int i; + + for (i = 0; nfs_errtbl[i].stat != -1; i++) { + if (nfs_errtbl[i].stat == (int)status) + return nfs_errtbl[i].errno; + } + dprintk("NFS: Unrecognized nfs status value: %u\n", status); + return nfs_errtbl[i].errno; +} + + #define PROC(proc, argtype, restype, timer) \ [NFS3PROC_##proc] = { \ .p_proc = NFS3PROC_##proc, \ -- cgit v0.10.2 From 2ba68002a74fb167b68844077d36e5ccfc87f323 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 10 May 2012 16:47:19 -0400 Subject: NFS: Make v2 configurable With this patch NFS v2 can be disabled during Kconfig. I default the option to "y" to match the current behavior. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 2a0e6c5..66b0f4c 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -29,6 +29,16 @@ config NFS_FS If unsure, say N. +config NFS_V2 + bool "NFS client support for NFS version 2" + depends on NFS_FS + default y + help + This option enables support for version 2 of the NFS protocol + (RFC 1094) in the kernel's NFS client. + + If unsure, say Y. + config NFS_V3 bool "NFS client support for NFS version 3" depends on NFS_FS diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index b58613d..7ddd45d 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -4,11 +4,12 @@ obj-$(CONFIG_NFS_FS) += nfs.o -nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ - direct.o pagelist.o proc.o read.o symlink.o unlink.o \ +nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ + direct.o pagelist.o read.o symlink.o unlink.o \ write.o namespace.o mount_clnt.o \ dns_resolve.o cache_lib.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o +nfs-$(CONFIG_NFS_V2) += proc.o nfs2xdr.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ diff --git a/fs/nfs/client.c b/fs/nfs/client.c index a8f8de6..8f1c652 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -90,7 +90,9 @@ static bool nfs4_disable_idmapping = true; * RPC cruft for NFS */ static const struct rpc_version *nfs_version[5] = { +#ifdef CONFIG_NFS_V2 [2] = &nfs_version2, +#endif #ifdef CONFIG_NFS_V3 [3] = &nfs_version3, #endif @@ -847,7 +849,7 @@ static int nfs_init_server(struct nfs_server *server, .hostname = data->nfs_server.hostname, .addr = (const struct sockaddr *)&data->nfs_server.address, .addrlen = data->nfs_server.addrlen, - .rpc_ops = &nfs_v2_clientops, + .rpc_ops = NULL, .proto = data->nfs_server.protocol, .net = data->net, }; @@ -857,10 +859,20 @@ static int nfs_init_server(struct nfs_server *server, dprintk("--> nfs_init_server()\n"); + switch (data->version) { +#ifdef CONFIG_NFS_V2 + case 2: + cl_init.rpc_ops = &nfs_v2_clientops; + break; +#endif #ifdef CONFIG_NFS_V3 - if (data->version == 3) + case 3: cl_init.rpc_ops = &nfs_v3_clientops; + break; #endif + default: + return -EPROTONOSUPPORT; + } nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, data->timeo, data->retrans); -- cgit v0.10.2 From 981f9face8fc21bede7b7a56a7cf6c176da5ab05 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 10 May 2012 16:47:20 -0400 Subject: NFS: Turn v3 on by default Most users will use NFS v3 or possibly v4 so this makes it easier for them. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 66b0f4c..f90f4f5 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -42,6 +42,7 @@ config NFS_V2 config NFS_V3 bool "NFS client support for NFS version 3" depends on NFS_FS + default y help This option enables support for version 3 of the NFS protocol (RFC 1813) in the kernel's NFS client. -- cgit v0.10.2 From 5abc03cd919535c61b813f2319cb38326a41e810 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 14 May 2012 22:45:28 +0300 Subject: NFS: kmalloc() doesn't return an ERR_PTR() Obviously we should check for NULL here instead of IS_ERR(). Signed-off-by: Dan Carpenter Cc: stable@vger.kernel.org [3.4] Signed-off-by: Trond Myklebust diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index ba3019f..3e8edbe 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -640,20 +640,16 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, struct idmap_msg *im; struct idmap *idmap = (struct idmap *)aux; struct key *key = cons->key; - int ret; + int ret = -ENOMEM; /* msg and im are freed in idmap_pipe_destroy_msg */ msg = kmalloc(sizeof(*msg), GFP_KERNEL); - if (IS_ERR(msg)) { - ret = PTR_ERR(msg); + if (!msg) goto out0; - } im = kmalloc(sizeof(*im), GFP_KERNEL); - if (IS_ERR(im)) { - ret = PTR_ERR(im); + if (!im) goto out1; - } ret = nfs_idmap_prepare_message(key->description, im, msg); if (ret < 0) -- cgit v0.10.2 From bda14606a3c055dbbccd998fa91eb87c4c7b2027 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 13 May 2012 10:35:40 -0700 Subject: sunrpc: fix kernel-doc warnings Fix kernel-doc warnings in sunrpc/rpc_pipe.c and sunrpc/rpcb_clnt.c: Warning(net/sunrpc/rpcb_clnt.c:428): No description found for parameter 'net' Warning(net/sunrpc/rpcb_clnt.c:567): No description found for parameter 'net' Warning(net/sunrpc/rpc_pipe.c:133): No description found for parameter 'pipe' Warning(net/sunrpc/rpc_pipe.c:133): Excess function parameter 'inode' description in 'rpc_queue_upcall' Warning(net/sunrpc/rpc_pipe.c:839): No description found for parameter 'pipe' Warning(net/sunrpc/rpc_pipe.c:839): Excess function parameter 'ops' description in 'rpc_mkpipe_dentry' Warning(net/sunrpc/rpc_pipe.c:839): Excess function parameter 'flags' description in 'rpc_mkpipe_dentry' Warning(net/sunrpc/rpc_pipe.c:949): No description found for parameter 'dentry' Warning(net/sunrpc/rpc_pipe.c:949): Excess function parameter 'clnt' description in 'rpc_remove_client_dir' Signed-off-by: Randy Dunlap Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 3b62cf2..7fb49c5 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -120,7 +120,7 @@ EXPORT_SYMBOL_GPL(rpc_pipe_generic_upcall); /** * rpc_queue_upcall - queue an upcall message to userspace - * @inode: inode of upcall pipe on which to queue given message + * @pipe: upcall pipe on which to queue given message * @msg: message to queue * * Call with an @inode created by rpc_mkpipe() to queue an upcall. @@ -819,9 +819,7 @@ static int rpc_rmdir_depopulate(struct dentry *dentry, * @parent: dentry of directory to create new "pipe" in * @name: name of pipe * @private: private data to associate with the pipe, for the caller's use - * @ops: operations defining the behavior of the pipe: upcall, downcall, - * release_pipe, open_pipe, and destroy_msg. - * @flags: rpc_pipe flags + * @pipe: &rpc_pipe containing input parameters * * Data is made available for userspace to read by calls to * rpc_queue_upcall(). The actual reads will result in calls to @@ -943,7 +941,7 @@ struct dentry *rpc_create_client_dir(struct dentry *dentry, /** * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir() - * @clnt: rpc client + * @dentry: dentry for the pipe */ int rpc_remove_client_dir(struct dentry *dentry) { diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 78ac39f..3c06534 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -394,6 +394,7 @@ static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) /** * rpcb_register - set or unset a port registration with the local rpcbind svc + * @net: target network namespace * @prog: RPC program number to bind * @vers: RPC version number to bind * @prot: transport protocol to register @@ -521,6 +522,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn, /** * rpcb_v4_register - set or unset a port registration with the local rpcbind + * @net: target network namespace * @program: RPC program number of service to (un)register * @version: RPC version number of service to (un)register * @address: address family, IP address, and port to (un)register -- cgit v0.10.2 From 7e450b4e47d14429d0cc17cf4ce389fc027937be Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 May 2012 13:04:26 -0400 Subject: rpc_pipefs: clear write bit from top level rpc_pipefs directory We can't create new files or directories here from userspace, so let's not pretend that this directory is writable. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 3b62cf2..f955d72 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1118,7 +1118,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &s_ops; sb->s_time_gran = 1; - inode = rpc_get_inode(sb, S_IFDIR | 0755); + inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO); sb->s_root = root = d_make_root(inode); if (!root) return -ENOMEM; -- cgit v0.10.2 From 39ffb9218e41b1ef4920432776791f5e9ed2eff3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 May 2012 10:21:30 -0700 Subject: NFS: Fix a compile issue when CONFIG_NFS_FSCACHE was undefined Reported-by: Stephen Rothwell Signed-off-by: Trond Myklebust diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index 2a08b91..c5b11b5 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -170,12 +170,6 @@ static inline void nfs_fscache_unregister(void) {} static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {} static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {} -static inline void nfs_fscache_get_super_cookie( - struct super_block *sb, - const char *uniq, - struct nfs_clone_mount *mntdata) -{ -} static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {} static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {} diff --git a/fs/nfs/super.c b/fs/nfs/super.c index c3ae819..a973eb1 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2346,6 +2346,7 @@ static int nfs_compare_super(struct super_block *sb, void *data) return nfs_compare_mount_options(sb, server, mntflags); } +#ifdef CONFIG_NFS_FSCACHE static void nfs_get_cache_cookie(struct super_block *sb, struct nfs_parsed_mount_data *parsed, struct nfs_clone_mount *cloned) @@ -2366,6 +2367,13 @@ static void nfs_get_cache_cookie(struct super_block *sb, nfs_fscache_get_super_cookie(sb, uniq, ulen); } +#else +static void nfs_get_cache_cookie(struct super_block *sb, + struct nfs_parsed_mount_data *parsed, + struct nfs_clone_mount *cloned) +{ +} +#endif static int nfs_bdi_register(struct nfs_server *server) { -- cgit v0.10.2 From 5f23eff3814e9d255464e7a03dba47c27069ac78 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 16 May 2012 11:35:36 +0300 Subject: NFS: fix unsigned comparison in nfs4_create_sec_client MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fs/nfs/nfs4namespace.c: In function ‘nfs4_create_sec_client’: fs/nfs/nfs4namespace.c:171:2: error: comparison of unsigned expression < 0 is always false [-Werror=type-limits] Introduced by commit 72de53ec4bca39c26709122a8f78bfefe7b6bca4 "NFS: Do secinfo as part of lookup" Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index a7f3ded..3f5519b 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -168,7 +168,7 @@ struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *ino rpc_authflavor_t flavor; flavor = nfs4_negotiate_security(inode, name); - if (flavor < 0) + if ((int)flavor < 0) return ERR_PTR(flavor); clone = rpc_clone_client(clnt); -- cgit v0.10.2 From 6b34309936ed5c85cbe5868655814065f42c2f38 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 May 2012 13:30:35 -0400 Subject: sunrpc: suppress page allocation warnings in xprt_alloc_slot() It's easily possible for these allocations to fail since we're using GFP_NOWAIT here. We don't want to spam the logs with warnings about that though. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 0cbcd1a..b239e75 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -979,7 +979,7 @@ static void xprt_alloc_slot(struct rpc_task *task) list_del(&req->rq_list); goto out_init_req; } - req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT); + req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT|__GFP_NOWARN); if (!IS_ERR(req)) goto out_init_req; switch (PTR_ERR(req)) { -- cgit v0.10.2 From 1afeaf5c29aa07db25760d2fbed5c08a3aec3498 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 19 May 2012 12:12:53 -0400 Subject: sunrpc: fix loss of task->tk_status after rpc_delay call in xprt_alloc_slot xprt_alloc_slot will call rpc_delay() to make the task wait a bit before retrying when it gets back an -ENOMEM error from xprt_dynamic_alloc_slot. The problem is that rpc_delay will clear the task->tk_status, causing call_reserveresult to abort the task. The solution is simply to let call_reserveresult handle the ENOMEM error directly. Reported-by: Jeff Layton Cc: stable@vger.kernel.org [>= 3.1] Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index adf2990..25302c8 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1288,6 +1288,8 @@ call_reserveresult(struct rpc_task *task) } switch (status) { + case -ENOMEM: + rpc_delay(task, HZ >> 2); case -EAGAIN: /* woken up; retry */ task->tk_action = call_reserve; return; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index b239e75..d7ccd79 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -984,15 +984,16 @@ static void xprt_alloc_slot(struct rpc_task *task) goto out_init_req; switch (PTR_ERR(req)) { case -ENOMEM: - rpc_delay(task, HZ >> 2); dprintk("RPC: dynamic allocation of request slot " "failed! Retrying\n"); + task->tk_status = -ENOMEM; break; case -EAGAIN: rpc_sleep_on(&xprt->backlog, task, NULL); dprintk("RPC: waiting for request slot\n"); + default: + task->tk_status = -EAGAIN; } - task->tk_status = -EAGAIN; return; out_init_req: task->tk_status = 0; -- cgit v0.10.2 From e73e6c9e85ed91187c1d21cb9238e86a116bf3db Mon Sep 17 00:00:00 2001 From: Matthew Treinish Date: Mon, 30 Apr 2012 11:32:57 -0400 Subject: Fixed goto readability in nfs_update_inode. Simplified error gotos to make it slightly easier to read, it doesn't affect the functionality of the routine. Signed-off-by: Matthew Treinish Signed-off-by: Trond Myklebust diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9f17cd1..9ad81ce 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1275,14 +1275,26 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_display_fhandle_hash(NFS_FH(inode)), atomic_read(&inode->i_count), fattr->valid); - if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) - goto out_fileid; + if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) { + printk(KERN_ERR "NFS: server %s error: fileid changed\n" + "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", + NFS_SERVER(inode)->nfs_client->cl_hostname, + inode->i_sb->s_id, (long long)nfsi->fileid, + (long long)fattr->fileid); + goto out_err; + } /* * Make sure the inode's type hasn't changed. */ - if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) - goto out_changed; + if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { + /* + * Big trouble! The inode has become a different object. + */ + printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n", + __func__, inode->i_ino, inode->i_mode, fattr->mode); + goto out_err; + } server = NFS_SERVER(inode); /* Update the fsid? */ @@ -1443,12 +1455,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->cache_validity |= invalid; return 0; - out_changed: - /* - * Big trouble! The inode has become a different object. - */ - printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n", - __func__, inode->i_ino, inode->i_mode, fattr->mode); out_err: /* * No need to worry about unhashing the dentry, as the @@ -1457,13 +1463,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ nfs_invalidate_inode(inode); return -ESTALE; - - out_fileid: - printk(KERN_ERR "NFS: server %s error: fileid changed\n" - "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", - NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id, - (long long)nfsi->fileid, (long long)fattr->fileid); - goto out_err; } -- cgit v0.10.2 From 554d458d79fa34acc73bc5128ba7bbf6b3007dfd Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 27 Apr 2012 17:53:42 -0400 Subject: NFSv4.1: cleanup filelayout invalid deviceid handling Move the invalid deviceid test into nfs4_fl_prepare_ds, called by the filelayout read, write, and commit routines. NFS4_DEVICE_ID_NEG_ENTRY is no longer needed. Remove redundant printk's - filelayout_mark_devid_invalid prints a KERN_WARNING. An invalid device prevents pNFS io. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 80a63f6..eebec9a 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -389,9 +389,6 @@ filelayout_read_pagelist(struct nfs_read_data *data) __func__, hdr->inode->i_ino, data->args.pgbase, (size_t)data->args.count, offset); - if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) - return PNFS_NOT_ATTEMPTED; - /* Retrieve the correct rpc_client for the byte range */ j = nfs4_fl_calc_j_index(lseg, offset); idx = nfs4_fl_calc_ds_index(lseg, j); @@ -432,16 +429,11 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) struct nfs_fh *fh; int status; - if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) - return PNFS_NOT_ATTEMPTED; - /* Retrieve the correct rpc_client for the byte range */ j = nfs4_fl_calc_j_index(lseg, offset); idx = nfs4_fl_calc_ds_index(lseg, j); ds = nfs4_fl_prepare_ds(lseg, idx); if (!ds) { - printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n", - __func__); set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); return PNFS_NOT_ATTEMPTED; @@ -977,8 +969,6 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); ds = nfs4_fl_prepare_ds(lseg, idx); if (!ds) { - printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n", - __func__); set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); prepare_to_resend_writes(data); diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 96b89bb..2f6330c 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -62,12 +62,8 @@ struct nfs4_pnfs_ds { atomic_t ds_count; }; -/* nfs4_file_layout_dsaddr flags */ -#define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001 - struct nfs4_file_layout_dsaddr { struct nfs4_deviceid_node id_node; - unsigned long flags; u32 stripe_count; u8 *stripe_indices; u32 ds_num; @@ -111,6 +107,23 @@ FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg) return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; } +static inline void +filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node) +{ + u32 *p = (u32 *)&node->deviceid; + + printk(KERN_WARNING "NFS: Deviceid [%x%x%x%x] marked out of use.\n", + p[0], p[1], p[2], p[3]); + + set_bit(NFS_DEVICEID_INVALID, &node->flags); +} + +static inline bool +filelayout_test_devid_invalid(struct nfs4_deviceid_node *node) +{ + return test_bit(NFS_DEVICEID_INVALID, &node->flags); +} + extern struct nfs_fh * nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index c9cff9a..7e0be3e 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -791,48 +791,33 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) return flseg->fh_array[i]; } -static void -filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, - int err, const char *ds_remotestr) -{ - u32 *p = (u32 *)&dsaddr->id_node.deviceid; - - printk(KERN_ERR "NFS: data server %s connection error %d." - " Deviceid [%x%x%x%x] marked out of use.\n", - ds_remotestr, err, p[0], p[1], p[2], p[3]); - - spin_lock(&nfs4_ds_cache_lock); - dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; - spin_unlock(&nfs4_ds_cache_lock); -} - struct nfs4_pnfs_ds * nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) { struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; + struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); + + if (filelayout_test_devid_invalid(devid)) + return NULL; if (ds == NULL) { printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", __func__, ds_idx); - return NULL; + goto mark_dev_invalid; } if (!ds->ds_clp) { struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); int err; - if (dsaddr->flags & NFS4_DEVICE_ID_NEG_ENTRY) { - /* Already tried to connect, don't try again */ - dprintk("%s Deviceid marked out of use\n", __func__); - return NULL; - } err = nfs4_ds_connect(s, ds); - if (err) { - filelayout_mark_devid_negative(dsaddr, err, - ds->ds_remotestr); - return NULL; - } + if (err) + goto mark_dev_invalid; } return ds; + +mark_dev_invalid: + filelayout_mark_devid_invalid(devid); + return NULL; } -- cgit v0.10.2 From 90fecfcb3437dfc9bec4ee3306584dcd6843701b Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 27 Apr 2012 17:53:43 -0400 Subject: NFSv4.1 cleanup filelayout invalid layout handling The invalid layout bits are should only be used to block LAYOUTGETs. Do not invalidate a layout on deviceid invalidation. Do not invalidate a layout on un-handled READ, WRITE, COMMIT errors. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index eebec9a..b9edc88 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -148,7 +148,6 @@ wait_on_recovery: static int filelayout_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) { - struct nfs_pgio_header *hdr = data->header; int reset = 0; dprintk("%s DS read\n", __func__); @@ -157,10 +156,8 @@ static int filelayout_read_done_cb(struct rpc_task *task, data->ds_clp, &reset) == -EAGAIN) { dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", __func__, data->ds_clp, data->ds_clp->cl_session); - if (reset) { - pnfs_set_lo_fail(hdr->lseg); + if (reset) nfs4_reset_read(task, data); - } rpc_restart_call_prepare(task); return -EAGAIN; } @@ -233,17 +230,14 @@ static void filelayout_read_release(void *data) static int filelayout_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) { - struct nfs_pgio_header *hdr = data->header; int reset = 0; if (filelayout_async_handle_error(task, data->args.context->state, data->ds_clp, &reset) == -EAGAIN) { dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", __func__, data->ds_clp, data->ds_clp->cl_session); - if (reset) { - pnfs_set_lo_fail(hdr->lseg); + if (reset) nfs4_reset_write(task, data); - } rpc_restart_call_prepare(task); return -EAGAIN; } @@ -272,10 +266,9 @@ static int filelayout_commit_done_cb(struct rpc_task *task, data->ds_clp, &reset) == -EAGAIN) { dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", __func__, data->ds_clp, data->ds_clp->cl_session); - if (reset) { + if (reset) prepare_to_resend_writes(data); - pnfs_set_lo_fail(data->lseg); - } else + else rpc_restart_call_prepare(task); return -EAGAIN; } @@ -393,12 +386,8 @@ filelayout_read_pagelist(struct nfs_read_data *data) j = nfs4_fl_calc_j_index(lseg, offset); idx = nfs4_fl_calc_ds_index(lseg, j); ds = nfs4_fl_prepare_ds(lseg, idx); - if (!ds) { - /* Either layout fh index faulty, or ds connect failed */ - set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); - set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); + if (!ds) return PNFS_NOT_ATTEMPTED; - } dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr); /* No multipath support. Use first DS */ @@ -433,11 +422,8 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) j = nfs4_fl_calc_j_index(lseg, offset); idx = nfs4_fl_calc_ds_index(lseg, j); ds = nfs4_fl_prepare_ds(lseg, idx); - if (!ds) { - set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); - set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); + if (!ds) return PNFS_NOT_ATTEMPTED; - } dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, offset, ds->ds_remotestr); @@ -969,8 +955,6 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); ds = nfs4_fl_prepare_ds(lseg, idx); if (!ds) { - set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); - set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); prepare_to_resend_writes(data); filelayout_commit_release(data); return -EAGAIN; -- cgit v0.10.2 From 9f0ec176b3071e0472582c07ae1e68055b28184d Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 27 Apr 2012 17:53:44 -0400 Subject: NFSv4.1 set RPC_TASK_SOFTCONN for filelayout DS RPC calls RPC_TASK_SOFTCONN returns connection errors to the caller which allows the pNFS file layout to quickly try the MDS or perhaps another DS. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index bf64095..6ed96c7 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -299,7 +299,7 @@ extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, const struct nfs_pgio_completion_ops *compl_ops); extern int nfs_initiate_read(struct rpc_clnt *clnt, struct nfs_read_data *data, - const struct rpc_call_ops *call_ops); + const struct rpc_call_ops *call_ops, int flags); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); @@ -326,13 +326,13 @@ extern void nfs_commit_free(struct nfs_commit_data *p); extern int nfs_initiate_write(struct rpc_clnt *clnt, struct nfs_write_data *data, const struct rpc_call_ops *call_ops, - int how); + int how, int flags); extern void nfs_write_prepare(struct rpc_task *task, void *calldata); extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); extern int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, const struct rpc_call_ops *call_ops, - int how); + int how, int flags); extern void nfs_init_commit(struct nfs_commit_data *data, struct list_head *head, struct pnfs_layout_segment *lseg, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index b9edc88..0db8c07 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -401,7 +401,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) /* Perform an asynchronous read to ds */ status = nfs_initiate_read(ds->ds_clp->cl_rpcclient, data, - &filelayout_read_call_ops); + &filelayout_read_call_ops, RPC_TASK_SOFTCONN); BUG_ON(status != 0); return PNFS_ATTEMPTED; } @@ -441,7 +441,8 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) /* Perform an asynchronous write */ status = nfs_initiate_write(ds->ds_clp->cl_rpcclient, data, - &filelayout_write_call_ops, sync); + &filelayout_write_call_ops, sync, + RPC_TASK_SOFTCONN); BUG_ON(status != 0); return PNFS_ATTEMPTED; } @@ -966,7 +967,8 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) if (fh) data->args.fh = fh; return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data, - &filelayout_commit_call_ops, how); + &filelayout_commit_call_ops, how, + RPC_TASK_SOFTCONN); } static int @@ -1120,7 +1122,7 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, if (!data->lseg) { nfs_init_commit(data, mds_pages, NULL, cinfo); nfs_initiate_commit(NFS_CLIENT(inode), data, - data->mds_ops, how); + data->mds_ops, how, 0); } else { struct pnfs_commit_bucket *buckets; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index f23cf25..2cfdd77 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -206,7 +206,7 @@ out: int nfs_initiate_read(struct rpc_clnt *clnt, struct nfs_read_data *data, - const struct rpc_call_ops *call_ops) + const struct rpc_call_ops *call_ops, int flags) { struct inode *inode = data->header->inode; int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; @@ -223,7 +223,7 @@ int nfs_initiate_read(struct rpc_clnt *clnt, .callback_ops = call_ops, .callback_data = data, .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC | swap_flags, + .flags = RPC_TASK_ASYNC | swap_flags | flags, }; /* Set up the initial task struct. */ @@ -272,7 +272,7 @@ static int nfs_do_read(struct nfs_read_data *data, { struct inode *inode = data->header->inode; - return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops); + return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0); } static int diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 8ffd7d5..e6fe3d6 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -916,7 +916,7 @@ static int flush_task_priority(int how) int nfs_initiate_write(struct rpc_clnt *clnt, struct nfs_write_data *data, const struct rpc_call_ops *call_ops, - int how) + int how, int flags) { struct inode *inode = data->header->inode; int priority = flush_task_priority(how); @@ -933,7 +933,7 @@ int nfs_initiate_write(struct rpc_clnt *clnt, .callback_ops = call_ops, .callback_data = data, .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | flags, .priority = priority, }; int ret = 0; @@ -1009,7 +1009,7 @@ static int nfs_do_write(struct nfs_write_data *data, { struct inode *inode = data->header->inode; - return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how); + return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0); } static int nfs_do_multiple_writes(struct list_head *head, @@ -1394,7 +1394,7 @@ EXPORT_SYMBOL_GPL(nfs_commitdata_release); int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, const struct rpc_call_ops *call_ops, - int how) + int how, int flags) { struct rpc_task *task; int priority = flush_task_priority(how); @@ -1410,7 +1410,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, .callback_ops = call_ops, .callback_data = data, .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | flags, .priority = priority, }; /* Set up the initial task struct. */ @@ -1499,7 +1499,8 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, /* Set up the argument struct */ nfs_init_commit(data, head, NULL, cinfo); atomic_inc(&cinfo->mds->rpcs_out); - return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how); + return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, + how, 0); out_bad: nfs_retry_commit(head, NULL, cinfo); cinfo->completion_ops->error_cleanup(NFS_I(inode)); -- cgit v0.10.2 From 98fc685ae2aa24eae98526e9196b3229d519083a Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 27 Apr 2012 17:53:45 -0400 Subject: NFSv4.1 data server timeo and retrans module parameters Set the recovery parameters for data servers. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 8f1c652..b4e2199 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1477,8 +1477,8 @@ error: * the MDS. */ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, - const struct sockaddr *ds_addr, - int ds_addrlen, int ds_proto) + const struct sockaddr *ds_addr, int ds_addrlen, + int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans) { struct nfs_client_initdata cl_init = { .addr = ds_addr, @@ -1488,12 +1488,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, .minorversion = mds_clp->cl_minorversion, .net = mds_clp->net, }; - struct rpc_timeout ds_timeout = { - .to_initval = 15 * HZ, - .to_maxval = 15 * HZ, - .to_retries = 1, - .to_exponential = 1, - }; + struct rpc_timeout ds_timeout; struct nfs_client *clp; /* @@ -1501,6 +1496,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS * (section 13.1 RFC 5661). */ + nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, mds_clp->cl_rpcclient->cl_auth->au_flavor, 0); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 6ed96c7..1466c5d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -172,7 +172,9 @@ extern void nfs_mark_client_ready(struct nfs_client *clp, int state); extern int nfs4_check_client_ready(struct nfs_client *clp); extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, const struct sockaddr *ds_addr, - int ds_addrlen, int ds_proto); + int ds_addrlen, int ds_proto, + unsigned int ds_timeo, + unsigned int ds_retrans); #ifdef CONFIG_PROC_FS extern int __init nfs_fs_proc_init(void); extern void nfs_fs_proc_exit(void); diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 2f6330c..6fb1901 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -33,6 +33,13 @@ #include "pnfs.h" /* + * Default data server connection timeout and retrans vaules. + * Set by module paramters dataserver_timeo and dataserver_retrans. + */ +#define NFS4_DEF_DS_TIMEO 60 +#define NFS4_DEF_DS_RETRANS 5 + +/* * Field testing shows we need to support up to 4096 stripe indices. * We store each index as a u8 (u32 on the wire) to keep the memory footprint * reasonable. This in turn means we support a maximum of 256 diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 7e0be3e..d4d2032 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -30,12 +30,16 @@ #include #include +#include #include "internal.h" #include "nfs4filelayout.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD +static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; +static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; + /* * Data server cache * @@ -165,8 +169,9 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) __func__, ds->ds_remotestr, da->da_remotestr); clp = nfs4_set_ds_client(mds_srv->nfs_client, - (struct sockaddr *)&da->da_addr, - da->da_addrlen, IPPROTO_TCP); + (struct sockaddr *)&da->da_addr, + da->da_addrlen, IPPROTO_TCP, + dataserver_timeo, dataserver_retrans); if (!IS_ERR(clp)) break; } @@ -821,3 +826,12 @@ mark_dev_invalid: filelayout_mark_devid_invalid(devid); return NULL; } + +module_param(dataserver_retrans, uint, 0644); +MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " + "retries a request before it attempts further " + " recovery action."); +module_param(dataserver_timeo, uint, 0644); +MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " + "NFSv4.1 client waits for a response from a " + " data server before it retries an NFS request."); -- cgit v0.10.2 From e7dd79af01e7ca932c5168a708e77750659f7a9e Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 27 Apr 2012 17:53:46 -0400 Subject: NFSv4.1: mark deviceid invalid on filelayout DS connection errors This prevents the use of any layout for i/o that references the deviceid. I/O is redirected through the MDS. Redirect the unhandled failed I/O to the MDS without marking either the layout or the deviceid invalid. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 0db8c07..f503cbe5 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -82,29 +82,77 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) BUG(); } +static void filelayout_reset_write(struct nfs_write_data *data) +{ + struct nfs_pgio_header *hdr = data->header; + struct inode *inode = hdr->inode; + struct rpc_task *task = &data->task; + + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { + dprintk("%s Reset task %5u for i/o through MDS " + "(req %s/%lld, %u bytes @ offset %llu)\n", __func__, + data->task.tk_pid, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + data->args.count, + (unsigned long long)data->args.offset); + + task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode, + &hdr->pages, + hdr->completion_ops); + } +} + +static void filelayout_reset_read(struct nfs_read_data *data) +{ + struct nfs_pgio_header *hdr = data->header; + struct inode *inode = hdr->inode; + struct rpc_task *task = &data->task; + + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { + dprintk("%s Reset task %5u for i/o through MDS " + "(req %s/%lld, %u bytes @ offset %llu)\n", __func__, + data->task.tk_pid, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + data->args.count, + (unsigned long long)data->args.offset); + + task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode, + &hdr->pages, + hdr->completion_ops); + } +} + static int filelayout_async_handle_error(struct rpc_task *task, struct nfs4_state *state, struct nfs_client *clp, - int *reset) + struct pnfs_layout_segment *lseg) { - struct nfs_server *mds_server = NFS_SERVER(state->inode); + struct inode *inode = lseg->pls_layout->plh_inode; + struct nfs_server *mds_server = NFS_SERVER(inode); + struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); struct nfs_client *mds_client = mds_server->nfs_client; if (task->tk_status >= 0) return 0; - *reset = 0; switch (task->tk_status) { /* MDS state errors */ case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: + if (state == NULL) + break; nfs_remove_bad_delegation(state->inode); case -NFS4ERR_OPENMODE: + if (state == NULL) + break; nfs4_schedule_stateid_recovery(mds_server, state); goto wait_on_recovery; case -NFS4ERR_EXPIRED: - nfs4_schedule_stateid_recovery(mds_server, state); + if (state != NULL) + nfs4_schedule_stateid_recovery(mds_server, state); nfs4_schedule_lease_recovery(mds_client); goto wait_on_recovery; /* DS session errors */ @@ -127,11 +175,22 @@ static int filelayout_async_handle_error(struct rpc_task *task, break; case -NFS4ERR_RETRY_UNCACHED_REP: break; + /* RPC connection errors */ + case -ECONNREFUSED: + case -EHOSTDOWN: + case -EHOSTUNREACH: + case -ENETUNREACH: + case -EIO: + case -ETIMEDOUT: + case -EPIPE: + dprintk("%s DS connection error %d\n", __func__, + task->tk_status); + filelayout_mark_devid_invalid(devid); + /* fall through */ default: - dprintk("%s DS error. Retry through MDS %d\n", __func__, + dprintk("%s Retry through MDS. Error %d\n", __func__, task->tk_status); - *reset = 1; - break; + return -NFS4ERR_RESET_TO_MDS; } out: task->tk_status = 0; @@ -148,16 +207,17 @@ wait_on_recovery: static int filelayout_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) { - int reset = 0; + struct nfs_pgio_header *hdr = data->header; + int err; - dprintk("%s DS read\n", __func__); + err = filelayout_async_handle_error(task, data->args.context->state, + data->ds_clp, hdr->lseg); - if (filelayout_async_handle_error(task, data->args.context->state, - data->ds_clp, &reset) == -EAGAIN) { - dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", - __func__, data->ds_clp, data->ds_clp->cl_session); - if (reset) - nfs4_reset_read(task, data); + switch (err) { + case -NFS4ERR_RESET_TO_MDS: + filelayout_reset_read(data); + return task->tk_status; + case -EAGAIN: rpc_restart_call_prepare(task); return -EAGAIN; } @@ -230,14 +290,17 @@ static void filelayout_read_release(void *data) static int filelayout_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) { - int reset = 0; - - if (filelayout_async_handle_error(task, data->args.context->state, - data->ds_clp, &reset) == -EAGAIN) { - dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", - __func__, data->ds_clp, data->ds_clp->cl_session); - if (reset) - nfs4_reset_write(task, data); + struct nfs_pgio_header *hdr = data->header; + int err; + + err = filelayout_async_handle_error(task, data->args.context->state, + data->ds_clp, hdr->lseg); + + switch (err) { + case -NFS4ERR_RESET_TO_MDS: + filelayout_reset_write(data); + return task->tk_status; + case -EAGAIN: rpc_restart_call_prepare(task); return -EAGAIN; } @@ -260,16 +323,17 @@ static void prepare_to_resend_writes(struct nfs_commit_data *data) static int filelayout_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *data) { - int reset = 0; - - if (filelayout_async_handle_error(task, data->context->state, - data->ds_clp, &reset) == -EAGAIN) { - dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", - __func__, data->ds_clp, data->ds_clp->cl_session); - if (reset) - prepare_to_resend_writes(data); - else - rpc_restart_call_prepare(task); + int err; + + err = filelayout_async_handle_error(task, NULL, data->ds_clp, + data->lseg); + + switch (err) { + case -NFS4ERR_RESET_TO_MDS: + prepare_to_resend_writes(data); + return -EAGAIN; + case -EAGAIN: + rpc_restart_call_prepare(task); return -EAGAIN; } diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 6fb1901..3259be6 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -48,6 +48,9 @@ #define NFS4_PNFS_MAX_STRIPE_CNT 4096 #define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */ +/* error codes for internal use */ +#define NFS4ERR_RESET_TO_MDS 12001 + enum stripetype4 { STRIPE_SPARSE = 1, STRIPE_DENSE = 2 diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6fdeca2..16cc194 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1175,7 +1175,7 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, } EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); -static int pnfs_write_done_resend_to_mds(struct inode *inode, +int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head, const struct nfs_pgio_completion_ops *compl_ops) { @@ -1203,6 +1203,7 @@ static int pnfs_write_done_resend_to_mds(struct inode *inode, } return 0; } +EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); static void pnfs_ld_handle_write_error(struct nfs_write_data *data) { @@ -1329,7 +1330,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) } EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); -static int pnfs_read_done_resend_to_mds(struct inode *inode, +int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head, const struct nfs_pgio_completion_ops *compl_ops) { @@ -1353,6 +1354,7 @@ static int pnfs_read_done_resend_to_mds(struct inode *inode, } return 0; } +EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); static void pnfs_ld_handle_read_error(struct nfs_read_data *data) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index f20054b..9cf9ede 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -222,6 +222,10 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, gfp_t gfp_flags); void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); +int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head, + const struct nfs_pgio_completion_ops *compl_ops); +int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head, + const struct nfs_pgio_completion_ops *compl_ops); /* nfs4_deviceid_flags */ enum { -- cgit v0.10.2 From a033a09189c0125d56f2ac17ffb4bec5a3d3323b Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 27 Apr 2012 17:53:47 -0400 Subject: NFSv4.1 remove nfs4_reset_write and nfs4_reset_read Replaced by filelayout_reset_write and filelayout_reset_read Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 1466c5d..989959a 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -373,13 +373,11 @@ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, /* nfs4proc.c */ extern void __nfs4_read_done_cb(struct nfs_read_data *); -extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); extern int nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, const char *ip_addr, rpc_authflavor_t authflavour, int noresvport); -extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data); extern int _nfs4_call_sync(struct rpc_clnt *clnt, struct nfs_server *server, struct rpc_message *msg, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e6ab15f..49eecd5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3369,23 +3369,6 @@ static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da rpc_call_start(task); } -/* Reset the the nfs_read_data to send the read to the MDS. */ -void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data) -{ - struct nfs_pgio_header *hdr = data->header; - struct inode *inode = hdr->inode; - - dprintk("%s Reset task for i/o through\n", __func__); - data->ds_clp = NULL; - /* offsets will differ in the dense stripe case */ - data->args.offset = data->mds_offset; - data->args.fh = NFS_FH(inode); - data->read_done_cb = nfs4_read_done_cb; - task->tk_ops = hdr->mds_ops; - rpc_task_reset_client(task, NFS_CLIENT(inode)); -} -EXPORT_SYMBOL_GPL(nfs4_reset_read); - static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) { struct inode *inode = data->header->inode; @@ -3409,24 +3392,6 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) nfs4_write_done_cb(task, data); } -/* Reset the the nfs_write_data to send the write to the MDS. */ -void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data) -{ - struct nfs_pgio_header *hdr = data->header; - struct inode *inode = hdr->inode; - - dprintk("%s Reset task for i/o through\n", __func__); - data->ds_clp = NULL; - data->write_done_cb = nfs4_write_done_cb; - data->args.fh = NFS_FH(inode); - data->args.bitmask = data->res.server->cache_consistency_bitmask; - data->args.offset = data->mds_offset; - data->res.fattr = &data->fattr; - task->tk_ops = hdr->mds_ops; - rpc_task_reset_client(task, NFS_CLIENT(inode)); -} -EXPORT_SYMBOL_GPL(nfs4_reset_write); - static bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data) { -- cgit v0.10.2 From 0ad2f378e1af7996d6f8355c02181ff3cc7ab260 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 27 Apr 2012 17:53:48 -0400 Subject: NFSv4.1 Check invalid deviceid upon slot table waitq wakeup Tasks sleeping on the slot table waitq wake to the rpc_prepare_task state. Reset the task for io through the MDS if the deviceid is invalid. The reset functions put the io pages through the pageio layer which has the advantage of re-coalescing which allows for the MDS and DS having different r/wsizes. Exit the awakened task without executing the rpc_call_done routine. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index f503cbe5..1b9bedb 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -252,7 +252,14 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata) static void filelayout_read_prepare(struct rpc_task *task, void *data) { struct nfs_read_data *rdata = data; + struct pnfs_layout_segment *lseg = rdata->header->lseg; + if (filelayout_test_devid_invalid(FILELAYOUT_DEVID_NODE(lseg))) { + dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); + filelayout_reset_read(rdata); + rpc_exit(task, 0); + return; + } rdata->read_done_cb = filelayout_read_done_cb; if (nfs41_setup_sequence(rdata->ds_clp->cl_session, @@ -269,6 +276,9 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data) dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); + if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags)) + return; + /* Note this may cause RPC to be resent */ rdata->header->mds_ops->rpc_call_done(task, data); } @@ -343,7 +353,14 @@ static int filelayout_commit_done_cb(struct rpc_task *task, static void filelayout_write_prepare(struct rpc_task *task, void *data) { struct nfs_write_data *wdata = data; + struct pnfs_layout_segment *lseg = wdata->header->lseg; + if (filelayout_test_devid_invalid(FILELAYOUT_DEVID_NODE(lseg))) { + dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); + filelayout_reset_write(wdata); + rpc_exit(task, 0); + return; + } if (nfs41_setup_sequence(wdata->ds_clp->cl_session, &wdata->args.seq_args, &wdata->res.seq_res, task)) @@ -356,6 +373,9 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data) { struct nfs_write_data *wdata = data; + if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags)) + return; + /* Note this may cause RPC to be resent */ wdata->header->mds_ops->rpc_call_done(task, data); } -- cgit v0.10.2 From 671fb89695fee0c70a969371efd38ed30be76a8a Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 27 Apr 2012 17:53:49 -0400 Subject: NFSv4.1 wake up all tasks on un-connected DS slot table waitq The DS has a connection error (invalid deviceid). Drain the fore channel slot table waitq. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 1b9bedb..a63062d 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -133,6 +133,7 @@ static int filelayout_async_handle_error(struct rpc_task *task, struct nfs_server *mds_server = NFS_SERVER(inode); struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); struct nfs_client *mds_client = mds_server->nfs_client; + struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; if (task->tk_status >= 0) return 0; @@ -186,6 +187,7 @@ static int filelayout_async_handle_error(struct rpc_task *task, dprintk("%s DS connection error %d\n", __func__, task->tk_status); filelayout_mark_devid_invalid(devid); + rpc_wake_up(&tbl->slot_tbl_waitq); /* fall through */ default: dprintk("%s Retry through MDS. Error %d\n", __func__, -- cgit v0.10.2 From 0a57cdac3fb9d249f4fbbc745c01b9292ef8c1b7 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 27 Apr 2012 17:53:50 -0400 Subject: NFSv4.1 send layoutreturn to fence disconnected data server Let the MDS know that you are redirecting I/O from pNFS to MDS. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index a63062d..c6b7c18 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -186,6 +186,8 @@ static int filelayout_async_handle_error(struct rpc_task *task, case -EPIPE: dprintk("%s DS connection error %d\n", __func__, task->tk_status); + if (!filelayout_test_devid_invalid(devid)) + _pnfs_return_layout(state->inode); filelayout_mark_devid_invalid(devid); rpc_wake_up(&tbl->slot_tbl_waitq); /* fall through */ diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 16cc194..e48017f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -692,6 +692,7 @@ out: dprintk("<-- %s status: %d\n", __func__, status); return status; } +EXPORT_SYMBOL_GPL(_pnfs_return_layout); bool pnfs_roc(struct inode *ino) { -- cgit v0.10.2 From 3a7936c3fc469c196d9163abfea6b7aa9572d443 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 27 Apr 2012 17:53:51 -0400 Subject: NFSv4.1 ref count nfs_client across filelayout data server io Prepare to put a dis-connected DS client record. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index c6b7c18..eb8eb00 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -101,6 +101,9 @@ static void filelayout_reset_write(struct nfs_write_data *data) &hdr->pages, hdr->completion_ops); } + /* balance nfs_get_client in filelayout_write_pagelist */ + nfs_put_client(data->ds_clp); + data->ds_clp = NULL; } static void filelayout_reset_read(struct nfs_read_data *data) @@ -122,6 +125,9 @@ static void filelayout_reset_read(struct nfs_read_data *data) &hdr->pages, hdr->completion_ops); } + /* balance nfs_get_client in filelayout_read_pagelist */ + nfs_put_client(data->ds_clp); + data->ds_clp = NULL; } static int filelayout_async_handle_error(struct rpc_task *task, @@ -298,6 +304,8 @@ static void filelayout_read_release(void *data) { struct nfs_read_data *rdata = data; + if (!test_bit(NFS_IOHDR_REDO, &rdata->header->flags)) + nfs_put_client(rdata->ds_clp); rdata->header->mds_ops->rpc_release(data); } @@ -395,6 +403,8 @@ static void filelayout_write_release(void *data) { struct nfs_write_data *wdata = data; + if (!test_bit(NFS_IOHDR_REDO, &wdata->header->flags)) + nfs_put_client(wdata->ds_clp); wdata->header->mds_ops->rpc_release(data); } @@ -431,6 +441,7 @@ static void filelayout_commit_release(void *calldata) data->completion_ops->completion(data); put_lseg(data->lseg); + nfs_put_client(data->ds_clp); nfs_commitdata_release(data); } @@ -476,9 +487,11 @@ filelayout_read_pagelist(struct nfs_read_data *data) ds = nfs4_fl_prepare_ds(lseg, idx); if (!ds) return PNFS_NOT_ATTEMPTED; - dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr); + dprintk("%s USE DS: %s cl_count %d\n", __func__, + ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); /* No multipath support. Use first DS */ + atomic_inc(&ds->ds_clp->cl_count); data->ds_clp = ds->ds_clp; fh = nfs4_fl_select_ds_fh(lseg, j); if (fh) @@ -512,11 +525,12 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) ds = nfs4_fl_prepare_ds(lseg, idx); if (!ds) return PNFS_NOT_ATTEMPTED; - dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__, - hdr->inode->i_ino, sync, (size_t) data->args.count, offset, - ds->ds_remotestr); + dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n", + __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, + offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); data->write_done_cb = filelayout_write_done_cb; + atomic_inc(&ds->ds_clp->cl_count); data->ds_clp = ds->ds_clp; fh = nfs4_fl_select_ds_fh(lseg, j); if (fh) @@ -1048,8 +1062,10 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) filelayout_commit_release(data); return -EAGAIN; } - dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); + dprintk("%s ino %lu, how %d cl_count %d\n", __func__, + data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count)); data->commit_done_cb = filelayout_commit_done_cb; + atomic_inc(&ds->ds_clp->cl_count); data->ds_clp = ds->ds_clp; fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); if (fh) -- cgit v0.10.2 From b4a2967e52523dbf0281b52c042f9042c6082f99 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 27 Apr 2012 17:53:52 -0400 Subject: NFSv4.1 dereference a disconnected data server client record When the last DS io is processed, the data server client record will be freed. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index eb8eb00..eaaca89 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -196,6 +196,7 @@ static int filelayout_async_handle_error(struct rpc_task *task, _pnfs_return_layout(state->inode); filelayout_mark_devid_invalid(devid); rpc_wake_up(&tbl->slot_tbl_waitq); + nfs4_ds_disconnect(clp); /* fall through */ default: dprintk("%s Retry through MDS. Error %d\n", __func__, diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 3259be6..95562ad 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -146,5 +146,6 @@ extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); struct nfs4_file_layout_dsaddr * get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); +void nfs4_ds_disconnect(struct nfs_client *clp); #endif /* FS_NFS_NFS4FILELAYOUT_H */ diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index d4d2032..bf49b78 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -149,6 +149,28 @@ _data_server_lookup_locked(const struct list_head *dsaddrs) } /* + * Lookup DS by nfs_client pointer. Zero data server client pointer + */ +void nfs4_ds_disconnect(struct nfs_client *clp) +{ + struct nfs4_pnfs_ds *ds; + struct nfs_client *found = NULL; + + dprintk("%s clp %p\n", __func__, clp); + spin_lock(&nfs4_ds_cache_lock); + list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) + if (ds->ds_clp && ds->ds_clp == clp) { + found = ds->ds_clp; + ds->ds_clp = NULL; + } + spin_unlock(&nfs4_ds_cache_lock); + if (found) { + set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); + nfs_put_client(clp); + } +} + +/* * Create an rpc connection to the nfs4_pnfs_ds data server * Currently only supports IPv4 and IPv6 addresses */ -- cgit v0.10.2 From 041245c88a29273788e8eff1353bc6e1f56c61df Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 27 Apr 2012 17:53:53 -0400 Subject: NFSv4.1 resend LAYOUTGET on data server invalid layout errors The "invalid layout" class of errors is handled by destroying the layout and getting a new layout from the server. Currently, the layout must be destroyed before a new layout can be obtained. This means that all references (e.g.lsegs) to the "to be destroyed" layout header must be dropped before it can be destroyed. This in turn means waiting for all in flight RPC's using the old layout as well as draining the data server session slot table wait queue. Set the NFS_LAYOUT_INVALID flag to redirect I/O to the MDS while waiting for the old layout to be destroyed. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index eaaca89..474c630 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -182,6 +182,27 @@ static int filelayout_async_handle_error(struct rpc_task *task, break; case -NFS4ERR_RETRY_UNCACHED_REP: break; + /* Invalidate Layout errors */ + case -NFS4ERR_PNFS_NO_LAYOUT: + case -ESTALE: /* mapped NFS4ERR_STALE */ + case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */ + case -EISDIR: /* mapped NFS4ERR_ISDIR */ + case -NFS4ERR_FHEXPIRED: + case -NFS4ERR_WRONG_TYPE: + dprintk("%s Invalid layout error %d\n", __func__, + task->tk_status); + /* + * Destroy layout so new i/o will get a new layout. + * Layout will not be destroyed until all current lseg + * references are put. Mark layout as invalid to resend failed + * i/o and all i/o waiting on the slot table to the MDS until + * layout is destroyed and a new valid layout is obtained. + */ + set_bit(NFS_LAYOUT_INVALID, + &NFS_I(state->inode)->layout->plh_flags); + pnfs_destroy_layout(NFS_I(state->inode)); + rpc_wake_up(&tbl->slot_tbl_waitq); + goto reset; /* RPC connection errors */ case -ECONNREFUSED: case -EHOSTDOWN: @@ -199,6 +220,7 @@ static int filelayout_async_handle_error(struct rpc_task *task, nfs4_ds_disconnect(clp); /* fall through */ default: +reset: dprintk("%s Retry through MDS. Error %d\n", __func__, task->tk_status); return -NFS4ERR_RESET_TO_MDS; @@ -263,9 +285,8 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata) static void filelayout_read_prepare(struct rpc_task *task, void *data) { struct nfs_read_data *rdata = data; - struct pnfs_layout_segment *lseg = rdata->header->lseg; - if (filelayout_test_devid_invalid(FILELAYOUT_DEVID_NODE(lseg))) { + if (filelayout_reset_to_mds(rdata->header->lseg)) { dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); filelayout_reset_read(rdata); rpc_exit(task, 0); @@ -366,9 +387,8 @@ static int filelayout_commit_done_cb(struct rpc_task *task, static void filelayout_write_prepare(struct rpc_task *task, void *data) { struct nfs_write_data *wdata = data; - struct pnfs_layout_segment *lseg = wdata->header->lseg; - if (filelayout_test_devid_invalid(FILELAYOUT_DEVID_NODE(lseg))) { + if (filelayout_reset_to_mds(wdata->header->lseg)) { dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); filelayout_reset_write(wdata); rpc_exit(task, 0); diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 95562ad..43fe802 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -129,11 +129,24 @@ filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node) } static inline bool +filelayout_test_layout_invalid(struct pnfs_layout_hdr *lo) +{ + return test_bit(NFS_LAYOUT_INVALID, &lo->plh_flags); +} + +static inline bool filelayout_test_devid_invalid(struct nfs4_deviceid_node *node) { return test_bit(NFS_DEVICEID_INVALID, &node->flags); } +static inline bool +filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) +{ + return filelayout_test_devid_invalid(FILELAYOUT_DEVID_NODE(lseg)) || + filelayout_test_layout_invalid(lseg->pls_layout); +} + extern struct nfs_fh * nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index e48017f..5d09a36 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -455,6 +455,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) spin_unlock(&nfsi->vfs_inode.i_lock); pnfs_free_lseg_list(&tmp_list); } +EXPORT_SYMBOL_GPL(pnfs_destroy_layout); /* * Called by the state manger to remove all layouts established under an diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 9cf9ede..7980756 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -63,6 +63,7 @@ enum { NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ NFS_LAYOUT_ROC, /* some lseg had roc bit set */ NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ + NFS_LAYOUT_INVALID, /* layout is being destroyed */ }; enum layoutdriver_policy_flags { -- cgit v0.10.2 From 53b8ee346463946f88b3e1639d688c384df1166c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 May 2012 16:36:27 -0400 Subject: NFSv4.1: Fix a bad reference count issue in the pNFS commit code filelayout_scan_commit_lists needs to bump the reference count on the struct nfs_page just like nfs_scan_commit_list(). Reported-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 474c630..33849d3 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -1106,6 +1106,7 @@ transfer_commit_list(struct list_head *src, struct list_head *dst, list_for_each_entry_safe(req, tmp, src, wb_list) { if (!nfs_lock_request(req)) continue; + kref_get(&req->wb_kref); if (cond_resched_lock(cinfo->lock)) list_safe_reset_next(req, tmp, wb_list); nfs_request_remove_commit_list(req, cinfo); -- cgit v0.10.2 From d42e78737c31f08893ed4916fc03104790867a71 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 22 May 2012 08:09:28 -0400 Subject: NFSv4.1 fix null state reference in filelayout_async_handle_error Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 33849d3..f8edb14 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -199,8 +199,8 @@ static int filelayout_async_handle_error(struct rpc_task *task, * layout is destroyed and a new valid layout is obtained. */ set_bit(NFS_LAYOUT_INVALID, - &NFS_I(state->inode)->layout->plh_flags); - pnfs_destroy_layout(NFS_I(state->inode)); + &NFS_I(inode)->layout->plh_flags); + pnfs_destroy_layout(NFS_I(inode)); rpc_wake_up(&tbl->slot_tbl_waitq); goto reset; /* RPC connection errors */ @@ -214,7 +214,7 @@ static int filelayout_async_handle_error(struct rpc_task *task, dprintk("%s DS connection error %d\n", __func__, task->tk_status); if (!filelayout_test_devid_invalid(devid)) - _pnfs_return_layout(state->inode); + _pnfs_return_layout(inode); filelayout_mark_devid_invalid(devid); rpc_wake_up(&tbl->slot_tbl_waitq); nfs4_ds_disconnect(clp); -- cgit v0.10.2 From 996074cb8c355bf3d87d066ba1e1189ba3f648f5 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 22 May 2012 08:09:26 -0400 Subject: NFSv4.1 Just use nfs_put_client in filelayout release Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index f8edb14..4662847 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -101,9 +101,6 @@ static void filelayout_reset_write(struct nfs_write_data *data) &hdr->pages, hdr->completion_ops); } - /* balance nfs_get_client in filelayout_write_pagelist */ - nfs_put_client(data->ds_clp); - data->ds_clp = NULL; } static void filelayout_reset_read(struct nfs_read_data *data) @@ -125,9 +122,6 @@ static void filelayout_reset_read(struct nfs_read_data *data) &hdr->pages, hdr->completion_ops); } - /* balance nfs_get_client in filelayout_read_pagelist */ - nfs_put_client(data->ds_clp); - data->ds_clp = NULL; } static int filelayout_async_handle_error(struct rpc_task *task, @@ -326,8 +320,7 @@ static void filelayout_read_release(void *data) { struct nfs_read_data *rdata = data; - if (!test_bit(NFS_IOHDR_REDO, &rdata->header->flags)) - nfs_put_client(rdata->ds_clp); + nfs_put_client(rdata->ds_clp); rdata->header->mds_ops->rpc_release(data); } @@ -424,8 +417,7 @@ static void filelayout_write_release(void *data) { struct nfs_write_data *wdata = data; - if (!test_bit(NFS_IOHDR_REDO, &wdata->header->flags)) - nfs_put_client(wdata->ds_clp); + nfs_put_client(wdata->ds_clp); wdata->header->mds_ops->rpc_release(data); } -- cgit v0.10.2 From bd4aeffb5b89070ae93c579f1d5a0758f7123e8b Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 22 May 2012 08:09:27 -0400 Subject: NFSv4.1 skip rpc_call_done only on disconnected DS slot_table_waitq tasks We reset all I/O on a disconnected data server through the pgio layer indicated by the NFS_IOHDR_REDO flag. Differentiate between on-the-wire tasks returning with an error which must call rpc_call_done and tasks woken from the data server slot_table_waitq waiting for a session slot with a status of zero which call rpc_exit in rpc_prepare and need to skip rpc_call_done. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 4662847..56aa0ec 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -302,7 +302,8 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data) dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); - if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags)) + if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) && + task->tk_status == 0) return; /* Note this may cause RPC to be resent */ @@ -399,7 +400,8 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data) { struct nfs_write_data *wdata = data; - if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags)) + if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && + task->tk_status == 0) return; /* Note this may cause RPC to be resent */ -- cgit v0.10.2 From 497826af60f812240ed5b6ba80541f7c9f2154d9 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 22 May 2012 10:10:03 -0400 Subject: NFS: Fix compiler warnings The "struct inode *inode" was only used in a dprintk, so compiling with CONFIG_SUNRPC_DEBUG off triggers a warning. To get around this, I remove the "struct inode *inode" variable and instead change the dprintk()s to use hdr->inode instead. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 56aa0ec..ddea4d3 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -85,15 +85,14 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) static void filelayout_reset_write(struct nfs_write_data *data) { struct nfs_pgio_header *hdr = data->header; - struct inode *inode = hdr->inode; struct rpc_task *task = &data->task; if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { dprintk("%s Reset task %5u for i/o through MDS " "(req %s/%lld, %u bytes @ offset %llu)\n", __func__, data->task.tk_pid, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), + hdr->inode->i_sb->s_id, + (long long)NFS_FILEID(hdr->inode), data->args.count, (unsigned long long)data->args.offset); @@ -106,15 +105,14 @@ static void filelayout_reset_write(struct nfs_write_data *data) static void filelayout_reset_read(struct nfs_read_data *data) { struct nfs_pgio_header *hdr = data->header; - struct inode *inode = hdr->inode; struct rpc_task *task = &data->task; if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { dprintk("%s Reset task %5u for i/o through MDS " "(req %s/%lld, %u bytes @ offset %llu)\n", __func__, data->task.tk_pid, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), + hdr->inode->i_sb->s_id, + (long long)NFS_FILEID(hdr->inode), data->args.count, (unsigned long long)data->args.offset); -- cgit v0.10.2 From c3607282b4d8787a530eb4a9a452b4e823508b9f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 21 May 2012 22:45:16 -0400 Subject: NFS: Don't swap bytes in nfs4_construct_boot_verifier() The SETCLIENTID boot verifier is opaque to NFSv4 servers, thus there is no requirement for byte swapping before the client puts the verifier on the wire. This treatment is similar to other timestamp-based verifiers. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 78784e5..0f4e540 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3908,8 +3908,8 @@ static void nfs4_construct_boot_verifier(struct nfs_client *clp, { __be32 verf[2]; - verf[0] = htonl((u32)clp->cl_boot_time.tv_sec); - verf[1] = htonl((u32)clp->cl_boot_time.tv_nsec); + verf[0] = (__be32)clp->cl_boot_time.tv_sec; + verf[1] = (__be32)clp->cl_boot_time.tv_nsec; memcpy(bootverf->data, verf, sizeof(bootverf->data)); } -- cgit v0.10.2 From e3c0fb7ef515852619932b0da993baa2d107684d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 21 May 2012 22:45:24 -0400 Subject: NFS: Add NFSDBG_STATE fs/nfs/nfs4state.c does not yet have any dprintk() call sites, and I'm about to introduce some. We will need a new flag for enabling them. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index dc484c0..6930bec 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -49,7 +49,7 @@ #include "nfs4_fs.h" #include "delegation.h" -#define NFSDBG_FACILITY NFSDBG_PROC +#define NFSDBG_FACILITY NFSDBG_STATE void nfs4_renew_state(struct work_struct *work) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 7f0fcfc..f8c06de 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -57,6 +57,8 @@ #include "internal.h" #include "pnfs.h" +#define NFSDBG_FACILITY NFSDBG_STATE + #define OPENOWNER_POOL_SIZE 8 const nfs4_stateid zero_stateid; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 6cc7dba..80a9385 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -634,6 +634,7 @@ nfs_fileid_to_ino_t(u64 fileid) #define NFSDBG_FSCACHE 0x0800 #define NFSDBG_PNFS 0x1000 #define NFSDBG_PNFS_LD 0x2000 +#define NFSDBG_STATE 0x4000 #define NFSDBG_ALL 0xFFFF #ifdef __KERNEL__ -- cgit v0.10.2 From 722baafc9e638714a69aa66e9ed24ef961ff350c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 21 May 2012 22:44:22 -0400 Subject: NFS: Fix comment misspelling in struct nfs_client definition Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 7073fc7..5498e9d 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -79,7 +79,7 @@ struct nfs_client { u32 cl_seqid; /* The flags used for obtaining the clientid during EXCHANGE_ID */ u32 cl_exchange_flags; - struct nfs4_session *cl_session; /* sharred session */ + struct nfs4_session *cl_session; /* shared session */ #endif /* CONFIG_NFS_V4 */ #ifdef CONFIG_NFS_FSCACHE -- cgit v0.10.2 From 79d4e1f0d8910f0214a57832ca6d589640d572c0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 21 May 2012 22:44:31 -0400 Subject: NFS: Use proper naming conventions for NFSv4.1 server scope fields Clean up: When naming fields and data types, follow established conventions to facilitate accurate grep/cscope searches. Additionally, for consistency, move the scope field into the NFSv4- specific part of the nfs_client, and free that memory in the logic that shuts down NFSv4 nfs_clients. Introduced by commit 99fe60d0 "nfs41: exchange_id operation", April 1 2009. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/client.c b/fs/nfs/client.c index b4e2199..471fc9b 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -237,6 +237,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp) nfs_idmap_delete(clp); rpc_destroy_wait_queue(&clp->cl_rpcwaitq); + kfree(clp->cl_serverscope); } /* idr_remove_all is not needed as all id's are removed by nfs_put_client */ @@ -305,7 +306,6 @@ static void nfs_free_client(struct nfs_client *clp) put_net(clp->net); kfree(clp->cl_hostname); - kfree(clp->server_scope); kfree(clp->impl_id); kfree(clp); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index edeef71..b14bcc3 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -338,7 +338,7 @@ extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); extern void nfs41_handle_recall_slot(struct nfs_client *clp); extern void nfs41_handle_server_scope(struct nfs_client *, - struct server_scope **); + struct nfs41_server_scope **); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0f4e540..94494f2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5052,7 +5052,8 @@ out_inval: } static bool -nfs41_same_server_scope(struct server_scope *a, struct server_scope *b) +nfs41_same_server_scope(struct nfs41_server_scope *a, + struct nfs41_server_scope *b) { if (a->server_scope_sz == b->server_scope_sz && memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0) @@ -5099,7 +5100,8 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) clp->cl_rpcclient->cl_nodename, clp->cl_rpcclient->cl_auth->au_flavor); - res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL); + res.server_scope = kzalloc(sizeof(struct nfs41_server_scope), + GFP_KERNEL); if (unlikely(!res.server_scope)) { status = -ENOMEM; goto out; @@ -5123,18 +5125,18 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) kfree(res.impl_id); if (!status) { - if (clp->server_scope && - !nfs41_same_server_scope(clp->server_scope, + if (clp->cl_serverscope && + !nfs41_same_server_scope(clp->cl_serverscope, res.server_scope)) { dprintk("%s: server_scope mismatch detected\n", __func__); set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); - kfree(clp->server_scope); - clp->server_scope = NULL; + kfree(clp->cl_serverscope); + clp->cl_serverscope = NULL; } - if (!clp->server_scope) { - clp->server_scope = res.server_scope; + if (!clp->cl_serverscope) { + clp->cl_serverscope = res.server_scope; goto out; } } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 5498e9d..900d733 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -17,7 +17,7 @@ struct nfs4_sequence_args; struct nfs4_sequence_res; struct nfs_server; struct nfs4_minor_version_ops; -struct server_scope; +struct nfs41_server_scope; struct nfs41_impl_id; /* @@ -80,13 +80,13 @@ struct nfs_client { /* The flags used for obtaining the clientid during EXCHANGE_ID */ u32 cl_exchange_flags; struct nfs4_session *cl_session; /* shared session */ + struct nfs41_server_scope *cl_serverscope; #endif /* CONFIG_NFS_V4 */ #ifdef CONFIG_NFS_FSCACHE struct fscache_cookie *fscache; /* client index cache cookie */ #endif - struct server_scope *server_scope; /* from exchange_id */ struct nfs41_impl_id *impl_id; /* from exchange_id */ struct net *net; }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2e53a3f..c420b8d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1104,7 +1104,7 @@ struct server_owner { char major_id[NFS4_OPAQUE_LIMIT]; }; -struct server_scope { +struct nfs41_server_scope { uint32_t server_scope_sz; char server_scope[NFS4_OPAQUE_LIMIT]; }; @@ -1118,7 +1118,7 @@ struct nfs41_impl_id { struct nfs41_exchange_id_res { struct nfs_client *client; u32 flags; - struct server_scope *server_scope; + struct nfs41_server_scope *server_scope; struct nfs41_impl_id *impl_id; }; -- cgit v0.10.2 From 591555465ec513c42416392d392fd56866cb220c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 21 May 2012 22:44:41 -0400 Subject: NFS: Use proper naming conventions for nfs_client.impl_id field Clean up: When naming fields and data types, follow established conventions to facilitate accurate grep/cscope searches. Additionally, for consistency, move the impl_id field into the NFSv4- specific part of the nfs_client, and free that memory in the logic that shuts down NFSv4 nfs_clients. Introduced by commit 7d2ed9ac "NFSv4: parse and display server implementation ids," Fri Feb 17, 2012. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 471fc9b..39db1be 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -238,6 +238,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp) rpc_destroy_wait_queue(&clp->cl_rpcwaitq); kfree(clp->cl_serverscope); + kfree(clp->cl_implid); } /* idr_remove_all is not needed as all id's are removed by nfs_put_client */ @@ -306,7 +307,6 @@ static void nfs_free_client(struct nfs_client *clp) put_net(clp->net); kfree(clp->cl_hostname); - kfree(clp->impl_id); kfree(clp); dprintk("<-- nfs_free_client()\n"); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 94494f2..daa4e1b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5119,8 +5119,8 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) if (!status) { /* use the most recent implementation id */ - kfree(clp->impl_id); - clp->impl_id = res.impl_id; + kfree(clp->cl_implid); + clp->cl_implid = res.impl_id; } else kfree(res.impl_id); @@ -5144,12 +5144,12 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) out_server_scope: kfree(res.server_scope); out: - if (clp->impl_id) + if (clp->cl_implid) dprintk("%s: Server Implementation ID: " "domain: %s, name: %s, date: %llu,%u\n", - __func__, clp->impl_id->domain, clp->impl_id->name, - clp->impl_id->date.seconds, - clp->impl_id->date.nseconds); + __func__, clp->cl_implid->domain, clp->cl_implid->name, + clp->cl_implid->date.seconds, + clp->cl_implid->date.nseconds); dprintk("<-- %s status= %d\n", __func__, status); return status; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a973eb1..ff656c0 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -796,8 +796,8 @@ static void show_pnfs(struct seq_file *m, struct nfs_server *server) static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss) { - if (nfss->nfs_client && nfss->nfs_client->impl_id) { - struct nfs41_impl_id *impl_id = nfss->nfs_client->impl_id; + if (nfss->nfs_client && nfss->nfs_client->cl_implid) { + struct nfs41_impl_id *impl_id = nfss->nfs_client->cl_implid; seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s'," "date='%llu,%u'", impl_id->name, impl_id->domain, diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 900d733..773e021 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -81,13 +81,13 @@ struct nfs_client { u32 cl_exchange_flags; struct nfs4_session *cl_session; /* shared session */ struct nfs41_server_scope *cl_serverscope; + struct nfs41_impl_id *cl_implid; #endif /* CONFIG_NFS_V4 */ #ifdef CONFIG_NFS_FSCACHE struct fscache_cookie *fscache; /* client index cache cookie */ #endif - struct nfs41_impl_id *impl_id; /* from exchange_id */ struct net *net; }; -- cgit v0.10.2 From 73ea666c2bb536f2862cefdb3e014ed62b262ba5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 21 May 2012 22:44:50 -0400 Subject: NFS: Use proper naming conventions for the nfs_client.net field Clean up: When naming fields and data types, follow established conventions to facilitate accurate grep/cscope searches. Introduced by commit e50a7a1a "NFS: make NFS client allocated per network namespace context," Tue Jan 10, 2012. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index a5c88a5..c965542 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c @@ -123,7 +123,7 @@ nfs4_blk_decode_device(struct nfs_server *server, uint8_t *dataptr; DECLARE_WAITQUEUE(wq, current); int offset, len, i, rc; - struct net *net = server->nfs_client->net; + struct net *net = server->nfs_client->cl_net; struct nfs_net *nn = net_generic(net, nfs_net_id); struct bl_dev_msg *reply = &nn->bl_mount_reply; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 39db1be..9b9df71 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -65,7 +65,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) { int ret = 0; - struct nfs_net *nn = net_generic(clp->net, nfs_net_id); + struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); if (clp->rpc_ops->version != 4 || minorversion != 0) return ret; @@ -174,7 +174,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ clp->cl_rpcclient = ERR_PTR(-EINVAL); clp->cl_proto = cl_init->proto; - clp->net = get_net(cl_init->net); + clp->cl_net = get_net(cl_init->net); #ifdef CONFIG_NFS_V4 err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); @@ -252,7 +252,7 @@ void nfs_cleanup_cb_ident_idr(struct net *net) /* nfs_client_lock held */ static void nfs_cb_idr_remove_locked(struct nfs_client *clp) { - struct nfs_net *nn = net_generic(clp->net, nfs_net_id); + struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); if (clp->cl_cb_ident) idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident); @@ -305,7 +305,7 @@ static void nfs_free_client(struct nfs_client *clp) if (clp->cl_machine_cred != NULL) put_rpccred(clp->cl_machine_cred); - put_net(clp->net); + put_net(clp->cl_net); kfree(clp->cl_hostname); kfree(clp); @@ -323,7 +323,7 @@ void nfs_put_client(struct nfs_client *clp) return; dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count)); - nn = net_generic(clp->net, nfs_net_id); + nn = net_generic(clp->cl_net, nfs_net_id); if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { list_del(&clp->cl_share_link); @@ -661,7 +661,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, { struct rpc_clnt *clnt = NULL; struct rpc_create_args args = { - .net = clp->net, + .net = clp->cl_net, .protocol = clp->cl_proto, .address = (struct sockaddr *)&clp->cl_addr, .addrsize = clp->cl_addrlen, @@ -715,7 +715,7 @@ static int nfs_start_lockd(struct nfs_server *server) .nfs_version = clp->rpc_ops->version, .noresvport = server->flags & NFS_MOUNT_NORESVPORT ? 1 : 0, - .net = clp->net, + .net = clp->cl_net, }; if (nlm_init.nfs_version > 3) @@ -1060,7 +1060,7 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve static void nfs_server_insert_lists(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; - struct nfs_net *nn = net_generic(clp->net, nfs_net_id); + struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); spin_lock(&nn->nfs_client_lock); list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); @@ -1077,7 +1077,7 @@ static void nfs_server_remove_lists(struct nfs_server *server) if (clp == NULL) return; - nn = net_generic(clp->net, nfs_net_id); + nn = net_generic(clp->cl_net, nfs_net_id); spin_lock(&nn->nfs_client_lock); list_del_rcu(&server->client_link); if (list_empty(&clp->cl_superblocks)) @@ -1486,7 +1486,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, .rpc_ops = &nfs_v4_clientops, .proto = ds_proto, .minorversion = mds_clp->cl_minorversion, - .net = mds_clp->net, + .net = mds_clp->cl_net, }; struct rpc_timeout ds_timeout; struct nfs_client *clp; @@ -1709,7 +1709,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, rpc_protocol(parent_server->client), parent_server->client->cl_timeout, parent_client->cl_mvops->minor_version, - parent_client->net); + parent_client->cl_net); if (error < 0) goto error; diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 3e8edbe..2eaecf9 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -415,7 +415,7 @@ static int __nfs_idmap_register(struct dentry *dir, static void nfs_idmap_unregister(struct nfs_client *clp, struct rpc_pipe *pipe) { - struct net *net = clp->net; + struct net *net = clp->cl_net; struct super_block *pipefs_sb; pipefs_sb = rpc_get_sb_net(net); @@ -429,7 +429,7 @@ static int nfs_idmap_register(struct nfs_client *clp, struct idmap *idmap, struct rpc_pipe *pipe) { - struct net *net = clp->net; + struct net *net = clp->cl_net; struct super_block *pipefs_sb; int err = 0; diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index bf49b78..c610f84 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -629,7 +629,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) mp_count = be32_to_cpup(p); /* multipath count */ for (j = 0; j < mp_count; j++) { - da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->net, + da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->cl_net, &stream, gfp_flags); if (da) list_add_tail(&da->da_node, &dsaddrs); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 773e021..59410b3 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -88,7 +88,7 @@ struct nfs_client { struct fscache_cookie *fscache; /* client index cache cookie */ #endif - struct net *net; + struct net *cl_net; }; /* -- cgit v0.10.2 From 177313f1498dd66b551dccadc98331b3fc3b09a4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 21 May 2012 22:44:58 -0400 Subject: NFS: Clean up return code checking in nfs4_proc_exchange_id() Clean up: update to use matching types in "if" expressions. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index daa4e1b..ab6b2e5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5102,30 +5102,30 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) res.server_scope = kzalloc(sizeof(struct nfs41_server_scope), GFP_KERNEL); - if (unlikely(!res.server_scope)) { + if (unlikely(res.server_scope == NULL)) { status = -ENOMEM; goto out; } res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_KERNEL); - if (unlikely(!res.impl_id)) { + if (unlikely(res.impl_id == NULL)) { status = -ENOMEM; goto out_server_scope; } status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); - if (!status) + if (status == 0) status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); - if (!status) { + if (status == 0) { /* use the most recent implementation id */ kfree(clp->cl_implid); clp->cl_implid = res.impl_id; } else kfree(res.impl_id); - if (!status) { - if (clp->cl_serverscope && + if (status == 0) { + if (clp->cl_serverscope != NULL && !nfs41_same_server_scope(clp->cl_serverscope, res.server_scope)) { dprintk("%s: server_scope mismatch detected\n", @@ -5135,7 +5135,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) clp->cl_serverscope = NULL; } - if (!clp->cl_serverscope) { + if (clp->cl_serverscope == NULL) { clp->cl_serverscope = res.server_scope; goto out; } @@ -5144,7 +5144,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) out_server_scope: kfree(res.server_scope); out: - if (clp->cl_implid) + if (clp->cl_implid != NULL) dprintk("%s: Server Implementation ID: " "domain: %s, name: %s, date: %llu,%u\n", __func__, clp->cl_implid->domain, clp->cl_implid->name, -- cgit v0.10.2 From ce1c8fc12d99386737953dfeb7b531dfa3d18e5e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 21 May 2012 22:45:08 -0400 Subject: NFS: Remove nfs_unique_id Clean up: this structure is unused. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b14bcc3..1526fdf 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -52,11 +52,6 @@ struct nfs4_minor_version_ops { const struct nfs4_state_maintenance_ops *state_renewal_ops; }; -struct nfs_unique_id { - struct rb_node rb_node; - __u64 id; -}; - #define NFS_SEQID_CONFIRMED 1 struct nfs_seqid_counter { ktime_t create_time; -- cgit v0.10.2 From 2c820d9a97f07b273b2c8a5960bd52b1b5864c68 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 21 May 2012 22:45:33 -0400 Subject: NFS: Force server to drop NFSv4 state nfs4_reset_all_state() refreshes the boot verifier a server sees to trigger that server to wipe this client's state. This function is invoked when an NFSv4.1 server reports that it has revoked some or all of a client's NFSv4 state. To facilitate server trunking discovery, we will eventually want to move the cl_boot_time field to a more global structure. The Uniform Client String model (and specifically, server trunking detection) requires that all servers see the same boot verifier until the client actually does reboot, and not a fresh verifier every time the client unmounts and remounts the server. Without the cl_boot_time field, however, nfs4_reset_all_state() will have to find some other way to force the server to purge the client's NFSv4 state. Because these verifiers are opaque (ie, the server doesn't know or care that they happen to be timestamps), we can force the server to wipe NFSv4 state by updating the boot verifier as we do now, then immediately afterwards establish a fresh client ID using the old boot verifier again. Hopefully there are no extra paranoid server implementations that keep track of the client's boot verifiers and prevent clients from reusing a previous one. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 1526fdf..e6da021 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -24,6 +24,7 @@ enum nfs4_client_state { NFS4CLNT_RECALL_SLOT, NFS4CLNT_LEASE_CONFIRM, NFS4CLNT_SERVER_SCOPE_MISMATCH, + NFS4CLNT_PURGE_STATE, }; enum nfs4_session_state { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ab6b2e5..81ccdbb 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3908,8 +3908,15 @@ static void nfs4_construct_boot_verifier(struct nfs_client *clp, { __be32 verf[2]; - verf[0] = (__be32)clp->cl_boot_time.tv_sec; - verf[1] = (__be32)clp->cl_boot_time.tv_nsec; + if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) { + /* An impossible timestamp guarantees this value + * will never match a generated boot time. */ + verf[0] = 0; + verf[1] = (__be32)(NSEC_PER_SEC + 1); + } else { + verf[0] = (__be32)clp->cl_boot_time.tv_sec; + verf[1] = (__be32)clp->cl_boot_time.tv_nsec; + } memcpy(bootverf->data, verf, sizeof(bootverf->data)); } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f8c06de..32cce4a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1615,7 +1615,7 @@ void nfs41_handle_recall_slot(struct nfs_client *clp) static void nfs4_reset_all_state(struct nfs_client *clp) { if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { - clp->cl_boot_time = CURRENT_TIME; + set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state); nfs4_state_start_reclaim_nograce(clp); nfs4_schedule_state_manager(clp); } @@ -1631,7 +1631,6 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp) static void nfs41_handle_state_revoked(struct nfs_client *clp) { - /* Temporary */ nfs4_reset_all_state(clp); } @@ -1652,6 +1651,10 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) { if (!flags) return; + + dprintk("%s: \"%s\" (client ID %llx) flags=0x%08x\n", + __func__, clp->cl_hostname, clp->cl_clientid, flags); + if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) nfs41_handle_server_reboot(clp); if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | @@ -1762,6 +1765,12 @@ static void nfs4_state_manager(struct nfs_client *clp) /* Ensure exclusive access to NFSv4 state */ do { + if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) { + nfs4_reclaim_lease(clp); + clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state); + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + } + if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { /* We're going to have to re-establish a clientid */ status = nfs4_reclaim_lease(clp); -- cgit v0.10.2 From f092075dd33ea04000590e8ffea65c2e7d03d764 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 21 May 2012 22:45:41 -0400 Subject: NFS: Always use the same SETCLIENTID boot verifier Currently our NFS client assigns a unique SETCLIENTID boot verifier for each server IP address it knows about. It's set to CURRENT_TIME when the struct nfs_client for that server IP is created. During the SETCLIENTID operation, our client also presents an nfs_client_id4 string to servers, as an identifier on which the server can hang all of this client's NFSv4 state. Our client's nfs_client_id4 string is unique for each server IP address. An NFSv4 server is obligated to wipe all NFSv4 state associated with an nfs_client_id4 string when the client presents the same nfs_client_id4 string along with a changed SETCLIENTID boot verifier. When our client unmounts the last of a server's shares, it destroys that server's struct nfs_client. The next time the client mounts that NFS server, it creates a fresh struct nfs_client with a fresh boot verifier. On seeing the fresh verifer, the server wipes any previous NFSv4 state associated with that nfs_client_id4. However, NFSv4.1 clients are supposed to present the same nfs_client_id4 string to all servers. And, to support Transparent State Migration, the same nfs_client_id4 string should be presented to all NFSv4.0 servers so they recognize that migrated state for this client belongs with state a server may already have for this client. (This is known as the Uniform Client String model). If the nfs_client_id4 string is the same but the boot verifier changes for each server IP address, SETCLIENTID and EXCHANGE_ID operations from such a client could unintentionally result in a server wiping a client's previously obtained lease. Thus, if our NFS client is going to use a fixed nfs_client_id4 string, either for NFSv4.0 or NFSv4.1 mounts, our NFS client should use a boot verifier that does not change depending on server IP address. Replace our current per-nfs_client boot verifier with a per-nfs_net boot verifier. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 9b9df71..af9b7e4 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -184,7 +184,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ spin_lock_init(&clp->cl_lock); INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); - clp->cl_boot_time = CURRENT_TIME; clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; clp->cl_minorversion = cl_init->minorversion; clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; @@ -1813,6 +1812,7 @@ void nfs_clients_init(struct net *net) idr_init(&nn->cb_ident_idr); #endif spin_lock_init(&nn->nfs_client_lock); + nn->boot_time = CURRENT_TIME; } #ifdef CONFIG_PROC_FS diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index aa14ec3..8a6394e 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -1,3 +1,7 @@ +/* + * NFS-private data for each "struct net". Accessed with net_generic(). + */ + #ifndef __NFS_NETNS_H__ #define __NFS_NETNS_H__ @@ -20,6 +24,7 @@ struct nfs_net { struct idr cb_ident_idr; /* Protected by nfs_client_lock */ #endif spinlock_t nfs_client_lock; + struct timespec boot_time; }; extern int nfs_net_id; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 81ccdbb..9e9334a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -64,6 +64,7 @@ #include "iostat.h" #include "callback.h" #include "pnfs.h" +#include "netns.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -3903,8 +3904,8 @@ wait_on_recovery: return -EAGAIN; } -static void nfs4_construct_boot_verifier(struct nfs_client *clp, - nfs4_verifier *bootverf) +static void nfs4_init_boot_verifier(const struct nfs_client *clp, + nfs4_verifier *bootverf) { __be32 verf[2]; @@ -3914,8 +3915,9 @@ static void nfs4_construct_boot_verifier(struct nfs_client *clp, verf[0] = 0; verf[1] = (__be32)(NSEC_PER_SEC + 1); } else { - verf[0] = (__be32)clp->cl_boot_time.tv_sec; - verf[1] = (__be32)clp->cl_boot_time.tv_nsec; + struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); + verf[0] = (__be32)nn->boot_time.tv_sec; + verf[1] = (__be32)nn->boot_time.tv_nsec; } memcpy(bootverf->data, verf, sizeof(bootverf->data)); } @@ -3939,7 +3941,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, int loop = 0; int status; - nfs4_construct_boot_verifier(clp, &sc_verifier); + nfs4_init_boot_verifier(clp, &sc_verifier); for(;;) { rcu_read_lock(); @@ -5099,7 +5101,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) dprintk("--> %s\n", __func__); BUG_ON(clp == NULL); - nfs4_construct_boot_verifier(clp, &verifier); + nfs4_init_boot_verifier(clp, &verifier); args.id_len = scnprintf(args.id, sizeof(args.id), "%s/%s/%u", diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index db040e9..12b9982 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -53,9 +53,11 @@ #include #include #include + #include "nfs4_fs.h" #include "internal.h" #include "pnfs.h" +#include "netns.h" #define NFSDBG_FACILITY NFSDBG_XDR @@ -1702,6 +1704,7 @@ static void encode_create_session(struct xdr_stream *xdr, char machine_name[NFS4_MAX_MACHINE_NAME_LEN]; uint32_t len; struct nfs_client *clp = args->client; + struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); u32 max_resp_sz_cached; /* @@ -1743,7 +1746,7 @@ static void encode_create_session(struct xdr_stream *xdr, *p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */ /* authsys_parms rfc1831 */ - *p++ = cpu_to_be32((u32)clp->cl_boot_time.tv_nsec); /* stamp */ + *p++ = (__be32)nn->boot_time.tv_nsec; /* stamp */ p = xdr_encode_opaque(p, machine_name, len); *p++ = cpu_to_be32(0); /* UID */ *p++ = cpu_to_be32(0); /* GID */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 59410b3..fbec57d 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -61,9 +61,6 @@ struct nfs_client { struct rpc_wait_queue cl_rpcwaitq; - /* used for the setclientid verifier */ - struct timespec cl_boot_time; - /* idmapper */ struct idmap * cl_idmap; -- cgit v0.10.2 From f411703adc762a92b72f8a93c6464050d66cb87b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 21 May 2012 22:45:50 -0400 Subject: NFS: Refactor nfs_get_client(): add nfs_found_client() Clean up: Code that takes and releases nfs_client_lock remains in nfs_get_client(). Logic that handles a pre-existing nfs_client is moved to a separate function. No behavior change is expected. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/client.c b/fs/nfs/client.c index af9b7e4..5f19f95 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -506,6 +506,35 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat } /* + * Found an existing client. Make sure it's ready before returning. + */ +static struct nfs_client * +nfs_found_client(const struct nfs_client_initdata *cl_init, + struct nfs_client *clp) +{ + int error; + + error = wait_event_killable(nfs_client_active_wq, + clp->cl_cons_state < NFS_CS_INITING); + if (error < 0) { + nfs_put_client(clp); + return ERR_PTR(-ERESTARTSYS); + } + + if (clp->cl_cons_state < NFS_CS_READY) { + error = clp->cl_cons_state; + nfs_put_client(clp); + return ERR_PTR(error); + } + + BUG_ON(clp->cl_cons_state != NFS_CS_READY); + + dprintk("<-- %s found nfs_client %p for %s\n", + __func__, clp, cl_init->hostname ?: ""); + return clp; +} + +/* * Look up a client by IP address and protocol version * - creates a new record if one doesn't yet exist */ @@ -528,8 +557,12 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, spin_lock(&nn->nfs_client_lock); clp = nfs_match_client(cl_init); - if (clp) - goto found_client; + if (clp) { + spin_unlock(&nn->nfs_client_lock); + if (new) + nfs_free_client(new); + return nfs_found_client(cl_init, clp); + } if (new) goto install_client; @@ -538,7 +571,8 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, new = nfs_alloc_client(cl_init); } while (!IS_ERR(new)); - dprintk("--> nfs_get_client() = %ld [failed]\n", PTR_ERR(new)); + dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n", + cl_init->hostname ?: "", PTR_ERR(new)); return new; /* install a new client and return with it unready */ @@ -555,33 +589,6 @@ install_client: } dprintk("--> nfs_get_client() = %p [new]\n", clp); return clp; - - /* found an existing client - * - make sure it's ready before returning - */ -found_client: - spin_unlock(&nn->nfs_client_lock); - - if (new) - nfs_free_client(new); - - error = wait_event_killable(nfs_client_active_wq, - clp->cl_cons_state < NFS_CS_INITING); - if (error < 0) { - nfs_put_client(clp); - return ERR_PTR(-ERESTARTSYS); - } - - if (clp->cl_cons_state < NFS_CS_READY) { - error = clp->cl_cons_state; - nfs_put_client(clp); - return ERR_PTR(error); - } - - BUG_ON(clp->cl_cons_state != NFS_CS_READY); - - dprintk("--> nfs_get_client() = %p [share]\n", clp); - return clp; } /* -- cgit v0.10.2 From 8cab4c390b43fe34c07bd33799c1bc24be648122 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 21 May 2012 22:45:59 -0400 Subject: NFS: Refactor nfs_get_client(): initialize nfs_client Clean up: Continue to rationalize the locking in nfs_get_client() by moving the logic that handles the case where a matching server IP address is not found. When we support server trunking detection, client initialization may return a different nfs_client struct than was passed to it. Change the synopsis of the init_client methods to return an nfs_client. The client initialization logic in nfs_get_client() is not much more than a wrapper around ->init_client. It's simpler to keep the little bits of error handling in the version-specific init_client methods. No behavior change is expected. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 5f19f95..8a4b3c2 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -546,7 +546,6 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, int noresvport) { struct nfs_client *clp, *new = NULL; - int error; struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); dprintk("--> nfs_get_client(%s,v%u)\n", @@ -563,8 +562,13 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, nfs_free_client(new); return nfs_found_client(cl_init, clp); } - if (new) - goto install_client; + if (new) { + list_add(&new->cl_share_link, &nn->nfs_client_list); + spin_unlock(&nn->nfs_client_lock); + return cl_init->rpc_ops->init_client(new, + timeparms, ip_addr, + authflavour, noresvport); + } spin_unlock(&nn->nfs_client_lock); @@ -574,21 +578,6 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n", cl_init->hostname ?: "", PTR_ERR(new)); return new; - - /* install a new client and return with it unready */ -install_client: - clp = new; - list_add(&clp->cl_share_link, &nn->nfs_client_list); - spin_unlock(&nn->nfs_client_lock); - - error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr, - authflavour, noresvport); - if (error < 0) { - nfs_put_client(clp); - return ERR_PTR(error); - } - dprintk("--> nfs_get_client() = %p [new]\n", clp); - return clp; } /* @@ -813,10 +802,19 @@ static int nfs_init_server_rpcclient(struct nfs_server *server, return 0; } -/* - * Initialise an NFS2 or NFS3 client +/** + * nfs_init_client - Initialise an NFS2 or NFS3 client + * + * @clp: nfs_client to initialise + * @timeparms: timeout parameters for underlying RPC transport + * @ip_addr: IP presentation address (not used) + * @authflavor: authentication flavor for underlying RPC transport + * @noresvport: set if RPC transport can use an ephemeral source port + * + * Returns pointer to an NFS client, or an ERR_PTR value. */ -int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, +struct nfs_client *nfs_init_client(struct nfs_client *clp, + const struct rpc_timeout *timeparms, const char *ip_addr, rpc_authflavor_t authflavour, int noresvport) { @@ -825,7 +823,7 @@ int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, if (clp->cl_cons_state == NFS_CS_READY) { /* the client is already initialised */ dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp); - return 0; + return clp; } /* @@ -837,12 +835,13 @@ int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, if (error < 0) goto error; nfs_mark_client_ready(clp, NFS_CS_READY); - return 0; + return clp; error: nfs_mark_client_ready(clp, error); + nfs_put_client(clp); dprintk("<-- nfs_init_client() = xerror %d\n", error); - return error; + return ERR_PTR(error); } /* @@ -1358,14 +1357,22 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp) return nfs4_init_callback(clp); } -/* - * Initialise an NFS4 client record +/** + * nfs4_init_client - Initialise an NFS4 client record + * + * @clp: nfs_client to initialise + * @timeparms: timeout parameters for underlying RPC transport + * @ip_addr: callback IP address in presentation format + * @authflavor: authentication flavor for underlying RPC transport + * @noresvport: set if RPC transport can use an ephemeral source port + * + * Returns pointer to an NFS client, or an ERR_PTR value. */ -int nfs4_init_client(struct nfs_client *clp, - const struct rpc_timeout *timeparms, - const char *ip_addr, - rpc_authflavor_t authflavour, - int noresvport) +struct nfs_client *nfs4_init_client(struct nfs_client *clp, + const struct rpc_timeout *timeparms, + const char *ip_addr, + rpc_authflavor_t authflavour, + int noresvport) { char buf[INET6_ADDRSTRLEN + 1]; int error; @@ -1373,7 +1380,7 @@ int nfs4_init_client(struct nfs_client *clp, if (clp->cl_cons_state == NFS_CS_READY) { /* the client is initialised already */ dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp); - return 0; + return clp; } /* Check NFS protocol revision and initialize RPC op vector */ @@ -1413,12 +1420,13 @@ int nfs4_init_client(struct nfs_client *clp, if (!nfs4_has_session(clp)) nfs_mark_client_ready(clp, NFS_CS_READY); - return 0; + return clp; error: nfs_mark_client_ready(clp, error); + nfs_put_client(clp); dprintk("<-- nfs4_init_client() = xerror %d\n", error); - return error; + return ERR_PTR(error); } /* diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 989959a..3a9e80c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -238,7 +238,7 @@ extern int nfs4_init_ds_session(struct nfs_client *clp); /* proc.c */ void nfs_close_context(struct nfs_open_context *ctx, int is_sync); -extern int nfs_init_client(struct nfs_client *clp, +extern struct nfs_client *nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, const char *ip_addr, rpc_authflavor_t authflavour, int noresvport); @@ -373,7 +373,7 @@ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, /* nfs4proc.c */ extern void __nfs4_read_done_cb(struct nfs_read_data *); -extern int nfs4_init_client(struct nfs_client *clp, +extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, const char *ip_addr, rpc_authflavor_t authflavour, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c420b8d..0c521cd 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1397,7 +1397,8 @@ struct nfs_rpc_ops { struct nfs_open_context *ctx, int open_flags, struct iattr *iattr); - int (*init_client) (struct nfs_client *, const struct rpc_timeout *, + struct nfs_client * + (*init_client) (struct nfs_client *, const struct rpc_timeout *, const char *, rpc_authflavor_t, int); }; -- cgit v0.10.2 From 4bf590e08f6db3395c181618a4c14f1c39b7c4af Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 21 May 2012 22:46:07 -0400 Subject: NFS: Add nfs_client behavior flags "noresvport" and "discrtry" can be passed to nfs_create_rpc_client() by setting flags in the passed-in nfs_client. This change makes it easy to add new flags. Note that these settings are now "sticky" over the lifetime of a struct nfs_client, and may even be copied when an nfs_client is cloned. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 8a4b3c2..34b2e68 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -131,6 +131,7 @@ const struct rpc_program nfsacl_program = { #endif /* CONFIG_NFS_V3_ACL */ struct nfs_client_initdata { + unsigned long init_flags; const char *hostname; const struct sockaddr *addr; size_t addrlen; @@ -542,8 +543,7 @@ static struct nfs_client * nfs_get_client(const struct nfs_client_initdata *cl_init, const struct rpc_timeout *timeparms, const char *ip_addr, - rpc_authflavor_t authflavour, - int noresvport) + rpc_authflavor_t authflavour) { struct nfs_client *clp, *new = NULL; struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); @@ -565,9 +565,10 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, if (new) { list_add(&new->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); + new->cl_flags = cl_init->init_flags; return cl_init->rpc_ops->init_client(new, timeparms, ip_addr, - authflavour, noresvport); + authflavour); } spin_unlock(&nn->nfs_client_lock); @@ -651,8 +652,7 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, */ static int nfs_create_rpc_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, - rpc_authflavor_t flavor, - int discrtry, int noresvport) + rpc_authflavor_t flavor) { struct rpc_clnt *clnt = NULL; struct rpc_create_args args = { @@ -667,9 +667,9 @@ static int nfs_create_rpc_client(struct nfs_client *clp, .authflavor = flavor, }; - if (discrtry) + if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_DISCRTRY; - if (noresvport) + if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; if (!IS_ERR(clp->cl_rpcclient)) @@ -809,14 +809,12 @@ static int nfs_init_server_rpcclient(struct nfs_server *server, * @timeparms: timeout parameters for underlying RPC transport * @ip_addr: IP presentation address (not used) * @authflavor: authentication flavor for underlying RPC transport - * @noresvport: set if RPC transport can use an ephemeral source port * * Returns pointer to an NFS client, or an ERR_PTR value. */ struct nfs_client *nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, - const char *ip_addr, rpc_authflavor_t authflavour, - int noresvport) + const char *ip_addr, rpc_authflavor_t authflavour) { int error; @@ -830,8 +828,7 @@ struct nfs_client *nfs_init_client(struct nfs_client *clp, * Create a client RPC handle for doing FSSTAT with UNIX auth only * - RFC 2623, sec 2.3.2 */ - error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, - 0, noresvport); + error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX); if (error < 0) goto error; nfs_mark_client_ready(clp, NFS_CS_READY); @@ -881,10 +878,11 @@ static int nfs_init_server(struct nfs_server *server, nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, data->timeo, data->retrans); + if (data->flags & NFS_MOUNT_NORESVPORT) + set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); /* Allocate or find a client reference we can use */ - clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX, - data->flags & NFS_MOUNT_NORESVPORT); + clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX); if (IS_ERR(clp)) { dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); return PTR_ERR(clp); @@ -1364,15 +1362,13 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp) * @timeparms: timeout parameters for underlying RPC transport * @ip_addr: callback IP address in presentation format * @authflavor: authentication flavor for underlying RPC transport - * @noresvport: set if RPC transport can use an ephemeral source port * * Returns pointer to an NFS client, or an ERR_PTR value. */ struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, const char *ip_addr, - rpc_authflavor_t authflavour, - int noresvport) + rpc_authflavor_t authflavour) { char buf[INET6_ADDRSTRLEN + 1]; int error; @@ -1386,8 +1382,8 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, /* Check NFS protocol revision and initialize RPC op vector */ clp->rpc_ops = &nfs_v4_clientops; - error = nfs_create_rpc_client(clp, timeparms, authflavour, - 1, noresvport); + __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); + error = nfs_create_rpc_client(clp, timeparms, authflavour); if (error < 0) goto error; @@ -1455,9 +1451,11 @@ static int nfs4_set_client(struct nfs_server *server, dprintk("--> nfs4_set_client()\n"); + if (server->flags & NFS_MOUNT_NORESVPORT) + set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + /* Allocate or find a client reference we can use */ - clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour, - server->flags & NFS_MOUNT_NORESVPORT); + clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour); if (IS_ERR(clp)) { error = PTR_ERR(clp); goto error; @@ -1512,7 +1510,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, */ nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, - mds_clp->cl_rpcclient->cl_auth->au_flavor, 0); + mds_clp->cl_rpcclient->cl_auth->au_flavor); dprintk("<-- %s %p\n", __func__, clp); return clp; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 3a9e80c..547f24f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -240,8 +240,7 @@ extern int nfs4_init_ds_session(struct nfs_client *clp); void nfs_close_context(struct nfs_open_context *ctx, int is_sync); extern struct nfs_client *nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, - const char *ip_addr, rpc_authflavor_t authflavour, - int noresvport); + const char *ip_addr, rpc_authflavor_t authflavour); /* dir.c */ extern int nfs_access_cache_shrinker(struct shrinker *shrink, @@ -376,8 +375,7 @@ extern void __nfs4_read_done_cb(struct nfs_read_data *); extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, const char *ip_addr, - rpc_authflavor_t authflavour, - int noresvport); + rpc_authflavor_t authflavour); extern int _nfs4_call_sync(struct rpc_clnt *clnt, struct nfs_server *server, struct rpc_message *msg, diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index fbec57d..3a99f52 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -35,6 +35,9 @@ struct nfs_client { #define NFS_CS_RENEWD 3 /* - renewd started */ #define NFS_CS_STOP_RENEW 4 /* no more state to renew */ #define NFS_CS_CHECK_LEASE_TIME 5 /* need to check lease time */ + unsigned long cl_flags; /* behavior switches */ +#define NFS_CS_NORESVPORT 0 /* - use ephemeral src port */ +#define NFS_CS_DISCRTRY 1 /* - disconnect on RPC retry */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0c521cd..07048c0 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1399,7 +1399,7 @@ struct nfs_rpc_ops { struct iattr *iattr); struct nfs_client * (*init_client) (struct nfs_client *, const struct rpc_timeout *, - const char *, rpc_authflavor_t, int); + const char *, rpc_authflavor_t); }; /* -- cgit v0.10.2 From acdeb69d9c5934a678a732b4e24770326bf9471e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 21 May 2012 22:46:16 -0400 Subject: NFS: EXCHANGE_ID should save the server major and minor ID Save the server major and minor ID results from EXCHANGE_ID, as they are needed for detecting server trunking. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 34b2e68..3c14468 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -237,6 +237,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp) nfs_idmap_delete(clp); rpc_destroy_wait_queue(&clp->cl_rpcwaitq); + kfree(clp->cl_serverowner); kfree(clp->cl_serverscope); kfree(clp->cl_implid); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9e9334a..0d46fe4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5109,11 +5109,18 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) clp->cl_rpcclient->cl_nodename, clp->cl_rpcclient->cl_auth->au_flavor); + res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), + GFP_KERNEL); + if (unlikely(res.server_owner == NULL)) { + status = -ENOMEM; + goto out; + } + res.server_scope = kzalloc(sizeof(struct nfs41_server_scope), GFP_KERNEL); if (unlikely(res.server_scope == NULL)) { status = -ENOMEM; - goto out; + goto out_server_owner; } res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_KERNEL); @@ -5127,6 +5134,12 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); if (status == 0) { + kfree(clp->cl_serverowner); + clp->cl_serverowner = res.server_owner; + res.server_owner = NULL; + } + + if (status == 0) { /* use the most recent implementation id */ kfree(clp->cl_implid); clp->cl_implid = res.impl_id; @@ -5150,6 +5163,8 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) } } +out_server_owner: + kfree(res.server_owner); out_server_scope: kfree(res.server_scope); out: diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 12b9982..5ad2b2c 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5144,24 +5144,27 @@ static int decode_exchange_id(struct xdr_stream *xdr, if (dummy != SP4_NONE) return -EIO; - /* Throw away minor_id */ + /* server_owner4.so_minor_id */ p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) goto out_overflow; + p = xdr_decode_hyper(p, &res->server_owner->minor_id); - /* Throw away Major id */ + /* server_owner4.so_major_id */ status = decode_opaque_inline(xdr, &dummy, &dummy_str); if (unlikely(status)) return status; + if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) + return -EIO; + memcpy(res->server_owner->major_id, dummy_str, dummy); + res->server_owner->major_id_sz = dummy; - /* Save server_scope */ + /* server_scope4 */ status = decode_opaque_inline(xdr, &dummy, &dummy_str); if (unlikely(status)) return status; - if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) return -EIO; - memcpy(res->server_scope->server_scope, dummy_str, dummy); res->server_scope->server_scope_sz = dummy; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 3a99f52..fbb78fb 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -80,6 +80,7 @@ struct nfs_client { /* The flags used for obtaining the clientid during EXCHANGE_ID */ u32 cl_exchange_flags; struct nfs4_session *cl_session; /* shared session */ + struct nfs41_server_owner *cl_serverowner; struct nfs41_server_scope *cl_serverscope; struct nfs41_impl_id *cl_implid; #endif /* CONFIG_NFS_V4 */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 07048c0..0872f32 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1098,7 +1098,7 @@ struct nfs41_exchange_id_args { u32 flags; }; -struct server_owner { +struct nfs41_server_owner { uint64_t minor_id; uint32_t major_id_sz; char major_id[NFS4_OPAQUE_LIMIT]; @@ -1118,6 +1118,7 @@ struct nfs41_impl_id { struct nfs41_exchange_id_res { struct nfs_client *client; u32 flags; + struct nfs41_server_owner *server_owner; struct nfs41_server_scope *server_scope; struct nfs41_impl_id *impl_id; }; -- cgit v0.10.2 From 7b38c3682c5cab4f98751d5fe57b78a59020653d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 23 May 2012 13:23:31 -0400 Subject: NFSv4.1: Fix session initialisation races Session initialisation is not complete until the lease manager has run. We need to ensure that both nfs4_init_session and nfs4_init_ds_session do so, and that they check for any resulting errors in clp->cl_cons_state. Only after this is done, can nfs4_ds_connect check the contents of clp->cl_exchange_flags. Signed-off-by: Trond Myklebust Cc: Andy Adamson diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 3c14468..25dde07 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -592,22 +592,6 @@ void nfs_mark_client_ready(struct nfs_client *clp, int state) } /* - * With sessions, the client is not marked ready until after a - * successful EXCHANGE_ID and CREATE_SESSION. - * - * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate - * other versions of NFS can be tried. - */ -int nfs4_check_client_ready(struct nfs_client *clp) -{ - if (!nfs4_has_session(clp)) - return 0; - if (clp->cl_cons_state < NFS_CS_READY) - return -EPROTONOSUPPORT; - return 0; -} - -/* * Initialise the timeout values for a connection */ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 547f24f..5ea571e 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -169,7 +169,6 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fattr *, rpc_authflavor_t); extern void nfs_mark_client_ready(struct nfs_client *clp, int state); -extern int nfs4_check_client_ready(struct nfs_client *clp); extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, const struct sockaddr *ds_addr, int ds_addrlen, int ds_proto, @@ -234,7 +233,7 @@ extern const u32 nfs41_maxwrite_overhead; extern struct rpc_procinfo nfs4_procedures[]; #endif -extern int nfs4_init_ds_session(struct nfs_client *clp); +extern int nfs4_init_ds_session(struct nfs_client *, unsigned long); /* proc.c */ void nfs_close_context(struct nfs_open_context *ctx, int is_sync); diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index c610f84..a1fab8d 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -203,28 +203,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) goto out; } - if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) { - if (!is_ds_client(clp)) { - status = -ENODEV; - goto out_put; - } - ds->ds_clp = clp; - dprintk("%s [existing] server=%s\n", __func__, - ds->ds_remotestr); - goto out; - } - - /* - * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to - * be equal to the MDS lease. Renewal is scheduled in create_session. - */ - spin_lock(&mds_srv->nfs_client->cl_lock); - clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time; - spin_unlock(&mds_srv->nfs_client->cl_lock); - clp->cl_last_renewal = jiffies; - - /* New nfs_client */ - status = nfs4_init_ds_session(clp); + status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time); if (status) goto out_put; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0d46fe4..c856298 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5603,53 +5603,78 @@ int nfs4_proc_destroy_session(struct nfs4_session *session) return status; } +/* + * With sessions, the client is not marked ready until after a + * successful EXCHANGE_ID and CREATE_SESSION. + * + * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate + * other versions of NFS can be tried. + */ +static int nfs41_check_session_ready(struct nfs_client *clp) +{ + int ret; + + if (clp->cl_cons_state == NFS_CS_SESSION_INITING) { + ret = nfs4_client_recover_expired_lease(clp); + if (ret) + return ret; + } + if (clp->cl_cons_state < NFS_CS_READY) + return -EPROTONOSUPPORT; + return 0; +} + int nfs4_init_session(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; struct nfs4_session *session; unsigned int rsize, wsize; - int ret; if (!nfs4_has_session(clp)) return 0; session = clp->cl_session; - if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) - return 0; + spin_lock(&clp->cl_lock); + if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) { - rsize = server->rsize; - if (rsize == 0) - rsize = NFS_MAX_FILE_IO_SIZE; - wsize = server->wsize; - if (wsize == 0) - wsize = NFS_MAX_FILE_IO_SIZE; + rsize = server->rsize; + if (rsize == 0) + rsize = NFS_MAX_FILE_IO_SIZE; + wsize = server->wsize; + if (wsize == 0) + wsize = NFS_MAX_FILE_IO_SIZE; - session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead; - session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead; + session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead; + session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead; + } + spin_unlock(&clp->cl_lock); - ret = nfs4_recover_expired_lease(server); - if (!ret) - ret = nfs4_check_client_ready(clp); - return ret; + return nfs41_check_session_ready(clp); } -int nfs4_init_ds_session(struct nfs_client *clp) +int nfs4_init_ds_session(struct nfs_client *clp, unsigned long lease_time) { struct nfs4_session *session = clp->cl_session; int ret; - if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) - return 0; - - ret = nfs4_client_recover_expired_lease(clp); - if (!ret) - /* Test for the DS role */ - if (!is_ds_client(clp)) - ret = -ENODEV; - if (!ret) - ret = nfs4_check_client_ready(clp); - return ret; + spin_lock(&clp->cl_lock); + if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) { + /* + * Do not set NFS_CS_CHECK_LEASE_TIME instead set the + * DS lease to be equal to the MDS lease. + */ + clp->cl_lease_time = lease_time; + clp->cl_last_renewal = jiffies; + } + spin_unlock(&clp->cl_lock); + ret = nfs41_check_session_ready(clp); + if (ret) + return ret; + /* Test for the DS role */ + if (!is_ds_client(clp)) + return -ENODEV; + return 0; } EXPORT_SYMBOL_GPL(nfs4_init_ds_session); -- cgit v0.10.2 From 4697bd5e9419348ef9fa9b55cefe4355ad9d3d01 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 23 May 2012 13:24:36 -0400 Subject: NFSv4: Fix a race in the net namespace mount notification Since the struct nfs_client gets added to the global nfs_client_list before it is initialised, it is possible that rpc_pipefs_event can end up trying to create idmapper entries on such a thing. The solution is to have the mount notification wait for the initialisation of each nfs_client to complete, and then to skip any entries for which the it failed. Reported-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust Acked-by: Stanislav Kinsbursky diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 25dde07..d356642 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -507,6 +507,17 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat return NULL; } +static bool nfs_client_init_is_complete(const struct nfs_client *clp) +{ + return clp->cl_cons_state != NFS_CS_INITING; +} + +int nfs_wait_client_init_complete(const struct nfs_client *clp) +{ + return wait_event_killable(nfs_client_active_wq, + nfs_client_init_is_complete(clp)); +} + /* * Found an existing client. Make sure it's ready before returning. */ @@ -516,8 +527,7 @@ nfs_found_client(const struct nfs_client_initdata *cl_init, { int error; - error = wait_event_killable(nfs_client_active_wq, - clp->cl_cons_state < NFS_CS_INITING); + error = nfs_wait_client_init_complete(clp); if (error < 0) { nfs_put_client(clp); return ERR_PTR(-ERESTARTSYS); @@ -1333,7 +1343,7 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp) * so that the client back channel can find the * nfs_client struct */ - clp->cl_cons_state = NFS_CS_SESSION_INITING; + nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING); } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 2eaecf9..861be75 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -530,9 +530,24 @@ static struct nfs_client *nfs_get_client_for_event(struct net *net, int event) struct nfs_net *nn = net_generic(net, nfs_net_id); struct dentry *cl_dentry; struct nfs_client *clp; + int err; +restart: spin_lock(&nn->nfs_client_lock); list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { + /* Wait for initialisation to finish */ + if (clp->cl_cons_state == NFS_CS_INITING) { + atomic_inc(&clp->cl_count); + spin_unlock(&nn->nfs_client_lock); + err = nfs_wait_client_init_complete(clp); + nfs_put_client(clp); + if (err) + return NULL; + goto restart; + } + /* Skip nfs_clients that failed to initialise */ + if (clp->cl_cons_state < 0) + continue; if (clp->rpc_ops != &nfs_v4_clientops) continue; cl_dentry = clp->cl_idmap->idmap_pipe->dentry; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5ea571e..1848a72 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -168,6 +168,7 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); +extern int nfs_wait_client_init_complete(const struct nfs_client *clp); extern void nfs_mark_client_ready(struct nfs_client *clp, int state); extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, const struct sockaddr *ds_addr, -- cgit v0.10.2 From 54ac471c83aff6b1e068eb8029c797dc68a76e89 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 23 May 2012 13:26:10 -0400 Subject: NFS: Add memory barriers to the nfs_client->cl_cons_state initialisation Ensure that a process that uses the nfs_client->cl_cons_state test for whether the initialisation process is finished does not read stale data. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/client.c b/fs/nfs/client.c index d356642..a50bdfb 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -459,6 +459,8 @@ static bool nfs4_cb_match_client(const struct sockaddr *addr, clp->cl_cons_state == NFS_CS_SESSION_INITING)) return false; + smp_rmb(); + /* Match the version and minorversion */ if (clp->rpc_ops->version != 4 || clp->cl_minorversion != minorversion) @@ -539,6 +541,8 @@ nfs_found_client(const struct nfs_client_initdata *cl_init, return ERR_PTR(error); } + smp_rmb(); + BUG_ON(clp->cl_cons_state != NFS_CS_READY); dprintk("<-- %s found nfs_client %p for %s\n", @@ -597,6 +601,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, */ void nfs_mark_client_ready(struct nfs_client *clp, int state) { + smp_wmb(); clp->cl_cons_state = state; wake_up_all(&nfs_client_active_wq); } diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 861be75..b5b86a0 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -548,6 +548,7 @@ restart: /* Skip nfs_clients that failed to initialise */ if (clp->cl_cons_state < 0) continue; + smp_rmb(); if (clp->rpc_ops != &nfs_v4_clientops) continue; cl_dentry = clp->cl_idmap->idmap_pipe->dentry; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c856298..8f39bb3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5621,6 +5621,7 @@ static int nfs41_check_session_ready(struct nfs_client *clp) } if (clp->cl_cons_state < NFS_CS_READY) return -EPROTONOSUPPORT; + smp_rmb(); return 0; } -- cgit v0.10.2 From 88034c3d88c2c48b215f2cc5eb22e564aa817f9c Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 23 May 2012 05:02:34 -0400 Subject: NFSv4.1 mdsthreshold attribute xdr We only support one layout type per file system, so one threshold_item4 per mdsthreshold4. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 5ad2b2c..edb8ac7 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -101,9 +101,12 @@ static int nfs4_stat_to_errno(int); #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) #define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) #define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) +/* We support only one layout type per file system */ +#define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8) /* This is based on getfattr, which uses the most attributes: */ #define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ - 3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz)) + 3 + 3 + 3 + nfs4_owner_maxsz + \ + nfs4_group_maxsz + decode_mdsthreshold_maxsz)) #define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ nfs4_fattr_value_maxsz) #define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) @@ -1172,6 +1175,16 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c bitmask[1] & nfs4_fattr_bitmap[1], hdr); } +static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask, + struct compound_hdr *hdr) +{ + encode_getattr_three(xdr, + bitmask[0] & nfs4_fattr_bitmap[0], + bitmask[1] & nfs4_fattr_bitmap[1], + bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD, + hdr); +} + static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) { encode_getattr_three(xdr, @@ -2164,7 +2177,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); encode_open(xdr, args, &hdr); encode_getfh(xdr, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); + encode_getfattr_open(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -4186,6 +4199,110 @@ xdr_error: return status; } +static int decode_threshold_hint(struct xdr_stream *xdr, + uint32_t *bitmap, + uint64_t *res, + uint32_t hint_bit) +{ + __be32 *p; + + *res = 0; + if (likely(bitmap[0] & hint_bit)) { + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + xdr_decode_hyper(p, res); + } + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_first_threshold_item4(struct xdr_stream *xdr, + struct nfs4_threshold *res) +{ + __be32 *p, *savep; + uint32_t bitmap[3] = {0,}, attrlen; + int status; + + /* layout type */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) { + print_overflow_msg(__func__, xdr); + return -EIO; + } + res->l_type = be32_to_cpup(p); + + /* thi_hintset bitmap */ + status = decode_attr_bitmap(xdr, bitmap); + if (status < 0) + goto xdr_error; + + /* thi_hintlist length */ + status = decode_attr_length(xdr, &attrlen, &savep); + if (status < 0) + goto xdr_error; + /* thi_hintlist */ + status = decode_threshold_hint(xdr, bitmap, &res->rd_sz, THRESHOLD_RD); + if (status < 0) + goto xdr_error; + status = decode_threshold_hint(xdr, bitmap, &res->wr_sz, THRESHOLD_WR); + if (status < 0) + goto xdr_error; + status = decode_threshold_hint(xdr, bitmap, &res->rd_io_sz, + THRESHOLD_RD_IO); + if (status < 0) + goto xdr_error; + status = decode_threshold_hint(xdr, bitmap, &res->wr_io_sz, + THRESHOLD_WR_IO); + if (status < 0) + goto xdr_error; + + status = verify_attr_len(xdr, savep, attrlen); + res->bm = bitmap[0]; + + dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n", + __func__, res->bm, res->rd_sz, res->wr_sz, res->rd_io_sz, + res->wr_io_sz); +xdr_error: + dprintk("%s ret=%d!\n", __func__, status); + return status; +} + +/* + * Thresholds on pNFS direct I/O vrs MDS I/O + */ +static int decode_attr_mdsthreshold(struct xdr_stream *xdr, + uint32_t *bitmap, + struct nfs4_threshold *res) +{ + __be32 *p; + int status = 0; + uint32_t num; + + if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U))) + return -EIO; + if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + num = be32_to_cpup(p); + if (num == 0) + return 0; + if (num > 1) + printk(KERN_INFO "%s: Warning: Multiple pNFS layout " + "drivers per filesystem not supported\n", + __func__); + + status = decode_first_threshold_item4(xdr, res); + } + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fattr *fattr, struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc, @@ -4292,6 +4409,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, goto xdr_error; fattr->valid |= status; + status = decode_attr_mdsthreshold(xdr, bitmap, fattr->mdsthreshold); + if (status < 0) + goto xdr_error; + xdr_error: dprintk("%s: xdr returned %d\n", __func__, -status); return status; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 0987146..72b6bad 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -526,6 +526,13 @@ enum lock_type4 { #define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23) #define FATTR4_WORD1_FS_LAYOUT_TYPES (1UL << 30) #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) +#define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) + +/* MDS threshold bitmap bits */ +#define THRESHOLD_RD (1UL << 0) +#define THRESHOLD_WR (1UL << 1) +#define THRESHOLD_RD_IO (1UL << 2) +#define THRESHOLD_WR_IO (1UL << 3) #define NFSPROC4_NULL 0 #define NFSPROC4_COMPOUND 1 diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0872f32..201c312 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -35,6 +35,15 @@ static inline int nfs_fsid_equal(const struct nfs_fsid *a, const struct nfs_fsid return a->major == b->major && a->minor == b->minor; } +struct nfs4_threshold { + __u32 bm; + __u32 l_type; + __u64 rd_sz; + __u64 wr_sz; + __u64 rd_io_sz; + __u64 wr_io_sz; +}; + struct nfs_fattr { unsigned int valid; /* which fields are valid */ umode_t mode; @@ -67,6 +76,7 @@ struct nfs_fattr { unsigned long gencount; struct nfs4_string *owner_name; struct nfs4_string *group_name; + struct nfs4_threshold *mdsthreshold; /* pNFS threshold hints */ }; #define NFS_ATTR_FATTR_TYPE (1U << 0) -- cgit v0.10.2 From 82be417aa37c05116e310b0f2171187ea389f89b Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 23 May 2012 05:02:35 -0400 Subject: NFSv4.1 cache mdsthreshold values on OPEN Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9ad81ce..889f7e5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -641,6 +641,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f nfs_init_lock_context(&ctx->lock_context); ctx->lock_context.open_context = ctx; INIT_LIST_HEAD(&ctx->list); + ctx->mdsthreshold = NULL; return ctx; } @@ -669,6 +670,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) put_rpccred(ctx->cred); dput(ctx->dentry); nfs_sb_deactive(sb); + kfree(ctx->mdsthreshold); kfree(ctx); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8f39bb3..e725736 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1782,7 +1782,14 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct /* * Returns a referenced nfs4_state */ -static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) +static int _nfs4_do_open(struct inode *dir, + struct dentry *dentry, + fmode_t fmode, + int flags, + struct iattr *sattr, + struct rpc_cred *cred, + struct nfs4_state **res, + struct nfs4_threshold **ctx_th) { struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; @@ -1807,6 +1814,11 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode if (opendata == NULL) goto err_put_state_owner; + if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { + opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); + if (!opendata->f_attr.mdsthreshold) + goto err_opendata_put; + } if (dentry->d_inode != NULL) opendata->state = nfs4_get_open_state(dentry->d_inode, sp); @@ -1832,11 +1844,19 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode nfs_setattr_update_inode(state->inode, sattr); nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr); } + + if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) + *ctx_th = opendata->f_attr.mdsthreshold; + else + kfree(opendata->f_attr.mdsthreshold); + opendata->f_attr.mdsthreshold = NULL; + nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); *res = state; return 0; err_opendata_put: + kfree(opendata->f_attr.mdsthreshold); nfs4_opendata_put(opendata); err_put_state_owner: nfs4_put_state_owner(sp); @@ -1846,14 +1866,21 @@ out_err: } -static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred) +static struct nfs4_state *nfs4_do_open(struct inode *dir, + struct dentry *dentry, + fmode_t fmode, + int flags, + struct iattr *sattr, + struct rpc_cred *cred, + struct nfs4_threshold **ctx_th) { struct nfs4_exception exception = { }; struct nfs4_state *res; int status; do { - status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res); + status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, + &res, ctx_th); if (status == 0) break; /* NOTE: BAD_SEQID means the server and client disagree about the @@ -2177,7 +2204,8 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags struct nfs4_state *state; /* Protect against concurrent sillydeletes */ - state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, ctx->cred); + state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, + ctx->cred, &ctx->mdsthreshold); if (IS_ERR(state)) return ERR_CAST(state); ctx->state = state; @@ -2779,7 +2807,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, fmode = ctx->mode; } sattr->ia_mode &= ~current_umask(); - state = nfs4_do_open(dir, de, fmode, flags, sattr, cred); + state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL); d_drop(dentry); if (IS_ERR(state)) { status = PTR_ERR(state); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 5d09a36..cbcb6ae 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1630,3 +1630,15 @@ out_free: kfree(data); goto out; } + +struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) +{ + struct nfs4_threshold *thp; + + thp = kzalloc(sizeof(*thp), GFP_NOFS); + if (!thp) { + dprintk("%s mdsthreshold allocation failed\n", __func__); + return NULL; + } + return thp; +} diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 7980756..29fd23c 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -227,6 +227,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head, const struct nfs_pgio_completion_ops *compl_ops); int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head, const struct nfs_pgio_completion_ops *compl_ops); +struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); /* nfs4_deviceid_flags */ enum { @@ -360,6 +361,14 @@ static inline int pnfs_return_layout(struct inode *ino) return 0; } +static inline bool +pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src, + struct nfs_server *nfss) +{ + return (dst && src && src->bm != 0 && + nfss->pnfs_curr_ld->id == src->l_type); +} + #ifdef NFS_DEBUG void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); #else @@ -485,6 +494,18 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) return 0; } +static inline bool +pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src, + struct nfs_server *nfss) +{ + return false; +} + +static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) +{ + return NULL; +} + #endif /* CONFIG_NFS_V4_1 */ #endif /* FS_NFS_PNFS_H */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 80a9385..ce910cb 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -102,6 +102,7 @@ struct nfs_open_context { int error; struct list_head list; + struct nfs4_threshold *mdsthreshold; }; struct nfs_open_dir_context { -- cgit v0.10.2 From 2701d086dbfca03b2d28b25c6dc11dd78d0e26ad Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 24 May 2012 13:13:24 -0400 Subject: NFSv4.1 add nfs_inode book keeping for mdsthreshold Keep track of the number of bytes read or written via buffered, direct, and mem-mapped i/o for use by mdsthreshold size_io hints. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index c47a46e..23d170b 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -447,6 +447,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos); if (!result) result = nfs_direct_wait(dreq); + NFS_I(inode)->read_io += result; out_release: nfs_direct_req_release(dreq); out: @@ -785,6 +786,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, pos += vec->iov_len; } nfs_pageio_complete(&desc); + NFS_I(dreq->inode)->write_io += desc.pg_bytes_written; /* * If no bytes were started, return the error, and let the diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 8eda8a6..56311ca 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -424,6 +424,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, if (status < 0) return status; + NFS_I(mapping->host)->write_io += copied; return copied; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 889f7e5..a6f5fbb 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -323,6 +323,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_gid = -2; inode->i_blocks = 0; memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + nfsi->write_io = 0; + nfsi->read_io = 0; nfsi->read_cache_jiffies = fattr->time_start; nfsi->attr_gencount = fattr->gencount; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index cbcb6ae..6620606 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -395,6 +395,9 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, dprintk("%s:Begin lo %p\n", __func__, lo); if (list_empty(&lo->plh_segs)) { + /* Reset MDS Threshold I/O counters */ + NFS_I(lo->plh_inode)->write_io = 0; + NFS_I(lo->plh_inode)->read_io = 0; if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) put_layout_hdr_locked(lo); return 0; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 2cfdd77..86ced78 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -152,6 +152,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops); nfs_pageio_add_request(&pgio, new); nfs_pageio_complete(&pgio); + NFS_I(inode)->read_io += pgio.pg_bytes_written; return 0; } @@ -656,6 +657,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); nfs_pageio_complete(&pgio); + NFS_I(inode)->read_io += pgio.pg_bytes_written; npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; nfs_add_stats(inode, NFSIOS_READPAGES, npages); read_complete: diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index ce910cb..b23cfc1 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -202,6 +202,9 @@ struct nfs_inode { /* pNFS layout information */ struct pnfs_layout_hdr *layout; #endif /* CONFIG_NFS_V4*/ + /* how many bytes have been written/read and how many bytes queued up */ + __u64 write_io; + __u64 read_io; #ifdef CONFIG_NFS_FSCACHE struct fscache_cookie *fscache; #endif -- cgit v0.10.2 From d23d61c8d351f5ced44ce87caf1fa3baab4c3f89 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 23 May 2012 05:02:37 -0400 Subject: NFSv4.1 test the mdsthreshold hint parameters Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6620606..b8323aa 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -936,6 +936,81 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, } /* + * Use mdsthreshold hints set at each OPEN to determine if I/O should go + * to the MDS or over pNFS + * + * The nfs_inode read_io and write_io fields are cumulative counters reset + * when there are no layout segments. Note that in pnfs_update_layout iomode + * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a + * WRITE request. + * + * A return of true means use MDS I/O. + * + * From rfc 5661: + * If a file's size is smaller than the file size threshold, data accesses + * SHOULD be sent to the metadata server. If an I/O request has a length that + * is below the I/O size threshold, the I/O SHOULD be sent to the metadata + * server. If both file size and I/O size are provided, the client SHOULD + * reach or exceed both thresholds before sending its read or write + * requests to the data server. + */ +static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, + struct inode *ino, int iomode) +{ + struct nfs4_threshold *t = ctx->mdsthreshold; + struct nfs_inode *nfsi = NFS_I(ino); + loff_t fsize = i_size_read(ino); + bool size = false, size_set = false, io = false, io_set = false, ret = false; + + if (t == NULL) + return ret; + + dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n", + __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz); + + switch (iomode) { + case IOMODE_READ: + if (t->bm & THRESHOLD_RD) { + dprintk("%s fsize %llu\n", __func__, fsize); + size_set = true; + if (fsize < t->rd_sz) + size = true; + } + if (t->bm & THRESHOLD_RD_IO) { + dprintk("%s nfsi->read_io %llu\n", __func__, + nfsi->read_io); + io_set = true; + if (nfsi->read_io < t->rd_io_sz) + io = true; + } + break; + case IOMODE_RW: + if (t->bm & THRESHOLD_WR) { + dprintk("%s fsize %llu\n", __func__, fsize); + size_set = true; + if (fsize < t->wr_sz) + size = true; + } + if (t->bm & THRESHOLD_WR_IO) { + dprintk("%s nfsi->write_io %llu\n", __func__, + nfsi->write_io); + io_set = true; + if (nfsi->write_io < t->wr_io_sz) + io = true; + } + break; + } + if (size_set && io_set) { + if (size && io) + ret = true; + } else if (size || io) + ret = true; + + dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret); + return ret; +} + +/* * Layout segment is retreived from the server if not cached. * The appropriate layout segment is referenced and returned to the caller. */ @@ -962,6 +1037,10 @@ pnfs_update_layout(struct inode *ino, if (!pnfs_enabled_sb(NFS_SERVER(ino))) return NULL; + + if (pnfs_within_mdsthreshold(ctx, ino, iomode)) + return NULL; + spin_lock(&ino->i_lock); lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); if (lo == NULL) { -- cgit v0.10.2 From 7c44f1ae4a21458a1ea3d6482ffb3136f1df6d2b Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 24 May 2012 13:22:50 -0400 Subject: nfs4.1: add BIND_CONN_TO_SESSION operation This patch adds the BIND_CONN_TO_SESSION operation which is needed for upcoming SP4_MACH_CRED work and useful for recovering from broken connections without destroying the session. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index e6da021..2c7f1cf 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -211,6 +211,7 @@ struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *, extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); +extern int nfs4_proc_bind_conn_to_session(struct nfs_client *); extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e725736..e8988c0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5100,6 +5100,60 @@ nfs41_same_server_scope(struct nfs41_server_scope *a, } /* + * nfs4_proc_bind_conn_to_session() + * + * The 4.1 client currently uses the same TCP connection for the + * fore and backchannel. + */ +int nfs4_proc_bind_conn_to_session(struct nfs_client *clp) +{ + int status; + struct nfs41_bind_conn_to_session_res res; + struct rpc_message msg = { + .rpc_proc = + &nfs4_procedures[NFSPROC4_CLNT_BIND_CONN_TO_SESSION], + .rpc_argp = clp, + .rpc_resp = &res, + }; + + dprintk("--> %s\n", __func__); + BUG_ON(clp == NULL); + + res.session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS); + if (unlikely(res.session == NULL)) { + status = -ENOMEM; + goto out; + } + + status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + if (status == 0) { + if (memcmp(res.session->sess_id.data, + clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) { + dprintk("NFS: %s: Session ID mismatch\n", __func__); + status = -EIO; + goto out_session; + } + if (res.dir != NFS4_CDFS4_BOTH) { + dprintk("NFS: %s: Unexpected direction from server\n", + __func__); + status = -EIO; + goto out_session; + } + if (res.use_conn_in_rdma_mode) { + dprintk("NFS: %s: Server returned RDMA mode = true\n", + __func__); + status = -EIO; + goto out_session; + } + } +out_session: + kfree(res.session); +out: + dprintk("<-- %s status= %d\n", __func__, status); + return status; +} + +/* * nfs4_proc_exchange_id() * * Since the clientid has expired, all compounds using sessions diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index edb8ac7..a6b95b7 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -326,6 +326,16 @@ static int nfs4_stat_to_errno(int); 1 /* csr_flags */ + \ decode_channel_attrs_maxsz + \ decode_channel_attrs_maxsz) +#define encode_bind_conn_to_session_maxsz (op_encode_hdr_maxsz + \ + /* bctsa_sessid */ \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \ + 1 /* bctsa_dir */ + \ + 1 /* bctsa_use_conn_in_rdma_mode */) +#define decode_bind_conn_to_session_maxsz (op_decode_hdr_maxsz + \ + /* bctsr_sessid */ \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \ + 1 /* bctsr_dir */ + \ + 1 /* bctsr_use_conn_in_rdma_mode */) #define encode_destroy_session_maxsz (op_encode_hdr_maxsz + 4) #define decode_destroy_session_maxsz (op_decode_hdr_maxsz) #define encode_sequence_maxsz (op_encode_hdr_maxsz + \ @@ -719,6 +729,12 @@ static int nfs4_stat_to_errno(int); decode_putfh_maxsz + \ decode_secinfo_maxsz) #if defined(CONFIG_NFS_V4_1) +#define NFS4_enc_bind_conn_to_session_sz \ + (compound_encode_hdr_maxsz + \ + encode_bind_conn_to_session_maxsz) +#define NFS4_dec_bind_conn_to_session_sz \ + (compound_decode_hdr_maxsz + \ + decode_bind_conn_to_session_maxsz) #define NFS4_enc_exchange_id_sz \ (compound_encode_hdr_maxsz + \ encode_exchange_id_maxsz) @@ -1669,6 +1685,20 @@ static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, stru #if defined(CONFIG_NFS_V4_1) /* NFSv4.1 operations */ +static void encode_bind_conn_to_session(struct xdr_stream *xdr, + struct nfs4_session *session, + struct compound_hdr *hdr) +{ + __be32 *p; + + encode_op_hdr(xdr, OP_BIND_CONN_TO_SESSION, + decode_bind_conn_to_session_maxsz, hdr); + encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); + p = xdr_reserve_space(xdr, 8); + *p++ = cpu_to_be32(NFS4_CDFC4_BACK_OR_BOTH); + *p = 0; /* use_conn_in_rdma_mode = False */ +} + static void encode_exchange_id(struct xdr_stream *xdr, struct nfs41_exchange_id_args *args, struct compound_hdr *hdr) @@ -2630,6 +2660,22 @@ static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req, #if defined(CONFIG_NFS_V4_1) /* + * BIND_CONN_TO_SESSION request + */ +static void nfs4_xdr_enc_bind_conn_to_session(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs_client *clp) +{ + struct compound_hdr hdr = { + .minorversion = clp->cl_mvops->minor_version, + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_bind_conn_to_session(xdr, clp->cl_session, &hdr); + encode_nops(&hdr); +} + +/* * EXCHANGE_ID request */ static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, @@ -5366,6 +5412,37 @@ static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid) return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN); } +static int decode_bind_conn_to_session(struct xdr_stream *xdr, + struct nfs41_bind_conn_to_session_res *res) +{ + __be32 *p; + int status; + + status = decode_op_hdr(xdr, OP_BIND_CONN_TO_SESSION); + if (!status) + status = decode_sessionid(xdr, &res->session->sess_id); + if (unlikely(status)) + return status; + + /* dir flags, rdma mode bool */ + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + + res->dir = be32_to_cpup(p++); + if (res->dir == 0 || res->dir > NFS4_CDFS4_BOTH) + return -EIO; + if (be32_to_cpup(p) == 0) + res->use_conn_in_rdma_mode = false; + else + res->use_conn_in_rdma_mode = true; + + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + static int decode_create_session(struct xdr_stream *xdr, struct nfs41_create_session_res *res) { @@ -6648,6 +6725,22 @@ out: #if defined(CONFIG_NFS_V4_1) /* + * Decode BIND_CONN_TO_SESSION response + */ +static int nfs4_xdr_dec_bind_conn_to_session(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (!status) + status = decode_bind_conn_to_session(xdr, res); + return status; +} + +/* * Decode EXCHANGE_ID response */ static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, @@ -7128,6 +7221,8 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), PROC(SECINFO, enc_secinfo, dec_secinfo), #if defined(CONFIG_NFS_V4_1) + PROC(BIND_CONN_TO_SESSION, + enc_bind_conn_to_session, dec_bind_conn_to_session), PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), PROC(CREATE_SESSION, enc_create_session, dec_create_session), PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session), diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 72b6bad..a2b71cb 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -69,6 +69,10 @@ #define NFS4_CDFC4_FORE_OR_BOTH 0x3 #define NFS4_CDFC4_BACK_OR_BOTH 0x7 +#define NFS4_CDFS4_FORE 0x1 +#define NFS4_CDFS4_BACK 0x2 +#define NFS4_CDFS4_BOTH 0x3 + #define NFS4_SET_TO_SERVER_TIME 0 #define NFS4_SET_TO_CLIENT_TIME 1 @@ -589,6 +593,7 @@ enum { NFSPROC4_CLNT_SECINFO, /* nfs41 */ + NFSPROC4_CLNT_BIND_CONN_TO_SESSION, NFSPROC4_CLNT_EXCHANGE_ID, NFSPROC4_CLNT_CREATE_SESSION, NFSPROC4_CLNT_DESTROY_SESSION, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 201c312..6387fc0 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1125,6 +1125,12 @@ struct nfs41_impl_id { struct nfstime4 date; }; +struct nfs41_bind_conn_to_session_res { + struct nfs4_session *session; + u32 dir; + bool use_conn_in_rdma_mode; +}; + struct nfs41_exchange_id_res { struct nfs_client *client; u32 flags; -- cgit v0.10.2 From a9e64442f1399e9f6ceaeeeb03a26a560c949fac Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 24 May 2012 12:26:37 -0400 Subject: nfs41: Use BIND_CONN_TO_SESSION for CB_PATH_DOWN* The state manager can handle SEQ4_STATUS_CB_PATH_DOWN* flags with a BIND_CONN_TO_SESSION instead of destroying the session and creating a new one. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 2c7f1cf..5fcb1ad 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -25,6 +25,7 @@ enum nfs4_client_state { NFS4CLNT_LEASE_CONFIRM, NFS4CLNT_SERVER_SCOPE_MISMATCH, NFS4CLNT_PURGE_STATE, + NFS4CLNT_BIND_CONN_TO_SESSION, }; enum nfs4_session_state { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 32cce4a..03fa802 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1640,13 +1640,20 @@ static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp) nfs_expire_all_delegations(clp); } -static void nfs41_handle_cb_path_down(struct nfs_client *clp) +static void nfs41_handle_backchannel_fault(struct nfs_client *clp) { nfs_expire_all_delegations(clp); if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0) nfs4_schedule_state_manager(clp); } +static void nfs41_handle_cb_path_down(struct nfs_client *clp) +{ + if (test_and_set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, + &clp->cl_state) == 0) + nfs4_schedule_state_manager(clp); +} + void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) { if (!flags) @@ -1664,9 +1671,10 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) nfs41_handle_state_revoked(clp); if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) nfs41_handle_recallable_state_revoked(clp); - if (flags & (SEQ4_STATUS_CB_PATH_DOWN | - SEQ4_STATUS_BACKCHANNEL_FAULT | - SEQ4_STATUS_CB_PATH_DOWN_SESSION)) + if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT) + nfs41_handle_backchannel_fault(clp); + else if (flags & (SEQ4_STATUS_CB_PATH_DOWN | + SEQ4_STATUS_CB_PATH_DOWN_SESSION)) nfs41_handle_cb_path_down(clp); } @@ -1691,6 +1699,7 @@ static int nfs4_reset_session(struct nfs_client *clp) clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); /* create_session negotiated new slot table */ clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); + clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); /* Let the state manager reestablish state */ if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) @@ -1727,10 +1736,19 @@ static int nfs4_recall_slot(struct nfs_client *clp) return 0; } +static int nfs4_bind_conn_to_session(struct nfs_client *clp) +{ + return nfs4_proc_bind_conn_to_session(clp); +} #else /* CONFIG_NFS_V4_1 */ static int nfs4_reset_session(struct nfs_client *clp) { return 0; } static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; } static int nfs4_recall_slot(struct nfs_client *clp) { return 0; } + +static int nfs4_bind_conn_to_session(struct nfs_client *clp) +{ + return 0; +} #endif /* CONFIG_NFS_V4_1 */ /* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors @@ -1814,6 +1832,14 @@ static void nfs4_state_manager(struct nfs_client *clp) goto out_error; } + /* Send BIND_CONN_TO_SESSION */ + if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, + &clp->cl_state) && nfs4_has_session(clp)) { + status = nfs4_bind_conn_to_session(clp); + if (status < 0) + goto out_error; + } + /* First recover reboot state... */ if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { status = nfs4_do_reclaim(clp, -- cgit v0.10.2 From bbafffd293e47f4cd5f0ae8b91d7d5767b242a5e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 24 May 2012 16:31:39 -0400 Subject: NFSv4.1: Exchange ID must use GFP_NOFS allocation mode Exchange ID can be called in a lease reclaim situation, so it will deadlock if it then tries to write out dirty NFS pages. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e8988c0..f8817e8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5192,20 +5192,20 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) clp->cl_rpcclient->cl_auth->au_flavor); res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), - GFP_KERNEL); + GFP_NOFS); if (unlikely(res.server_owner == NULL)) { status = -ENOMEM; goto out; } res.server_scope = kzalloc(sizeof(struct nfs41_server_scope), - GFP_KERNEL); + GFP_NOFS); if (unlikely(res.server_scope == NULL)) { status = -ENOMEM; goto out_server_owner; } - res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_KERNEL); + res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_NOFS); if (unlikely(res.impl_id == NULL)) { status = -ENOMEM; goto out_server_scope; -- cgit v0.10.2 From 2a6ee6aa2f6dfc47fce8380ec9e31601c96a693e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 25 May 2012 15:00:06 -0400 Subject: NFSv4: Clean up the error handling for nfs4_reclaim_lease Try to consolidate the error handling for nfs4_reclaim_lease into a single function instead of doing a bit here, and a bit there... Also ensure that NFS4CLNT_PURGE_STATE handles errors correctly. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 03fa802..758b9a8 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1574,26 +1574,57 @@ out: return nfs4_recovery_handle_error(clp, status); } +/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors + * on EXCHANGE_ID for v4.1 + */ +static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) +{ + switch (status) { + case -NFS4ERR_CLID_INUSE: + case -NFS4ERR_STALE_CLIENTID: + clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); + break; + case -EACCES: + if (clp->cl_machine_cred == NULL) + return -EACCES; + /* Handle case where the user hasn't set up machine creds */ + nfs4_clear_machine_cred(clp); + case -NFS4ERR_DELAY: + case -ETIMEDOUT: + case -EAGAIN: + ssleep(1); + break; + + case -NFS4ERR_MINOR_VERS_MISMATCH: + if (clp->cl_cons_state == NFS_CS_SESSION_INITING) + nfs_mark_client_ready(clp, -EPROTONOSUPPORT); + return -EPROTONOSUPPORT; + case -EKEYEXPIRED: + nfs4_warn_keyexpired(clp->cl_hostname); + case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery + * in nfs4_exchange_id */ + default: + return status; + } + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + return 0; +} + static int nfs4_reclaim_lease(struct nfs_client *clp) { struct rpc_cred *cred; const struct nfs4_state_recovery_ops *ops = clp->cl_mvops->reboot_recovery_ops; - int status = -ENOENT; + int status; cred = ops->get_clid_cred(clp); - if (cred != NULL) { - status = ops->establish_clid(clp, cred); - put_rpccred(cred); - /* Handle case where the user hasn't set up machine creds */ - if (status == -EACCES && cred == clp->cl_machine_cred) { - nfs4_clear_machine_cred(clp); - status = -EAGAIN; - } - if (status == -NFS4ERR_MINOR_VERS_MISMATCH) - status = -EPROTONOSUPPORT; - } - return status; + if (cred == NULL) + return -ENOENT; + status = ops->establish_clid(clp, cred); + put_rpccred(cred); + if (status != 0) + return nfs4_handle_reclaim_lease_error(clp, status); + return 0; } #ifdef CONFIG_NFS_V4_1 @@ -1751,32 +1782,6 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) } #endif /* CONFIG_NFS_V4_1 */ -/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors - * on EXCHANGE_ID for v4.1 - */ -static void nfs4_set_lease_expired(struct nfs_client *clp, int status) -{ - switch (status) { - case -NFS4ERR_CLID_INUSE: - case -NFS4ERR_STALE_CLIENTID: - clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); - break; - case -NFS4ERR_DELAY: - case -ETIMEDOUT: - case -EAGAIN: - ssleep(1); - break; - - case -EKEYEXPIRED: - nfs4_warn_keyexpired(clp->cl_hostname); - case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery - * in nfs4_exchange_id */ - default: - return; - } - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); -} - static void nfs4_state_manager(struct nfs_client *clp) { int status = 0; @@ -1784,7 +1789,9 @@ static void nfs4_state_manager(struct nfs_client *clp) /* Ensure exclusive access to NFSv4 state */ do { if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) { - nfs4_reclaim_lease(clp); + status = nfs4_reclaim_lease(clp); + if (status < 0) + goto out_error; clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state); set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); } @@ -1792,16 +1799,10 @@ static void nfs4_state_manager(struct nfs_client *clp) if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { /* We're going to have to re-establish a clientid */ status = nfs4_reclaim_lease(clp); - if (status) { - nfs4_set_lease_expired(clp, status); - if (test_bit(NFS4CLNT_LEASE_EXPIRED, - &clp->cl_state)) - continue; - if (clp->cl_cons_state == - NFS_CS_SESSION_INITING) - nfs_mark_client_ready(clp, status); + if (status < 0) goto out_error; - } + if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) + continue; clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, -- cgit v0.10.2 From be0bfed002e0c64a91dacc42a4dab6e883e6bc7e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 25 May 2012 16:02:15 -0400 Subject: NFSv4: When purging the lease, we must clear NFS4CLNT_LEASE_CONFIRM Otherwise we can end up not sending a new exchange-id/setclientid Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 758b9a8..604c600 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1647,6 +1647,7 @@ static void nfs4_reset_all_state(struct nfs_client *clp) { if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state); + clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); nfs4_state_start_reclaim_nograce(clp); nfs4_schedule_state_manager(clp); } -- cgit v0.10.2 From 89a217360ef4e96eb83758cb9647f1c42581b097 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 25 May 2012 15:00:06 -0400 Subject: NFSv4.1: Handle NFS4ERR_SEQ_MISORDERED when confirming the lease Apparently the patch "NFS: Always use the same SETCLIENTID boot verifier" is tickling a Linux nfs server bug, and causing a regression: the server can get into a situation where it keeps replying NFS4ERR_SEQ_MISORDERED to our CREATE_SESSION request even when we are sending the correct sequence ID. Fix this by purging the lease and then retrying. Reported-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 604c600..419f8c4 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1580,6 +1580,11 @@ out: static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) { switch (status) { + case -NFS4ERR_SEQ_MISORDERED: + if (test_and_set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) + return -ESERVERFAULT; + /* Lease confirmation error: retry after purging the lease */ + ssleep(1); case -NFS4ERR_CLID_INUSE: case -NFS4ERR_STALE_CLIENTID: clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); -- cgit v0.10.2 From ad24ecfbcddfa88541bccc980e753aeda8bf4031 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 25 May 2012 17:11:42 -0400 Subject: NFSv4.1: Move NFSPROC4_CLNT_BIND_CONN_TO_SESSION to the end of the operations For backward compatibility with nfs-utils. Signed-off-by: Trond Myklebust Cc: Weston Andros Adamson diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index a6b95b7..1d4d259 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7221,8 +7221,6 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), PROC(SECINFO, enc_secinfo, dec_secinfo), #if defined(CONFIG_NFS_V4_1) - PROC(BIND_CONN_TO_SESSION, - enc_bind_conn_to_session, dec_bind_conn_to_session), PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), PROC(CREATE_SESSION, enc_create_session, dec_create_session), PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session), @@ -7237,6 +7235,8 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist), + PROC(BIND_CONN_TO_SESSION, + enc_bind_conn_to_session, dec_bind_conn_to_session), #endif /* CONFIG_NFS_V4_1 */ }; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index a2b71cb..54006a9 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -593,7 +593,6 @@ enum { NFSPROC4_CLNT_SECINFO, /* nfs41 */ - NFSPROC4_CLNT_BIND_CONN_TO_SESSION, NFSPROC4_CLNT_EXCHANGE_ID, NFSPROC4_CLNT_CREATE_SESSION, NFSPROC4_CLNT_DESTROY_SESSION, @@ -608,6 +607,7 @@ enum { NFSPROC4_CLNT_TEST_STATEID, NFSPROC4_CLNT_FREE_STATEID, NFSPROC4_CLNT_GETDEVICELIST, + NFSPROC4_CLNT_BIND_CONN_TO_SESSION, }; /* nfs41 types */ -- cgit v0.10.2 From 848f5bda54ef19435ff78f124082bf6eff2ab620 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 25 May 2012 17:51:23 -0400 Subject: NFSv4.1: Ensure we use the correct credentials for session create/destroy Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 5fcb1ad..a5dbe62 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -241,8 +241,8 @@ extern int nfs41_setup_sequence(struct nfs4_session *session, struct rpc_task *task); extern void nfs4_destroy_session(struct nfs4_session *session); extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); -extern int nfs4_proc_create_session(struct nfs_client *); -extern int nfs4_proc_destroy_session(struct nfs4_session *); +extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *); +extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *); extern int nfs4_init_session(struct nfs_server *server); extern int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f8817e8..8fa3a36 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5480,8 +5480,12 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) void nfs4_destroy_session(struct nfs4_session *session) { struct rpc_xprt *xprt; + struct rpc_cred *cred; - nfs4_proc_destroy_session(session); + cred = nfs4_get_exchange_id_cred(session->clp); + nfs4_proc_destroy_session(session, cred); + if (cred) + put_rpccred(cred); rcu_read_lock(); xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt); @@ -5591,7 +5595,8 @@ static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args, return nfs4_verify_back_channel_attrs(args, session); } -static int _nfs4_proc_create_session(struct nfs_client *clp) +static int _nfs4_proc_create_session(struct nfs_client *clp, + struct rpc_cred *cred) { struct nfs4_session *session = clp->cl_session; struct nfs41_create_session_args args = { @@ -5605,6 +5610,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp) .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE_SESSION], .rpc_argp = &args, .rpc_resp = &res, + .rpc_cred = cred, }; int status; @@ -5629,7 +5635,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp) * It is the responsibility of the caller to verify the session is * expired before calling this routine. */ -int nfs4_proc_create_session(struct nfs_client *clp) +int nfs4_proc_create_session(struct nfs_client *clp, struct rpc_cred *cred) { int status; unsigned *ptr; @@ -5637,7 +5643,7 @@ int nfs4_proc_create_session(struct nfs_client *clp) dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); - status = _nfs4_proc_create_session(clp); + status = _nfs4_proc_create_session(clp, cred); if (status) goto out; @@ -5659,10 +5665,15 @@ out: * Issue the over-the-wire RPC DESTROY_SESSION. * The caller must serialize access to this routine. */ -int nfs4_proc_destroy_session(struct nfs4_session *session) +int nfs4_proc_destroy_session(struct nfs4_session *session, + struct rpc_cred *cred) { + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION], + .rpc_argp = session, + .rpc_cred = cred, + }; int status = 0; - struct rpc_message msg; dprintk("--> nfs4_proc_destroy_session\n"); @@ -5670,10 +5681,6 @@ int nfs4_proc_destroy_session(struct nfs4_session *session) if (session->clp->cl_cons_state != NFS_CS_READY) return status; - msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION]; - msg.rpc_argp = session; - msg.rpc_resp = NULL; - msg.rpc_cred = NULL; status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (status) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 419f8c4..1587840 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -256,7 +256,7 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) goto out; set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); do_confirm: - status = nfs4_proc_create_session(clp); + status = nfs4_proc_create_session(clp, cred); if (status != 0) goto out; clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); @@ -1717,10 +1717,12 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) static int nfs4_reset_session(struct nfs_client *clp) { + struct rpc_cred *cred; int status; nfs4_begin_drain_session(clp); - status = nfs4_proc_destroy_session(clp->cl_session); + cred = nfs4_get_exchange_id_cred(clp); + status = nfs4_proc_destroy_session(clp->cl_session, cred); if (status && status != -NFS4ERR_BADSESSION && status != -NFS4ERR_DEADSESSION) { status = nfs4_recovery_handle_error(clp, status); @@ -1728,7 +1730,7 @@ static int nfs4_reset_session(struct nfs_client *clp) } memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN); - status = nfs4_proc_create_session(clp); + status = nfs4_proc_create_session(clp, cred); if (status) { status = nfs4_recovery_handle_error(clp, status); goto out; @@ -1742,6 +1744,8 @@ static int nfs4_reset_session(struct nfs_client *clp) if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) nfs41_setup_state_renewal(clp); out: + if (cred) + put_rpccred(cred); return status; } -- cgit v0.10.2 From 2cf047c994c8a62f65e520342d0287fca8807a53 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 25 May 2012 17:57:41 -0400 Subject: NFSv4.1: Ensure we use the correct credentials for bind_conn_to_session Signed-off-by: Trond Myklebust Cc: Weston Andros Adamson diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index a5dbe62..f730730 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -212,7 +212,7 @@ struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *, extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); -extern int nfs4_proc_bind_conn_to_session(struct nfs_client *); +extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, struct rpc_cred *cred); extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8fa3a36..3fdff0c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5105,7 +5105,7 @@ nfs41_same_server_scope(struct nfs41_server_scope *a, * The 4.1 client currently uses the same TCP connection for the * fore and backchannel. */ -int nfs4_proc_bind_conn_to_session(struct nfs_client *clp) +int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred) { int status; struct nfs41_bind_conn_to_session_res res; @@ -5114,6 +5114,7 @@ int nfs4_proc_bind_conn_to_session(struct nfs_client *clp) &nfs4_procedures[NFSPROC4_CLNT_BIND_CONN_TO_SESSION], .rpc_argp = clp, .rpc_resp = &res, + .rpc_cred = cred, }; dprintk("--> %s\n", __func__); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1587840..7dbca66 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1779,7 +1779,14 @@ static int nfs4_recall_slot(struct nfs_client *clp) static int nfs4_bind_conn_to_session(struct nfs_client *clp) { - return nfs4_proc_bind_conn_to_session(clp); + struct rpc_cred *cred; + int ret; + + cred = nfs4_get_exchange_id_cred(clp); + ret = nfs4_proc_bind_conn_to_session(clp, cred); + if (cred) + put_rpccred(cred); + return ret; } #else /* CONFIG_NFS_V4_1 */ static int nfs4_reset_session(struct nfs_client *clp) { return 0; } -- cgit v0.10.2 From 662455391040a783b89d0232e743c27c23617dbd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 25 May 2012 17:18:09 -0400 Subject: NFSv4.1: Add DESTROY_CLIENTID Ensure that we destroy our lease on last unmount Signed-off-by: Trond Myklebust diff --git a/fs/nfs/client.c b/fs/nfs/client.c index a50bdfb..7d10875 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -209,6 +209,7 @@ static void nfs4_shutdown_session(struct nfs_client *clp) if (nfs4_has_session(clp)) { nfs4_deviceid_purge_client(clp); nfs4_destroy_session(clp->cl_session); + nfs4_destroy_clientid(clp); } } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index f730730..b20b516 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -214,6 +214,7 @@ extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setcli extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, struct rpc_cred *cred); extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); +extern int nfs4_destroy_clientid(struct nfs_client *clp); extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3fdff0c..485a6c0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5261,6 +5261,65 @@ out: return status; } +static int _nfs4_proc_destroy_clientid(struct nfs_client *clp, + struct rpc_cred *cred) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_CLIENTID], + .rpc_argp = clp, + .rpc_cred = cred, + }; + int status; + + status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + if (status) + pr_warn("NFS: Got error %d from the server %s on " + "DESTROY_CLIENTID.", status, clp->cl_hostname); + return status; +} + +static int nfs4_proc_destroy_clientid(struct nfs_client *clp, + struct rpc_cred *cred) +{ + unsigned int loop; + int ret; + + for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) { + ret = _nfs4_proc_destroy_clientid(clp, cred); + switch (ret) { + case -NFS4ERR_DELAY: + case -NFS4ERR_CLIENTID_BUSY: + ssleep(1); + break; + default: + return ret; + } + } + return 0; +} + +int nfs4_destroy_clientid(struct nfs_client *clp) +{ + struct rpc_cred *cred; + int ret = 0; + + if (clp->cl_mvops->minor_version < 1) + goto out; + if (clp->cl_exchange_flags == 0) + goto out; + cred = nfs4_get_exchange_id_cred(clp); + ret = nfs4_proc_destroy_clientid(clp, cred); + if (cred) + put_rpccred(cred); + switch (ret) { + case 0: + case -NFS4ERR_STALE_CLIENTID: + clp->cl_exchange_flags = 0; + } +out: + return ret; +} + struct nfs4_get_lease_time_data { struct nfs4_get_lease_time_args *args; struct nfs4_get_lease_time_res *res; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1d4d259..b9ce3fd 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -338,6 +338,8 @@ static int nfs4_stat_to_errno(int); 1 /* bctsr_use_conn_in_rdma_mode */) #define encode_destroy_session_maxsz (op_encode_hdr_maxsz + 4) #define decode_destroy_session_maxsz (op_decode_hdr_maxsz) +#define encode_destroy_clientid_maxsz (op_encode_hdr_maxsz + 2) +#define decode_destroy_clientid_maxsz (op_decode_hdr_maxsz) #define encode_sequence_maxsz (op_encode_hdr_maxsz + \ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4) #define decode_sequence_maxsz (op_decode_hdr_maxsz + \ @@ -751,6 +753,10 @@ static int nfs4_stat_to_errno(int); encode_destroy_session_maxsz) #define NFS4_dec_destroy_session_sz (compound_decode_hdr_maxsz + \ decode_destroy_session_maxsz) +#define NFS4_enc_destroy_clientid_sz (compound_encode_hdr_maxsz + \ + encode_destroy_clientid_maxsz) +#define NFS4_dec_destroy_clientid_sz (compound_decode_hdr_maxsz + \ + decode_destroy_clientid_maxsz) #define NFS4_enc_sequence_sz \ (compound_decode_hdr_maxsz + \ encode_sequence_maxsz) @@ -1804,6 +1810,14 @@ static void encode_destroy_session(struct xdr_stream *xdr, encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); } +static void encode_destroy_clientid(struct xdr_stream *xdr, + uint64_t clientid, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_DESTROY_CLIENTID, decode_destroy_clientid_maxsz, hdr); + encode_uint64(xdr, clientid); +} + static void encode_reclaim_complete(struct xdr_stream *xdr, struct nfs41_reclaim_complete_args *args, struct compound_hdr *hdr) @@ -2724,6 +2738,22 @@ static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req, } /* + * a DESTROY_CLIENTID request + */ +static void nfs4_xdr_enc_destroy_clientid(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs_client *clp) +{ + struct compound_hdr hdr = { + .minorversion = clp->cl_mvops->minor_version, + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_destroy_clientid(xdr, clp->cl_clientid, &hdr); + encode_nops(&hdr); +} + +/* * a SEQUENCE request */ static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr, @@ -5479,6 +5509,11 @@ static int decode_destroy_session(struct xdr_stream *xdr, void *dummy) return decode_op_hdr(xdr, OP_DESTROY_SESSION); } +static int decode_destroy_clientid(struct xdr_stream *xdr, void *dummy) +{ + return decode_op_hdr(xdr, OP_DESTROY_CLIENTID); +} + static int decode_reclaim_complete(struct xdr_stream *xdr, void *dummy) { return decode_op_hdr(xdr, OP_RECLAIM_COMPLETE); @@ -6789,6 +6824,22 @@ static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp, } /* + * Decode DESTROY_CLIENTID response + */ +static int nfs4_xdr_dec_destroy_clientid(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (!status) + status = decode_destroy_clientid(xdr, res); + return status; +} + +/* * Decode SEQUENCE response */ static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, @@ -7237,6 +7288,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist), PROC(BIND_CONN_TO_SESSION, enc_bind_conn_to_session, dec_bind_conn_to_session), + PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid), #endif /* CONFIG_NFS_V4_1 */ }; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 54006a9..af2d2fa 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -608,6 +608,7 @@ enum { NFSPROC4_CLNT_FREE_STATEID, NFSPROC4_CLNT_GETDEVICELIST, NFSPROC4_CLNT_BIND_CONN_TO_SESSION, + NFSPROC4_CLNT_DESTROY_CLIENTID, }; /* nfs41 types */ -- cgit v0.10.2 From 32b0131069c5bebf52368a9fe170f8d58b78fa8d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 26 May 2012 13:41:04 -0400 Subject: NFSv4.1: Don't clobber the seqid if exchange_id returns a confirmed clientid If the EXCHGID4_FLAG_CONFIRMED_R flag is set, the client is in theory supposed to already know the correct value of the seqid, in which case RFC5661 states that it should ignore the value returned. Also ensure that if the sanity check in nfs4_check_cl_exchange_flags fails, then we must not change the nfs_client fields. Finally, clean up the code: we don't need to retest the value of 'status' unless it can change. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 485a6c0..9f0a96f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5171,7 +5171,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER, }; struct nfs41_exchange_id_res res = { - .client = clp, + 0 }; int status; struct rpc_message msg = { @@ -5214,22 +5214,22 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (status == 0) - status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); + status = nfs4_check_cl_exchange_flags(res.flags); if (status == 0) { + clp->cl_clientid = res.clientid; + clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R); + if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R)) + clp->cl_seqid = res.seqid; + kfree(clp->cl_serverowner); clp->cl_serverowner = res.server_owner; res.server_owner = NULL; - } - if (status == 0) { /* use the most recent implementation id */ kfree(clp->cl_implid); clp->cl_implid = res.impl_id; - } else - kfree(res.impl_id); - if (status == 0) { if (clp->cl_serverscope != NULL && !nfs41_same_server_scope(clp->cl_serverscope, res.server_scope)) { @@ -5244,7 +5244,8 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) clp->cl_serverscope = res.server_scope; goto out; } - } + } else + kfree(res.impl_id); out_server_owner: kfree(res.server_owner); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index b9ce3fd..ee4a74d 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5319,7 +5319,6 @@ static int decode_exchange_id(struct xdr_stream *xdr, uint32_t dummy; char *dummy_str; int status; - struct nfs_client *clp = res->client; uint32_t impl_id_count; status = decode_op_hdr(xdr, OP_EXCHANGE_ID); @@ -5329,12 +5328,12 @@ static int decode_exchange_id(struct xdr_stream *xdr, p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) goto out_overflow; - xdr_decode_hyper(p, &clp->cl_clientid); + xdr_decode_hyper(p, &res->clientid); p = xdr_inline_decode(xdr, 12); if (unlikely(!p)) goto out_overflow; - clp->cl_seqid = be32_to_cpup(p++); - clp->cl_exchange_flags = be32_to_cpup(p++); + res->seqid = be32_to_cpup(p++); + res->flags = be32_to_cpup(p++); /* We ask for SP4_NONE */ dummy = be32_to_cpup(p); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6387fc0..d1a7bf5 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1132,7 +1132,8 @@ struct nfs41_bind_conn_to_session_res { }; struct nfs41_exchange_id_res { - struct nfs_client *client; + u64 clientid; + u32 seqid; u32 flags; struct nfs41_server_owner *server_owner; struct nfs41_server_scope *server_scope; -- cgit v0.10.2 From 43ac544cb36adf38338c01968f8e3a5f81b7d629 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 27 May 2012 13:47:21 -0400 Subject: NFSv4.1: nfs4_bind_conn_to_session should drain the session In order to avoid races with other RPC calls that end up setting the NFS4CLNT_BIND_CONN_TO_SESSION flag. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 7dbca66..d21ed03 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1782,10 +1782,12 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) struct rpc_cred *cred; int ret; + nfs4_begin_drain_session(clp); cred = nfs4_get_exchange_id_cred(clp); ret = nfs4_proc_bind_conn_to_session(clp, cred); if (cred) put_rpccred(cred); + clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); return ret; } #else /* CONFIG_NFS_V4_1 */ -- cgit v0.10.2 From bf674c8228710fa4149df3988862dc112860df99 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 27 May 2012 12:53:10 -0400 Subject: NFSv4.1: Handle errors in nfs4_bind_conn_to_session Ensure that we handle NFS4ERR_DELAY errors separately, and then let nfs4_recovery_handle_error() handle all other cases. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index d21ed03..d685fd4 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1788,7 +1788,17 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) if (cred) put_rpccred(cred); clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); - return ret; + switch (ret) { + case 0: + break; + case -NFS4ERR_DELAY: + ssleep(1); + set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); + break; + default: + return nfs4_recovery_handle_error(clp, ret); + } + return 0; } #else /* CONFIG_NFS_V4_1 */ static int nfs4_reset_session(struct nfs_client *clp) { return 0; } @@ -1858,6 +1868,7 @@ static void nfs4_state_manager(struct nfs_client *clp) status = nfs4_bind_conn_to_session(clp); if (status < 0) goto out_error; + continue; } /* First recover reboot state... */ -- cgit v0.10.2 From 7c5d7256845e30d295de4d72d4c52943bff7d1ac Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 27 May 2012 12:58:48 -0400 Subject: NFSv4.1: Handle NFS4ERR_CONN_NOT_BOUND_TO_SESSION in the state manager Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index d685fd4..e593ae9 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1493,12 +1493,14 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) case -NFS4ERR_BADSLOT: case -NFS4ERR_BAD_HIGH_SLOT: case -NFS4ERR_DEADSESSION: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: case -NFS4ERR_SEQ_FALSE_RETRY: case -NFS4ERR_SEQ_MISORDERED: set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); /* Zero session reset errors */ break; + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); + break; case -EKEYEXPIRED: /* Nothing we can do */ nfs4_warn_keyexpired(clp->cl_hostname); -- cgit v0.10.2 From 9f594791dd530c2dc06953fb32505a26cc28371f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 27 May 2012 13:02:53 -0400 Subject: NFSv4.1: Handle other occurrences of NFS4ERR_CONN_NOT_BOUND_TO_SESSION Let nfs4_schedule_session_recovery() handle the details of choosing between resetting the session, and other session related recovery. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b20b516..c6827f93 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -313,9 +313,9 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp); #if defined(CONFIG_NFS_V4_1) struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); -extern void nfs4_schedule_session_recovery(struct nfs4_session *); +extern void nfs4_schedule_session_recovery(struct nfs4_session *, int); #else -static inline void nfs4_schedule_session_recovery(struct nfs4_session *session) +static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) { } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index ddea4d3..e134029 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -165,7 +165,7 @@ static int filelayout_async_handle_error(struct rpc_task *task, dprintk("%s ERROR %d, Reset session. Exchangeid " "flags 0x%x\n", __func__, task->tk_status, clp->cl_exchange_flags); - nfs4_schedule_session_recovery(clp->cl_session); + nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); break; case -NFS4ERR_DELAY: case -NFS4ERR_GRACE: diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9f0a96f..6be5fa3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -306,7 +306,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc case -NFS4ERR_SEQ_MISORDERED: dprintk("%s ERROR: %d Reset session\n", __func__, errorcode); - nfs4_schedule_session_recovery(clp->cl_session); + nfs4_schedule_session_recovery(clp->cl_session, errorcode); exception->retry = 1; break; #endif /* defined(CONFIG_NFS_V4_1) */ @@ -1330,7 +1330,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state case -NFS4ERR_BAD_HIGH_SLOT: case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: case -NFS4ERR_DEADSESSION: - nfs4_schedule_session_recovery(server->nfs_client->cl_session); + nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); goto out; case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: @@ -3906,7 +3906,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, case -NFS4ERR_SEQ_MISORDERED: dprintk("%s ERROR %d, Reset session\n", __func__, task->tk_status); - nfs4_schedule_session_recovery(clp->cl_session); + nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); task->tk_status = 0; return -EAGAIN; #endif /* CONFIG_NFS_V4_1 */ @@ -4847,7 +4847,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) case -NFS4ERR_BAD_HIGH_SLOT: case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: case -NFS4ERR_DEADSESSION: - nfs4_schedule_session_recovery(server->nfs_client->cl_session); + nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); goto out; case -ERESTARTSYS: /* diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e593ae9..d46a905 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1635,11 +1635,17 @@ static int nfs4_reclaim_lease(struct nfs_client *clp) } #ifdef CONFIG_NFS_V4_1 -void nfs4_schedule_session_recovery(struct nfs4_session *session) +void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) { struct nfs_client *clp = session->clp; - set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + switch (err) { + default: + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + break; + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); + } nfs4_schedule_lease_recovery(clp); } EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery); -- cgit v0.10.2 From f2c1b5100db340441963649fabb4e43e2a65df77 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 27 May 2012 14:46:46 -0400 Subject: NFSv4.1: nfs4_reset_session should use nfs4_handle_reclaim_lease_error The results from a call to nfs4_proc_create_session() should always be fed into nfs4_handle_reclaim_lease_error, so that we can handle errors such as NFS4ERR_SEQ_MISORDERED correctly. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index d46a905..dfedde6 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1740,7 +1740,7 @@ static int nfs4_reset_session(struct nfs_client *clp) memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN); status = nfs4_proc_create_session(clp, cred); if (status) { - status = nfs4_recovery_handle_error(clp, status); + status = nfs4_handle_reclaim_lease_error(clp, status); goto out; } clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); -- cgit v0.10.2 From 359d7d1c976851c658aa7085761015812ed3b56f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 28 May 2012 10:01:34 -0400 Subject: NFSv4: update_changeattr does not need to set NFS_INO_REVAL_PAGECACHE We're already invalidating the data cache, and setting the new change attribute. Since directories don't care about the i_size field, there is no need to be forcing any extra revalidation of the page cache. We do keep the NFS_INO_INVALID_ATTR flag, in order to force an attribute cache revalidation on stat() calls since we do not update the mtime and ctime fields. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6be5fa3..af2db2c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -774,7 +774,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) struct nfs_inode *nfsi = NFS_I(dir); spin_lock(&dir->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; if (!cinfo->atomic || cinfo->before != dir->i_version) nfs_force_lookup_revalidate(dir); dir->i_version = cinfo->after; -- cgit v0.10.2 From fb13bfa7e1bcfdcfdece47c24b62f1a1cad957e9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 28 May 2012 11:36:28 -0400 Subject: NFSv4: Map NFS4ERR_SHARE_DENIED into an EACCES error instead of EIO If a file OPEN is denied due to a share lock, the resulting NFS4ERR_SHARE_DENIED is currently mapped to the default EIO. This patch adds a more appropriate mapping, and brings Linux into line with what Solaris 10 does. See https://bugzilla.kernel.org/show_bug.cgi?id=43286 Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index af2db2c..42d9e9c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -103,6 +103,8 @@ static int nfs4_map_errors(int err) case -NFS4ERR_BADOWNER: case -NFS4ERR_BADNAME: return -EINVAL; + case -NFS4ERR_SHARE_DENIED: + return -EACCES; default: dprintk("%s could not handle NFSv4 error %d\n", __func__, -err); -- cgit v0.10.2 From cc0a98436820b161b595b8cc1d2329bcf7328107 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 28 May 2012 15:12:27 -0400 Subject: NFSv4: Add debugging printks to state manager Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index dfedde6..c679b9e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1108,6 +1108,8 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp) return; if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); + dprintk("%s: scheduling lease recovery for server %s\n", __func__, + clp->cl_hostname); nfs4_schedule_state_manager(clp); } EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery); @@ -1124,6 +1126,8 @@ static void nfs40_handle_cb_pathdown(struct nfs_client *clp) { set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); nfs_expire_all_delegations(clp); + dprintk("%s: handling CB_PATHDOWN recovery for server %s\n", __func__, + clp->cl_hostname); } void nfs4_schedule_path_down_recovery(struct nfs_client *clp) @@ -1160,6 +1164,8 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4 struct nfs_client *clp = server->nfs_client; nfs4_state_mark_reclaim_nograce(clp, state); + dprintk("%s: scheduling stateid recovery for server %s\n", __func__, + clp->cl_hostname); nfs4_schedule_state_manager(clp); } EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery); @@ -1506,8 +1512,12 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) nfs4_warn_keyexpired(clp->cl_hostname); break; default: + dprintk("%s: failed to handle error %d for server %s\n", + __func__, error, clp->cl_hostname); return error; } + dprintk("%s: handled error %d for server %s\n", __func__, error, + clp->cl_hostname); return 0; } @@ -1605,15 +1615,21 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) case -NFS4ERR_MINOR_VERS_MISMATCH: if (clp->cl_cons_state == NFS_CS_SESSION_INITING) nfs_mark_client_ready(clp, -EPROTONOSUPPORT); + dprintk("%s: exit with error %d for server %s\n", + __func__, -EPROTONOSUPPORT, clp->cl_hostname); return -EPROTONOSUPPORT; case -EKEYEXPIRED: nfs4_warn_keyexpired(clp->cl_hostname); case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery * in nfs4_exchange_id */ default: + dprintk("%s: exit with error %d for server %s\n", __func__, + status, clp->cl_hostname); return status; } set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + dprintk("%s: handled error %d for server %s\n", __func__, status, + clp->cl_hostname); return 0; } @@ -1653,6 +1669,8 @@ EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery); void nfs41_handle_recall_slot(struct nfs_client *clp) { set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); + dprintk("%s: scheduling slot recall for server %s\n", __func__, + clp->cl_hostname); nfs4_schedule_state_manager(clp); } @@ -1662,6 +1680,8 @@ static void nfs4_reset_all_state(struct nfs_client *clp) set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state); clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); nfs4_state_start_reclaim_nograce(clp); + dprintk("%s: scheduling reset of all state for server %s!\n", + __func__, clp->cl_hostname); nfs4_schedule_state_manager(clp); } } @@ -1670,6 +1690,8 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp) { if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { nfs4_state_start_reclaim_reboot(clp); + dprintk("%s: server %s rebooted!\n", __func__, + clp->cl_hostname); nfs4_schedule_state_manager(clp); } } @@ -1677,12 +1699,15 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp) static void nfs41_handle_state_revoked(struct nfs_client *clp) { nfs4_reset_all_state(clp); + dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname); } static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp) { /* This will need to handle layouts too */ nfs_expire_all_delegations(clp); + dprintk("%s: Recallable state revoked on server %s!\n", __func__, + clp->cl_hostname); } static void nfs41_handle_backchannel_fault(struct nfs_client *clp) @@ -1690,6 +1715,8 @@ static void nfs41_handle_backchannel_fault(struct nfs_client *clp) nfs_expire_all_delegations(clp); if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0) nfs4_schedule_state_manager(clp); + dprintk("%s: server %s declared a backchannel fault\n", __func__, + clp->cl_hostname); } static void nfs41_handle_cb_path_down(struct nfs_client *clp) @@ -1740,6 +1767,8 @@ static int nfs4_reset_session(struct nfs_client *clp) memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN); status = nfs4_proc_create_session(clp, cred); if (status) { + dprintk("%s: session reset failed with status %d for server %s!\n", + __func__, status, clp->cl_hostname); status = nfs4_handle_reclaim_lease_error(clp, status); goto out; } @@ -1747,6 +1776,8 @@ static int nfs4_reset_session(struct nfs_client *clp) /* create_session negotiated new slot table */ clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); + dprintk("%s: session reset was successful for server %s!\n", + __func__, clp->cl_hostname); /* Let the state manager reestablish state */ if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) @@ -1798,6 +1829,8 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); switch (ret) { case 0: + dprintk("%s: bind_conn_to_session was successful for server %s!\n", + __func__, clp->cl_hostname); break; case -NFS4ERR_DELAY: ssleep(1); -- cgit v0.10.2