From 726fd6ad59f73bd116b6a22d701db078183673c8 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 1 Jun 2011 16:23:59 +0400 Subject: sunrpc: use dprint_status() macro in call_decode() common dprint_status() macro is used in all callbacks but not in call_decode() Signed-off-by: Vasily Averin Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8c91415..64c3fe1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1551,8 +1551,7 @@ call_decode(struct rpc_task *task) kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode; __be32 *p; - dprintk("RPC: %5u call_decode (status %d)\n", - task->tk_pid, task->tk_status); + dprint_status(task); if (task->tk_flags & RPC_CALL_MAJORSEEN) { if (clnt->cl_chatty) -- cgit v0.10.2 From 82c2c8b8616fa9e77264c53f0df483f74ac54613 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 1 Jun 2011 16:54:32 +0400 Subject: lockd: properly convert be32 values in debug messages lockd: server returns status 50331648 it's quite hard to understand that number in this message is 3 in big endian Signed-off-by: Vasily Averin Signed-off-by: Trond Myklebust diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index e374050..8392cb8 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -302,7 +302,8 @@ nlmclnt_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc) /* We appear to be out of the grace period */ wake_up_all(&host->h_gracewait); } - dprintk("lockd: server returns status %d\n", resp->status); + dprintk("lockd: server returns status %d\n", + ntohl(resp->status)); return 0; /* Okay, call complete */ } @@ -690,7 +691,8 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) goto out; if (resp->status != nlm_lck_denied_nolocks) - printk("lockd: unexpected unlock status: %d\n", resp->status); + printk("lockd: unexpected unlock status: %d\n", + ntohl(resp->status)); /* What to do now? I'm out of my depth... */ status = -ENOLCK; out: @@ -843,6 +845,7 @@ nlm_stat_to_errno(__be32 status) return -ENOLCK; #endif } - printk(KERN_NOTICE "lockd: unexpected server status %d\n", status); + printk(KERN_NOTICE "lockd: unexpected server status %d\n", + ntohl(status)); return -ENOLCK; } -- cgit v0.10.2 From c9895cb69b07a4b17d8fdae26667f9a9fba5183b Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 31 May 2011 18:48:56 -0400 Subject: NFS: pnfs IPv6 support Handle ipv6 remote addresses from GETDEVICEINFO - supports netid "tcp" for ipv4 and "tcp6" for ipv6 as rfc 5665 specifies - added ds_remotestr to avoid having to handle different AFs in every dprintk - tested against pynfs 4.1 server, submitting ipv6 support patch to pynfs - tested with IPv6 disabled, it compiles cleanly and relies on rpc_pton to refuse to accept IPv6 addresses Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index f9d03ab..b690bb5 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -344,8 +344,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); return PNFS_NOT_ATTEMPTED; } - dprintk("%s USE DS:ip %x %hu\n", __func__, - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); + dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr); /* No multipath support. Use first DS */ data->ds_clp = ds->ds_clp; @@ -384,9 +383,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); return PNFS_NOT_ATTEMPTED; } - dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__, + dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__, data->inode->i_ino, sync, (size_t) data->args.count, offset, - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); + ds->ds_remotestr); data->write_done_cb = filelayout_write_done_cb; data->ds_clp = ds->ds_clp; diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index cebe01e..6c6a817 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -49,8 +49,9 @@ enum stripetype4 { /* Individual ip address */ struct nfs4_pnfs_ds { struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ - u32 ds_ip_addr; - u32 ds_port; + struct sockaddr_storage ds_addr; + size_t ds_addrlen; + char *ds_remotestr; /* human readable addr+port */ struct nfs_client *ds_clp; atomic_t ds_count; }; diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 3b7bf13..1a8308b 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -56,28 +56,56 @@ print_ds(struct nfs4_pnfs_ds *ds) printk("%s NULL device\n", __func__); return; } - printk(" ip_addr %x port %hu\n" + printk(" ds %s\n" " ref count %d\n" " client %p\n" " cl_exchange_flags %x\n", - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), + ds->ds_remotestr, atomic_read(&ds->ds_count), ds->ds_clp, ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); } /* nfs4_ds_cache_lock is held */ static struct nfs4_pnfs_ds * -_data_server_lookup_locked(u32 ip_addr, u32 port) +_data_server_lookup_locked(struct sockaddr *addr, size_t addrlen) { struct nfs4_pnfs_ds *ds; - - dprintk("_data_server_lookup: ip_addr=%x port=%hu\n", - ntohl(ip_addr), ntohs(port)); + struct sockaddr_in *a, *b; + struct sockaddr_in6 *a6, *b6; list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { - if (ds->ds_ip_addr == ip_addr && - ds->ds_port == port) { - return ds; + if (addr->sa_family != ds->ds_addr.ss_family) + continue; + + switch (addr->sa_family) { + case AF_INET: + a = (struct sockaddr_in *)addr; + b = (struct sockaddr_in *)&ds->ds_addr; + + if (a->sin_addr.s_addr == b->sin_addr.s_addr && + a->sin_port == b->sin_port) + return ds; + break; + + case AF_INET6: + a6 = (struct sockaddr_in6 *)addr; + b6 = (struct sockaddr_in6 *)&ds->ds_addr; + + /* LINKLOCAL addresses must have matching scope_id */ + if (ipv6_addr_scope(&a6->sin6_addr) == + IPV6_ADDR_SCOPE_LINKLOCAL && + a6->sin6_scope_id != b6->sin6_scope_id) + continue; + + if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && + a6->sin6_port == b6->sin6_port) + return ds; + break; + + default: + dprintk("%s: unhandled address family: %u\n", + __func__, addr->sa_family); + return NULL; } } return NULL; @@ -91,19 +119,14 @@ static int nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) { struct nfs_client *clp; - struct sockaddr_in sin; int status = 0; - dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__, - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), + dprintk("--> %s addr %s au_flavor %d\n", __func__, ds->ds_remotestr, mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = ds->ds_ip_addr; - sin.sin_port = ds->ds_port; - - clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin, - sizeof(sin), IPPROTO_TCP); + clp = nfs4_set_ds_client(mds_srv->nfs_client, + (struct sockaddr *)&ds->ds_addr, + ds->ds_addrlen, IPPROTO_TCP); if (IS_ERR(clp)) { status = PTR_ERR(clp); goto out; @@ -115,8 +138,8 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) goto out_put; } ds->ds_clp = clp; - dprintk("%s [existing] ip=%x, port=%hu\n", __func__, - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); + dprintk("%s [existing] server=%s\n", __func__, + ds->ds_remotestr); goto out; } @@ -135,8 +158,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) goto out_put; ds->ds_clp = clp; - dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr), - ntohs(ds->ds_port)); + dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); out: return status; out_put: @@ -153,6 +175,7 @@ destroy_ds(struct nfs4_pnfs_ds *ds) if (ds->ds_clp) nfs_put_client(ds->ds_clp); + kfree(ds->ds_remotestr); kfree(ds); } @@ -179,31 +202,85 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) kfree(dsaddr); } +/* + * Create a string with a human readable address and port to avoid + * complicated setup around many dprinks. + */ +static char * +nfs4_pnfs_remotestr(struct sockaddr *ds_addr, gfp_t gfp_flags) +{ + char buf[INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN]; + char *remotestr; + char *startsep = ""; + char *endsep = ""; + size_t len; + uint16_t port; + + switch (ds_addr->sa_family) { + case AF_INET: + port = ((struct sockaddr_in *)ds_addr)->sin_port; + break; + case AF_INET6: + startsep = "["; + endsep = "]"; + port = ((struct sockaddr_in6 *)ds_addr)->sin6_port; + break; + default: + dprintk("%s: Unknown address family %u\n", + __func__, ds_addr->sa_family); + return NULL; + } + + if (!rpc_ntop((struct sockaddr *)ds_addr, buf, sizeof(buf))) { + dprintk("%s: error printing addr\n", __func__); + return NULL; + } + + len = strlen(buf) + strlen(startsep) + strlen(endsep) + 1 + 5 + 1; + remotestr = kzalloc(len, gfp_flags); + + if (unlikely(!remotestr)) { + dprintk("%s: couldn't alloc remotestr\n", __func__); + return NULL; + } + + snprintf(remotestr, len, "%s%s%s:%u", + startsep, buf, endsep, ntohs(port)); + + return remotestr; +} + static struct nfs4_pnfs_ds * -nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags) +nfs4_pnfs_ds_add(struct sockaddr *addr, size_t addrlen, gfp_t gfp_flags) { - struct nfs4_pnfs_ds *tmp_ds, *ds; + struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; + char *remotestr; ds = kzalloc(sizeof(*tmp_ds), gfp_flags); if (!ds) goto out; + /* this is only used for debugging, so it's ok if its NULL */ + remotestr = nfs4_pnfs_remotestr(addr, gfp_flags); + spin_lock(&nfs4_ds_cache_lock); - tmp_ds = _data_server_lookup_locked(ip_addr, port); + tmp_ds = _data_server_lookup_locked(addr, addrlen); if (tmp_ds == NULL) { - ds->ds_ip_addr = ip_addr; - ds->ds_port = port; + memcpy(&ds->ds_addr, addr, addrlen); + ds->ds_addrlen = addrlen; + ds->ds_remotestr = remotestr; atomic_set(&ds->ds_count, 1); INIT_LIST_HEAD(&ds->ds_node); ds->ds_clp = NULL; list_add(&ds->ds_node, &nfs4_data_server_cache); - dprintk("%s add new data server ip 0x%x\n", __func__, - ds->ds_ip_addr); + dprintk("%s add new data server %s\n", __func__, + ds->ds_remotestr); } else { + kfree(remotestr); kfree(ds); atomic_inc(&tmp_ds->ds_count); - dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n", - __func__, tmp_ds->ds_ip_addr, + dprintk("%s data server %s found, inc'ed ds_count to %d\n", + __func__, tmp_ds->ds_remotestr, atomic_read(&tmp_ds->ds_count)); ds = tmp_ds; } @@ -213,18 +290,21 @@ out: } /* - * Currently only support ipv4, and one multi-path address. + * Currently only supports ipv4, ipv6 and one multi-path address. */ static struct nfs4_pnfs_ds * decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags) { struct nfs4_pnfs_ds *ds = NULL; - char *buf; - const char *ipend, *pstr; - u32 ip_addr, port; - int nlen, rlen, i; + char *buf, *portstr; + struct sockaddr_storage ss; + size_t sslen; + u32 port; + int nlen, rlen; int tmp[2]; __be32 *p; + char *netid, *match_netid; + size_t match_netid_len; /* r_netid */ p = xdr_inline_decode(streamp, 4); @@ -236,62 +316,97 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_fla if (unlikely(!p)) goto out_err; - /* Check that netid is "tcp" */ - if (nlen != 3 || memcmp((char *)p, "tcp", 3)) { - dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); + netid = kmalloc(nlen+1, gfp_flags); + if (unlikely(!netid)) goto out_err; - } - /* r_addr */ + netid[nlen] = '\0'; + memcpy(netid, p, nlen); + + /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ p = xdr_inline_decode(streamp, 4); if (unlikely(!p)) - goto out_err; + goto out_free_netid; rlen = be32_to_cpup(p); p = xdr_inline_decode(streamp, rlen); if (unlikely(!p)) - goto out_err; + goto out_free_netid; - /* ipv6 length plus port is legal */ - if (rlen > INET6_ADDRSTRLEN + 8) { + /* port is ".ABC.DEF", 8 chars max */ + if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { dprintk("%s: Invalid address, length %d\n", __func__, rlen); - goto out_err; + goto out_free_netid; } buf = kmalloc(rlen + 1, gfp_flags); if (!buf) { dprintk("%s: Not enough memory\n", __func__); - goto out_err; + goto out_free_netid; } buf[rlen] = '\0'; memcpy(buf, p, rlen); - /* replace the port dots with dashes for the in4_pton() delimiter*/ - for (i = 0; i < 2; i++) { - char *res = strrchr(buf, '.'); - if (!res) { - dprintk("%s: Failed finding expected dots in port\n", - __func__); - goto out_free; - } - *res = '-'; + /* replace port '.' with '-' */ + portstr = strrchr(buf, '.'); + if (!portstr) { + dprintk("%s: Failed finding expected dot in port\n", + __func__); + goto out_free_buf; + } + *portstr = '-'; + + /* find '.' between address and port */ + portstr = strrchr(buf, '.'); + if (!portstr) { + dprintk("%s: Failed finding expected dot between address and " + "port\n", __func__); + goto out_free_buf; } + *portstr = '\0'; - /* Currently only support ipv4 address */ - if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) { - dprintk("%s: Only ipv4 addresses supported\n", __func__); - goto out_free; + if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&ss, sizeof(ss))) { + dprintk("%s: Error parsing address %s\n", __func__, buf); + goto out_free_buf; } - /* port */ - pstr = ipend; - sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]); + portstr++; + sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); port = htons((tmp[0] << 8) | (tmp[1])); - ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags); - dprintk("%s: Decoded address and port %s\n", __func__, buf); -out_free: + switch (ss.ss_family) { + case AF_INET: + ((struct sockaddr_in *)&ss)->sin_port = port; + sslen = sizeof(struct sockaddr_in); + match_netid = "tcp"; + match_netid_len = 3; + break; + + case AF_INET6: + ((struct sockaddr_in6 *)&ss)->sin6_port = port; + sslen = sizeof(struct sockaddr_in6); + match_netid = "tcp6"; + match_netid_len = 4; + break; + + default: + dprintk("%s: unsupported address family: %u\n", + __func__, ss.ss_family); + goto out_free_buf; + } + + if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { + dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", + __func__, netid, match_netid); + goto out_free_buf; + } + + ds = nfs4_pnfs_ds_add((struct sockaddr *)&ss, sslen, gfp_flags); + dprintk("%s: Added DS %s\n", __func__, ds->ds_remotestr); +out_free_buf: kfree(buf); +out_free_netid: + kfree(netid); out_err: return ds; } @@ -591,13 +706,13 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) static void filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, - int err, u32 ds_addr) + int err, const char *ds_remotestr) { u32 *p = (u32 *)&dsaddr->id_node.deviceid; - printk(KERN_ERR "NFS: data server %x connection error %d." + printk(KERN_ERR "NFS: data server %s connection error %d." " Deviceid [%x%x%x%x] marked out of use.\n", - ds_addr, err, p[0], p[1], p[2], p[3]); + ds_remotestr, err, p[0], p[1], p[2], p[3]); spin_lock(&nfs4_ds_cache_lock); dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; @@ -628,7 +743,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) err = nfs4_ds_connect(s, ds); if (err) { filelayout_mark_devid_negative(dsaddr, err, - ntohl(ds->ds_ip_addr)); + ds->ds_remotestr); return NULL; } } -- cgit v0.10.2 From 14f9a6076f5388f3fd6341ad4b841337b28fc825 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 31 May 2011 18:48:57 -0400 Subject: NFS: Parse and store all multipath DS addresses This parses and stores all addresses associated with each data server, laying the groundwork for supporting multipath to data servers. - Skips over addresses that cannot be parsed (ie IPv6 addrs if v6 is not enabled). Only fails if none of the addresses are recognizable - Currently only uses the first address that parsed cleanly - Tested against pynfs server (modified to support multipath) Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 6c6a817..68cce73 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -47,11 +47,17 @@ enum stripetype4 { }; /* Individual ip address */ +struct nfs4_pnfs_ds_addr { + struct sockaddr_storage da_addr; + size_t da_addrlen; + struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */ + char *da_remotestr; /* human readable addr+port */ +}; + struct nfs4_pnfs_ds { struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ - struct sockaddr_storage ds_addr; - size_t ds_addrlen; - char *ds_remotestr; /* human readable addr+port */ + char *ds_remotestr; /* comma sep list of addrs */ + struct list_head ds_addrs; struct nfs_client *ds_clp; atomic_t ds_count; }; diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 1a8308b..610808a 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -65,53 +65,104 @@ print_ds(struct nfs4_pnfs_ds *ds) ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); } -/* nfs4_ds_cache_lock is held */ -static struct nfs4_pnfs_ds * -_data_server_lookup_locked(struct sockaddr *addr, size_t addrlen) +static bool +same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) { - struct nfs4_pnfs_ds *ds; struct sockaddr_in *a, *b; struct sockaddr_in6 *a6, *b6; - list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { - if (addr->sa_family != ds->ds_addr.ss_family) - continue; - - switch (addr->sa_family) { - case AF_INET: - a = (struct sockaddr_in *)addr; - b = (struct sockaddr_in *)&ds->ds_addr; - - if (a->sin_addr.s_addr == b->sin_addr.s_addr && - a->sin_port == b->sin_port) - return ds; - break; - - case AF_INET6: - a6 = (struct sockaddr_in6 *)addr; - b6 = (struct sockaddr_in6 *)&ds->ds_addr; - - /* LINKLOCAL addresses must have matching scope_id */ - if (ipv6_addr_scope(&a6->sin6_addr) == - IPV6_ADDR_SCOPE_LINKLOCAL && - a6->sin6_scope_id != b6->sin6_scope_id) - continue; - - if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && - a6->sin6_port == b6->sin6_port) - return ds; - break; - - default: - dprintk("%s: unhandled address family: %u\n", - __func__, addr->sa_family); - return NULL; + if (addr1->sa_family != addr2->sa_family) + return false; + + switch (addr1->sa_family) { + case AF_INET: + a = (struct sockaddr_in *)addr1; + b = (struct sockaddr_in *)addr2; + + if (a->sin_addr.s_addr == b->sin_addr.s_addr && + a->sin_port == b->sin_port) + return true; + break; + + case AF_INET6: + a6 = (struct sockaddr_in6 *)addr1; + b6 = (struct sockaddr_in6 *)addr2; + + /* LINKLOCAL addresses must have matching scope_id */ + if (ipv6_addr_scope(&a6->sin6_addr) == + IPV6_ADDR_SCOPE_LINKLOCAL && + a6->sin6_scope_id != b6->sin6_scope_id) + return false; + + if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && + a6->sin6_port == b6->sin6_port) + return true; + break; + + default: + dprintk("%s: unhandled address family: %u\n", + __func__, addr1->sa_family); + return false; + } + + return false; +} + +/* + * Lookup DS by addresses. The first matching address returns true. + * nfs4_ds_cache_lock is held + */ +static struct nfs4_pnfs_ds * +_data_server_lookup_locked(struct list_head *dsaddrs) +{ + struct nfs4_pnfs_ds *ds; + struct nfs4_pnfs_ds_addr *da1, *da2; + + list_for_each_entry(da1, dsaddrs, da_node) { + list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { + list_for_each_entry(da2, &ds->ds_addrs, da_node) { + if (same_sockaddr( + (struct sockaddr *)&da1->da_addr, + (struct sockaddr *)&da2->da_addr)) + return ds; + } } } return NULL; } /* + * Compare two lists of addresses. + */ +static bool +_data_server_match_all_addrs_locked(struct list_head *dsaddrs1, + struct list_head *dsaddrs2) +{ + struct nfs4_pnfs_ds_addr *da1, *da2; + size_t count1 = 0, + count2 = 0; + + list_for_each_entry(da1, dsaddrs1, da_node) + count1++; + + list_for_each_entry(da2, dsaddrs2, da_node) { + bool found = false; + count2++; + list_for_each_entry(da1, dsaddrs1, da_node) { + if (same_sockaddr((struct sockaddr *)&da1->da_addr, + (struct sockaddr *)&da2->da_addr)) { + found = true; + break; + } + } + if (!found) + return false; + } + + return (count1 == count2); +} + +/* * Create an rpc connection to the nfs4_pnfs_ds data server * Currently only support IPv4 */ @@ -119,14 +170,21 @@ static int nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) { struct nfs_client *clp; + struct nfs4_pnfs_ds_addr *da; int status = 0; - dprintk("--> %s addr %s au_flavor %d\n", __func__, ds->ds_remotestr, + dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); + BUG_ON(list_empty(&ds->ds_addrs)); + + da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node); + dprintk("%s: using the first address for DS %s: %s\n", + __func__, ds->ds_remotestr, da->da_remotestr); + clp = nfs4_set_ds_client(mds_srv->nfs_client, - (struct sockaddr *)&ds->ds_addr, - ds->ds_addrlen, IPPROTO_TCP); + (struct sockaddr *)&da->da_addr, + da->da_addrlen, IPPROTO_TCP); if (IS_ERR(clp)) { status = PTR_ERR(clp); goto out; @@ -169,12 +227,24 @@ out_put: static void destroy_ds(struct nfs4_pnfs_ds *ds) { + struct nfs4_pnfs_ds_addr *da; + dprintk("--> %s\n", __func__); ifdebug(FACILITY) print_ds(ds); if (ds->ds_clp) nfs_put_client(ds->ds_clp); + + while (!list_empty(&ds->ds_addrs)) { + da = list_first_entry(&ds->ds_addrs, + struct nfs4_pnfs_ds_addr, + da_node); + list_del_init(&da->da_node); + kfree(da->da_remotestr); + kfree(da); + } + kfree(ds->ds_remotestr); kfree(ds); } @@ -207,67 +277,73 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) * complicated setup around many dprinks. */ static char * -nfs4_pnfs_remotestr(struct sockaddr *ds_addr, gfp_t gfp_flags) +nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) { - char buf[INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN]; + struct nfs4_pnfs_ds_addr *da; char *remotestr; - char *startsep = ""; - char *endsep = ""; size_t len; - uint16_t port; + char *p; - switch (ds_addr->sa_family) { - case AF_INET: - port = ((struct sockaddr_in *)ds_addr)->sin_port; - break; - case AF_INET6: - startsep = "["; - endsep = "]"; - port = ((struct sockaddr_in6 *)ds_addr)->sin6_port; - break; - default: - dprintk("%s: Unknown address family %u\n", - __func__, ds_addr->sa_family); - return NULL; + len = 3; /* '{', '}' and eol */ + list_for_each_entry(da, dsaddrs, da_node) { + len += strlen(da->da_remotestr) + 1; /* string plus comma */ } - if (!rpc_ntop((struct sockaddr *)ds_addr, buf, sizeof(buf))) { - dprintk("%s: error printing addr\n", __func__); + remotestr = kzalloc(len, gfp_flags); + if (!remotestr) return NULL; - } - len = strlen(buf) + strlen(startsep) + strlen(endsep) + 1 + 5 + 1; - remotestr = kzalloc(len, gfp_flags); + p = remotestr; + *(p++) = '{'; + len--; + list_for_each_entry(da, dsaddrs, da_node) { + size_t ll = strlen(da->da_remotestr); - if (unlikely(!remotestr)) { - dprintk("%s: couldn't alloc remotestr\n", __func__); - return NULL; - } + if (ll > len) + goto out_err; - snprintf(remotestr, len, "%s%s%s:%u", - startsep, buf, endsep, ntohs(port)); + memcpy(p, da->da_remotestr, ll); + p += ll; + len -= ll; + if (len < 1) + goto out_err; + (*p++) = ','; + len--; + } + if (len < 2) + goto out_err; + *(p++) = '}'; + *p = '\0'; return remotestr; +out_err: + kfree(remotestr); + return NULL; } static struct nfs4_pnfs_ds * -nfs4_pnfs_ds_add(struct sockaddr *addr, size_t addrlen, gfp_t gfp_flags) +nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) { struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; char *remotestr; - ds = kzalloc(sizeof(*tmp_ds), gfp_flags); + if (list_empty(dsaddrs)) { + dprintk("%s: no addresses defined\n", __func__); + goto out; + } + + ds = kzalloc(sizeof(*ds), gfp_flags); if (!ds) goto out; /* this is only used for debugging, so it's ok if its NULL */ - remotestr = nfs4_pnfs_remotestr(addr, gfp_flags); + remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); spin_lock(&nfs4_ds_cache_lock); - tmp_ds = _data_server_lookup_locked(addr, addrlen); + tmp_ds = _data_server_lookup_locked(dsaddrs); if (tmp_ds == NULL) { - memcpy(&ds->ds_addr, addr, addrlen); - ds->ds_addrlen = addrlen; + INIT_LIST_HEAD(&ds->ds_addrs); + list_splice_init(dsaddrs, &ds->ds_addrs); ds->ds_remotestr = remotestr; atomic_set(&ds->ds_count, 1); INIT_LIST_HEAD(&ds->ds_node); @@ -276,6 +352,11 @@ nfs4_pnfs_ds_add(struct sockaddr *addr, size_t addrlen, gfp_t gfp_flags) dprintk("%s add new data server %s\n", __func__, ds->ds_remotestr); } else { + if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs, + dsaddrs)) { + dprintk("%s: multipath address mismatch: %s != %s", + __func__, tmp_ds->ds_remotestr, remotestr); + } kfree(remotestr); kfree(ds); atomic_inc(&tmp_ds->ds_count); @@ -292,19 +373,20 @@ out: /* * Currently only supports ipv4, ipv6 and one multi-path address. */ -static struct nfs4_pnfs_ds * -decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags) +static struct nfs4_pnfs_ds_addr * +decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) { - struct nfs4_pnfs_ds *ds = NULL; + struct nfs4_pnfs_ds_addr *da = NULL; char *buf, *portstr; - struct sockaddr_storage ss; - size_t sslen; u32 port; int nlen, rlen; int tmp[2]; __be32 *p; char *netid, *match_netid; - size_t match_netid_len; + size_t len, match_netid_len; + char *startsep = ""; + char *endsep = ""; + /* r_netid */ p = xdr_inline_decode(streamp, 4); @@ -365,50 +447,74 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_fla } *portstr = '\0'; - if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&ss, sizeof(ss))) { - dprintk("%s: Error parsing address %s\n", __func__, buf); + da = kzalloc(sizeof(*da), gfp_flags); + if (unlikely(!da)) goto out_free_buf; + + INIT_LIST_HEAD(&da->da_node); + + if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr, + sizeof(da->da_addr))) { + dprintk("%s: error parsing address %s\n", __func__, buf); + goto out_free_da; } portstr++; sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); port = htons((tmp[0] << 8) | (tmp[1])); - switch (ss.ss_family) { + switch (da->da_addr.ss_family) { case AF_INET: - ((struct sockaddr_in *)&ss)->sin_port = port; - sslen = sizeof(struct sockaddr_in); + ((struct sockaddr_in *)&da->da_addr)->sin_port = port; + da->da_addrlen = sizeof(struct sockaddr_in); match_netid = "tcp"; match_netid_len = 3; break; case AF_INET6: - ((struct sockaddr_in6 *)&ss)->sin6_port = port; - sslen = sizeof(struct sockaddr_in6); + ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; + da->da_addrlen = sizeof(struct sockaddr_in6); match_netid = "tcp6"; match_netid_len = 4; + startsep = "["; + endsep = "]"; break; default: dprintk("%s: unsupported address family: %u\n", - __func__, ss.ss_family); - goto out_free_buf; + __func__, da->da_addr.ss_family); + goto out_free_da; } if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", __func__, netid, match_netid); - goto out_free_buf; + goto out_free_da; } - ds = nfs4_pnfs_ds_add((struct sockaddr *)&ss, sslen, gfp_flags); - dprintk("%s: Added DS %s\n", __func__, ds->ds_remotestr); + /* save human readable address */ + len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; + da->da_remotestr = kzalloc(len, gfp_flags); + + /* NULL is ok, only used for dprintk */ + if (da->da_remotestr) + snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, + buf, endsep, ntohs(port)); + + dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); + kfree(buf); + kfree(netid); + return da; + +out_free_da: + kfree(da); out_free_buf: + dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); kfree(buf); out_free_netid: kfree(netid); out_err: - return ds; + return NULL; } /* Decode opaque device data and return the result */ @@ -425,6 +531,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) struct xdr_stream stream; struct xdr_buf buf; struct page *scratch; + struct list_head dsaddrs; + struct nfs4_pnfs_ds_addr *da; /* set up xdr stream */ scratch = alloc_page(gfp_flags); @@ -501,6 +609,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) NFS_SERVER(ino)->nfs_client, &pdev->dev_id); + INIT_LIST_HEAD(&dsaddrs); + for (i = 0; i < dsaddr->ds_num; i++) { int j; u32 mp_count; @@ -510,48 +620,43 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) goto out_err_free_deviceid; mp_count = be32_to_cpup(p); /* multipath count */ - if (mp_count > 1) { - printk(KERN_WARNING - "%s: Multipath count %d not supported, " - "skipping all greater than 1\n", __func__, - mp_count); - } for (j = 0; j < mp_count; j++) { - if (j == 0) { - dsaddr->ds_list[i] = decode_and_add_ds(&stream, - ino, gfp_flags); - if (dsaddr->ds_list[i] == NULL) - goto out_err_free_deviceid; - } else { - u32 len; - /* skip extra multipath */ - - /* read len, skip */ - p = xdr_inline_decode(&stream, 4); - if (unlikely(!p)) - goto out_err_free_deviceid; - len = be32_to_cpup(p); - - p = xdr_inline_decode(&stream, len); - if (unlikely(!p)) - goto out_err_free_deviceid; - - /* read len, skip */ - p = xdr_inline_decode(&stream, 4); - if (unlikely(!p)) - goto out_err_free_deviceid; - len = be32_to_cpup(p); - - p = xdr_inline_decode(&stream, len); - if (unlikely(!p)) - goto out_err_free_deviceid; - } + da = decode_ds_addr(&stream, gfp_flags); + if (da) + list_add_tail(&da->da_node, &dsaddrs); + } + if (list_empty(&dsaddrs)) { + dprintk("%s: no suitable DS addresses found\n", + __func__); + goto out_err_free_deviceid; + } + + dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); + if (!dsaddr->ds_list[i]) + goto out_err_drain_dsaddrs; + + /* If DS was already in cache, free ds addrs */ + while (!list_empty(&dsaddrs)) { + da = list_first_entry(&dsaddrs, + struct nfs4_pnfs_ds_addr, + da_node); + list_del_init(&da->da_node); + kfree(da->da_remotestr); + kfree(da); } } __free_page(scratch); return dsaddr; +out_err_drain_dsaddrs: + while (!list_empty(&dsaddrs)) { + da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, + da_node); + list_del_init(&da->da_node); + kfree(da->da_remotestr); + kfree(da); + } out_err_free_deviceid: nfs4_fl_free_deviceid(dsaddr); /* stripe_indicies was part of dsaddr */ -- cgit v0.10.2 From 7e574f0d3911c5cc60d4d2b57fee975c462d6cd0 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 31 May 2011 18:48:58 -0400 Subject: NFS: pnfs: loop over multipath addrs on connect Don't just use the first addr in the multipath list - instead, loop over addresses when calling nfs4_set_ds_client() (which calls connect) until it is successful. Although this is not real multipath support, it's a quick fix to handle when an MDS sends a list of addresses for a DS and some of the addr families are unsupported or misconfigured (like no routable ipv6 addr assigned). This will attempt all paths to the DS before giving up, instead of immediately falling back to the MDS. As before, an error encountered after a successful connect() will cause all i/o to fall back to the MDS. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 610808a..61c173b 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -169,7 +169,7 @@ _data_server_match_all_addrs_locked(struct list_head *dsaddrs1, static int nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) { - struct nfs_client *clp; + struct nfs_client *clp = ERR_PTR(-EIO); struct nfs4_pnfs_ds_addr *da; int status = 0; @@ -178,13 +178,17 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) BUG_ON(list_empty(&ds->ds_addrs)); - da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node); - dprintk("%s: using the first address for DS %s: %s\n", - __func__, ds->ds_remotestr, da->da_remotestr); + list_for_each_entry(da, &ds->ds_addrs, da_node) { + dprintk("%s: DS %s: trying address %s\n", + __func__, ds->ds_remotestr, da->da_remotestr); - clp = nfs4_set_ds_client(mds_srv->nfs_client, + clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&da->da_addr, da->da_addrlen, IPPROTO_TCP); + if (!IS_ERR(clp)) + break; + } + if (IS_ERR(clp)) { status = PTR_ERR(clp); goto out; -- cgit v0.10.2 From 78fe0f41d9937ee62817912ac8d627e06243c269 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 31 May 2011 19:05:47 -0400 Subject: NFS: use scope from exchange_id to skip reclaim can be skipped if the "eir_server_scope" from the exchange_id proc differs from previous calls. Also, in the future server_scope will be useful for determining whether client trunking is available Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/client.c b/fs/nfs/client.c index b3dc2b8..006f8ff 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -293,6 +293,7 @@ static void nfs_free_client(struct nfs_client *clp) nfs4_deviceid_purge_client(clp); kfree(clp->cl_hostname); + kfree(clp->server_scope); kfree(clp); dprintk("<-- nfs_free_client()\n"); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c4a6983..b47f0d4 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -48,6 +48,7 @@ enum nfs4_client_state { NFS4CLNT_SESSION_RESET, NFS4CLNT_RECALL_SLOT, NFS4CLNT_LEASE_CONFIRM, + NFS4CLNT_SERVER_SCOPE_MISMATCH, }; enum nfs4_session_state { @@ -349,6 +350,8 @@ extern void nfs4_schedule_state_manager(struct nfs_client *); extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); extern void nfs41_handle_recall_slot(struct nfs_client *clp); +extern void nfs41_handle_server_scope(struct nfs_client *, + struct server_scope **); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5879b23..5f4912f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4781,6 +4781,16 @@ out_inval: return -NFS4ERR_INVAL; } +static bool +nfs41_same_server_scope(struct server_scope *a, struct server_scope *b) +{ + if (a->server_scope_sz == b->server_scope_sz && + memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0) + return true; + + return false; +} + /* * nfs4_proc_exchange_id() * @@ -4823,9 +4833,31 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) init_utsname()->domainname, clp->cl_rpcclient->cl_auth->au_flavor); + res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL); + if (unlikely(!res.server_scope)) + return -ENOMEM; + status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (!status) status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); + + if (!status) { + if (clp->server_scope && + !nfs41_same_server_scope(clp->server_scope, + res.server_scope)) { + dprintk("%s: server_scope mismatch detected\n", + __func__); + set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); + kfree(clp->server_scope); + clp->server_scope = NULL; + } + + if (!clp->server_scope) + clp->server_scope = res.server_scope; + else + kfree(res.server_scope); + } + dprintk("<-- %s status= %d\n", __func__, status); return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e97dd21..5d744a5 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1643,7 +1643,14 @@ static void nfs4_state_manager(struct nfs_client *clp) goto out_error; } clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); - set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); + + if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, + &clp->cl_state)) + nfs4_state_start_reclaim_nograce(clp); + else + set_bit(NFS4CLNT_RECLAIM_REBOOT, + &clp->cl_state); + pnfs_destroy_all_layouts(clp); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e6e8f3b..1555c74 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4977,11 +4977,17 @@ static int decode_exchange_id(struct xdr_stream *xdr, if (unlikely(status)) return status; - /* Throw away server_scope */ + /* Save server_scope */ status = decode_opaque_inline(xdr, &dummy, &dummy_str); if (unlikely(status)) return status; + if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) + return -EIO; + + memcpy(res->server_scope->server_scope, dummy_str, dummy); + res->server_scope->server_scope_sz = dummy; + /* Throw away Implementation id array */ status = decode_opaque_inline(xdr, &dummy, &dummy_str); if (unlikely(status)) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 87694ca..f23b188 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -16,6 +16,7 @@ struct nfs4_sequence_args; struct nfs4_sequence_res; struct nfs_server; struct nfs4_minor_version_ops; +struct server_scope; /* * The nfs_client identifies our client state to the server. @@ -83,6 +84,8 @@ struct nfs_client { #ifdef CONFIG_NFS_FSCACHE struct fscache_cookie *fscache; /* client index cache cookie */ #endif + + struct server_scope *server_scope; /* from exchange_id */ }; /* diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 00848d8..c4d98df 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1060,6 +1060,7 @@ struct server_scope { struct nfs41_exchange_id_res { struct nfs_client *client; u32 flags; + struct server_scope *server_scope; }; struct nfs41_create_session_args { -- cgit v0.10.2 From 35dbbc99e93e57680837c17f96efe370f0535064 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Wed, 1 Jun 2011 16:32:21 -0400 Subject: NFS: fix comment We support IPv4 and IPv6 now. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 61c173b..ed388aa 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -164,7 +164,7 @@ _data_server_match_all_addrs_locked(struct list_head *dsaddrs1, /* * Create an rpc connection to the nfs4_pnfs_ds data server - * Currently only support IPv4 + * Currently only supports IPv4 and IPv6 addresses */ static int nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) -- cgit v0.10.2 From 6382a44138e7aa40bf52170e7afc014443a24806 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Wed, 1 Jun 2011 16:44:44 -0400 Subject: NFS: move pnfs layouts to nfs_server structure Layouts should be tracked per nfs_server (aka superblock) instead of per struct nfs_client, which may have multiple FSIDs associated with it. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index d4d1954..74780f9 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -111,6 +111,7 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf static u32 initiate_file_draining(struct nfs_client *clp, struct cb_layoutrecallargs *args) { + struct nfs_server *server; struct pnfs_layout_hdr *lo; struct inode *ino; bool found = false; @@ -118,21 +119,28 @@ static u32 initiate_file_draining(struct nfs_client *clp, LIST_HEAD(free_me_list); spin_lock(&clp->cl_lock); - list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { - if (nfs_compare_fh(&args->cbl_fh, - &NFS_I(lo->plh_inode)->fh)) - continue; - ino = igrab(lo->plh_inode); - if (!ino) - continue; - found = true; - /* Without this, layout can be freed as soon - * as we release cl_lock. - */ - get_layout_hdr(lo); - break; + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + list_for_each_entry(lo, &server->layouts, plh_layouts) { + if (nfs_compare_fh(&args->cbl_fh, + &NFS_I(lo->plh_inode)->fh)) + continue; + ino = igrab(lo->plh_inode); + if (!ino) + continue; + found = true; + /* Without this, layout can be freed as soon + * as we release cl_lock. + */ + get_layout_hdr(lo); + break; + } + if (found) + break; } + rcu_read_unlock(); spin_unlock(&clp->cl_lock); + if (!found) return NFS4ERR_NOMATCHING_LAYOUT; @@ -154,6 +162,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, static u32 initiate_bulk_draining(struct nfs_client *clp, struct cb_layoutrecallargs *args) { + struct nfs_server *server; struct pnfs_layout_hdr *lo; struct inode *ino; u32 rv = NFS4ERR_NOMATCHING_LAYOUT; @@ -167,18 +176,24 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, }; spin_lock(&clp->cl_lock); - list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { if ((args->cbl_recall_type == RETURN_FSID) && - memcmp(&NFS_SERVER(lo->plh_inode)->fsid, - &args->cbl_fsid, sizeof(struct nfs_fsid))) - continue; - if (!igrab(lo->plh_inode)) + memcmp(&server->fsid, &args->cbl_fsid, + sizeof(struct nfs_fsid))) continue; - get_layout_hdr(lo); - BUG_ON(!list_empty(&lo->plh_bulk_recall)); - list_add(&lo->plh_bulk_recall, &recall_list); + + list_for_each_entry(lo, &server->layouts, plh_layouts) { + if (!igrab(lo->plh_inode)) + continue; + get_layout_hdr(lo); + BUG_ON(!list_empty(&lo->plh_bulk_recall)); + list_add(&lo->plh_bulk_recall, &recall_list); + } } + rcu_read_unlock(); spin_unlock(&clp->cl_lock); + list_for_each_entry_safe(lo, tmp, &recall_list, plh_bulk_recall) { ino = lo->plh_inode; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 006f8ff..5452ada 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -188,9 +188,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ cred = rpc_lookup_machine_cred(); if (!IS_ERR(cred)) clp->cl_machine_cred = cred; -#if defined(CONFIG_NFS_V4_1) - INIT_LIST_HEAD(&clp->cl_layouts); -#endif nfs_fscache_get_client_cookie(clp); return clp; @@ -1063,6 +1060,7 @@ static struct nfs_server *nfs_alloc_server(void) INIT_LIST_HEAD(&server->client_link); INIT_LIST_HEAD(&server->master_link); INIT_LIST_HEAD(&server->delegations); + INIT_LIST_HEAD(&server->layouts); atomic_set(&server->active, 0); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 29c0ca7..ff82007 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -448,11 +448,17 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) void pnfs_destroy_all_layouts(struct nfs_client *clp) { + struct nfs_server *server; struct pnfs_layout_hdr *lo; LIST_HEAD(tmp_list); spin_lock(&clp->cl_lock); - list_splice_init(&clp->cl_layouts, &tmp_list); + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + if (!list_empty(&server->layouts)) + list_splice_init(&server->layouts, &tmp_list); + } + rcu_read_unlock(); spin_unlock(&clp->cl_lock); while (!list_empty(&tmp_list)) { @@ -920,7 +926,8 @@ pnfs_update_layout(struct inode *ino, }; unsigned pg_offset; struct nfs_inode *nfsi = NFS_I(ino); - struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; + struct nfs_server *server = NFS_SERVER(ino); + struct nfs_client *clp = server->nfs_client; struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg = NULL; bool first = false; @@ -964,7 +971,7 @@ pnfs_update_layout(struct inode *ino, */ spin_lock(&clp->cl_lock); BUG_ON(!list_empty(&lo->plh_layouts)); - list_add_tail(&lo->plh_layouts, &clp->cl_layouts); + list_add_tail(&lo->plh_layouts, &server->layouts); spin_unlock(&clp->cl_lock); } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index f23b188..4faeac8 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -78,7 +78,6 @@ struct nfs_client { /* The flags used for obtaining the clientid during EXCHANGE_ID */ u32 cl_exchange_flags; struct nfs4_session *cl_session; /* sharred session */ - struct list_head cl_layouts; #endif /* CONFIG_NFS_V4 */ #ifdef CONFIG_NFS_FSCACHE @@ -152,6 +151,7 @@ struct nfs_server { struct rb_root openowner_id; struct rb_root lockowner_id; #endif + struct list_head layouts; struct list_head delegations; void (*destroy)(struct nfs_server *); -- cgit v0.10.2 From fca78d6d2c77f87d7dbee89bbe4836a44da881e2 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 2 Jun 2011 14:59:07 -0400 Subject: NFS: Add SECINFO_NO_NAME procedure If the client is using NFS v4.1, then we can use SECINFO_NO_NAME to find the secflavor for the initial mount. If the server doesn't support SECINFO_NO_NAME then I fall back on the "guess and check" method used for v4.0 mounts. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2a55347..fc017ea 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -277,6 +277,9 @@ extern void nfs_sb_deactive(struct super_block *sb); extern char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen); extern struct vfsmount *nfs_d_automount(struct path *path); +#ifdef CONFIG_NFS_V4 +rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); +#endif /* getroot.c */ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 1f063ba..8102391 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -119,7 +119,7 @@ Elong: } #ifdef CONFIG_NFS_V4 -static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) +rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) { struct gss_api_mech *mech; struct xdr_netobj oid; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b47f0d4..c30aed2 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -67,6 +67,8 @@ struct nfs4_minor_version_ops { int cache_reply); int (*validate_stateid)(struct nfs_delegation *, const nfs4_stateid *); + int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, + struct nfs_fsinfo *); const struct nfs4_state_recovery_ops *reboot_recovery_ops; const struct nfs4_state_recovery_ops *nograce_recovery_ops; const struct nfs4_state_maintenance_ops *state_renewal_ops; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5f4912f..892bff5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2251,13 +2251,14 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { + int minor_version = server->nfs_client->cl_minorversion; int status = nfs4_lookup_root(server, fhandle, info); if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR)) /* * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM * by nfs4_map_errors() as this function exits. */ - status = nfs4_find_root_sec(server, fhandle, info); + status = nfs_v4_minor_ops[minor_version]->find_root_sec(server, fhandle, info); if (status == 0) status = nfs4_server_capabilities(server, fhandle); if (status == 0) @@ -5935,6 +5936,85 @@ out: rpc_put_task(task); return status; } + +static int +_nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) +{ + struct nfs41_secinfo_no_name_args args = { + .style = SECINFO_STYLE_CURRENT_FH, + }; + struct nfs4_secinfo_res res = { + .flavors = flavors, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO_NO_NAME], + .rpc_argp = &args, + .rpc_resp = &res, + }; + return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); +} + +static int +nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = _nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); + switch (err) { + case 0: + case -NFS4ERR_WRONGSEC: + case -NFS4ERR_NOTSUPP: + break; + default: + err = nfs4_handle_exception(server, err, &exception); + } + } while (exception.retry); + return err; +} + +static int +nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + int err; + struct page *page; + rpc_authflavor_t flavor; + struct nfs4_secinfo_flavors *flavors; + + page = alloc_page(GFP_KERNEL); + if (!page) { + err = -ENOMEM; + goto out; + } + + flavors = page_address(page); + err = nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); + + /* + * Fall back on "guess and check" method if + * the server doesn't support SECINFO_NO_NAME + */ + if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) { + err = nfs4_find_root_sec(server, fhandle, info); + goto out_freepage; + } + if (err) + goto out_freepage; + + flavor = nfs_find_best_sec(flavors); + if (err == 0) + err = nfs4_lookup_root_sec(server, fhandle, info, flavor); + +out_freepage: + put_page(page); + if (err == -EACCES) + return -EPERM; +out: + return err; +} #endif /* CONFIG_NFS_V4_1 */ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { @@ -5996,6 +6076,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .minor_version = 0, .call_sync = _nfs4_call_sync, .validate_stateid = nfs4_validate_delegation_stateid, + .find_root_sec = nfs4_find_root_sec, .reboot_recovery_ops = &nfs40_reboot_recovery_ops, .nograce_recovery_ops = &nfs40_nograce_recovery_ops, .state_renewal_ops = &nfs40_state_renewal_ops, @@ -6006,6 +6087,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, .call_sync = _nfs4_call_sync_session, .validate_stateid = nfs41_validate_delegation_stateid, + .find_root_sec = nfs41_find_root_sec, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, .state_renewal_ops = &nfs41_state_renewal_ops, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1555c74..c8c069a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -343,6 +343,8 @@ static int nfs4_stat_to_errno(int); 1 /* FIXME: opaque lrf_body always empty at the moment */) #define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \ 1 + decode_stateid_maxsz) +#define encode_secinfo_no_name_maxsz (op_encode_hdr_maxsz + 1) +#define decode_secinfo_no_name_maxsz decode_secinfo_maxsz #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 @@ -772,6 +774,14 @@ static int nfs4_stat_to_errno(int); decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_layoutreturn_maxsz) +#define NFS4_enc_secinfo_no_name_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putrootfh_maxsz +\ + encode_secinfo_no_name_maxsz) +#define NFS4_dec_secinfo_no_name_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putrootfh_maxsz + \ + decode_secinfo_no_name_maxsz) const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + compound_encode_hdr_maxsz + @@ -1938,6 +1948,20 @@ encode_layoutreturn(struct xdr_stream *xdr, hdr->nops++; hdr->replen += decode_layoutreturn_maxsz; } + +static int +encode_secinfo_no_name(struct xdr_stream *xdr, + const struct nfs41_secinfo_no_name_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + p = reserve_space(xdr, 8); + *p++ = cpu_to_be32(OP_SECINFO_NO_NAME); + *p++ = cpu_to_be32(args->style); + hdr->nops++; + hdr->replen += decode_secinfo_no_name_maxsz; + return 0; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -2790,6 +2814,25 @@ static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, encode_layoutreturn(xdr, args, &hdr); encode_nops(&hdr); } + +/* + * Encode SECINFO_NO_NAME request + */ +static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs41_secinfo_no_name_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putrootfh(xdr, &hdr); + encode_secinfo_no_name(xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} #endif /* CONFIG_NFS_V4_1 */ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) @@ -6467,6 +6510,30 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, out: return status; } + +/* + * Decode SECINFO_NO_NAME response + */ +static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs4_secinfo_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putrootfh(xdr); + if (status) + goto out; + status = decode_secinfo(xdr, res); +out: + return status; +} #endif /* CONFIG_NFS_V4_1 */ /** @@ -6669,6 +6736,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(LAYOUTGET, enc_layoutget, dec_layoutget), PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), + PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), #endif /* CONFIG_NFS_V4_1 */ }; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 504b289..7cc7d57 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -563,6 +563,7 @@ enum { NFSPROC4_CLNT_GETDEVICEINFO, NFSPROC4_CLNT_LAYOUTCOMMIT, NFSPROC4_CLNT_LAYOUTRETURN, + NFSPROC4_CLNT_SECINFO_NO_NAME, }; /* nfs41 types */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c4d98df..7159e1d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1084,6 +1084,14 @@ struct nfs41_reclaim_complete_args { struct nfs41_reclaim_complete_res { struct nfs4_sequence_res seq_res; }; + +#define SECINFO_STYLE_CURRENT_FH 0 +#define SECINFO_STYLE_PARENT 1 +struct nfs41_secinfo_no_name_args { + int style; + struct nfs4_sequence_args seq_args; +}; + #endif /* CONFIG_NFS_V4_1 */ struct nfs_page; -- cgit v0.10.2 From 7d9747947ae66d8f6a9a9a023a3a5e28df6a536e Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 2 Jun 2011 14:59:08 -0400 Subject: NFS: Added TEST_STATEID call This patch adds in the xdr for doing a TEST_STATEID call with a single stateid. RFC 5661 allows multiple stateids to be tested in a single call, but only testing one keeps things simpler for now. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 892bff5..5612ba9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6015,6 +6015,34 @@ out_freepage: out: return err; } +static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) +{ + int status; + struct nfs41_test_stateid_args args = { + .stateid = &state->stateid, + }; + struct nfs41_test_stateid_res res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID], + .rpc_argp = &args, + .rpc_resp = &res, + }; + args.seq_args.sa_session = res.seq_res.sr_session = NULL; + status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); + return status; +} + +static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs41_test_stateid(server, state), + &exception); + } while (exception.retry); + return err; +} #endif /* CONFIG_NFS_V4_1 */ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c8c069a..7d914d9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -345,6 +345,9 @@ static int nfs4_stat_to_errno(int); 1 + decode_stateid_maxsz) #define encode_secinfo_no_name_maxsz (op_encode_hdr_maxsz + 1) #define decode_secinfo_no_name_maxsz decode_secinfo_maxsz +#define encode_test_stateid_maxsz (op_encode_hdr_maxsz + 2 + \ + XDR_QUADLEN(NFS4_STATEID_SIZE)) +#define decode_test_stateid_maxsz (op_decode_hdr_maxsz + 2 + 1) #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 @@ -782,6 +785,12 @@ static int nfs4_stat_to_errno(int); decode_sequence_maxsz + \ decode_putrootfh_maxsz + \ decode_secinfo_no_name_maxsz) +#define NFS4_enc_test_stateid_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_test_stateid_maxsz) +#define NFS4_dec_test_stateid_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_test_stateid_maxsz) const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + compound_encode_hdr_maxsz + @@ -1962,6 +1971,20 @@ encode_secinfo_no_name(struct xdr_stream *xdr, hdr->replen += decode_secinfo_no_name_maxsz; return 0; } + +static void encode_test_stateid(struct xdr_stream *xdr, + struct nfs41_test_stateid_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_TEST_STATEID); + *p++ = cpu_to_be32(1); + xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); + hdr->nops++; + hdr->replen += decode_test_stateid_maxsz; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -2833,6 +2856,23 @@ static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req, encode_nops(&hdr); return 0; } + +/* + * Encode TEST_STATEID request + */ +static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs41_test_stateid_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_test_stateid(xdr, args, &hdr); + encode_nops(&hdr); +} #endif /* CONFIG_NFS_V4_1 */ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) @@ -5371,6 +5411,35 @@ out_overflow: print_overflow_msg(__func__, xdr); return -EIO; } + +static int decode_test_stateid(struct xdr_stream *xdr, + struct nfs41_test_stateid_res *res) +{ + __be32 *p; + int status; + int num_res; + + status = decode_op_hdr(xdr, OP_TEST_STATEID); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + num_res = be32_to_cpup(p++); + if (num_res != 1) + goto out; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + res->status = be32_to_cpup(p++); + return res->status; +out_overflow: + print_overflow_msg(__func__, xdr); +out: + return -EIO; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -6534,6 +6603,27 @@ static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp, out: return status; } + +/* + * Decode TEST_STATEID response + */ +static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs41_test_stateid_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_test_stateid(xdr, res); +out: + return status; +} #endif /* CONFIG_NFS_V4_1 */ /** @@ -6737,6 +6827,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), + PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), #endif /* CONFIG_NFS_V4_1 */ }; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 7cc7d57..efe07f7 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -564,6 +564,7 @@ enum { NFSPROC4_CLNT_LAYOUTCOMMIT, NFSPROC4_CLNT_LAYOUTRETURN, NFSPROC4_CLNT_SECINFO_NO_NAME, + NFSPROC4_CLNT_TEST_STATEID, }; /* nfs41 types */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7159e1d..e696c1f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1092,6 +1092,16 @@ struct nfs41_secinfo_no_name_args { struct nfs4_sequence_args seq_args; }; +struct nfs41_test_stateid_args { + nfs4_stateid *stateid; + struct nfs4_sequence_args seq_args; +}; + +struct nfs41_test_stateid_res { + unsigned int status; + struct nfs4_sequence_res seq_res; +}; + #endif /* CONFIG_NFS_V4_1 */ struct nfs_page; -- cgit v0.10.2 From 9aeda35fd643eba683fdb8dba8907fa796a85dda Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 2 Jun 2011 14:59:09 -0400 Subject: NFS: added FREE_STATEID call FREE_STATEID is used to tell the server that we want to free a stateid that no longer has any locks associated with it. This allows the client to reclaim locks without encountering edge conditions documented in section 8.4.3 of RFC 5661. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5612ba9..197ad0a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6043,6 +6043,36 @@ static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *stat } while (exception.retry); return err; } + +static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state) +{ + int status; + struct nfs41_free_stateid_args args = { + .stateid = &state->stateid, + }; + struct nfs41_free_stateid_res res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID], + .rpc_argp = &args, + .rpc_resp = &res, + }; + + args.seq_args.sa_session = res.seq_res.sr_session = NULL; + status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); + return status; +} + +static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_free_stateid(server, state), + &exception); + } while (exception.retry); + return err; +} #endif /* CONFIG_NFS_V4_1 */ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 7d914d9..c191a9b 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -348,6 +348,9 @@ static int nfs4_stat_to_errno(int); #define encode_test_stateid_maxsz (op_encode_hdr_maxsz + 2 + \ XDR_QUADLEN(NFS4_STATEID_SIZE)) #define decode_test_stateid_maxsz (op_decode_hdr_maxsz + 2 + 1) +#define encode_free_stateid_maxsz (op_encode_hdr_maxsz + 1 + \ + XDR_QUADLEN(NFS4_STATEID_SIZE)) +#define decode_free_stateid_maxsz (op_decode_hdr_maxsz + 1) #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 @@ -791,6 +794,12 @@ static int nfs4_stat_to_errno(int); #define NFS4_dec_test_stateid_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_test_stateid_maxsz) +#define NFS4_enc_free_stateid_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_free_stateid_maxsz) +#define NFS4_dec_free_stateid_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_free_stateid_maxsz) const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + compound_encode_hdr_maxsz + @@ -1985,6 +1994,18 @@ static void encode_test_stateid(struct xdr_stream *xdr, hdr->nops++; hdr->replen += decode_test_stateid_maxsz; } + +static void encode_free_stateid(struct xdr_stream *xdr, + struct nfs41_free_stateid_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_FREE_STATEID); + xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); + hdr->nops++; + hdr->replen += decode_free_stateid_maxsz; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -2873,6 +2894,23 @@ static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req, encode_test_stateid(xdr, args, &hdr); encode_nops(&hdr); } + +/* + * Encode FREE_STATEID request + */ +static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs41_free_stateid_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_free_stateid(xdr, args, &hdr); + encode_nops(&hdr); +} #endif /* CONFIG_NFS_V4_1 */ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) @@ -5440,6 +5478,26 @@ out_overflow: out: return -EIO; } + +static int decode_free_stateid(struct xdr_stream *xdr, + struct nfs41_free_stateid_res *res) +{ + __be32 *p; + int status; + + status = decode_op_hdr(xdr, OP_FREE_STATEID); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + res->status = be32_to_cpup(p++); + return res->status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -6624,6 +6682,27 @@ static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp, out: return status; } + +/* + * Decode FREE_STATEID response + */ +static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs41_free_stateid_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_free_stateid(xdr, res); +out: + return status; +} #endif /* CONFIG_NFS_V4_1 */ /** @@ -6828,6 +6907,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), + PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), #endif /* CONFIG_NFS_V4_1 */ }; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index efe07f7..a3c4bc8 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -565,6 +565,7 @@ enum { NFSPROC4_CLNT_LAYOUTRETURN, NFSPROC4_CLNT_SECINFO_NO_NAME, NFSPROC4_CLNT_TEST_STATEID, + NFSPROC4_CLNT_FREE_STATEID, }; /* nfs41 types */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e696c1f..2094555 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1102,6 +1102,16 @@ struct nfs41_test_stateid_res { struct nfs4_sequence_res seq_res; }; +struct nfs41_free_stateid_args { + nfs4_stateid *stateid; + struct nfs4_sequence_args seq_args; +}; + +struct nfs41_free_stateid_res { + unsigned int status; + struct nfs4_sequence_res seq_res; +}; + #endif /* CONFIG_NFS_V4_1 */ struct nfs_page; -- cgit v0.10.2 From f062eb6ced3b297277b94b4da3113b1d3782e539 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 2 Jun 2011 14:59:10 -0400 Subject: NFS: test and free stateids during recovery When recovering open files and locks, the stateid should be tested against the server and freed if it is invalid. This patch adds new recovery functions for NFS v4.1. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 197ad0a..325706f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -80,7 +80,10 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, struct nfs4_state *state); - +#ifdef CONFIG_NFS_V4_1 +static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *); +static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *); +#endif /* Prevent leaks of NFSv4 errors into userland */ static int nfs4_map_errors(int err) { @@ -1687,6 +1690,20 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta return ret; } +#if defined(CONFIG_NFS_V4_1) +static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + int status; + struct nfs_server *server = NFS_SERVER(state->inode); + + status = nfs41_test_stateid(server, state); + if (status == NFS_OK) + return 0; + nfs41_free_stateid(server, state); + return nfs4_open_expired(sp, state); +} +#endif + /* * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-* * fields corresponding to attributes that were used to store the verifier. @@ -4444,6 +4461,20 @@ out: return err; } +#if defined(CONFIG_NFS_V4_1) +static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request) +{ + int status; + struct nfs_server *server = NFS_SERVER(state->inode); + + status = nfs41_test_stateid(server, state); + if (status == NFS_OK) + return 0; + nfs41_free_stateid(server, state); + return nfs4_lock_expired(state, request); +} +#endif + static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { struct nfs_inode *nfsi = NFS_I(state->inode); @@ -6109,8 +6140,8 @@ struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, - .recover_open = nfs4_open_expired, - .recover_lock = nfs4_lock_expired, + .recover_open = nfs41_open_expired, + .recover_lock = nfs41_lock_expired, .establish_clid = nfs41_init_clientid, .get_clid_cred = nfs4_get_exchange_id_cred, }; -- cgit v0.10.2 From 1751c3638f2a07a8c66a803a31791bab9bd3fced Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 10 Jun 2011 13:30:23 -0400 Subject: NFS: Cleanup of the nfs_pageio code in preparation for a pnfs bugfix We need to ensure that the layouts are set up before we can decide to coalesce requests. To do so, we want to further split up the struct nfs_pageio_descriptor operations into an initialisation callback, a coalescing test callback, and a 'do i/o' callback. This patch cleans up the existing callback methods before adding the 'initialisation' callback. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index b690bb5..46bb31b 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -658,7 +658,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, * return true : coalesce page * return false : don't coalesce page */ -bool +static bool filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { @@ -681,6 +681,16 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return (p_stripe == r_stripe); } +static const struct nfs_pageio_ops filelayout_pg_read_ops = { + .pg_test = filelayout_pg_test, + .pg_doio = nfs_generic_pg_readpages, +}; + +static const struct nfs_pageio_ops filelayout_pg_write_ops = { + .pg_test = filelayout_pg_test, + .pg_doio = nfs_generic_pg_writepages, +}; + static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) { return !FILELAYOUT_LSEG(lseg)->commit_through_mds; @@ -878,7 +888,8 @@ static struct pnfs_layoutdriver_type filelayout_type = { .owner = THIS_MODULE, .alloc_lseg = filelayout_alloc_lseg, .free_lseg = filelayout_free_lseg, - .pg_test = filelayout_pg_test, + .pg_read_ops = &filelayout_pg_read_ops, + .pg_write_ops = &filelayout_pg_write_ops, .mark_pnfs_commit = filelayout_mark_pnfs_commit, .choose_commit_list = filelayout_choose_commit_list, .commit_pagelist = filelayout_commit_pagelist, diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 8ff2ea3..c203fab 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -1007,6 +1007,16 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, OBJIO_LSEG(pgio->pg_lseg)->max_io_size; } +static const struct nfs_pageio_ops objio_pg_read_ops = { + .pg_test = objio_pg_test, + .pg_doio = nfs_generic_pg_readpages, +}; + +static const struct nfs_pageio_ops objio_pg_write_ops = { + .pg_test = objio_pg_test, + .pg_doio = nfs_generic_pg_writepages, +}; + static struct pnfs_layoutdriver_type objlayout_type = { .id = LAYOUT_OSD2_OBJECTS, .name = "LAYOUT_OSD2_OBJECTS", @@ -1020,7 +1030,8 @@ static struct pnfs_layoutdriver_type objlayout_type = { .read_pagelist = objlayout_read_pagelist, .write_pagelist = objlayout_write_pagelist, - .pg_test = objio_pg_test, + .pg_read_ops = &objio_pg_read_ops, + .pg_write_ops = &objio_pg_write_ops, .free_deviceid_node = objio_free_deviceid_node, diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 0098557..9b8a473 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test); */ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, - int (*doio)(struct nfs_pageio_descriptor *), + const struct nfs_pageio_ops *pg_ops, size_t bsize, int io_flags) { @@ -241,12 +241,10 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_base = 0; desc->pg_moreio = 0; desc->pg_inode = inode; - desc->pg_doio = doio; + desc->pg_ops = pg_ops; desc->pg_ioflags = io_flags; desc->pg_error = 0; desc->pg_lseg = NULL; - desc->pg_test = nfs_generic_pg_test; - pnfs_pageio_init(desc, inode); } /** @@ -276,7 +274,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, return false; if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) return false; - return pgio->pg_test(pgio, prev, req); + return pgio->pg_ops->pg_test(pgio, prev, req); } /** @@ -311,7 +309,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) { if (!list_empty(&desc->pg_list)) { - int error = desc->pg_doio(desc); + int error = desc->pg_ops->pg_doio(desc); if (error < 0) desc->pg_error = error; else diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ff82007..7ec46d5 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1056,6 +1056,30 @@ out_forget_reply: } bool +pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; + + if (ld == NULL) + return false; + nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0); + return true; +} + +bool +pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; + + if (ld == NULL) + return false; + nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags); + return true; +} + +bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 96bf4e6..137a2bd 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -87,7 +87,8 @@ struct pnfs_layoutdriver_type { void (*free_lseg) (struct pnfs_layout_segment *lseg); /* test for nfs page cache coalescing */ - bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); + const struct nfs_pageio_ops *pg_read_ops; + const struct nfs_pageio_ops *pg_write_ops; /* Returns true if layoutdriver wants to divert this request to * driver's commit routine. @@ -152,6 +153,10 @@ struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, loff_t pos, u64 count, enum pnfs_iomode access_type, gfp_t gfp_flags); + +bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); +bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int); + void set_pnfs_layoutdriver(struct nfs_server *, u32 id); void unset_pnfs_layoutdriver(struct nfs_server *); enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, @@ -293,15 +298,6 @@ static inline int pnfs_return_layout(struct inode *ino) return 0; } -static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, - struct inode *inode) -{ - struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; - - if (ld) - pgio->pg_test = ld->pg_test; -} - #else /* CONFIG_NFS_V4_1 */ static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) @@ -385,9 +381,14 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) { } -static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, - struct inode *inode) +static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) { + return false; +} + +static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) +{ + return false; } static inline void diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 20a7f95..b6d9ec9 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -32,6 +32,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc); static int nfs_pagein_one(struct nfs_pageio_descriptor *desc); +static const struct nfs_pageio_ops nfs_pageio_read_ops; static const struct rpc_call_ops nfs_read_partial_ops; static const struct rpc_call_ops nfs_read_full_ops; @@ -113,6 +114,20 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) } } +static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, + struct inode *inode) +{ + nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, + NFS_SERVER(inode)->rsize, 0); +} + +static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, + struct inode *inode) +{ + if (!pnfs_pageio_init_read(pgio, inode)) + nfs_pageio_init_read_mds(pgio, inode); +} + int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, struct page *page) { @@ -131,14 +146,11 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, if (len < PAGE_CACHE_SIZE) zero_user_segment(page, len, PAGE_CACHE_SIZE); - nfs_pageio_init(&pgio, inode, NULL, 0, 0); + nfs_pageio_init_read(&pgio, inode); nfs_list_add_request(new, &pgio.pg_list); pgio.pg_count = len; - if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) - nfs_pagein_multi(&pgio); - else - nfs_pagein_one(&pgio); + nfs_pageio_complete(&pgio); return 0; } @@ -365,6 +377,20 @@ out: return ret; } +int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) +{ + if (desc->pg_bsize < PAGE_CACHE_SIZE) + return nfs_pagein_multi(desc); + return nfs_pagein_one(desc); +} +EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages); + + +static const struct nfs_pageio_ops nfs_pageio_read_ops = { + .pg_test = nfs_generic_pg_test, + .pg_doio = nfs_generic_pg_readpages, +}; + /* * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). @@ -635,8 +661,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, .pgio = &pgio, }; struct inode *inode = mapping->host; - struct nfs_server *server = NFS_SERVER(inode); - size_t rsize = server->rsize; unsigned long npages; int ret = -ESTALE; @@ -664,10 +688,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) goto read_complete; /* all pages were read */ - if (rsize < PAGE_CACHE_SIZE) - nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); - else - nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0); + nfs_pageio_init_read(&pgio, inode); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7271680..b144b54 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1033,15 +1033,31 @@ out: return ret; } -static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, +int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) +{ + if (desc->pg_bsize < PAGE_CACHE_SIZE) + return nfs_flush_multi(desc); + return nfs_flush_one(desc); +} +EXPORT_SYMBOL_GPL(nfs_generic_pg_writepages); + +static const struct nfs_pageio_ops nfs_pageio_write_ops = { + .pg_test = nfs_generic_pg_test, + .pg_doio = nfs_generic_pg_writepages, +}; + +static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) { - size_t wsize = NFS_SERVER(inode)->wsize; + nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, + NFS_SERVER(inode)->wsize, ioflags); +} - if (wsize < PAGE_CACHE_SIZE) - nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); - else - nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags); +static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, + struct inode *inode, int ioflags) +{ + if (!pnfs_pageio_init_write(pgio, inode, ioflags)) + nfs_pageio_init_write_mds(pgio, inode, ioflags); } /* diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 25311b3..d378f08 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -55,6 +55,12 @@ struct nfs_page { struct nfs_writeverf wb_verf; /* Commit cookie */ }; +struct nfs_pageio_descriptor; +struct nfs_pageio_ops { + bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); + int (*pg_doio)(struct nfs_pageio_descriptor *); +}; + struct nfs_pageio_descriptor { struct list_head pg_list; unsigned long pg_bytes_written; @@ -64,11 +70,10 @@ struct nfs_pageio_descriptor { char pg_moreio; struct inode *pg_inode; - int (*pg_doio)(struct nfs_pageio_descriptor *); + const struct nfs_pageio_ops *pg_ops; int pg_ioflags; int pg_error; struct pnfs_layout_segment *pg_lseg; - bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); }; #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) @@ -85,7 +90,7 @@ extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst, pgoff_t idx_start, unsigned int npages, int tag); extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, - int (*doio)(struct nfs_pageio_descriptor *desc), + const struct nfs_pageio_ops *pg_ops, size_t bsize, int how); extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, @@ -100,6 +105,9 @@ extern void nfs_unlock_request(struct nfs_page *req); extern int nfs_set_page_tag_locked(struct nfs_page *req); extern void nfs_clear_page_tag_locked(struct nfs_page *req); +extern int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); +extern int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); + /* * Lock the page of an asynchronous request without getting a new reference -- cgit v0.10.2 From d8007d4dd6ff8749cc8a4063c3ec87442db76d82 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 10 Jun 2011 13:30:23 -0400 Subject: NFSv4.1: Add an initialisation callback for pNFS Ensure that we always get a layout before setting up the i/o request. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 46bb31b..faac87f 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -682,11 +682,13 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, } static const struct nfs_pageio_ops filelayout_pg_read_ops = { + .pg_init = pnfs_generic_pg_init_read, .pg_test = filelayout_pg_test, .pg_doio = nfs_generic_pg_readpages, }; static const struct nfs_pageio_ops filelayout_pg_write_ops = { + .pg_init = pnfs_generic_pg_init_write, .pg_test = filelayout_pg_test, .pg_doio = nfs_generic_pg_writepages, }; diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index c203fab..b759aeb 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -1008,11 +1008,13 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, } static const struct nfs_pageio_ops objio_pg_read_ops = { + .pg_init = pnfs_generic_pg_init_read, .pg_test = objio_pg_test, .pg_doio = nfs_generic_pg_readpages, }; static const struct nfs_pageio_ops objio_pg_write_ops = { + .pg_init = pnfs_generic_pg_init_write, .pg_test = objio_pg_test, .pg_doio = nfs_generic_pg_writepages, }; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 9b8a473..d421e19 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -295,6 +295,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, if (!nfs_can_coalesce_requests(prev, req, desc)) return 0; } else { + if (desc->pg_ops->pg_init) + desc->pg_ops->pg_init(desc, req); desc->pg_base = req->wb_pgbase; } nfs_list_remove_request(req); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 7ec46d5..69a0c3f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -911,7 +911,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, * Layout segment is retreived from the server if not cached. * The appropriate layout segment is referenced and returned to the caller. */ -struct pnfs_layout_segment * +static struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, loff_t pos, @@ -1055,6 +1055,34 @@ out_forget_reply: goto out; } +void +pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + BUG_ON(pgio->pg_lseg != NULL); + + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + req_offset(req), + req->wb_bytes, + IOMODE_READ, + GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); + +void +pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + BUG_ON(pgio->pg_lseg != NULL); + + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + req_offset(req), + req->wb_bytes, + IOMODE_RW, + GFP_NOFS); +} +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); + bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) { @@ -1083,31 +1111,8 @@ bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { - enum pnfs_iomode access_type; - gfp_t gfp_flags; - - /* We assume that pg_ioflags == 0 iff we're reading a page */ - if (pgio->pg_ioflags == 0) { - access_type = IOMODE_READ; - gfp_flags = GFP_KERNEL; - } else { - access_type = IOMODE_RW; - gfp_flags = GFP_NOFS; - } - - if (pgio->pg_lseg == NULL) { - if (pgio->pg_count != prev->wb_bytes) - return true; - /* This is first coelesce call for a series of nfs_pages */ - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - prev->wb_context, - req_offset(prev), - pgio->pg_count, - access_type, - gfp_flags); - if (pgio->pg_lseg == NULL) - return true; - } + if (pgio->pg_lseg == NULL) + return nfs_generic_pg_test(pgio, prev, req); /* * Test if a nfs_page is fully contained in the pnfs_layout_range. diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 137a2bd..dd54351 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -149,10 +149,6 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ void get_layout_hdr(struct pnfs_layout_hdr *lo); void put_lseg(struct pnfs_layout_segment *lseg); -struct pnfs_layout_segment * -pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - loff_t pos, u64 count, enum pnfs_iomode access_type, - gfp_t gfp_flags); bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int); @@ -163,6 +159,8 @@ enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, const struct rpc_call_ops *, int); enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, const struct rpc_call_ops *); +void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); +void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); int pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); @@ -318,14 +316,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg) { } -static inline struct pnfs_layout_segment * -pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - loff_t pos, u64 count, enum pnfs_iomode access_type, - gfp_t gfp_flags) -{ - return NULL; -} - static inline enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *data, const struct rpc_call_ops *call_ops) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index b6d9ec9..2cf728b 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -147,9 +147,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, zero_user_segment(page, len, PAGE_CACHE_SIZE); nfs_pageio_init_read(&pgio, inode); - nfs_list_add_request(new, &pgio.pg_list); - pgio.pg_count = len; - + nfs_pageio_add_request(&pgio, new); nfs_pageio_complete(&pgio); return 0; } @@ -281,7 +279,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) unsigned int offset; int requests = 0; int ret = 0; - struct pnfs_layout_segment *lseg; + struct pnfs_layout_segment *lseg = desc->pg_lseg; LIST_HEAD(list); nfs_list_remove_request(req); @@ -299,10 +297,6 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) } while(nbytes != 0); atomic_set(&req->wb_complete, requests); - BUG_ON(desc->pg_lseg != NULL); - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, - req_offset(req), desc->pg_count, - IOMODE_READ, GFP_KERNEL); ClearPageError(page); offset = 0; nbytes = desc->pg_count; @@ -336,6 +330,8 @@ out_bad: } SetPageError(page); nfs_readpage_release(req); + put_lseg(lseg); + desc->pg_lseg = NULL; return -ENOMEM; } @@ -364,10 +360,6 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) *pages++ = req->wb_page; } req = nfs_list_entry(data->pages.next); - if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, - req_offset(req), desc->pg_count, - IOMODE_READ, GFP_KERNEL); ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, 0, lseg); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b144b54..d1ae7e4 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -916,7 +916,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) unsigned int offset; int requests = 0; int ret = 0; - struct pnfs_layout_segment *lseg; + struct pnfs_layout_segment *lseg = desc->pg_lseg; LIST_HEAD(list); nfs_list_remove_request(req); @@ -940,10 +940,6 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) } while (nbytes != 0); atomic_set(&req->wb_complete, requests); - BUG_ON(desc->pg_lseg); - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, - req_offset(req), desc->pg_count, - IOMODE_RW, GFP_NOFS); ClearPageError(page); offset = 0; nbytes = desc->pg_count; @@ -976,6 +972,8 @@ out_bad: nfs_writedata_free(data); } nfs_redirty_request(req); + put_lseg(lseg); + desc->pg_lseg = NULL; return -ENOMEM; } @@ -1016,10 +1014,6 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) *pages++ = req->wb_page; } req = nfs_list_entry(data->pages.next); - if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, - req_offset(req), desc->pg_count, - IOMODE_RW, GFP_NOFS); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index d378f08..9ac2dd1 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -57,6 +57,7 @@ struct nfs_page { struct nfs_pageio_descriptor; struct nfs_pageio_ops { + void (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *); bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); int (*pg_doio)(struct nfs_pageio_descriptor *); }; -- cgit v0.10.2 From e885de1a5bc9f46ef8f934c5a7602c89d2d51e8d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 10 Jun 2011 13:30:23 -0400 Subject: NFSv4.1: Fall back to ordinary i/o through the mds if we have no layout segment Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index fc017ea..31e8b50 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -296,7 +296,13 @@ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, const struct rpc_call_ops *call_ops); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); +struct nfs_pageio_descriptor; +extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, + struct inode *inode); + /* write.c */ +extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, + struct inode *inode, int ioflags); extern void nfs_commit_free(struct nfs_write_data *p); extern int nfs_initiate_write(struct nfs_write_data *data, struct rpc_clnt *clnt, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index faac87f..d1ae5bf 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -669,8 +669,6 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, !nfs_generic_pg_test(pgio, prev, req)) return false; - if (!pgio->pg_lseg) - return 1; p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index b759aeb..70272d5 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -1000,9 +1000,6 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, if (!pnfs_generic_pg_test(pgio, prev, req)) return false; - if (pgio->pg_lseg == NULL) - return true; - return pgio->pg_count + req->wb_bytes <= OBJIO_LSEG(pgio->pg_lseg)->max_io_size; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 69a0c3f..cc807fe 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1066,6 +1066,10 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r req->wb_bytes, IOMODE_READ, GFP_KERNEL); + /* If no lseg, fall back to read through mds */ + if (pgio->pg_lseg == NULL) + nfs_pageio_init_read_mds(pgio, pgio->pg_inode); + } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); @@ -1080,6 +1084,9 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page * req->wb_bytes, IOMODE_RW, GFP_NOFS); + /* If no lseg, fall back to write through mds */ + if (pgio->pg_lseg == NULL) + nfs_pageio_init_write_mds(pgio, pgio->pg_inode, pgio->pg_ioflags); } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 2cf728b..9b2a1d7 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -114,7 +114,7 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) } } -static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, +void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode) { nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d1ae7e4..00940dc9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1040,7 +1040,7 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = { .pg_doio = nfs_generic_pg_writepages, }; -static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, +void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) { nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, -- cgit v0.10.2 From 47cb498e9316314e7e681f417135589195ad78a7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Jun 2011 12:18:11 -0400 Subject: NFSv4.1: Clean ups for the device id cache The fact that the global device id cache holds a reference to the nfs4_deviceid_node until it is invisible to rcu lookups implies that we can always assume that the reference count is non-zero in _find_get_deviceid. Also clean up nfs4_put_deviceid_node and the removal of the device id from the cache. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index dd54351..1cfc96f 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -198,7 +198,6 @@ struct nfs4_deviceid_node { void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); -struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, const struct pnfs_layoutdriver_type *, diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index f0f8e1e..fb9498d 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -100,8 +100,8 @@ _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, rcu_read_lock(); d = _lookup_deviceid(ld, clp, id, hash); - if (d && !atomic_inc_not_zero(&d->ref)) - d = NULL; + if (d != NULL) + atomic_inc(&d->ref); rcu_read_unlock(); return d; } @@ -115,15 +115,15 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld, EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); /* - * Unhash and put deviceid + * Remove a deviceid from cache * * @clp nfs_client associated with deviceid * @id the deviceid to unhash * * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise. */ -struct nfs4_deviceid_node * -nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, +void +nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, const struct nfs_client *clp, const struct nfs4_deviceid *id) { struct nfs4_deviceid_node *d; @@ -134,7 +134,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, rcu_read_unlock(); if (!d) { spin_unlock(&nfs4_deviceid_lock); - return NULL; + return; } hlist_del_init_rcu(&d->node); spin_unlock(&nfs4_deviceid_lock); @@ -142,28 +142,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, /* balance the initial ref set in pnfs_insert_deviceid */ if (atomic_dec_and_test(&d->ref)) - return d; - - return NULL; -} -EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid); - -/* - * Delete a deviceid from cache - * - * @clp struct nfs_client qualifying the deviceid - * @id deviceid to delete - */ -void -nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, - const struct nfs_client *clp, const struct nfs4_deviceid *id) -{ - struct nfs4_deviceid_node *d; - - d = nfs4_unhash_put_deviceid(ld, clp, id); - if (!d) - return; - d->ld->free_deviceid_node(d); + d->ld->free_deviceid_node(d); } EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); @@ -221,16 +200,15 @@ EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node); * * @d deviceid node to put * - * @ret true iff the node was deleted + * return true iff the node was deleted + * Note that since the test for d->ref == 0 is sufficient to establish + * that the node is no longer hashed in the global device id cache. */ bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) { - if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock)) + if (!atomic_dec_and_test(&d->ref)) return false; - hlist_del_init_rcu(&d->node); - spin_unlock(&nfs4_deviceid_lock); - synchronize_rcu(); d->ld->free_deviceid_node(d); return true; } -- cgit v0.10.2 From 7c24d9489fe57d67cb56c6bdad58d89806e7fd97 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Mon, 13 Jun 2011 18:22:38 -0400 Subject: NFSv4.1: File layout only supports whole file layouts Ask for whole file layouts. Until support for layout segments is fully supported in the file layout code, discard non-whole file layouts. Signed-off-by: Andy Adamson Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index d1ae5bf..51c1909 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -427,6 +427,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); + /* FIXME: remove this check when layout segment support is added */ + if (lgr->range.offset != 0 || + lgr->range.length != NFS4_MAX_UINT64) { + dprintk("%s Only whole file layouts supported. Use MDS i/o\n", + __func__); + goto out; + } + if (fl->pattern_offset > lgr->range.offset) { dprintk("%s pattern_offset %lld too large\n", __func__, fl->pattern_offset); @@ -679,14 +687,49 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return (p_stripe == r_stripe); } +void +filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + BUG_ON(pgio->pg_lseg != NULL); + + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_READ, + GFP_KERNEL); + /* If no lseg, fall back to read through mds */ + if (pgio->pg_lseg == NULL) + nfs_pageio_init_read_mds(pgio, pgio->pg_inode); +} + +void +filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + BUG_ON(pgio->pg_lseg != NULL); + + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_RW, + GFP_NOFS); + /* If no lseg, fall back to write through mds */ + if (pgio->pg_lseg == NULL) + nfs_pageio_init_write_mds(pgio, pgio->pg_inode, + pgio->pg_ioflags); +} + static const struct nfs_pageio_ops filelayout_pg_read_ops = { - .pg_init = pnfs_generic_pg_init_read, + .pg_init = filelayout_pg_init_read, .pg_test = filelayout_pg_test, .pg_doio = nfs_generic_pg_readpages, }; static const struct nfs_pageio_ops filelayout_pg_write_ops = { - .pg_init = pnfs_generic_pg_init_write, + .pg_init = filelayout_pg_init_write, .pg_test = filelayout_pg_test, .pg_doio = nfs_generic_pg_writepages, }; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index cc807fe..a7dc336 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -911,7 +911,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, * Layout segment is retreived from the server if not cached. * The appropriate layout segment is referenced and returned to the caller. */ -static struct pnfs_layout_segment * +struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, loff_t pos, @@ -980,7 +980,8 @@ pnfs_update_layout(struct inode *ino, arg.offset -= pg_offset; arg.length += pg_offset; } - arg.length = PAGE_CACHE_ALIGN(arg.length); + if (arg.length != NFS4_MAX_UINT64) + arg.length = PAGE_CACHE_ALIGN(arg.length); lseg = send_layoutget(lo, ctx, &arg, gfp_flags); if (!lseg && first) { @@ -998,6 +999,7 @@ out_unlock: spin_unlock(&ino->i_lock); goto out; } +EXPORT_SYMBOL_GPL(pnfs_update_layout); int pnfs_layout_process(struct nfs4_layoutget *lgp) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1cfc96f..678c4c7 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -185,6 +185,12 @@ int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); int pnfs_ld_write_done(struct nfs_write_data *); int pnfs_ld_read_done(struct nfs_read_data *); +struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, + struct nfs_open_context *ctx, + loff_t pos, + u64 count, + enum pnfs_iomode iomode, + gfp_t gfp_flags); /* pnfs_dev.c */ struct nfs4_deviceid_node { diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 9b2a1d7..c394662 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -120,6 +120,7 @@ void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, NFS_SERVER(inode)->rsize, 0); } +EXPORT_SYMBOL_GPL(nfs_pageio_init_read_mds); static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 00940dc9..7f8732e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1046,6 +1046,7 @@ void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, NFS_SERVER(inode)->wsize, ioflags); } +EXPORT_SYMBOL_GPL(nfs_pageio_init_write_mds); static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) -- cgit v0.10.2 From aa5c01446610f0305f96251d0f9621866b8e5a14 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 13 Jun 2011 17:52:55 -0400 Subject: pnfs-obj: pnfs_osd_xdr: Remove dead code and cleanup * Some leftovers from ancient times. * This file will only define common types and client API. Remove server from comments Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h index 76efbdd..435dd5f 100644 --- a/include/linux/pnfs_osd_xdr.h +++ b/include/linux/pnfs_osd_xdr.h @@ -41,9 +41,6 @@ #include #include -#include - -#define PNFS_OSD_OSDNAME_MAXSIZE 256 /* * draft-ietf-nfsv4-minorversion-22 @@ -99,12 +96,6 @@ struct pnfs_osd_objid { #define _DEVID_HI(oid_device_id) \ (unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1) -static inline int -pnfs_osd_objid_xdr_sz(void) -{ - return (NFS4_DEVICEID4_SIZE / 4) + 2 + 2; -} - enum pnfs_osd_version { PNFS_OSD_MISSING = 0, PNFS_OSD_VERSION_1 = 1, @@ -189,8 +180,6 @@ struct pnfs_osd_targetid { struct nfs4_string oti_scsi_device_id; }; -enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 }; - /* struct netaddr4 { * // see struct rpcb in RFC1833 * string r_netid<>; // network id @@ -207,12 +196,6 @@ struct pnfs_osd_targetaddr { struct pnfs_osd_net_addr ota_netaddr; }; -enum { - NETWORK_ID_MAX = 16 / 4, - UNIVERSAL_ADDRESS_MAX = 64 / 4, - PNFS_OSD_TARGETADDR_MAX = 3 + NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX, -}; - struct pnfs_osd_deviceaddr { struct pnfs_osd_targetid oda_targetid; struct pnfs_osd_targetaddr oda_targetaddr; @@ -222,15 +205,6 @@ struct pnfs_osd_deviceaddr { struct nfs4_string oda_osdname; }; -enum { - ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4, - PNFS_OSD_DEVICEADDR_MAX = - PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX + - 2 /*oda_lun*/ + - 1 + OSD_SYSTEMID_LEN + - 1 + ODA_OSDNAME_MAX, -}; - /* LAYOUTCOMMIT: layoutupdate */ /* union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) { @@ -279,7 +253,7 @@ struct pnfs_osd_ioerr { u32 oer_errno; }; -/* OSD XDR API */ +/* OSD XDR Client API */ /* Layout helpers */ /* Layout decoding is done in two parts: * 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part @@ -337,8 +311,7 @@ extern int pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr, struct pnfs_osd_layoutupdate *lou); -/* osd_ioerror encoding/decoding (layout_return) */ -/* Client */ +/* osd_ioerror encoding (layout_return) */ extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr); extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr); -- cgit v0.10.2 From a56aaa02b1f723e28b41d339ddff02e958d32d43 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 15 Jun 2011 11:59:10 -0400 Subject: NFSv4.1: Clean up layoutreturn Since we take a reference to it, we really ought to pass the a pointer to the layout header in the arguments instead of assuming that NFS_I(inode)->layout will forever point to the correct object. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 325706f..93ef776 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5770,7 +5770,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) { struct nfs4_layoutreturn *lrp = calldata; struct nfs_server *server; - struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; + struct pnfs_layout_hdr *lo = lrp->args.layout; dprintk("--> %s\n", __func__); @@ -5799,7 +5799,7 @@ static void nfs4_layoutreturn_release(void *calldata) struct nfs4_layoutreturn *lrp = calldata; dprintk("--> %s\n", __func__); - put_layout_hdr(NFS_I(lrp->args.inode)->layout); + put_layout_hdr(lrp->args.layout); kfree(calldata); dprintk("<-- %s\n", __func__); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a7dc336..5fc2e5d 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -667,6 +667,7 @@ _pnfs_return_layout(struct inode *ino) lrp->args.stateid = stateid; lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; lrp->args.inode = ino; + lrp->args.layout = lo; lrp->clp = NFS_SERVER(ino)->nfs_client; status = nfs4_proc_layoutreturn(lrp); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2094555..956d3576 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -269,9 +269,10 @@ struct nfs4_layoutcommit_data { }; struct nfs4_layoutreturn_args { - __u32 layout_type; + struct pnfs_layout_hdr *layout; struct inode *inode; nfs4_stateid stateid; + __u32 layout_type; struct nfs4_sequence_args seq_args; }; -- cgit v0.10.2 From c47abcf8ff4d0c56d20ce541e80d3e1c975f54b5 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 15 Jun 2011 17:52:40 -0400 Subject: NFSv4.1: do not use deviceids after MDS clientid invalidation Mark all deviceids established under an expired MDS clientid as invalid. Stop all new i/o through DS and send through the MDS. Don't use any new LAYOUTGETs that use the invalid deviceid. Purge all layouts established under the expired MDS clientid. Remove the MDS clientid deviceid and data servers reference Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 51c1909..af9bf9e 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -334,6 +334,9 @@ filelayout_read_pagelist(struct nfs_read_data *data) __func__, data->inode->i_ino, data->args.pgbase, (size_t)data->args.count, offset); + if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) + return PNFS_NOT_ATTEMPTED; + /* Retrieve the correct rpc_client for the byte range */ j = nfs4_fl_calc_j_index(lseg, offset); idx = nfs4_fl_calc_ds_index(lseg, j); @@ -373,6 +376,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) struct nfs_fh *fh; int status; + if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) + return PNFS_NOT_ATTEMPTED; + /* Retrieve the correct rpc_client for the byte range */ j = nfs4_fl_calc_j_index(lseg, offset); idx = nfs4_fl_calc_ds_index(lseg, j); @@ -456,6 +462,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, goto out; } else dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); + /* Found deviceid is being reaped */ + if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags)) + goto out_put; + fl->dsaddr = dsaddr; if (fl->first_stripe_index < 0 || diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 68cce73..2e42284 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -96,6 +96,12 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) generic_hdr); } +static inline struct nfs4_deviceid_node * +FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg) +{ + return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; +} + extern struct nfs_fh * nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 5fc2e5d..5b3cc3f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -452,6 +452,9 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) struct pnfs_layout_hdr *lo; LIST_HEAD(tmp_list); + nfs4_deviceid_mark_client_invalid(clp); + nfs4_deviceid_purge_client(clp); + spin_lock(&clp->cl_lock); rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 678c4c7..a59736e 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -192,12 +192,20 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, enum pnfs_iomode iomode, gfp_t gfp_flags); +void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); + +/* nfs4_deviceid_flags */ +enum { + NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ +}; + /* pnfs_dev.c */ struct nfs4_deviceid_node { struct hlist_node node; struct hlist_node tmpnode; const struct pnfs_layoutdriver_type *ld; const struct nfs_client *nfs_client; + unsigned long flags; struct nfs4_deviceid deviceid; atomic_t ref; }; diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index fb9498d..6fda522 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -156,6 +156,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, INIT_HLIST_NODE(&d->tmpnode); d->ld = ld; d->nfs_client = nfs_client; + d->flags = 0; d->deviceid = *id; atomic_set(&d->ref, 1); } @@ -253,3 +254,22 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp) for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) _deviceid_purge_client(clp, h); } + +/* + * Stop use of all deviceids associated with an nfs_client + */ +void +nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) +{ + struct nfs4_deviceid_node *d; + struct hlist_node *n; + int i; + + rcu_read_lock(); + for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){ + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node) + if (d->nfs_client == clp) + set_bit(NFS_DEVICEID_INVALID, &d->flags); + } + rcu_read_unlock(); +} -- cgit v0.10.2 From 87ed5eb44ad9338b1617a0e78dea81d681325298 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 12 Jul 2011 13:42:01 -0400 Subject: NFS: Don't use DATA_SYNC writes If we're writing back data, and the FLUSH_STABLE flag is set, then we always want to use NFS_FILE_SYNC, since we're always in a situation where we're doing page reclaim, and so we want to free up the page as quickly as possible. If we're in the FLUSH_COND_STABLE case, then we either want to use another unstable write (if we have to do a commit anyway) or again, we want to use NFS_FILE_SYNC because we know that we have no more pages to write out. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7f8732e..1af4d82 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -872,10 +872,14 @@ static int nfs_write_rpcsetup(struct nfs_page *req, data->args.context = get_nfs_open_context(req->wb_context); data->args.lock_context = req->wb_lock_context; data->args.stable = NFS_UNSTABLE; - if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { - data->args.stable = NFS_DATA_SYNC; - if (!nfs_need_commit(NFS_I(inode))) - data->args.stable = NFS_FILE_SYNC; + switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { + case 0: + break; + case FLUSH_COND_STABLE: + if (nfs_need_commit(NFS_I(inode))) + break; + default: + data->args.stable = NFS_FILE_SYNC; } data->res.fattr = &data->fattr; -- cgit v0.10.2 From 6e4efd568574221840ee8dd86f176dc977c1330c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 12 Jul 2011 13:42:02 -0400 Subject: NFS: Clean up nfs_read_rpcsetup and nfs_write_rpcsetup Split them up into two parts: one which sets up the struct nfs_read/write_data, the other which sets up the actual RPC call or pNFS call. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c394662..248a554 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -213,17 +213,14 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read); /* * Set up the NFS read request struct */ -static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, - const struct rpc_call_ops *call_ops, - unsigned int count, unsigned int offset, - struct pnfs_layout_segment *lseg) +static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, + unsigned int count, unsigned int offset) { struct inode *inode = req->wb_context->path.dentry->d_inode; data->req = req; data->inode = inode; data->cred = req->wb_context->cred; - data->lseg = get_lseg(lseg); data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; @@ -237,10 +234,21 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, data->res.count = count; data->res.eof = 0; nfs_fattr_init(&data->fattr); +} - if (data->lseg && - (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)) - return 0; +static int nfs_do_read(struct nfs_read_data *data, + const struct rpc_call_ops *call_ops, + struct pnfs_layout_segment *lseg) +{ + struct inode *inode = data->args.context->path.dentry->d_inode; + + if (lseg) { + data->lseg = get_lseg(lseg); + if (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED) + return 0; + put_lseg(data->lseg); + data->lseg = NULL; + } return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); } @@ -292,7 +300,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) data = nfs_readdata_alloc(1); if (!data) goto out_bad; - list_add(&data->pages, &list); + list_add(&data->list, &list); requests++; nbytes -= len; } while(nbytes != 0); @@ -304,15 +312,15 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) do { int ret2; - data = list_entry(list.next, struct nfs_read_data, pages); - list_del_init(&data->pages); + data = list_entry(list.next, struct nfs_read_data, list); + list_del_init(&data->list); data->pagevec[0] = page; if (nbytes < rsize) rsize = nbytes; - ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, - rsize, offset, lseg); + nfs_read_rpcsetup(req, data, rsize, offset); + ret2 = nfs_do_read(data, &nfs_read_partial_ops, lseg); if (ret == 0) ret = ret2; offset += rsize; @@ -325,8 +333,8 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) out_bad: while (!list_empty(&list)) { - data = list_entry(list.next, struct nfs_read_data, pages); - list_del(&data->pages); + data = list_entry(list.next, struct nfs_read_data, list); + list_del(&data->list); nfs_readdata_free(data); } SetPageError(page); @@ -362,8 +370,8 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) } req = nfs_list_entry(data->pages.next); - ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, - 0, lseg); + nfs_read_rpcsetup(req, data, desc->pg_count, 0); + ret = nfs_do_read(data, &nfs_read_full_ops, lseg); out: put_lseg(lseg); desc->pg_lseg = NULL; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 1af4d82..0aeb09b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -845,11 +845,9 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write); /* * Set up the argument/result storage required for the RPC call. */ -static int nfs_write_rpcsetup(struct nfs_page *req, +static void nfs_write_rpcsetup(struct nfs_page *req, struct nfs_write_data *data, - const struct rpc_call_ops *call_ops, unsigned int count, unsigned int offset, - struct pnfs_layout_segment *lseg, int how) { struct inode *inode = req->wb_context->path.dentry->d_inode; @@ -860,7 +858,6 @@ static int nfs_write_rpcsetup(struct nfs_page *req, data->req = req; data->inode = inode = req->wb_context->path.dentry->d_inode; data->cred = req->wb_context->cred; - data->lseg = get_lseg(lseg); data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; @@ -886,10 +883,22 @@ static int nfs_write_rpcsetup(struct nfs_page *req, data->res.count = count; data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); +} - if (data->lseg && - (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED)) - return 0; +static int nfs_do_write(struct nfs_write_data *data, + const struct rpc_call_ops *call_ops, + struct pnfs_layout_segment *lseg, + int how) +{ + struct inode *inode = data->args.context->path.dentry->d_inode; + + if (lseg != NULL) { + data->lseg = get_lseg(lseg); + if (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED) + return 0; + put_lseg(data->lseg); + data->lseg = NULL; + } return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); } @@ -938,7 +947,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) data = nfs_writedata_alloc(1); if (!data) goto out_bad; - list_add(&data->pages, &list); + list_add(&data->list, &list); requests++; nbytes -= len; } while (nbytes != 0); @@ -950,15 +959,16 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) do { int ret2; - data = list_entry(list.next, struct nfs_write_data, pages); - list_del_init(&data->pages); + data = list_entry(list.next, struct nfs_write_data, list); + list_del_init(&data->list); data->pagevec[0] = page; if (nbytes < wsize) wsize = nbytes; - ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, - wsize, offset, lseg, desc->pg_ioflags); + nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags); + ret2 = nfs_do_write(data, &nfs_write_partial_ops, lseg, + desc->pg_ioflags); if (ret == 0) ret = ret2; offset += wsize; @@ -971,8 +981,8 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) out_bad: while (!list_empty(&list)) { - data = list_entry(list.next, struct nfs_write_data, pages); - list_del(&data->pages); + data = list_entry(list.next, struct nfs_write_data, list); + list_del(&data->list); nfs_writedata_free(data); } nfs_redirty_request(req); @@ -1024,7 +1034,8 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) desc->pg_ioflags &= ~FLUSH_COND_STABLE; /* Set up the argument struct */ - ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); + nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); + ret = nfs_do_write(data, &nfs_write_full_ops, lseg, desc->pg_ioflags); out: put_lseg(lseg); /* Cleans any gotten in ->pg_test */ desc->pg_lseg = NULL; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 956d3576..5b11595 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1126,6 +1126,7 @@ struct nfs_read_data { struct rpc_cred *cred; struct nfs_fattr fattr; /* fattr storage */ struct list_head pages; /* Coalesced read requests */ + struct list_head list; /* lists of struct nfs_read_data */ struct nfs_page *req; /* multi ops per nfs_page */ struct page **pagevec; unsigned int npages; /* Max length of pagevec */ @@ -1149,6 +1150,7 @@ struct nfs_write_data { struct nfs_fattr fattr; struct nfs_writeverf verf; struct list_head pages; /* Coalesced requests we wish to flush */ + struct list_head list; /* lists of struct nfs_write_data */ struct nfs_page *req; /* multi ops per nfs_page */ struct page **pagevec; unsigned int npages; /* Max length of pagevec */ -- cgit v0.10.2 From 3b6091846d5b6113d695c79caec7cc96b62d469b Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 15 Jul 2011 03:33:42 -0400 Subject: NFS: fix return value of nfs_pagein_one/nfs_flush_one Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 248a554..581534a4 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -351,12 +351,13 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) struct nfs_read_data *data; struct list_head *head = &desc->pg_list; struct pnfs_layout_segment *lseg = desc->pg_lseg; - int ret = -ENOMEM; + int ret = 0; data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, desc->pg_count)); if (!data) { nfs_async_read_error(head); + ret = -ENOMEM; goto out; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0aeb09b..d9dd744 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1006,7 +1006,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) struct nfs_write_data *data; struct list_head *head = &desc->pg_list; struct pnfs_layout_segment *lseg = desc->pg_lseg; - int ret; + int ret = 0; data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, desc->pg_count)); -- cgit v0.10.2 From 275acaafd45fbc8ecc3beabd6367e60b3049606a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 12 Jul 2011 13:42:02 -0400 Subject: NFS: Clean up: split out the RPC transmission from nfs_pagein_multi/one ...and do the same for nfs_flush_multi/one. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 581534a4..0215bac 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -30,8 +30,6 @@ #define NFSDBG_FACILITY NFSDBG_PAGECACHE -static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc); -static int nfs_pagein_one(struct nfs_pageio_descriptor *desc); static const struct nfs_pageio_ops nfs_pageio_read_ops; static const struct rpc_call_ops nfs_read_partial_ops; static const struct rpc_call_ops nfs_read_full_ops; @@ -253,6 +251,27 @@ static int nfs_do_read(struct nfs_read_data *data, return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); } +static int +nfs_do_multiple_reads(struct list_head *head, + const struct rpc_call_ops *call_ops, + struct pnfs_layout_segment *lseg) +{ + struct nfs_read_data *data; + int ret = 0; + + while (!list_empty(head)) { + int ret2; + + data = list_entry(head->next, struct nfs_read_data, list); + list_del_init(&data->list); + + ret2 = nfs_do_read(data, call_ops, lseg); + if (ret == 0) + ret = ret2; + } + return ret; +} + static void nfs_async_read_error(struct list_head *head) { @@ -279,7 +298,7 @@ nfs_async_read_error(struct list_head *head) * won't see the new data until our attribute cache is updated. This is more * or less conventional NFS client behavior. */ -static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) +static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) { struct nfs_page *req = nfs_list_entry(desc->pg_list.next); struct page *page = req->wb_page; @@ -288,11 +307,10 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) unsigned int offset; int requests = 0; int ret = 0; - struct pnfs_layout_segment *lseg = desc->pg_lseg; - LIST_HEAD(list); nfs_list_remove_request(req); + offset = 0; nbytes = desc->pg_count; do { size_t len = min(nbytes,rsize); @@ -300,57 +318,33 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) data = nfs_readdata_alloc(1); if (!data) goto out_bad; - list_add(&data->list, &list); + data->pagevec[0] = page; + nfs_read_rpcsetup(req, data, len, offset); + list_add(&data->list, res); requests++; nbytes -= len; + offset += len; } while(nbytes != 0); atomic_set(&req->wb_complete, requests); - ClearPageError(page); - offset = 0; - nbytes = desc->pg_count; - do { - int ret2; - - data = list_entry(list.next, struct nfs_read_data, list); - list_del_init(&data->list); - - data->pagevec[0] = page; - - if (nbytes < rsize) - rsize = nbytes; - nfs_read_rpcsetup(req, data, rsize, offset); - ret2 = nfs_do_read(data, &nfs_read_partial_ops, lseg); - if (ret == 0) - ret = ret2; - offset += rsize; - nbytes -= rsize; - } while (nbytes != 0); - put_lseg(lseg); - desc->pg_lseg = NULL; - return ret; - out_bad: - while (!list_empty(&list)) { - data = list_entry(list.next, struct nfs_read_data, list); + while (!list_empty(res)) { + data = list_entry(res->next, struct nfs_read_data, list); list_del(&data->list); nfs_readdata_free(data); } SetPageError(page); nfs_readpage_release(req); - put_lseg(lseg); - desc->pg_lseg = NULL; return -ENOMEM; } -static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) +static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res) { struct nfs_page *req; struct page **pages; struct nfs_read_data *data; struct list_head *head = &desc->pg_list; - struct pnfs_layout_segment *lseg = desc->pg_lseg; int ret = 0; data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, @@ -372,18 +366,32 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) req = nfs_list_entry(data->pages.next); nfs_read_rpcsetup(req, data, desc->pg_count, 0); - ret = nfs_do_read(data, &nfs_read_full_ops, lseg); + list_add(&data->list, res); out: - put_lseg(lseg); - desc->pg_lseg = NULL; return ret; } int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { - if (desc->pg_bsize < PAGE_CACHE_SIZE) - return nfs_pagein_multi(desc); - return nfs_pagein_one(desc); + LIST_HEAD(head); + int ret; + + if (desc->pg_bsize < PAGE_CACHE_SIZE) { + ret = nfs_pagein_multi(desc, &head); + if (ret == 0) + ret = nfs_do_multiple_reads(&head, + &nfs_read_partial_ops, + desc->pg_lseg); + } else { + ret = nfs_pagein_one(desc, &head); + if (ret == 0) + ret = nfs_do_multiple_reads(&head, + &nfs_read_full_ops, + desc->pg_lseg); + } + put_lseg(desc->pg_lseg); + desc->pg_lseg = NULL; + return ret; } EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d9dd744..bd4b34e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -903,6 +903,27 @@ static int nfs_do_write(struct nfs_write_data *data, return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); } +static int nfs_do_multiple_writes(struct list_head *head, + const struct rpc_call_ops *call_ops, + struct pnfs_layout_segment *lseg, + int how) +{ + struct nfs_write_data *data; + int ret = 0; + + while (!list_empty(head)) { + int ret2; + + data = list_entry(head->next, struct nfs_write_data, list); + list_del_init(&data->list); + + ret2 = nfs_do_write(data, call_ops, lseg, how); + if (ret == 0) + ret = ret2; + } + return ret; +} + /* If a nfs_flush_* function fails, it should remove reqs from @head and * call this on each, which will prepare them to be retried on next * writeback using standard nfs. @@ -920,7 +941,7 @@ static void nfs_redirty_request(struct nfs_page *req) * Generate multiple small requests to write out a single * contiguous dirty area on one page. */ -static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) +static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) { struct nfs_page *req = nfs_list_entry(desc->pg_list.next); struct page *page = req->wb_page; @@ -929,8 +950,6 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) unsigned int offset; int requests = 0; int ret = 0; - struct pnfs_layout_segment *lseg = desc->pg_lseg; - LIST_HEAD(list); nfs_list_remove_request(req); @@ -940,6 +959,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) desc->pg_ioflags &= ~FLUSH_COND_STABLE; + offset = 0; nbytes = desc->pg_count; do { size_t len = min(nbytes, wsize); @@ -947,47 +967,23 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) data = nfs_writedata_alloc(1); if (!data) goto out_bad; - list_add(&data->list, &list); + data->pagevec[0] = page; + nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags); + list_add(&data->list, res); requests++; nbytes -= len; + offset += len; } while (nbytes != 0); atomic_set(&req->wb_complete, requests); - - ClearPageError(page); - offset = 0; - nbytes = desc->pg_count; - do { - int ret2; - - data = list_entry(list.next, struct nfs_write_data, list); - list_del_init(&data->list); - - data->pagevec[0] = page; - - if (nbytes < wsize) - wsize = nbytes; - nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags); - ret2 = nfs_do_write(data, &nfs_write_partial_ops, lseg, - desc->pg_ioflags); - if (ret == 0) - ret = ret2; - offset += wsize; - nbytes -= wsize; - } while (nbytes != 0); - - put_lseg(lseg); - desc->pg_lseg = NULL; return ret; out_bad: - while (!list_empty(&list)) { - data = list_entry(list.next, struct nfs_write_data, list); + while (!list_empty(res)) { + data = list_entry(res->next, struct nfs_write_data, list); list_del(&data->list); nfs_writedata_free(data); } nfs_redirty_request(req); - put_lseg(lseg); - desc->pg_lseg = NULL; return -ENOMEM; } @@ -999,13 +995,12 @@ out_bad: * This is the case if nfs_updatepage detects a conflicting request * that has been written but not committed. */ -static int nfs_flush_one(struct nfs_pageio_descriptor *desc) +static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res) { struct nfs_page *req; struct page **pages; struct nfs_write_data *data; struct list_head *head = &desc->pg_list; - struct pnfs_layout_segment *lseg = desc->pg_lseg; int ret = 0; data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, @@ -1035,18 +1030,34 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) /* Set up the argument struct */ nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); - ret = nfs_do_write(data, &nfs_write_full_ops, lseg, desc->pg_ioflags); + list_add(&data->list, res); out: - put_lseg(lseg); /* Cleans any gotten in ->pg_test */ - desc->pg_lseg = NULL; return ret; } int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { - if (desc->pg_bsize < PAGE_CACHE_SIZE) - return nfs_flush_multi(desc); - return nfs_flush_one(desc); + LIST_HEAD(head); + int ret; + + if (desc->pg_bsize < PAGE_CACHE_SIZE) { + ret = nfs_flush_multi(desc, &head); + if (ret == 0) + ret = nfs_do_multiple_writes(&head, + &nfs_write_partial_ops, + desc->pg_lseg, + desc->pg_ioflags); + } else { + ret = nfs_flush_one(desc, &head); + if (ret == 0) + ret = nfs_do_multiple_writes(&head, + &nfs_write_full_ops, + desc->pg_lseg, + desc->pg_ioflags); + } + put_lseg(desc->pg_lseg); + desc->pg_lseg = NULL; + return ret; } EXPORT_SYMBOL_GPL(nfs_generic_pg_writepages); -- cgit v0.10.2 From 50828d7e6767a92726708bc0666e2b8b84575808 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 12 Jul 2011 13:42:02 -0400 Subject: NFS: Cache rpc_ops in struct nfs_pageio_descriptor Signed-off-by: Trond Myklebust diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 0215bac..c14362f 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -327,6 +327,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head } while(nbytes != 0); atomic_set(&req->wb_complete, requests); ClearPageError(page); + desc->pg_rpc_callops = &nfs_read_partial_ops; return ret; out_bad: while (!list_empty(res)) { @@ -367,6 +368,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head * nfs_read_rpcsetup(req, data, desc->pg_count, 0); list_add(&data->list, res); + desc->pg_rpc_callops = &nfs_read_full_ops; out: return ret; } @@ -376,19 +378,14 @@ int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) LIST_HEAD(head); int ret; - if (desc->pg_bsize < PAGE_CACHE_SIZE) { + if (desc->pg_bsize < PAGE_CACHE_SIZE) ret = nfs_pagein_multi(desc, &head); - if (ret == 0) - ret = nfs_do_multiple_reads(&head, - &nfs_read_partial_ops, - desc->pg_lseg); - } else { + else ret = nfs_pagein_one(desc, &head); - if (ret == 0) - ret = nfs_do_multiple_reads(&head, - &nfs_read_full_ops, - desc->pg_lseg); - } + + if (ret == 0) + ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops, + desc->pg_lseg); put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return ret; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index bd4b34e..71fbba7 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -975,6 +975,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head offset += len; } while (nbytes != 0); atomic_set(&req->wb_complete, requests); + desc->pg_rpc_callops = &nfs_write_partial_ops; return ret; out_bad: @@ -1031,6 +1032,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r /* Set up the argument struct */ nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); list_add(&data->list, res); + desc->pg_rpc_callops = &nfs_write_full_ops; out: return ret; } @@ -1040,21 +1042,13 @@ int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) LIST_HEAD(head); int ret; - if (desc->pg_bsize < PAGE_CACHE_SIZE) { + if (desc->pg_bsize < PAGE_CACHE_SIZE) ret = nfs_flush_multi(desc, &head); - if (ret == 0) - ret = nfs_do_multiple_writes(&head, - &nfs_write_partial_ops, - desc->pg_lseg, - desc->pg_ioflags); - } else { + else ret = nfs_flush_one(desc, &head); - if (ret == 0) - ret = nfs_do_multiple_writes(&head, - &nfs_write_full_ops, - desc->pg_lseg, - desc->pg_ioflags); - } + if (ret == 0) + ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops, + desc->pg_lseg, desc->pg_ioflags); put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return ret; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 9ac2dd1..db3194f 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -74,6 +74,7 @@ struct nfs_pageio_descriptor { const struct nfs_pageio_ops *pg_ops; int pg_ioflags; int pg_error; + const struct rpc_call_ops *pg_rpc_callops; struct pnfs_layout_segment *pg_lseg; }; -- cgit v0.10.2 From d097971d8ab4042eaa4bff98698ae9cc55942327 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 12 Jul 2011 13:42:02 -0400 Subject: NFS: Use the nfs_pageio_descriptor->pg_bsize in the read/write request Instead of looking up the rsize and wsize, the routines that generate the RPC requests should really be using the pg_bsize, since that is what we use when deciding whether or not to coalesce write requests... Signed-off-by: Trond Myklebust diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c14362f..75088f5 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -303,7 +303,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head struct nfs_page *req = nfs_list_entry(desc->pg_list.next); struct page *page = req->wb_page; struct nfs_read_data *data; - size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes; + size_t rsize = desc->pg_bsize, nbytes; unsigned int offset; int requests = 0; int ret = 0; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 71fbba7..c88a1ab 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -946,7 +946,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head struct nfs_page *req = nfs_list_entry(desc->pg_list.next); struct page *page = req->wb_page; struct nfs_write_data *data; - size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes; + size_t wsize = desc->pg_bsize, nbytes; unsigned int offset; int requests = 0; int ret = 0; -- cgit v0.10.2 From d9156f9f364897e93bdd98b4ad22138de18f7c24 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 12 Jul 2011 13:42:02 -0400 Subject: NFS: Allow the nfs_pageio_descriptor to signal that a re-coalesce is needed If an attempt to do pNFS fails, and we have to fall back to writing through the MDS, then we may want to re-coalesce the requests that we already have since the block size for the MDS read/writes may be different to that of the DS read/writes. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d421e19..7139dbf 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -240,6 +240,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_bsize = bsize; desc->pg_base = 0; desc->pg_moreio = 0; + desc->pg_recoalesce = 0; desc->pg_inode = inode; desc->pg_ops = pg_ops; desc->pg_ioflags = io_flags; @@ -331,7 +332,7 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) * Returns true if the request 'req' was successfully coalesced into the * existing list of pages 'desc'. */ -int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, +static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { while (!nfs_pageio_do_add_request(desc, req)) { @@ -340,17 +341,67 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, if (desc->pg_error < 0) return 0; desc->pg_moreio = 0; + if (desc->pg_recoalesce) + return 0; } return 1; } +static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) +{ + LIST_HEAD(head); + + do { + list_splice_init(&desc->pg_list, &head); + desc->pg_bytes_written -= desc->pg_count; + desc->pg_count = 0; + desc->pg_base = 0; + desc->pg_recoalesce = 0; + + while (!list_empty(&head)) { + struct nfs_page *req; + + req = list_first_entry(&head, struct nfs_page, wb_list); + nfs_list_remove_request(req); + if (__nfs_pageio_add_request(desc, req)) + continue; + if (desc->pg_error < 0) + return 0; + break; + } + } while (desc->pg_recoalesce); + return 1; +} + +int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, + struct nfs_page *req) +{ + int ret; + + do { + ret = __nfs_pageio_add_request(desc, req); + if (ret) + break; + if (desc->pg_error < 0) + break; + ret = nfs_do_recoalesce(desc); + } while (ret); + return ret; +} + /** * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor * @desc: pointer to io descriptor */ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) { - nfs_pageio_doio(desc); + for (;;) { + nfs_pageio_doio(desc); + if (!desc->pg_recoalesce) + break; + if (!nfs_do_recoalesce(desc)) + break; + } } /** @@ -369,7 +420,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) if (!list_empty(&desc->pg_list)) { struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); if (index != prev->wb_index + 1) - nfs_pageio_doio(desc); + nfs_pageio_complete(desc); } } diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index db3194f..7241b2a 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -68,7 +68,8 @@ struct nfs_pageio_descriptor { size_t pg_count; size_t pg_bsize; unsigned int pg_base; - char pg_moreio; + unsigned char pg_moreio : 1, + pg_recoalesce : 1; struct inode *pg_inode; const struct nfs_pageio_ops *pg_ops; -- cgit v0.10.2 From 493292ddc78d18ee2ad2d5c24c2b7dd6a24641d2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 13 Jul 2011 15:58:28 -0400 Subject: NFS: Move the pnfs read code into pnfs.c ...and ensure that we recoalese to take into account differences in block sizes when falling back to read through the MDS. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 31e8b50..d74d7de 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -291,14 +291,18 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *, extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); #endif +struct nfs_pageio_descriptor; /* read.c */ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, const struct rpc_call_ops *call_ops); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); +extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, + struct list_head *head); -struct nfs_pageio_descriptor; extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode); +extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); +extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index af9bf9e..fc556d6 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -735,7 +735,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, static const struct nfs_pageio_ops filelayout_pg_read_ops = { .pg_init = filelayout_pg_init_read, .pg_test = filelayout_pg_test, - .pg_doio = nfs_generic_pg_readpages, + .pg_doio = pnfs_generic_pg_readpages, }; static const struct nfs_pageio_ops filelayout_pg_write_ops = { diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 70272d5..add6289 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -1007,7 +1007,7 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, static const struct nfs_pageio_ops objio_pg_read_ops = { .pg_init = pnfs_generic_pg_init_read, .pg_test = objio_pg_test, - .pg_doio = nfs_generic_pg_readpages, + .pg_doio = pnfs_generic_pg_readpages, }; static const struct nfs_pageio_ops objio_pg_write_ops = { diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 5b3cc3f..9eca5a8 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -28,6 +28,7 @@ */ #include +#include #include "internal.h" #include "pnfs.h" #include "iostat.h" @@ -1216,18 +1217,32 @@ pnfs_ld_read_done(struct nfs_read_data *data) } EXPORT_SYMBOL_GPL(pnfs_ld_read_done); +static void +pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, + struct nfs_read_data *data) +{ + list_splice_tail_init(&data->pages, &desc->pg_list); + if (data->req && list_empty(&data->req->wb_list)) + nfs_list_add_request(data->req, &desc->pg_list); + nfs_pageio_reset_read_mds(desc); + desc->pg_recoalesce = 1; + nfs_readdata_release(data); +} + /* * Call the appropriate parallel I/O subsystem read function. */ -enum pnfs_try_status +static enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *rdata, - const struct rpc_call_ops *call_ops) + const struct rpc_call_ops *call_ops, + struct pnfs_layout_segment *lseg) { struct inode *inode = rdata->inode; struct nfs_server *nfss = NFS_SERVER(inode); enum pnfs_try_status trypnfs; rdata->mds_ops = call_ops; + rdata->lseg = get_lseg(lseg); dprintk("%s: Reading ino:%lu %u@%llu\n", __func__, inode->i_ino, rdata->args.count, rdata->args.offset); @@ -1243,6 +1258,44 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, return trypnfs; } +static void +pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) +{ + struct nfs_read_data *data; + const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; + struct pnfs_layout_segment *lseg = desc->pg_lseg; + + desc->pg_lseg = NULL; + while (!list_empty(head)) { + enum pnfs_try_status trypnfs; + + data = list_entry(head->next, struct nfs_read_data, list); + list_del_init(&data->list); + + trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); + if (trypnfs == PNFS_NOT_ATTEMPTED) + pnfs_read_through_mds(desc, data); + } + put_lseg(lseg); +} + +int +pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) +{ + LIST_HEAD(head); + int ret; + + ret = nfs_generic_pagein(desc, &head); + if (ret != 0) { + put_lseg(desc->pg_lseg); + desc->pg_lseg = NULL; + return ret; + } + pnfs_do_multiple_reads(desc, &head); + return 0; +} +EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); + /* * Currently there is only one (whole file) write lseg. */ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index a59736e..c40ffa5 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -157,9 +157,8 @@ void set_pnfs_layoutdriver(struct nfs_server *, u32 id); void unset_pnfs_layoutdriver(struct nfs_server *); enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, const struct rpc_call_ops *, int); -enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, - const struct rpc_call_ops *); void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); +int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); int pnfs_layout_process(struct nfs4_layoutget *lgp); @@ -330,13 +329,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg) } static inline enum pnfs_try_status -pnfs_try_to_read_data(struct nfs_read_data *data, - const struct rpc_call_ops *call_ops) -{ - return PNFS_NOT_ATTEMPTED; -} - -static inline enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *data, const struct rpc_call_ops *call_ops, int how) { diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 75088f5..d2f53dd 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -67,7 +67,7 @@ void nfs_readdata_free(struct nfs_read_data *p) mempool_free(p, nfs_rdata_mempool); } -static void nfs_readdata_release(struct nfs_read_data *rdata) +void nfs_readdata_release(struct nfs_read_data *rdata) { put_lseg(rdata->lseg); put_nfs_open_context(rdata->args.context); @@ -120,6 +120,12 @@ void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, } EXPORT_SYMBOL_GPL(nfs_pageio_init_read_mds); +void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) +{ + pgio->pg_ops = &nfs_pageio_read_ops; + pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; +} + static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) { @@ -235,26 +241,16 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, } static int nfs_do_read(struct nfs_read_data *data, - const struct rpc_call_ops *call_ops, - struct pnfs_layout_segment *lseg) + const struct rpc_call_ops *call_ops) { struct inode *inode = data->args.context->path.dentry->d_inode; - if (lseg) { - data->lseg = get_lseg(lseg); - if (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED) - return 0; - put_lseg(data->lseg); - data->lseg = NULL; - } - return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); } static int nfs_do_multiple_reads(struct list_head *head, - const struct rpc_call_ops *call_ops, - struct pnfs_layout_segment *lseg) + const struct rpc_call_ops *call_ops) { struct nfs_read_data *data; int ret = 0; @@ -265,7 +261,7 @@ nfs_do_multiple_reads(struct list_head *head, data = list_entry(head->next, struct nfs_read_data, list); list_del_init(&data->list); - ret2 = nfs_do_read(data, call_ops, lseg); + ret2 = nfs_do_read(data, call_ops); if (ret == 0) ret = ret2; } @@ -373,25 +369,23 @@ out: return ret; } -int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) +int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head) +{ + if (desc->pg_bsize < PAGE_CACHE_SIZE) + return nfs_pagein_multi(desc, head); + return nfs_pagein_one(desc, head); +} + +static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { LIST_HEAD(head); int ret; - if (desc->pg_bsize < PAGE_CACHE_SIZE) - ret = nfs_pagein_multi(desc, &head); - else - ret = nfs_pagein_one(desc, &head); - + ret = nfs_generic_pagein(desc, &head); if (ret == 0) - ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops, - desc->pg_lseg); - put_lseg(desc->pg_lseg); - desc->pg_lseg = NULL; + ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops); return ret; } -EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages); - static const struct nfs_pageio_ops nfs_pageio_read_ops = { .pg_test = nfs_generic_pg_test, diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 7241b2a..0a48f84 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -108,7 +108,6 @@ extern void nfs_unlock_request(struct nfs_page *req); extern int nfs_set_page_tag_locked(struct nfs_page *req); extern void nfs_clear_page_tag_locked(struct nfs_page *req); -extern int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); extern int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); -- cgit v0.10.2 From dce81290eed64d24493989bb7a08f9e20495e184 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 13 Jul 2011 15:59:19 -0400 Subject: NFS: Move the pnfs write code into pnfs.c ...and ensure that we recoalese to take into account differences in differences in block sizes when falling back to write through the MDS. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d74d7de..09e20de 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -305,8 +305,12 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ +extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, + struct list_head *head); extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags); +extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); +extern void nfs_writedata_release(struct nfs_write_data *wdata); extern void nfs_commit_free(struct nfs_write_data *p); extern int nfs_initiate_write(struct nfs_write_data *data, struct rpc_clnt *clnt, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index fc556d6..fbc5b42 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -741,7 +741,7 @@ static const struct nfs_pageio_ops filelayout_pg_read_ops = { static const struct nfs_pageio_ops filelayout_pg_write_ops = { .pg_init = filelayout_pg_init_write, .pg_test = filelayout_pg_test, - .pg_doio = nfs_generic_pg_writepages, + .pg_doio = pnfs_generic_pg_writepages, }; static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index add6289..7d49bb1 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -1013,7 +1013,7 @@ static const struct nfs_pageio_ops objio_pg_read_ops = { static const struct nfs_pageio_ops objio_pg_write_ops = { .pg_init = pnfs_generic_pg_init_write, .pg_test = objio_pg_test, - .pg_doio = nfs_generic_pg_writepages, + .pg_doio = pnfs_generic_pg_writepages, }; static struct pnfs_layoutdriver_type objlayout_type = { diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 9eca5a8..93c7329 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1170,15 +1170,30 @@ pnfs_ld_write_done(struct nfs_write_data *data) } EXPORT_SYMBOL_GPL(pnfs_ld_write_done); -enum pnfs_try_status +static void +pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, + struct nfs_write_data *data) +{ + list_splice_tail_init(&data->pages, &desc->pg_list); + if (data->req && list_empty(&data->req->wb_list)) + nfs_list_add_request(data->req, &desc->pg_list); + nfs_pageio_reset_write_mds(desc); + desc->pg_recoalesce = 1; + nfs_writedata_release(data); +} + +static enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *wdata, - const struct rpc_call_ops *call_ops, int how) + const struct rpc_call_ops *call_ops, + struct pnfs_layout_segment *lseg, + int how) { struct inode *inode = wdata->inode; enum pnfs_try_status trypnfs; struct nfs_server *nfss = NFS_SERVER(inode); wdata->mds_ops = call_ops; + wdata->lseg = get_lseg(lseg); dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, inode->i_ino, wdata->args.count, wdata->args.offset, how); @@ -1194,6 +1209,44 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, return trypnfs; } +static void +pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) +{ + struct nfs_write_data *data; + const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; + struct pnfs_layout_segment *lseg = desc->pg_lseg; + + desc->pg_lseg = NULL; + while (!list_empty(head)) { + enum pnfs_try_status trypnfs; + + data = list_entry(head->next, struct nfs_write_data, list); + list_del_init(&data->list); + + trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); + if (trypnfs == PNFS_NOT_ATTEMPTED) + pnfs_write_through_mds(desc, data); + } + put_lseg(lseg); +} + +int +pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) +{ + LIST_HEAD(head); + int ret; + + ret = nfs_generic_flush(desc, &head); + if (ret != 0) { + put_lseg(desc->pg_lseg); + desc->pg_lseg = NULL; + return ret; + } + pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags); + return 0; +} +EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); + /* * Called by non rpc-based layout drivers */ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index c40ffa5..078670d 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -155,11 +155,10 @@ bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int) void set_pnfs_layoutdriver(struct nfs_server *, u32 id); void unset_pnfs_layoutdriver(struct nfs_server *); -enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, - const struct rpc_call_ops *, int); void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); +int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); int pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); @@ -328,13 +327,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg) { } -static inline enum pnfs_try_status -pnfs_try_to_write_data(struct nfs_write_data *data, - const struct rpc_call_ops *call_ops, int how) -{ - return PNFS_NOT_ATTEMPTED; -} - static inline int pnfs_return_layout(struct inode *ino) { return 0; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c88a1ab..cabe5f6 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -97,7 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p) mempool_free(p, nfs_wdata_mempool); } -static void nfs_writedata_release(struct nfs_write_data *wdata) +void nfs_writedata_release(struct nfs_write_data *wdata) { put_lseg(wdata->lseg); put_nfs_open_context(wdata->args.context); @@ -887,25 +887,15 @@ static void nfs_write_rpcsetup(struct nfs_page *req, static int nfs_do_write(struct nfs_write_data *data, const struct rpc_call_ops *call_ops, - struct pnfs_layout_segment *lseg, int how) { struct inode *inode = data->args.context->path.dentry->d_inode; - if (lseg != NULL) { - data->lseg = get_lseg(lseg); - if (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED) - return 0; - put_lseg(data->lseg); - data->lseg = NULL; - } - return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); } static int nfs_do_multiple_writes(struct list_head *head, const struct rpc_call_ops *call_ops, - struct pnfs_layout_segment *lseg, int how) { struct nfs_write_data *data; @@ -917,7 +907,7 @@ static int nfs_do_multiple_writes(struct list_head *head, data = list_entry(head->next, struct nfs_write_data, list); list_del_init(&data->list); - ret2 = nfs_do_write(data, call_ops, lseg, how); + ret2 = nfs_do_write(data, call_ops, how); if (ret == 0) ret = ret2; } @@ -1037,23 +1027,24 @@ out: return ret; } -int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) +int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head) +{ + if (desc->pg_bsize < PAGE_CACHE_SIZE) + return nfs_flush_multi(desc, head); + return nfs_flush_one(desc, head); +} + +static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { LIST_HEAD(head); int ret; - if (desc->pg_bsize < PAGE_CACHE_SIZE) - ret = nfs_flush_multi(desc, &head); - else - ret = nfs_flush_one(desc, &head); + ret = nfs_generic_flush(desc, &head); if (ret == 0) ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops, - desc->pg_lseg, desc->pg_ioflags); - put_lseg(desc->pg_lseg); - desc->pg_lseg = NULL; + desc->pg_ioflags); return ret; } -EXPORT_SYMBOL_GPL(nfs_generic_pg_writepages); static const struct nfs_pageio_ops nfs_pageio_write_ops = { .pg_test = nfs_generic_pg_test, @@ -1068,6 +1059,12 @@ void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, } EXPORT_SYMBOL_GPL(nfs_pageio_init_write_mds); +void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) +{ + pgio->pg_ops = &nfs_pageio_write_ops; + pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; +} + static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) { diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 0a48f84..e2791a2 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -108,9 +108,6 @@ extern void nfs_unlock_request(struct nfs_page *req); extern int nfs_set_page_tag_locked(struct nfs_page *req); extern void nfs_clear_page_tag_locked(struct nfs_page *req); -extern int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); - - /* * Lock the page of an asynchronous request without getting a new reference */ -- cgit v0.10.2 From 1f9453578f059d2651aa6c6b16756627fc9f2a74 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 13 Jul 2011 15:59:57 -0400 Subject: NFS: Clean up - simplify the switch to read/write-through-MDS Use nfs_pageio_reset_read_mds and nfs_pageio_reset_write_mds instead of completely reinitialising the struct nfs_pageio_descriptor. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 09e20de..ab12913 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -299,16 +299,12 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head); -extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, - struct inode *inode); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head); -extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_writedata_release(struct nfs_write_data *wdata); extern void nfs_commit_free(struct nfs_write_data *p); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index fbc5b42..f0b37e1 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -711,7 +711,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, GFP_KERNEL); /* If no lseg, fall back to read through mds */ if (pgio->pg_lseg == NULL) - nfs_pageio_init_read_mds(pgio, pgio->pg_inode); + nfs_pageio_reset_read_mds(pgio); } void @@ -728,8 +728,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, GFP_NOFS); /* If no lseg, fall back to write through mds */ if (pgio->pg_lseg == NULL) - nfs_pageio_init_write_mds(pgio, pgio->pg_inode, - pgio->pg_ioflags); + nfs_pageio_reset_write_mds(pgio); } static const struct nfs_pageio_ops filelayout_pg_read_ops = { diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 93c7329..38e5508 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1075,7 +1075,7 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r GFP_KERNEL); /* If no lseg, fall back to read through mds */ if (pgio->pg_lseg == NULL) - nfs_pageio_init_read_mds(pgio, pgio->pg_inode); + nfs_pageio_reset_read_mds(pgio); } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); @@ -1093,7 +1093,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page * GFP_NOFS); /* If no lseg, fall back to write through mds */ if (pgio->pg_lseg == NULL) - nfs_pageio_init_write_mds(pgio, pgio->pg_inode, pgio->pg_ioflags); + nfs_pageio_reset_write_mds(pgio); } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index d2f53dd..7cba228 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -112,19 +112,19 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) } } -void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, +static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode) { nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, NFS_SERVER(inode)->rsize, 0); } -EXPORT_SYMBOL_GPL(nfs_pageio_init_read_mds); void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) { pgio->pg_ops = &nfs_pageio_read_ops; pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; } +EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index cabe5f6..9fba527 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1051,19 +1051,19 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = { .pg_doio = nfs_generic_pg_writepages, }; -void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, +static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) { nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, NFS_SERVER(inode)->wsize, ioflags); } -EXPORT_SYMBOL_GPL(nfs_pageio_init_write_mds); void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) { pgio->pg_ops = &nfs_pageio_write_ops; pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; } +EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) -- cgit v0.10.2 From 9e00abc3c20904fd6a5d888bb7023925799ec8a5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 13 Jul 2011 19:20:49 -0400 Subject: SUNRPC: sunrpc should not explicitly depend on NFS config options Change explicit references to CONFIG_NFS_V4_1 to implicit ones Get rid of the unnecessary defines in backchannel_rqst.c and bc_svc.c: the Makefile takes care of those dependency. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 8151554..2cde5d9 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -77,6 +77,7 @@ config NFS_V4 config NFS_V4_1 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" depends on NFS_FS && NFS_V4 && EXPERIMENTAL + select SUNRPC_BACKCHANNEL select PNFS_FILE_LAYOUT help This option enables support for minor version 1 of the NFSv4 protocol diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 0828842..f7f3ce34 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -31,7 +31,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include -#ifdef CONFIG_NFS_V4_1 +#ifdef CONFIG_SUNRPC_BACKCHANNEL struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); @@ -47,7 +47,7 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp) return 1; return 0; } -#else /* CONFIG_NFS_V4_1 */ +#else /* CONFIG_SUNRPC_BACKCHANNEL */ static inline int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) { @@ -62,6 +62,6 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp) static inline void xprt_free_bc_request(struct rpc_rqst *req) { } -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ #endif /* _LINUX_SUNRPC_BC_XPRT_H */ diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index ea29330..9a3fa7b 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -92,7 +92,7 @@ struct svc_serv { struct module * sv_module; /* optional module to count when * adding threads */ svc_thread_fn sv_function; /* main function for threads */ -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) struct list_head sv_cb_list; /* queue for callback requests * that arrive over the same * connection */ @@ -100,7 +100,7 @@ struct svc_serv { wait_queue_head_t sv_cb_waitq; /* sleep here if there are no * entries in the svc_cb_list */ struct svc_xprt *sv_bc_xprt; /* callback on fore channel */ -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ }; /* diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 81cce3b..217b020 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -100,11 +100,11 @@ struct rpc_rqst { ktime_t rq_xtime; /* transmit time stamp */ int rq_ntrans; -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) struct list_head rq_bc_list; /* Callback service list */ unsigned long rq_bc_pa_state; /* Backchannel prealloc state */ struct list_head rq_bc_pa_list; /* Backchannel prealloc list */ -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANEL */ }; #define rq_svec rq_snd_buf.head #define rq_slen rq_snd_buf.len @@ -200,7 +200,7 @@ struct rpc_xprt { u32 xid; /* Next XID value to use */ struct rpc_task * snd_task; /* Task blocked in send */ struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) struct svc_serv *bc_serv; /* The RPC service which will */ /* process the callback */ unsigned int bc_alloc_count; /* Total number of preallocs */ @@ -208,7 +208,7 @@ struct rpc_xprt { * items */ struct list_head bc_pa_list; /* List of preallocated * backchannel rpc_rqst's */ -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ struct list_head recv; struct { @@ -228,15 +228,15 @@ struct rpc_xprt { const char *address_strings[RPC_DISPLAY_MAX]; }; -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) /* * Backchannel flags */ #define RPC_BC_PA_IN_USE 0x0001 /* Preallocated backchannel */ /* buffer in use */ -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) static inline int bc_prealloc(struct rpc_rqst *req) { return test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); @@ -246,7 +246,7 @@ static inline int bc_prealloc(struct rpc_rqst *req) { return 0; } -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ struct xprt_create { int ident; /* XPRT_TRANSPORT identifier */ diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index b2198e6..ffd243d 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -4,6 +4,10 @@ config SUNRPC config SUNRPC_GSS tristate +config SUNRPC_BACKCHANNEL + bool + depends on SUNRPC + config SUNRPC_XPRT_RDMA tristate depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 9d2fca5..8209a04 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -13,6 +13,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ addr.o rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ svc_xprt.o -sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o +sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o sunrpc-$(CONFIG_PROC_FS) += stats.o sunrpc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index cf06af3..bb5593f 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -29,8 +29,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define RPCDBG_FACILITY RPCDBG_TRANS #endif -#if defined(CONFIG_NFS_V4_1) - /* * Helper routines that track the number of preallocation elements * on the transport. @@ -279,4 +277,3 @@ void xprt_free_bc_request(struct rpc_rqst *req) spin_unlock_bh(&xprt->bc_pa_lock); } -#endif /* CONFIG_NFS_V4_1 */ diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c index 1dd1a68..0b2eb38 100644 --- a/net/sunrpc/bc_svc.c +++ b/net/sunrpc/bc_svc.c @@ -27,8 +27,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * reply over an existing open connection previously established by the client. */ -#if defined(CONFIG_NFS_V4_1) - #include #include @@ -63,4 +61,3 @@ int bc_send(struct rpc_rqst *req) return ret; } -#endif /* CONFIG_NFS_V4_1 */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 64c3fe1..d3fe866 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -64,9 +64,9 @@ static void call_decode(struct rpc_task *task); static void call_bind(struct rpc_task *task); static void call_bind_status(struct rpc_task *task); static void call_transmit(struct rpc_task *task); -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) static void call_bc_transmit(struct rpc_task *task); -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ static void call_status(struct rpc_task *task); static void call_transmit_status(struct rpc_task *task); static void call_refresh(struct rpc_task *task); @@ -716,7 +716,7 @@ rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags, } EXPORT_SYMBOL_GPL(rpc_call_async); -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) /** * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run * rpc_execute against it @@ -759,7 +759,7 @@ out: dprintk("RPC: rpc_run_bc_task: task= %p\n", task); return task; } -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ void rpc_call_start(struct rpc_task *task) @@ -1362,7 +1362,7 @@ call_transmit_status(struct rpc_task *task) } } -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) /* * 5b. Send the backchannel RPC reply. On error, drop the reply. In * addition, disconnect on connectivity errors. @@ -1426,7 +1426,7 @@ call_bc_transmit(struct rpc_task *task) } rpc_wake_up_queued_task(&req->rq_xprt->pending, task); } -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ /* * 6. Sort out the RPC call status diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 2b90292..3c6fe22 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1252,7 +1252,7 @@ svc_process(struct svc_rqst *rqstp) } } -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) /* * Process a backchannel RPC request that arrived over an existing * outbound connection @@ -1301,7 +1301,7 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, } } EXPORT_SYMBOL(bc_svc_process); -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ /* * Return (transport-specific) limit on the rpc payload. diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index af04f77..a1812a2 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -66,12 +66,12 @@ static void svc_sock_free(struct svc_xprt *); static struct svc_xprt *svc_create_socket(struct svc_serv *, int, struct net *, struct sockaddr *, int, int); -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int, struct net *, struct sockaddr *, int, int); static void svc_bc_sock_free(struct svc_xprt *xprt); -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key svc_key[2]; @@ -1241,7 +1241,7 @@ static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags); } -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int, struct net *, struct sockaddr *, int, int); @@ -1282,7 +1282,7 @@ static void svc_cleanup_bc_xprt_sock(void) { svc_unreg_xprt_class(&svc_tcp_bc_class); } -#else /* CONFIG_NFS_V4_1 */ +#else /* CONFIG_SUNRPC_BACKCHANNEL */ static void svc_init_bc_xprt_sock(void) { } @@ -1290,7 +1290,7 @@ static void svc_init_bc_xprt_sock(void) static void svc_cleanup_bc_xprt_sock(void) { } -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ static struct svc_xprt_ops svc_tcp_ops = { .xpo_create = svc_tcp_create, @@ -1621,7 +1621,7 @@ static void svc_sock_free(struct svc_xprt *xprt) kfree(svsk); } -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) /* * Create a back channel svc_xprt which shares the fore channel socket. */ @@ -1660,4 +1660,4 @@ static void svc_bc_sock_free(struct svc_xprt *xprt) if (xprt) kfree(container_of(xprt, struct svc_sock, sk_xprt)); } -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index ce5eb68..fbdbaf2 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1111,10 +1111,10 @@ found: INIT_LIST_HEAD(&xprt->free); INIT_LIST_HEAD(&xprt->recv); -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) spin_lock_init(&xprt->bc_pa_lock); INIT_LIST_HEAD(&xprt->bc_pa_list); -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ INIT_WORK(&xprt->task_cleanup, xprt_autoclose); if (xprt_has_timer(xprt)) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 72abb73..cd6c410 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -37,7 +37,7 @@ #include #include #include -#ifdef CONFIG_NFS_V4_1 +#ifdef CONFIG_SUNRPC_BACKCHANNEL #include #endif @@ -1236,7 +1236,7 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, return 0; } -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) /* * Obtains an rpc_rqst previously allocated and invokes the common * tcp read code to read the data. The result is placed in the callback @@ -1299,7 +1299,7 @@ static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, { return xs_tcp_read_reply(xprt, desc); } -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ /* * Read data off the transport. This can be either an RPC_CALL or an -- cgit v0.10.2 From 0d961aa934b799ca7369db582e52952cc50c656d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 13 Jul 2011 19:24:15 -0400 Subject: SUNRPC: Convert the backchannel exports to EXPORT_SYMBOL_GPL Ensure that the backchannel exports conform to the existing sunrpc practice. Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index bb5593f..91eaa26 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -172,7 +172,7 @@ out_free: dprintk("RPC: setup backchannel transport failed\n"); return -1; } -EXPORT_SYMBOL(xprt_setup_backchannel); +EXPORT_SYMBOL_GPL(xprt_setup_backchannel); /* * Destroys the backchannel preallocated structures. @@ -202,7 +202,7 @@ void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs) dprintk("RPC: backchannel list empty= %s\n", list_empty(&xprt->bc_pa_list) ? "true" : "false"); } -EXPORT_SYMBOL(xprt_destroy_backchannel); +EXPORT_SYMBOL_GPL(xprt_destroy_backchannel); /* * One or more rpc_rqst structure have been preallocated during the diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 3c6fe22..6a69a11 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1300,7 +1300,7 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, return 0; } } -EXPORT_SYMBOL(bc_svc_process); +EXPORT_SYMBOL_GPL(bc_svc_process); #endif /* CONFIG_SUNRPC_BACKCHANNEL */ /* diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index f008c14..277ebd4 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -126,7 +126,7 @@ xdr_terminate_string(struct xdr_buf *buf, const u32 len) kaddr[buf->page_base + len] = '\0'; kunmap_atomic(kaddr, KM_USER0); } -EXPORT_SYMBOL(xdr_terminate_string); +EXPORT_SYMBOL_GPL(xdr_terminate_string); void xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, -- cgit v0.10.2 From 94b134ac8e9965309e70684b504c53bca36338b4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 13 Jul 2011 19:26:49 -0400 Subject: NFS: Convert nfs4_set_ds_client to EXPORT_SYMBOL_GPL This is not part of an external ABI... Signed-off-by: Trond Myklebust diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 5452ada..19ea7d9 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1463,7 +1463,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, dprintk("<-- %s %p\n", __func__, clp); return clp; } -EXPORT_SYMBOL(nfs4_set_ds_client); +EXPORT_SYMBOL_GPL(nfs4_set_ds_client); /* * Session has been established, and the client marked ready. -- cgit v0.10.2 From 674e405b8b3310702fd43d314f5f432ec2cb9980 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 15 Jul 2011 19:09:08 -0400 Subject: nfs: document nfsv4 sillyrename issues Somebody working on this code asked what the deal was with NFSv4, since this comment notes that it's v2/v3's statelessness that requires sillyrename. Shouldn't hurt to document the answer. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 8d6864c..981298c 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -501,6 +501,14 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, * and only performs the unlink once the last reference to it is put. * * The final cleanup is done during dentry_iput. + * + * (Note: NFSv4 is stateful, and has opens, so in theory an NFSv4 server + * could take responsibility for keeping open files referenced. The server + * would also need to ensure that opened-but-deleted files were kept over + * reboots. However, we may not assume a server does so. (RFC 5661 + * does provide an OPEN4_RESULT_PRESERVE_UNLINKED flag that a server can + * use to advertise that it does this; some day we may take advantage of + * it.)) */ int nfs_sillyrename(struct inode *dir, struct dentry *dentry) -- cgit v0.10.2 From f85ef69ce08bc2209858135328335f668ba35bdb Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 15 Jul 2011 19:18:42 -0400 Subject: pnfs: simplify pnfs files module autoloading Embed the necessary alias into the module rather than waiting for someone to add it to /etc/modprobe.conf Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index f0b37e1..be93a62 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -964,5 +964,7 @@ static void __exit nfs4filelayout_exit(void) pnfs_unregister_layoutdriver(&filelayout_type); } +MODULE_ALIAS("nfs-layouttype4-1"); + module_init(nfs4filelayout_init); module_exit(nfs4filelayout_exit); diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 7d49bb1..9383ca7 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -1065,5 +1065,7 @@ objlayout_exit(void) __func__); } +MODULE_ALIAS("nfs-layouttype4-2"); + module_init(objlayout_init); module_exit(objlayout_exit); -- cgit v0.10.2 From 43cedbf0e8dfb9c5610eb7985d5f21263e313802 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jul 2011 16:01:03 -0400 Subject: SUNRPC: Ensure that we grab the XPRT_LOCK before calling xprt_alloc_slot This throttles the allocation of new slots when the socket is busy reconnecting and/or is out of buffer space. Signed-off-by: Trond Myklebust diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 217b020..a876882 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -111,7 +111,7 @@ struct rpc_rqst { struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); - int (*reserve_xprt)(struct rpc_task *task); + int (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*rpcbind)(struct rpc_task *task); void (*set_port)(struct rpc_xprt *xprt, unsigned short port); @@ -271,8 +271,8 @@ struct xprt_class { struct rpc_xprt *xprt_create_transport(struct xprt_create *args); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); -int xprt_reserve_xprt(struct rpc_task *task); -int xprt_reserve_xprt_cong(struct rpc_task *task); +int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); +int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); int xprt_prepare_transmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); void xprt_end_transmit(struct rpc_task *task); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index fbdbaf2..ccd583a 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -191,10 +191,9 @@ EXPORT_SYMBOL_GPL(xprt_load_transport); * transport connects from colliding with writes. No congestion control * is provided. */ -int xprt_reserve_xprt(struct rpc_task *task) +int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - struct rpc_xprt *xprt = req->rq_xprt; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { if (task == xprt->snd_task) @@ -202,8 +201,10 @@ int xprt_reserve_xprt(struct rpc_task *task) goto out_sleep; } xprt->snd_task = task; - req->rq_bytes_sent = 0; - req->rq_ntrans++; + if (req != NULL) { + req->rq_bytes_sent = 0; + req->rq_ntrans++; + } return 1; @@ -212,7 +213,7 @@ out_sleep: task->tk_pid, xprt); task->tk_timeout = 0; task->tk_status = -EAGAIN; - if (req->rq_ntrans) + if (req != NULL && req->rq_ntrans) rpc_sleep_on(&xprt->resend, task, NULL); else rpc_sleep_on(&xprt->sending, task, NULL); @@ -239,9 +240,8 @@ static void xprt_clear_locked(struct rpc_xprt *xprt) * integrated into the decision of whether a request is allowed to be * woken up and given access to the transport. */ -int xprt_reserve_xprt_cong(struct rpc_task *task) +int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req = task->tk_rqstp; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { @@ -249,12 +249,14 @@ int xprt_reserve_xprt_cong(struct rpc_task *task) return 1; goto out_sleep; } + if (req == NULL) { + xprt->snd_task = task; + return 1; + } if (__xprt_get_cong(xprt, task)) { xprt->snd_task = task; - if (req) { - req->rq_bytes_sent = 0; - req->rq_ntrans++; - } + req->rq_bytes_sent = 0; + req->rq_ntrans++; return 1; } xprt_clear_locked(xprt); @@ -262,7 +264,7 @@ out_sleep: dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); task->tk_timeout = 0; task->tk_status = -EAGAIN; - if (req && req->rq_ntrans) + if (req != NULL && req->rq_ntrans) rpc_sleep_on(&xprt->resend, task, NULL); else rpc_sleep_on(&xprt->sending, task, NULL); @@ -275,7 +277,7 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) int retval; spin_lock_bh(&xprt->transport_lock); - retval = xprt->ops->reserve_xprt(task); + retval = xprt->ops->reserve_xprt(xprt, task); spin_unlock_bh(&xprt->transport_lock); return retval; } @@ -291,7 +293,7 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt) task = rpc_wake_up_next(&xprt->resend); if (!task) { task = rpc_wake_up_next(&xprt->sending); - if (!task) + if (task == NULL) goto out_unlock; } @@ -310,6 +312,7 @@ out_unlock: static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) { struct rpc_task *task; + struct rpc_rqst *req; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) return; @@ -318,16 +321,19 @@ static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) task = rpc_wake_up_next(&xprt->resend); if (!task) { task = rpc_wake_up_next(&xprt->sending); - if (!task) + if (task == NULL) goto out_unlock; } + + req = task->tk_rqstp; + if (req == NULL) { + xprt->snd_task = task; + return; + } if (__xprt_get_cong(xprt, task)) { - struct rpc_rqst *req = task->tk_rqstp; xprt->snd_task = task; - if (req) { - req->rq_bytes_sent = 0; - req->rq_ntrans++; - } + req->rq_bytes_sent = 0; + req->rq_ntrans++; return; } out_unlock: @@ -852,7 +858,7 @@ int xprt_prepare_transmit(struct rpc_task *task) err = req->rq_reply_bytes_recvd; goto out_unlock; } - if (!xprt->ops->reserve_xprt(task)) + if (!xprt->ops->reserve_xprt(xprt, task)) err = -EAGAIN; out_unlock: spin_unlock_bh(&xprt->transport_lock); @@ -933,8 +939,6 @@ static void xprt_alloc_slot(struct rpc_task *task) struct rpc_xprt *xprt = task->tk_xprt; task->tk_status = 0; - if (task->tk_rqstp) - return; if (!list_empty(&xprt->free)) { struct rpc_rqst *req = list_entry(xprt->free.next, struct rpc_rqst, rq_list); list_del_init(&req->rq_list); @@ -944,7 +948,6 @@ static void xprt_alloc_slot(struct rpc_task *task) } dprintk("RPC: waiting for request slot\n"); task->tk_status = -EAGAIN; - task->tk_timeout = 0; rpc_sleep_on(&xprt->backlog, task, NULL); } @@ -1001,10 +1004,25 @@ void xprt_reserve(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; + task->tk_status = 0; + if (task->tk_rqstp != NULL) + return; + + /* Note: grabbing the xprt_lock_write() here is not strictly needed, + * but ensures that we throttle new slot allocation if the transport + * is congested (e.g. if reconnecting or if we're out of socket + * write buffer space). + */ + task->tk_timeout = 0; + task->tk_status = -EAGAIN; + if (!xprt_lock_write(xprt, task)) + return; + task->tk_status = -EIO; spin_lock(&xprt->reserve_lock); xprt_alloc_slot(task); spin_unlock(&xprt->reserve_lock); + xprt_release_write(xprt, task); } static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 0867070..674a492 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -452,9 +452,8 @@ xprt_rdma_connect(struct rpc_task *task) } static int -xprt_rdma_reserve_xprt(struct rpc_task *task) +xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); int credits = atomic_read(&r_xprt->rx_buf.rb_credits); @@ -466,7 +465,7 @@ xprt_rdma_reserve_xprt(struct rpc_task *task) BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0); } xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale; - return xprt_reserve_xprt_cong(task); + return xprt_reserve_xprt_cong(xprt, task); } /* diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index cd6c410..adaa54c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -755,6 +755,8 @@ static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) if (task == NULL) goto out_release; req = task->tk_rqstp; + if (req == NULL) + goto out_release; if (req->rq_bytes_sent == 0) goto out_release; if (req->rq_bytes_sent == req->rq_snd_buf.len) -- cgit v0.10.2 From 8d9266ffe4332afc5ac9de401ef6f825b3798585 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jul 2011 16:01:09 -0400 Subject: SUNRPC: Initalise the struct xprt upon allocation Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index ccd583a..efb8dc5 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1091,38 +1091,9 @@ void xprt_release(struct rpc_task *task) xprt_free_bc_request(req); } -/** - * xprt_create_transport - create an RPC transport - * @args: rpc transport creation arguments - * - */ -struct rpc_xprt *xprt_create_transport(struct xprt_create *args) +static void xprt_init(struct rpc_xprt *xprt) { - struct rpc_xprt *xprt; struct rpc_rqst *req; - struct xprt_class *t; - - spin_lock(&xprt_list_lock); - list_for_each_entry(t, &xprt_list, list) { - if (t->ident == args->ident) { - spin_unlock(&xprt_list_lock); - goto found; - } - } - spin_unlock(&xprt_list_lock); - printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident); - return ERR_PTR(-EIO); - -found: - xprt = t->setup(args); - if (IS_ERR(xprt)) { - dprintk("RPC: xprt_create_transport: failed, %ld\n", - -PTR_ERR(xprt)); - return xprt; - } - if (test_and_set_bit(XPRT_INITIALIZED, &xprt->state)) - /* ->setup returned a pre-initialized xprt: */ - return xprt; spin_lock_init(&xprt->transport_lock); spin_lock_init(&xprt->reserve_lock); @@ -1156,6 +1127,42 @@ found: xprt_init_xid(xprt); +} + +/** + * xprt_create_transport - create an RPC transport + * @args: rpc transport creation arguments + * + */ +struct rpc_xprt *xprt_create_transport(struct xprt_create *args) +{ + struct rpc_xprt *xprt; + struct xprt_class *t; + + spin_lock(&xprt_list_lock); + list_for_each_entry(t, &xprt_list, list) { + if (t->ident == args->ident) { + spin_unlock(&xprt_list_lock); + goto found; + } + } + spin_unlock(&xprt_list_lock); + printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident); + return ERR_PTR(-EIO); + +found: + xprt = t->setup(args); + if (IS_ERR(xprt)) { + dprintk("RPC: xprt_create_transport: failed, %ld\n", + -PTR_ERR(xprt)); + return xprt; + } + if (test_and_set_bit(XPRT_INITIALIZED, &xprt->state)) + /* ->setup returned a pre-initialized xprt: */ + return xprt; + + xprt_init(xprt); + dprintk("RPC: created transport %p with %u slots\n", xprt, xprt->max_reqs); return xprt; -- cgit v0.10.2 From 21de0a955f3af29fa1100d96f66e6adade89e77a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jul 2011 16:57:32 -0400 Subject: SUNRPC: Clean up the slot table allocation Signed-off-by: Trond Myklebust diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index a876882..0504725 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -168,7 +168,6 @@ struct rpc_xprt { struct rpc_wait_queue pending; /* requests in flight */ struct rpc_wait_queue backlog; /* waiting for slot */ struct list_head free; /* free slots */ - struct rpc_rqst * slot; /* slot table storage */ unsigned int max_reqs; /* total slots */ unsigned long state; /* transport state */ unsigned char shutdown : 1, /* being shut down */ @@ -321,7 +320,6 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); #define XPRT_CLOSING (6) #define XPRT_CONNECTION_ABORT (7) #define XPRT_CONNECTION_CLOSE (8) -#define XPRT_INITIALIZED (9) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index efb8dc5..ea7b3c1 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -62,6 +62,7 @@ /* * Local functions */ +static void xprt_init(struct rpc_xprt *xprt, struct net *net); static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); static void xprt_connect_status(struct rpc_task *task); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); @@ -961,25 +962,42 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) spin_unlock(&xprt->reserve_lock); } -struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req) +static void xprt_free_all_slots(struct rpc_xprt *xprt) +{ + struct rpc_rqst *req; + while (!list_empty(&xprt->free)) { + req = list_first_entry(&xprt->free, struct rpc_rqst, rq_list); + list_del(&req->rq_list); + kfree(req); + } +} + +struct rpc_xprt *xprt_alloc(struct net *net, int size, int num_prealloc) { struct rpc_xprt *xprt; + struct rpc_rqst *req; + int i; xprt = kzalloc(size, GFP_KERNEL); if (xprt == NULL) goto out; - atomic_set(&xprt->count, 1); - xprt->max_reqs = max_req; - xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL); - if (xprt->slot == NULL) + xprt_init(xprt, net); + + for (i = 0; i < num_prealloc; i++) { + req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL); + if (!req) + break; + list_add(&req->rq_list, &xprt->free); + } + if (i < num_prealloc) goto out_free; + xprt->max_reqs = num_prealloc; - xprt->xprt_net = get_net(net); return xprt; out_free: - kfree(xprt); + xprt_free(xprt); out: return NULL; } @@ -988,7 +1006,7 @@ EXPORT_SYMBOL_GPL(xprt_alloc); void xprt_free(struct rpc_xprt *xprt) { put_net(xprt->xprt_net); - kfree(xprt->slot); + xprt_free_all_slots(xprt); kfree(xprt); } EXPORT_SYMBOL_GPL(xprt_free); @@ -1091,9 +1109,9 @@ void xprt_release(struct rpc_task *task) xprt_free_bc_request(req); } -static void xprt_init(struct rpc_xprt *xprt) +static void xprt_init(struct rpc_xprt *xprt, struct net *net) { - struct rpc_rqst *req; + atomic_set(&xprt->count, 1); spin_lock_init(&xprt->transport_lock); spin_lock_init(&xprt->reserve_lock); @@ -1105,12 +1123,6 @@ static void xprt_init(struct rpc_xprt *xprt) INIT_LIST_HEAD(&xprt->bc_pa_list); #endif /* CONFIG_SUNRPC_BACKCHANNEL */ - INIT_WORK(&xprt->task_cleanup, xprt_autoclose); - if (xprt_has_timer(xprt)) - setup_timer(&xprt->timer, xprt_init_autodisconnect, - (unsigned long)xprt); - else - init_timer(&xprt->timer); xprt->last_used = jiffies; xprt->cwnd = RPC_INITCWND; xprt->bind_index = 0; @@ -1121,12 +1133,9 @@ static void xprt_init(struct rpc_xprt *xprt) rpc_init_wait_queue(&xprt->resend, "xprt_resend"); rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); - /* initialize free list */ - for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--) - list_add(&req->rq_list, &xprt->free); - xprt_init_xid(xprt); + xprt->xprt_net = get_net(net); } /** @@ -1155,16 +1164,17 @@ found: if (IS_ERR(xprt)) { dprintk("RPC: xprt_create_transport: failed, %ld\n", -PTR_ERR(xprt)); - return xprt; + goto out; } - if (test_and_set_bit(XPRT_INITIALIZED, &xprt->state)) - /* ->setup returned a pre-initialized xprt: */ - return xprt; - - xprt_init(xprt); - + INIT_WORK(&xprt->task_cleanup, xprt_autoclose); + if (xprt_has_timer(xprt)) + setup_timer(&xprt->timer, xprt_init_autodisconnect, + (unsigned long)xprt); + else + init_timer(&xprt->timer); dprintk("RPC: created transport %p with %u slots\n", xprt, xprt->max_reqs); +out: return xprt; } -- cgit v0.10.2 From d9ba131d8f58c0d2ff5029e7002ab43f913b36f9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jul 2011 18:11:30 -0400 Subject: SUNRPC: Support dynamic slot allocation for TCP connections Allow the number of available slots to grow with the TCP window size. Signed-off-by: Trond Myklebust diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 0504725..d02762d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -22,6 +22,7 @@ #define RPC_MIN_SLOT_TABLE (2U) #define RPC_DEF_SLOT_TABLE (16U) #define RPC_MAX_SLOT_TABLE (128U) +#define RPC_MAX_SLOT_TABLE_LIMIT (65536U) /* * This describes a timeout strategy @@ -168,7 +169,9 @@ struct rpc_xprt { struct rpc_wait_queue pending; /* requests in flight */ struct rpc_wait_queue backlog; /* waiting for slot */ struct list_head free; /* free slots */ - unsigned int max_reqs; /* total slots */ + unsigned int max_reqs; /* max number of slots */ + unsigned int min_reqs; /* min number of slots */ + atomic_t num_reqs; /* total slots */ unsigned long state; /* transport state */ unsigned char shutdown : 1, /* being shut down */ resvport : 1; /* use a reserved port */ @@ -281,7 +284,9 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release(struct rpc_task *task); struct rpc_xprt * xprt_get(struct rpc_xprt *xprt); void xprt_put(struct rpc_xprt *xprt); -struct rpc_xprt * xprt_alloc(struct net *net, int size, int max_req); +struct rpc_xprt * xprt_alloc(struct net *net, size_t size, + unsigned int num_prealloc, + unsigned int max_req); void xprt_free(struct rpc_xprt *); static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index ea7b3c1..be85cf0 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -935,25 +935,66 @@ void xprt_transmit(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); } +static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags) +{ + struct rpc_rqst *req = ERR_PTR(-EAGAIN); + + if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs)) + goto out; + req = kzalloc(sizeof(struct rpc_rqst), gfp_flags); + if (req != NULL) + goto out; + atomic_dec(&xprt->num_reqs); + req = ERR_PTR(-ENOMEM); +out: + return req; +} + +static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) +{ + if (atomic_add_unless(&xprt->num_reqs, -1, xprt->min_reqs)) { + kfree(req); + return true; + } + return false; +} + static void xprt_alloc_slot(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_rqst *req; - task->tk_status = 0; if (!list_empty(&xprt->free)) { - struct rpc_rqst *req = list_entry(xprt->free.next, struct rpc_rqst, rq_list); - list_del_init(&req->rq_list); - task->tk_rqstp = req; - xprt_request_init(task, xprt); - return; + req = list_entry(xprt->free.next, struct rpc_rqst, rq_list); + list_del(&req->rq_list); + goto out_init_req; + } + req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT); + if (!IS_ERR(req)) + goto out_init_req; + switch (PTR_ERR(req)) { + case -ENOMEM: + rpc_delay(task, HZ >> 2); + dprintk("RPC: dynamic allocation of request slot " + "failed! Retrying\n"); + break; + case -EAGAIN: + rpc_sleep_on(&xprt->backlog, task, NULL); + dprintk("RPC: waiting for request slot\n"); } - dprintk("RPC: waiting for request slot\n"); task->tk_status = -EAGAIN; - rpc_sleep_on(&xprt->backlog, task, NULL); + return; +out_init_req: + task->tk_status = 0; + task->tk_rqstp = req; + xprt_request_init(task, xprt); } static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) { + if (xprt_dynamic_free_slot(xprt, req)) + return; + memset(req, 0, sizeof(*req)); /* mark unused */ spin_lock(&xprt->reserve_lock); @@ -972,7 +1013,9 @@ static void xprt_free_all_slots(struct rpc_xprt *xprt) } } -struct rpc_xprt *xprt_alloc(struct net *net, int size, int num_prealloc) +struct rpc_xprt *xprt_alloc(struct net *net, size_t size, + unsigned int num_prealloc, + unsigned int max_alloc) { struct rpc_xprt *xprt; struct rpc_rqst *req; @@ -992,7 +1035,12 @@ struct rpc_xprt *xprt_alloc(struct net *net, int size, int num_prealloc) } if (i < num_prealloc) goto out_free; - xprt->max_reqs = num_prealloc; + if (max_alloc > num_prealloc) + xprt->max_reqs = max_alloc; + else + xprt->max_reqs = num_prealloc; + xprt->min_reqs = num_prealloc; + atomic_set(&xprt->num_reqs, num_prealloc); return xprt; @@ -1036,7 +1084,6 @@ void xprt_reserve(struct rpc_task *task) if (!xprt_lock_write(xprt, task)) return; - task->tk_status = -EIO; spin_lock(&xprt->reserve_lock); xprt_alloc_slot(task); spin_unlock(&xprt->reserve_lock); @@ -1057,6 +1104,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) { struct rpc_rqst *req = task->tk_rqstp; + INIT_LIST_HEAD(&req->rq_list); req->rq_timeout = task->tk_client->cl_timeout->to_initval; req->rq_task = task; req->rq_xprt = xprt; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 674a492..b446e10 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -283,6 +283,7 @@ xprt_setup_rdma(struct xprt_create *args) } xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), + xprt_rdma_slot_table_entries, xprt_rdma_slot_table_entries); if (xprt == NULL) { dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index adaa54c..d7f97ef 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -54,7 +54,8 @@ static void xs_close(struct rpc_xprt *xprt); * xprtsock tunables */ unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; -unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; +unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE; +unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE; unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; @@ -75,6 +76,7 @@ static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; +static unsigned int max_tcp_slot_table_limit = RPC_MAX_SLOT_TABLE_LIMIT; static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT; static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT; @@ -104,6 +106,15 @@ static ctl_table xs_tunables_table[] = { .extra2 = &max_slot_table_size }, { + .procname = "tcp_max_slot_table_entries", + .data = &xprt_max_tcp_slot_table_entries, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_slot_table_size, + .extra2 = &max_tcp_slot_table_limit + }, + { .procname = "min_resvport", .data = &xprt_min_resvport, .maxlen = sizeof(unsigned int), @@ -2491,7 +2502,8 @@ static int xs_init_anyaddr(const int family, struct sockaddr *sap) } static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, - unsigned int slot_table_size) + unsigned int slot_table_size, + unsigned int max_slot_table_size) { struct rpc_xprt *xprt; struct sock_xprt *new; @@ -2501,7 +2513,8 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, return ERR_PTR(-EBADF); } - xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size); + xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size, + max_slot_table_size); if (xprt == NULL) { dprintk("RPC: xs_setup_xprt: couldn't allocate " "rpc_xprt\n"); @@ -2543,7 +2556,8 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) struct rpc_xprt *xprt; struct rpc_xprt *ret; - xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, + xprt_max_tcp_slot_table_entries); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); @@ -2607,7 +2621,8 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) struct sock_xprt *transport; struct rpc_xprt *ret; - xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries); + xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries, + xprt_udp_slot_table_entries); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); @@ -2683,7 +2698,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) struct sock_xprt *transport; struct rpc_xprt *ret; - xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, + xprt_max_tcp_slot_table_entries); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); @@ -2762,7 +2778,8 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) */ return args->bc_xprt->xpt_bc_xprt; } - xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, + xprt_tcp_slot_table_entries); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); @@ -2949,8 +2966,26 @@ static struct kernel_param_ops param_ops_slot_table_size = { #define param_check_slot_table_size(name, p) \ __param_check(name, p, unsigned int); +static int param_set_max_slot_table_size(const char *val, + const struct kernel_param *kp) +{ + return param_set_uint_minmax(val, kp, + RPC_MIN_SLOT_TABLE, + RPC_MAX_SLOT_TABLE_LIMIT); +} + +static struct kernel_param_ops param_ops_max_slot_table_size = { + .set = param_set_max_slot_table_size, + .get = param_get_uint, +}; + +#define param_check_max_slot_table_size(name, p) \ + __param_check(name, p, unsigned int); + module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries, slot_table_size, 0644); +module_param_named(tcp_max_slot_table_entries, xprt_max_tcp_slot_table_entries, + max_slot_table_size, 0644); module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries, slot_table_size, 0644); -- cgit v0.10.2 From 3b27bad7f7ceacca6d6c0ef647ffb38aa55a8336 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jul 2011 18:11:34 -0400 Subject: SUNRPC: Allow caller of rpc_sleep_on() to select priority levels Currently, the caller has to change the value of task->tk_priority if it wants to select on which priority level the task will sleep. This patch allows the caller to select a priority level at sleep time rather than always using task->tk_priority. Signed-off-by: Trond Myklebust diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index fe2d8e6..e775689 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -227,6 +227,10 @@ void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); void rpc_destroy_wait_queue(struct rpc_wait_queue *); void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, rpc_action action); +void rpc_sleep_on_priority(struct rpc_wait_queue *, + struct rpc_task *, + rpc_action action, + int priority); void rpc_wake_up_queued_task(struct rpc_wait_queue *, struct rpc_task *); void rpc_wake_up(struct rpc_wait_queue *); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 4814e24..d12ffa5 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -97,14 +97,16 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) /* * Add new request to a priority queue. */ -static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *task) +static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, + struct rpc_task *task, + unsigned char queue_priority) { struct list_head *q; struct rpc_task *t; INIT_LIST_HEAD(&task->u.tk_wait.links); - q = &queue->tasks[task->tk_priority]; - if (unlikely(task->tk_priority > queue->maxpriority)) + q = &queue->tasks[queue_priority]; + if (unlikely(queue_priority > queue->maxpriority)) q = &queue->tasks[queue->maxpriority]; list_for_each_entry(t, q, u.tk_wait.list) { if (t->tk_owner == task->tk_owner) { @@ -123,12 +125,14 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r * improve overall performance. * Everyone else gets appended to the queue to ensure proper FIFO behavior. */ -static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) +static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, + struct rpc_task *task, + unsigned char queue_priority) { BUG_ON (RPC_IS_QUEUED(task)); if (RPC_IS_PRIORITY(queue)) - __rpc_add_wait_queue_priority(queue, task); + __rpc_add_wait_queue_priority(queue, task, queue_priority); else if (RPC_IS_SWAPPER(task)) list_add(&task->u.tk_wait.list, &queue->tasks[0]); else @@ -311,13 +315,15 @@ static void rpc_make_runnable(struct rpc_task *task) * NB: An RPC task will only receive interrupt-driven events as long * as it's on a wait queue. */ -static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, - rpc_action action) +static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, + struct rpc_task *task, + rpc_action action, + unsigned char queue_priority) { dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", task->tk_pid, rpc_qname(q), jiffies); - __rpc_add_wait_queue(q, task); + __rpc_add_wait_queue(q, task, queue_priority); BUG_ON(task->tk_callback != NULL); task->tk_callback = action; @@ -334,11 +340,25 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, * Protect the queue operations. */ spin_lock_bh(&q->lock); - __rpc_sleep_on(q, task, action); + __rpc_sleep_on_priority(q, task, action, task->tk_priority); spin_unlock_bh(&q->lock); } EXPORT_SYMBOL_GPL(rpc_sleep_on); +void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, + rpc_action action, int priority) +{ + /* We shouldn't ever put an inactive task to sleep */ + BUG_ON(!RPC_IS_ACTIVATED(task)); + + /* + * Protect the queue operations. + */ + spin_lock_bh(&q->lock); + __rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW); + spin_unlock_bh(&q->lock); +} + /** * __rpc_do_wake_up_task - wake up a single rpc_task * @queue: wait queue -- cgit v0.10.2 From 34006cee28f7344f9557a4be3816c7891b1bbab1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jul 2011 18:11:34 -0400 Subject: SUNRPC: Replace xprt->resend and xprt->sending with a priority queue Signed-off-by: Trond Myklebust diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d02762d..15518a1 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -165,7 +165,6 @@ struct rpc_xprt { struct rpc_wait_queue binding; /* requests waiting on rpcbind */ struct rpc_wait_queue sending; /* requests waiting to send */ - struct rpc_wait_queue resend; /* requests waiting to resend */ struct rpc_wait_queue pending; /* requests in flight */ struct rpc_wait_queue backlog; /* waiting for slot */ struct list_head free; /* free slots */ diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index be85cf0..9b6a4d1 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -195,6 +195,7 @@ EXPORT_SYMBOL_GPL(xprt_load_transport); int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; + int priority; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { if (task == xprt->snd_task) @@ -214,10 +215,13 @@ out_sleep: task->tk_pid, xprt); task->tk_timeout = 0; task->tk_status = -EAGAIN; - if (req != NULL && req->rq_ntrans) - rpc_sleep_on(&xprt->resend, task, NULL); + if (req == NULL) + priority = RPC_PRIORITY_LOW; + else if (!req->rq_ntrans) + priority = RPC_PRIORITY_NORMAL; else - rpc_sleep_on(&xprt->sending, task, NULL); + priority = RPC_PRIORITY_HIGH; + rpc_sleep_on_priority(&xprt->sending, task, NULL, priority); return 0; } EXPORT_SYMBOL_GPL(xprt_reserve_xprt); @@ -244,6 +248,7 @@ static void xprt_clear_locked(struct rpc_xprt *xprt) int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; + int priority; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { if (task == xprt->snd_task) @@ -265,10 +270,13 @@ out_sleep: dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); task->tk_timeout = 0; task->tk_status = -EAGAIN; - if (req != NULL && req->rq_ntrans) - rpc_sleep_on(&xprt->resend, task, NULL); + if (req == NULL) + priority = RPC_PRIORITY_LOW; + else if (!req->rq_ntrans) + priority = RPC_PRIORITY_NORMAL; else - rpc_sleep_on(&xprt->sending, task, NULL); + priority = RPC_PRIORITY_HIGH; + rpc_sleep_on_priority(&xprt->sending, task, NULL, priority); return 0; } EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); @@ -291,12 +299,9 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt) if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) return; - task = rpc_wake_up_next(&xprt->resend); - if (!task) { - task = rpc_wake_up_next(&xprt->sending); - if (task == NULL) - goto out_unlock; - } + task = rpc_wake_up_next(&xprt->sending); + if (task == NULL) + goto out_unlock; req = task->tk_rqstp; xprt->snd_task = task; @@ -319,12 +324,9 @@ static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) return; if (RPCXPRT_CONGESTED(xprt)) goto out_unlock; - task = rpc_wake_up_next(&xprt->resend); - if (!task) { - task = rpc_wake_up_next(&xprt->sending); - if (task == NULL) - goto out_unlock; - } + task = rpc_wake_up_next(&xprt->sending); + if (task == NULL) + goto out_unlock; req = task->tk_rqstp; if (req == NULL) { @@ -1177,8 +1179,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net) rpc_init_wait_queue(&xprt->binding, "xprt_binding"); rpc_init_wait_queue(&xprt->pending, "xprt_pending"); - rpc_init_wait_queue(&xprt->sending, "xprt_sending"); - rpc_init_wait_queue(&xprt->resend, "xprt_resend"); + rpc_init_priority_wait_queue(&xprt->sending, "xprt_sending"); rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); xprt_init_xid(xprt); @@ -1240,7 +1241,6 @@ static void xprt_destroy(struct rpc_xprt *xprt) rpc_destroy_wait_queue(&xprt->binding); rpc_destroy_wait_queue(&xprt->pending); rpc_destroy_wait_queue(&xprt->sending); - rpc_destroy_wait_queue(&xprt->resend); rpc_destroy_wait_queue(&xprt->backlog); cancel_work_sync(&xprt->task_cleanup); /* -- cgit v0.10.2 From 2773395b34883fe54418de188733a63bb38e0ad6 Mon Sep 17 00:00:00 2001 From: Steve Dickson Date: Thu, 21 Jul 2011 13:49:02 -0400 Subject: RDMA: Increasing RPCRDMA_MAX_DATA_SEGS Our performance team has noticed that increasing RPCRDMA_MAX_DATA_SEGS from 8 to 64 significantly increases throughput when using the RDMA transport. Signed-off-by: Steve Dickson Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index cae761a..5d1cfe5 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -109,7 +109,7 @@ struct rpcrdma_ep { */ /* temporary static scatter/gather max */ -#define RPCRDMA_MAX_DATA_SEGS (8) /* max scatter/gather */ +#define RPCRDMA_MAX_DATA_SEGS (64) /* max scatter/gather */ #define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */ #define MAX_RPCRDMAHDR (\ /* max supported RPC/RDMA header */ \ -- cgit v0.10.2 From 73ca1001ed6881b476e8252adcd0eede1ea368ea Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 18 Jul 2011 11:26:30 -0400 Subject: nfs: don't use d_move in nfs_async_rename_done If the task that initiated the sillyrename ends up being killed by a fatal signal, then it will eventually return back to userspace and end up releasing the i_mutex. d_move however needs to be done while holding the i_mutex. Instead of using d_move here, just unhash the old and new dentries to prevent them from being found by lookups. With this change though, the dentries are now incorrect post-rename and do not reflect the actual name of the file on the server. I'm proceeding under the assumption that since they are unhashed that this isn't really a problem. In order for the sillydelete to still work though, the dname must be copied earlier when setting up the sillydelete info, and the name must be recopied if the sillydelete info has to be moved to a new dentry. Reported-by: Al Viro Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 981298c..b2fbbde 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -147,7 +147,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n alias = d_lookup(parent, &data->args.name); if (alias != NULL) { - int ret = 0; + int ret; void *devname_garbage = NULL; /* @@ -155,14 +155,16 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n * the sillyrename information to the aliased dentry. */ nfs_free_dname(data); + ret = nfs_copy_dname(alias, data); spin_lock(&alias->d_lock); - if (alias->d_inode != NULL && + if (ret == 0 && alias->d_inode != NULL && !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { devname_garbage = alias->d_fsdata; alias->d_fsdata = data; alias->d_flags |= DCACHE_NFSFS_RENAMED; ret = 1; - } + } else + ret = 0; spin_unlock(&alias->d_lock); nfs_dec_sillycount(dir); dput(alias); @@ -171,8 +173,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n * point dentry is definitely not a root, so we won't need * that anymore. */ - if (devname_garbage) - kfree(devname_garbage); + kfree(devname_garbage); return ret; } data->dir = igrab(dir); @@ -204,8 +205,6 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) if (parent == NULL) goto out_free; dir = parent->d_inode; - if (nfs_copy_dname(dentry, data) != 0) - goto out_dput; /* Non-exclusive lock protects against concurrent lookup() calls */ spin_lock(&dir->i_lock); if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) { @@ -366,6 +365,8 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) struct nfs_renamedata *data = calldata; struct inode *old_dir = data->old_dir; struct inode *new_dir = data->new_dir; + struct dentry *old_dentry = data->old_dentry; + struct dentry *new_dentry = data->new_dentry; if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client); @@ -373,12 +374,12 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) } if (task->tk_status != 0) { - nfs_cancel_async_unlink(data->old_dentry); + nfs_cancel_async_unlink(old_dentry); return; } - nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir)); - d_move(data->old_dentry, data->new_dentry); + d_drop(old_dentry); + d_drop(new_dentry); } /** @@ -568,6 +569,14 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) if (error) goto out_dput; + /* populate unlinkdata with the right dname */ + error = nfs_copy_dname(sdentry, + (struct nfs_unlinkdata *)dentry->d_fsdata); + if (error) { + nfs_cancel_async_unlink(dentry); + goto out_dput; + } + /* run the rename task, undo unlink if it fails */ task = nfs_async_rename(dir, dir, dentry, sdentry); if (IS_ERR(task)) { -- cgit v0.10.2 From ed1e6211a0a134ff23592c6f057af982ad5dab52 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 25 Jul 2011 15:37:29 -0400 Subject: NFSv4: Don't use the delegation->inode in nfs_mark_return_delegation() nfs_mark_return_delegation() is usually called without any locking, and so it is not safe to dereference delegation->inode. Since the inode is only used to discover the nfs_client anyway, it makes more sense to have the callers pass a valid pointer to the nfs_server as a parameter. Reported-by: Ian Kent Cc: stable@kernel.org Signed-off-by: Trond Myklebust diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index dd25c2a..321a66b 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -398,12 +398,11 @@ int nfs_inode_return_delegation(struct inode *inode) return err; } -static void nfs_mark_return_delegation(struct nfs_delegation *delegation) +static void nfs_mark_return_delegation(struct nfs_server *server, + struct nfs_delegation *delegation) { - struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client; - set_bit(NFS_DELEGATION_RETURN, &delegation->flags); - set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); + set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); } /** @@ -441,7 +440,7 @@ static void nfs_mark_return_all_delegation_types(struct nfs_server *server, if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) continue; if (delegation->type & flags) - nfs_mark_return_delegation(delegation); + nfs_mark_return_delegation(server, delegation); } } @@ -508,7 +507,7 @@ static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) list_for_each_entry_rcu(delegation, &server->delegations, super_list) { if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) continue; - nfs_mark_return_delegation(delegation); + nfs_mark_return_delegation(server, delegation); } } @@ -539,7 +538,8 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp) int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_client *clp = server->nfs_client; struct nfs_delegation *delegation; rcu_read_lock(); @@ -549,7 +549,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, rcu_read_unlock(); return -ENOENT; } - nfs_mark_return_delegation(delegation); + nfs_mark_return_delegation(server, delegation); rcu_read_unlock(); nfs_delegation_run_state_manager(clp); -- cgit v0.10.2