From 0b05b18381eea98c9c9ada95629bf659a88c9374 Mon Sep 17 00:00:00 2001 From: "Anand V. Avati" Date: Sun, 19 Aug 2012 08:53:23 -0400 Subject: fuse: implement NFS-like readdirplus support This patch implements readdirplus support in FUSE, similar to NFS. The payload returned in the readdirplus call contains 'fuse_entry_out' structure thereby providing all the necessary inputs for 'faking' a lookup() operation on the spot. If the dentry and inode already existed (for e.g. in a re-run of ls -l) then just the inode attributes timeout and dentry timeout are refreshed. With a simple client->network->server implementation of a FUSE based filesystem, the following performance observations were made: Test: Performing a filesystem crawl over 20,000 files with sh# time ls -lR /mnt Without readdirplus: Run 1: 18.1s Run 2: 16.0s Run 3: 16.2s With readdirplus: Run 1: 4.1s Run 2: 3.8s Run 3: 3.8s The performance improvement is significant as it avoided 20,000 upcalls calls (lookup). Cache consistency is no worse than what already is. Signed-off-by: Anand V. Avati Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index e83351a..05c3eec 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -491,6 +491,25 @@ void fuse_request_send_background_locked(struct fuse_conn *fc, fuse_request_send_nowait_locked(fc, req); } +void fuse_force_forget(struct file *file, u64 nodeid) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req; + struct fuse_forget_in inarg; + + memset(&inarg, 0, sizeof(inarg)); + inarg.nlookup = 1; + req = fuse_get_req_nofail(fc, file); + req->in.h.opcode = FUSE_FORGET; + req->in.h.nodeid = nodeid; + req->in.numargs = 1; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + req->isreply = 0; + fuse_request_send_nowait(fc, req); +} + /* * Lock the request. Up to the next unlock_request() there mustn't be * anything that could cause a page-fault. If the request was already diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b7c09f9..dcc1e52 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1155,6 +1155,143 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file, return 0; } +static int fuse_direntplus_link(struct file *file, + struct fuse_direntplus *direntplus, + u64 attr_version) +{ + int err; + struct fuse_entry_out *o = &direntplus->entry_out; + struct fuse_dirent *dirent = &direntplus->dirent; + struct dentry *parent = file->f_path.dentry; + struct qstr name = QSTR_INIT(dirent->name, dirent->namelen); + struct dentry *dentry; + struct dentry *alias; + struct inode *dir = parent->d_inode; + struct fuse_conn *fc; + struct inode *inode; + + if (!o->nodeid) { + /* + * Unlike in the case of fuse_lookup, zero nodeid does not mean + * ENOENT. Instead, it only means the userspace filesystem did + * not want to return attributes/handle for this entry. + * + * So do nothing. + */ + return 0; + } + + if (name.name[0] == '.') { + /* + * We could potentially refresh the attributes of the directory + * and its parent? + */ + if (name.len == 1) + return 0; + if (name.name[1] == '.' && name.len == 2) + return 0; + } + fc = get_fuse_conn(dir); + + name.hash = full_name_hash(name.name, name.len); + dentry = d_lookup(parent, &name); + if (dentry && dentry->d_inode) { + inode = dentry->d_inode; + if (get_node_id(inode) == o->nodeid) { + struct fuse_inode *fi; + fi = get_fuse_inode(inode); + spin_lock(&fc->lock); + fi->nlookup++; + spin_unlock(&fc->lock); + + /* + * The other branch to 'found' comes via fuse_iget() + * which bumps nlookup inside + */ + goto found; + } + err = d_invalidate(dentry); + if (err) + goto out; + dput(dentry); + dentry = NULL; + } + + dentry = d_alloc(parent, &name); + err = -ENOMEM; + if (!dentry) + goto out; + + inode = fuse_iget(dir->i_sb, o->nodeid, o->generation, + &o->attr, entry_attr_timeout(o), attr_version); + if (!inode) + goto out; + + alias = d_materialise_unique(dentry, inode); + err = PTR_ERR(alias); + if (IS_ERR(alias)) + goto out; + if (alias) { + dput(dentry); + dentry = alias; + } + +found: + fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o), + attr_version); + + fuse_change_entry_timeout(dentry, o); + + err = 0; +out: + if (dentry) + dput(dentry); + return err; +} + +static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, + void *dstbuf, filldir_t filldir, u64 attr_version) +{ + struct fuse_direntplus *direntplus; + struct fuse_dirent *dirent; + size_t reclen; + int over = 0; + int ret; + + while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) { + direntplus = (struct fuse_direntplus *) buf; + dirent = &direntplus->dirent; + reclen = FUSE_DIRENTPLUS_SIZE(direntplus); + + if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) + return -EIO; + if (reclen > nbytes) + break; + + if (!over) { + /* We fill entries into dstbuf only as much as + it can hold. But we still continue iterating + over remaining entries to link them. If not, + we need to send a FORGET for each of those + which we did not link. + */ + over = filldir(dstbuf, dirent->name, dirent->namelen, + file->f_pos, dirent->ino, + dirent->type); + file->f_pos = dirent->off; + } + + buf += reclen; + nbytes -= reclen; + + ret = fuse_direntplus_link(file, direntplus, attr_version); + if (ret) + fuse_force_forget(file, direntplus->entry_out.nodeid); + } + + return 0; +} + static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) { int err; @@ -1163,6 +1300,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) struct inode *inode = file->f_path.dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_req *req; + u64 attr_version = 0; if (is_bad_inode(inode)) return -EIO; @@ -1179,14 +1317,28 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) req->out.argpages = 1; req->num_pages = 1; req->pages[0] = page; - fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR); + if (fc->do_readdirplus) { + attr_version = fuse_get_attr_version(fc); + fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, + FUSE_READDIRPLUS); + } else { + fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, + FUSE_READDIR); + } fuse_request_send(fc, req); nbytes = req->out.args[0].size; err = req->out.h.error; fuse_put_request(fc, req); - if (!err) - err = parse_dirfile(page_address(page), nbytes, file, dstbuf, - filldir); + if (!err) { + if (fc->do_readdirplus) { + err = parse_dirplusfile(page_address(page), nbytes, + file, dstbuf, filldir, + attr_version); + } else { + err = parse_dirfile(page_address(page), nbytes, file, + dstbuf, filldir); + } + } __free_page(page); fuse_invalidate_attr(inode); /* atime changed */ diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e105a53..5c50553 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -487,6 +487,9 @@ struct fuse_conn { /** Use enhanced/automatic page cache invalidation. */ unsigned auto_inval_data:1; + /** Does the filesystem support readdir-plus? */ + unsigned do_readdirplus:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; @@ -578,6 +581,9 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, struct fuse_forget_link *fuse_alloc_forget(void); +/* Used by READDIRPLUS */ +void fuse_force_forget(struct file *file, u64 nodeid); + /** * Initialize READ or READDIR request */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 73ca6b7..6f7d574 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -863,6 +863,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->dont_mask = 1; if (arg->flags & FUSE_AUTO_INVAL_DATA) fc->auto_inval_data = 1; + if (arg->flags & FUSE_DO_READDIRPLUS) + fc->do_readdirplus = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -889,7 +891,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | - FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA; + FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | + FUSE_DO_READDIRPLUS; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index d8c713e..5dc1fea 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -193,6 +193,7 @@ struct fuse_file_lock { #define FUSE_FLOCK_LOCKS (1 << 10) #define FUSE_HAS_IOCTL_DIR (1 << 11) #define FUSE_AUTO_INVAL_DATA (1 << 12) +#define FUSE_DO_READDIRPLUS (1 << 13) /** * CUSE INIT request/reply flags @@ -299,6 +300,7 @@ enum fuse_opcode { FUSE_NOTIFY_REPLY = 41, FUSE_BATCH_FORGET = 42, FUSE_FALLOCATE = 43, + FUSE_READDIRPLUS = 44, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -630,6 +632,16 @@ struct fuse_dirent { #define FUSE_DIRENT_SIZE(d) \ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) +struct fuse_direntplus { + struct fuse_entry_out entry_out; + struct fuse_dirent dirent; +}; + +#define FUSE_NAME_OFFSET_DIRENTPLUS \ + offsetof(struct fuse_direntplus, dirent.name) +#define FUSE_DIRENTPLUS_SIZE(d) \ + FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen) + struct fuse_notify_inval_inode_out { __u64 ino; __s64 off; -- cgit v0.10.2 From 4250c0668ea10a19f3d37b1733f54ce6c8a37234 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:48:07 +0400 Subject: fuse: general infrastructure for pages[] of variable size The patch removes inline array of FUSE_MAX_PAGES_PER_REQ page pointers from fuse_req. Instead of that, req->pages may now point either to small inline array or to an array allocated dynamically. This essentially means that all callers of fuse_request_alloc[_nofs] should pass the number of pages needed explicitly. The patch doesn't make any logic changes. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 05c3eec..af37ae1 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -34,34 +34,55 @@ static struct fuse_conn *fuse_get_conn(struct file *file) return file->private_data; } -static void fuse_request_init(struct fuse_req *req) +static void fuse_request_init(struct fuse_req *req, struct page **pages, + unsigned npages) { memset(req, 0, sizeof(*req)); + memset(pages, 0, sizeof(*pages) * npages); INIT_LIST_HEAD(&req->list); INIT_LIST_HEAD(&req->intr_entry); init_waitqueue_head(&req->waitq); atomic_set(&req->count, 1); + req->pages = pages; + req->max_pages = npages; } -struct fuse_req *fuse_request_alloc(void) +static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags) { - struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL); - if (req) - fuse_request_init(req); + struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags); + if (req) { + struct page **pages; + + if (npages <= FUSE_REQ_INLINE_PAGES) + pages = req->inline_pages; + else + pages = kmalloc(sizeof(struct page *) * npages, flags); + + if (!pages) { + kmem_cache_free(fuse_req_cachep, req); + return NULL; + } + + fuse_request_init(req, pages, npages); + } return req; } + +struct fuse_req *fuse_request_alloc(unsigned npages) +{ + return __fuse_request_alloc(npages, GFP_KERNEL); +} EXPORT_SYMBOL_GPL(fuse_request_alloc); -struct fuse_req *fuse_request_alloc_nofs(void) +struct fuse_req *fuse_request_alloc_nofs(unsigned npages) { - struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS); - if (req) - fuse_request_init(req); - return req; + return __fuse_request_alloc(npages, GFP_NOFS); } void fuse_request_free(struct fuse_req *req) { + if (req->pages != req->inline_pages) + kfree(req->pages); kmem_cache_free(fuse_req_cachep, req); } @@ -116,7 +137,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc) if (!fc->connected) goto out; - req = fuse_request_alloc(); + req = fuse_request_alloc(FUSE_MAX_PAGES_PER_REQ); err = -ENOMEM; if (!req) goto out; @@ -165,7 +186,7 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req) struct fuse_file *ff = file->private_data; spin_lock(&fc->lock); - fuse_request_init(req); + fuse_request_init(req, req->pages, req->max_pages); BUG_ON(ff->reserved_req); ff->reserved_req = req; wake_up_all(&fc->reserved_req_waitq); @@ -192,7 +213,7 @@ struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file) atomic_inc(&fc->num_waiting); wait_event(fc->blocked_waitq, !fc->blocked); - req = fuse_request_alloc(); + req = fuse_request_alloc(FUSE_MAX_PAGES_PER_REQ); if (!req) req = get_reserved_req(fc, file); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f3ab824..2565f63 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -57,7 +57,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) return NULL; ff->fc = fc; - ff->reserved_req = fuse_request_alloc(); + ff->reserved_req = fuse_request_alloc(0); if (unlikely(!ff->reserved_req)) { kfree(ff); return NULL; @@ -1272,7 +1272,7 @@ static int fuse_writepage_locked(struct page *page) set_page_writeback(page); - req = fuse_request_alloc_nofs(); + req = fuse_request_alloc_nofs(1); if (!req) goto err; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 5c50553..0c5b931 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -44,6 +44,9 @@ doing the mount will be allowed to access the filesystem */ #define FUSE_ALLOW_OTHER (1 << 1) +/** Number of page pointers embedded in fuse_req */ +#define FUSE_REQ_INLINE_PAGES 1 + /** List of active connections */ extern struct list_head fuse_conn_list; @@ -291,7 +294,13 @@ struct fuse_req { } misc; /** page vector */ - struct page *pages[FUSE_MAX_PAGES_PER_REQ]; + struct page **pages; + + /** size of the 'pages' array */ + unsigned max_pages; + + /** inline page vector */ + struct page *inline_pages[FUSE_REQ_INLINE_PAGES]; /** number of pages in vector */ unsigned num_pages; @@ -664,9 +673,9 @@ void fuse_ctl_cleanup(void); /** * Allocate a request */ -struct fuse_req *fuse_request_alloc(void); +struct fuse_req *fuse_request_alloc(unsigned npages); -struct fuse_req *fuse_request_alloc_nofs(void); +struct fuse_req *fuse_request_alloc_nofs(unsigned npages); /** * Free a request diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6f7d574..9a937f0 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1037,12 +1037,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) /* only now - we want root dentry with NULL ->d_op */ sb->s_d_op = &fuse_dentry_operations; - init_req = fuse_request_alloc(); + init_req = fuse_request_alloc(0); if (!init_req) goto err_put_root; if (is_bdev) { - fc->destroy_req = fuse_request_alloc(); + fc->destroy_req = fuse_request_alloc(0); if (!fc->destroy_req) goto err_free_init_req; } -- cgit v0.10.2 From b111c8c0e3e5e780ae0758fc4c1c376a7c9d5997 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:48:30 +0400 Subject: fuse: categorize fuse_get_req() The patch categorizes all fuse_get_req() invocations into two categories: - fuse_get_req_nopages(fc) - when caller doesn't care about req->pages - fuse_get_req(fc, n) - when caller need n page pointers (n > 0) Adding fuse_get_req_nopages() helps to avoid numerous fuse_get_req(fc, 0) scattered over code. Now it's clear from the first glance when a caller need fuse_req with page pointers. The patch doesn't make any logic changes. In multi-page case, it silly allocates array of FUSE_MAX_PAGES_PER_REQ page pointers. This will be amended by future patches. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index e397b67..5cc838f 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -419,7 +419,7 @@ static int cuse_send_init(struct cuse_conn *cc) BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); - req = fuse_get_req(fc); + req = fuse_get_req(fc, 1); if (IS_ERR(req)) { rc = PTR_ERR(req); goto err; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index af37ae1..ff5e8be 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -118,7 +118,7 @@ static void fuse_req_init_context(struct fuse_req *req) req->in.h.pid = current->pid; } -struct fuse_req *fuse_get_req(struct fuse_conn *fc) +struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages) { struct fuse_req *req; sigset_t oldset; @@ -137,7 +137,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc) if (!fc->connected) goto out; - req = fuse_request_alloc(FUSE_MAX_PAGES_PER_REQ); + req = fuse_request_alloc(npages); err = -ENOMEM; if (!req) goto out; @@ -207,13 +207,14 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req) * filesystem should not have it's own file open. If deadlock is * intentional, it can still be broken by "aborting" the filesystem. */ -struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file) +struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, + struct file *file) { struct fuse_req *req; atomic_inc(&fc->num_waiting); wait_event(fc->blocked_waitq, !fc->blocked); - req = fuse_request_alloc(FUSE_MAX_PAGES_PER_REQ); + req = fuse_request_alloc(0); if (!req) req = get_reserved_req(fc, file); @@ -521,7 +522,7 @@ void fuse_force_forget(struct file *file, u64 nodeid) memset(&inarg, 0, sizeof(inarg)); inarg.nlookup = 1; - req = fuse_get_req_nofail(fc, file); + req = fuse_get_req_nofail_nopages(fc, file); req->in.h.opcode = FUSE_FORGET; req->in.h.nodeid = nodeid; req->in.numargs = 1; @@ -1577,7 +1578,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, unsigned int offset; size_t total_len = 0; - req = fuse_get_req(fc); + req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); if (IS_ERR(req)) return PTR_ERR(req); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index dcc1e52..d04bcc5 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -178,7 +178,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) return -ECHILD; fc = get_fuse_conn(inode); - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return 0; @@ -271,7 +271,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, if (name->len > FUSE_NAME_MAX) goto out; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); err = PTR_ERR(req); if (IS_ERR(req)) goto out; @@ -391,7 +391,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, if (!forget) goto out_err; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); err = PTR_ERR(req); if (IS_ERR(req)) goto out_put_forget_req; @@ -592,7 +592,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode, { struct fuse_mknod_in inarg; struct fuse_conn *fc = get_fuse_conn(dir); - struct fuse_req *req = fuse_get_req(fc); + struct fuse_req *req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -623,7 +623,7 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode) { struct fuse_mkdir_in inarg; struct fuse_conn *fc = get_fuse_conn(dir); - struct fuse_req *req = fuse_get_req(fc); + struct fuse_req *req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -647,7 +647,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, { struct fuse_conn *fc = get_fuse_conn(dir); unsigned len = strlen(link) + 1; - struct fuse_req *req = fuse_get_req(fc); + struct fuse_req *req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -664,7 +664,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) { int err; struct fuse_conn *fc = get_fuse_conn(dir); - struct fuse_req *req = fuse_get_req(fc); + struct fuse_req *req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -696,7 +696,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) { int err; struct fuse_conn *fc = get_fuse_conn(dir); - struct fuse_req *req = fuse_get_req(fc); + struct fuse_req *req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -723,7 +723,7 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, int err; struct fuse_rename_in inarg; struct fuse_conn *fc = get_fuse_conn(olddir); - struct fuse_req *req = fuse_get_req(fc); + struct fuse_req *req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -776,7 +776,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, struct fuse_link_in inarg; struct inode *inode = entry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req = fuse_get_req(fc); + struct fuse_req *req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -848,7 +848,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, struct fuse_req *req; u64 attr_version; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -1029,7 +1029,7 @@ static int fuse_access(struct inode *inode, int mask) if (fc->no_access) return 0; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -1305,7 +1305,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) if (is_bad_inode(inode)) return -EIO; - req = fuse_get_req(fc); + req = fuse_get_req(fc, 1); if (IS_ERR(req)) return PTR_ERR(req); @@ -1349,7 +1349,7 @@ static char *read_link(struct dentry *dentry) { struct inode *inode = dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req = fuse_get_req(fc); + struct fuse_req *req = fuse_get_req_nopages(fc); char *link; if (IS_ERR(req)) @@ -1562,7 +1562,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, if (attr->ia_valid & ATTR_SIZE) is_truncate = true; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -1670,7 +1670,7 @@ static int fuse_setxattr(struct dentry *entry, const char *name, if (fc->no_setxattr) return -EOPNOTSUPP; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -1709,7 +1709,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name, if (fc->no_getxattr) return -EOPNOTSUPP; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -1761,7 +1761,7 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) if (fc->no_listxattr) return -EOPNOTSUPP; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -1806,7 +1806,7 @@ static int fuse_removexattr(struct dentry *entry, const char *name) if (fc->no_removexattr) return -EOPNOTSUPP; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2565f63..8828771 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -25,7 +25,7 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, struct fuse_req *req; int err; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -368,7 +368,7 @@ static int fuse_flush(struct file *file, fl_owner_t id) if (fc->no_flush) return 0; - req = fuse_get_req_nofail(fc, file); + req = fuse_get_req_nofail_nopages(fc, file); memset(&inarg, 0, sizeof(inarg)); inarg.fh = ff->fh; inarg.lock_owner = fuse_lock_owner_id(fc, id); @@ -436,7 +436,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, fuse_sync_writes(inode); - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) { err = PTR_ERR(req); goto out; @@ -544,7 +544,7 @@ static int fuse_readpage(struct file *file, struct page *page) */ fuse_wait_on_page_writeback(inode, page->index); - req = fuse_get_req(fc); + req = fuse_get_req(fc, 1); err = PTR_ERR(req); if (IS_ERR(req)) goto out; @@ -657,7 +657,7 @@ static int fuse_readpages_fill(void *_data, struct page *page) (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || req->pages[req->num_pages - 1]->index + 1 != page->index)) { fuse_send_readpages(req, data->file); - data->req = req = fuse_get_req(fc); + data->req = req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); if (IS_ERR(req)) { unlock_page(page); return PTR_ERR(req); @@ -683,7 +683,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, data.file = file; data.inode = inode; - data.req = fuse_get_req(fc); + data.req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); err = PTR_ERR(data.req); if (IS_ERR(data.req)) goto out; @@ -890,7 +890,7 @@ static ssize_t fuse_perform_write(struct file *file, struct fuse_req *req; ssize_t count; - req = fuse_get_req(fc); + req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); if (IS_ERR(req)) { err = PTR_ERR(req); break; @@ -1072,7 +1072,7 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf, ssize_t res = 0; struct fuse_req *req; - req = fuse_get_req(fc); + req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); if (IS_ERR(req)) return PTR_ERR(req); @@ -1108,7 +1108,7 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf, break; if (count) { fuse_put_request(fc, req); - req = fuse_get_req(fc); + req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); if (IS_ERR(req)) break; } @@ -1471,7 +1471,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl) struct fuse_lk_out outarg; int err; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -1506,7 +1506,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) if (fl->fl_flags & FL_CLOSE) return 0; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -1575,7 +1575,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block) if (!inode->i_sb->s_bdev || fc->no_bmap) return 0; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return 0; @@ -1873,7 +1873,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, num_pages++; } - req = fuse_get_req(fc); + req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); if (IS_ERR(req)) { err = PTR_ERR(req); req = NULL; @@ -2076,7 +2076,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait) fuse_register_polled_file(fc, ff); } - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return POLLERR; @@ -2194,7 +2194,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (fc->no_fallocate) return -EOPNOTSUPP; - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 0c5b931..5b21e6a 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -683,14 +683,25 @@ struct fuse_req *fuse_request_alloc_nofs(unsigned npages); void fuse_request_free(struct fuse_req *req); /** - * Get a request, may fail with -ENOMEM + * Get a request, may fail with -ENOMEM, + * caller should specify # elements in req->pages[] explicitly */ -struct fuse_req *fuse_get_req(struct fuse_conn *fc); +struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages); + +/** + * Get a request, may fail with -ENOMEM, + * useful for callers who doesn't use req->pages[] + */ +static inline struct fuse_req *fuse_get_req_nopages(struct fuse_conn *fc) +{ + return fuse_get_req(fc, 0); +} /** * Gets a requests for a file operation, always succeeds */ -struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file); +struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, + struct file *file); /** * Decrement reference count of a request. If count goes to zero free diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9a937f0..9d95a5a 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -413,7 +413,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } - req = fuse_get_req(fc); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); -- cgit v0.10.2 From 4d53dc99baf139e4fa0d395f7658032cc2eb3297 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:48:42 +0400 Subject: fuse: rework fuse_retrieve() The patch reworks fuse_retrieve() to allocate only so many page pointers as needed. The core part of the patch is the following calculation: num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; (thanks Miklos for formula). All other changes are mostly shuffling lines. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ff5e8be..28d9792 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1577,13 +1577,24 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, unsigned int num; unsigned int offset; size_t total_len = 0; + int num_pages; - req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); + offset = outarg->offset & ~PAGE_CACHE_MASK; + file_size = i_size_read(inode); + + num = outarg->size; + if (outarg->offset > file_size) + num = 0; + else if (outarg->offset + num > file_size) + num = file_size - outarg->offset; + + num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; + num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ); + + req = fuse_get_req(fc, num_pages); if (IS_ERR(req)) return PTR_ERR(req); - offset = outarg->offset & ~PAGE_CACHE_MASK; - req->in.h.opcode = FUSE_NOTIFY_REPLY; req->in.h.nodeid = outarg->nodeid; req->in.numargs = 2; @@ -1592,14 +1603,8 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, req->end = fuse_retrieve_end; index = outarg->offset >> PAGE_CACHE_SHIFT; - file_size = i_size_read(inode); - num = outarg->size; - if (outarg->offset > file_size) - num = 0; - else if (outarg->offset + num > file_size) - num = file_size - outarg->offset; - while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) { + while (num && req->num_pages < num_pages) { struct page *page; unsigned int this_num; -- cgit v0.10.2 From f8dbdf81821b5ab4c5e86e7b2bd7edb892c159c2 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:48:51 +0400 Subject: fuse: rework fuse_readpages() The patch uses 'nr_pages' argument of fuse_readpages() as heuristics for the number of page pointers to allocate. This can be improved further by taking in consideration fc->max_read and gaps between page indices, but it's not clear whether it's worthy or not. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 8828771..5fd06ba 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -641,6 +641,7 @@ struct fuse_fill_data { struct fuse_req *req; struct file *file; struct inode *inode; + unsigned nr_pages; }; static int fuse_readpages_fill(void *_data, struct page *page) @@ -656,16 +657,25 @@ static int fuse_readpages_fill(void *_data, struct page *page) (req->num_pages == FUSE_MAX_PAGES_PER_REQ || (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || req->pages[req->num_pages - 1]->index + 1 != page->index)) { + int nr_alloc = min_t(unsigned, data->nr_pages, + FUSE_MAX_PAGES_PER_REQ); fuse_send_readpages(req, data->file); - data->req = req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); + data->req = req = fuse_get_req(fc, nr_alloc); if (IS_ERR(req)) { unlock_page(page); return PTR_ERR(req); } } + + if (WARN_ON(req->num_pages >= req->max_pages)) { + fuse_put_request(fc, req); + return -EIO; + } + page_cache_get(page); req->pages[req->num_pages] = page; req->num_pages++; + data->nr_pages--; return 0; } @@ -676,6 +686,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_fill_data data; int err; + int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ); err = -EIO; if (is_bad_inode(inode)) @@ -683,7 +694,8 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, data.file = file; data.inode = inode; - data.req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); + data.req = fuse_get_req(fc, nr_alloc); + data.nr_pages = nr_pages; err = PTR_ERR(data.req); if (IS_ERR(data.req)) goto out; -- cgit v0.10.2 From d07f09f509fb21482096e1975f160b694c0edf84 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:49:00 +0400 Subject: fuse: rework fuse_perform_write() The patch allocates as many page pointers in fuse_req as needed to cover interval [pos .. pos+len-1]. Inline helper fuse_wr_pages() is introduced to hide this cumbersome arithmetic. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 5fd06ba..b997250 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -881,11 +881,19 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, if (!fc->big_writes) break; } while (iov_iter_count(ii) && count < fc->max_write && - req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0); + req->num_pages < req->max_pages && offset == 0); return count > 0 ? count : err; } +static inline unsigned fuse_wr_pages(loff_t pos, size_t len) +{ + return min_t(unsigned, + ((pos + len - 1) >> PAGE_CACHE_SHIFT) - + (pos >> PAGE_CACHE_SHIFT) + 1, + FUSE_MAX_PAGES_PER_REQ); +} + static ssize_t fuse_perform_write(struct file *file, struct address_space *mapping, struct iov_iter *ii, loff_t pos) @@ -901,8 +909,9 @@ static ssize_t fuse_perform_write(struct file *file, do { struct fuse_req *req; ssize_t count; + unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii)); - req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); + req = fuse_get_req(fc, nr_pages); if (IS_ERR(req)) { err = PTR_ERR(req); break; -- cgit v0.10.2 From 54b966702dafe396b6f4e609f222b8e0fdb4d7a4 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:49:13 +0400 Subject: fuse: rework fuse_do_ioctl() fuse_do_ioctl() already calculates the number of pages it's going to use. It is stored in 'num_pages' variable. So the patch simply uses it for allocating fuse_req. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b997250..fdb5b33 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1894,7 +1894,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, num_pages++; } - req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); + req = fuse_get_req(fc, num_pages); if (IS_ERR(req)) { err = PTR_ERR(req); req = NULL; -- cgit v0.10.2 From b2430d7567a376b3685627ca7e9d712f6f27d49b Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:49:24 +0400 Subject: fuse: add per-page descriptor to fuse_req The ability to save page pointers along with lengths and offsets in fuse_req will be useful to cover several iovec-s with a single fuse_req. Per-request page_offset is removed because anybody who need it can use req->page_descs[0].offset instead. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 28d9792..db4af8f 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -35,15 +35,18 @@ static struct fuse_conn *fuse_get_conn(struct file *file) } static void fuse_request_init(struct fuse_req *req, struct page **pages, + struct fuse_page_desc *page_descs, unsigned npages) { memset(req, 0, sizeof(*req)); memset(pages, 0, sizeof(*pages) * npages); + memset(page_descs, 0, sizeof(*page_descs) * npages); INIT_LIST_HEAD(&req->list); INIT_LIST_HEAD(&req->intr_entry); init_waitqueue_head(&req->waitq); atomic_set(&req->count, 1); req->pages = pages; + req->page_descs = page_descs; req->max_pages = npages; } @@ -52,18 +55,25 @@ static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags) struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags); if (req) { struct page **pages; + struct fuse_page_desc *page_descs; - if (npages <= FUSE_REQ_INLINE_PAGES) + if (npages <= FUSE_REQ_INLINE_PAGES) { pages = req->inline_pages; - else + page_descs = req->inline_page_descs; + } else { pages = kmalloc(sizeof(struct page *) * npages, flags); + page_descs = kmalloc(sizeof(struct fuse_page_desc) * + npages, flags); + } - if (!pages) { + if (!pages || !page_descs) { + kfree(pages); + kfree(page_descs); kmem_cache_free(fuse_req_cachep, req); return NULL; } - fuse_request_init(req, pages, npages); + fuse_request_init(req, pages, page_descs, npages); } return req; } @@ -81,8 +91,10 @@ struct fuse_req *fuse_request_alloc_nofs(unsigned npages) void fuse_request_free(struct fuse_req *req) { - if (req->pages != req->inline_pages) + if (req->pages != req->inline_pages) { kfree(req->pages); + kfree(req->page_descs); + } kmem_cache_free(fuse_req_cachep, req); } @@ -186,7 +198,7 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req) struct fuse_file *ff = file->private_data; spin_lock(&fc->lock); - fuse_request_init(req, req->pages, req->max_pages); + fuse_request_init(req, req->pages, req->page_descs, req->max_pages); BUG_ON(ff->reserved_req); ff->reserved_req = req; wake_up_all(&fc->reserved_req_waitq); @@ -891,7 +903,7 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, { unsigned i; struct fuse_req *req = cs->req; - unsigned offset = req->page_offset; + unsigned offset = req->page_descs[0].offset; unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset); for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { @@ -1599,7 +1611,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, req->in.h.nodeid = outarg->nodeid; req->in.numargs = 2; req->in.argpages = 1; - req->page_offset = offset; + req->page_descs[0].offset = offset; req->end = fuse_retrieve_end; index = outarg->offset >> PAGE_CACHE_SHIFT; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index fdb5b33..2b6f08a 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -798,7 +798,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, res = fuse_send_write(req, file, pos, count, NULL); - offset = req->page_offset; + offset = req->page_descs[0].offset; count = res; for (i = 0; i < req->num_pages; i++) { struct page *page = req->pages[i]; @@ -829,7 +829,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, int err; req->in.argpages = 1; - req->page_offset = offset; + req->page_descs[0].offset = offset; do { size_t tmp; @@ -1070,14 +1070,14 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, return npages; req->num_pages = npages; - req->page_offset = offset; + req->page_descs[0].offset = offset; if (write) req->in.argpages = 1; else req->out.argpages = 1; - nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset; + nbytes = (req->num_pages << PAGE_SHIFT) - req->page_descs[0].offset; *nbytesp = min(*nbytesp, nbytes); return 0; @@ -1314,7 +1314,7 @@ static int fuse_writepage_locked(struct page *page) req->in.argpages = 1; req->num_pages = 1; req->pages[0] = tmp_page; - req->page_offset = 0; + req->page_descs[0].offset = 0; req->end = fuse_writepage_end; req->inode = inode; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 5b21e6a..70cef60 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -203,6 +203,12 @@ struct fuse_out { struct fuse_arg args[3]; }; +/** FUSE page descriptor */ +struct fuse_page_desc { + unsigned int length; + unsigned int offset; +}; + /** The request state */ enum fuse_req_state { FUSE_REQ_INIT = 0, @@ -296,18 +302,21 @@ struct fuse_req { /** page vector */ struct page **pages; + /** page-descriptor vector */ + struct fuse_page_desc *page_descs; + /** size of the 'pages' array */ unsigned max_pages; /** inline page vector */ struct page *inline_pages[FUSE_REQ_INLINE_PAGES]; + /** inline page-descriptor vector */ + struct fuse_page_desc inline_page_descs[FUSE_REQ_INLINE_PAGES]; + /** number of pages in vector */ unsigned num_pages; - /** offset of data on first page */ - unsigned page_offset; - /** File used in the request (or NULL) */ struct fuse_file *ff; -- cgit v0.10.2 From 85f40aec887110ae6bbefa87988def4606a3d583 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:49:33 +0400 Subject: fuse: use req->page_descs[] for argpages cases Previously, anyone who set flag 'argpages' only filled req->pages[] and set per-request page_offset. This patch re-works all cases where argpages=1 to fill req->page_descs[] properly. Having req->page_descs[] filled properly allows to re-work fuse_copy_pages() to copy page fragments described by req->page_descs[]. This will be useful for next patches optimizing direct_IO. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 5cc838f..fb9dcfd 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -449,6 +449,7 @@ static int cuse_send_init(struct cuse_conn *cc) req->out.argvar = 1; req->out.argpages = 1; req->pages[0] = page; + req->page_descs[0].length = req->out.args[1].size; req->num_pages = 1; req->end = cuse_process_init_reply; fuse_request_send_background(fc, req); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index db4af8f..cbae09e 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -903,11 +903,11 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, { unsigned i; struct fuse_req *req = cs->req; - unsigned offset = req->page_descs[0].offset; - unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset); for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { int err; + unsigned offset = req->page_descs[i].offset; + unsigned count = min(nbytes, req->page_descs[i].length); err = fuse_copy_page(cs, &req->pages[i], offset, count, zeroing); @@ -915,8 +915,6 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, return err; nbytes -= count; - count = min(nbytes, (unsigned) PAGE_SIZE); - offset = 0; } return 0; } @@ -1626,6 +1624,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset); req->pages[req->num_pages] = page; + req->page_descs[req->num_pages].length = this_num; req->num_pages++; offset = 0; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index d04bcc5..ed8f8c5 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1317,6 +1317,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) req->out.argpages = 1; req->num_pages = 1; req->pages[0] = page; + req->page_descs[0].length = PAGE_SIZE; if (fc->do_readdirplus) { attr_version = fuse_get_attr_version(fc); fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2b6f08a..3384a20 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -555,6 +555,7 @@ static int fuse_readpage(struct file *file, struct page *page) req->out.argpages = 1; req->num_pages = 1; req->pages[0] = page; + req->page_descs[0].length = count; num_read = fuse_send_read(req, file, pos, count, NULL); err = req->out.h.error; fuse_put_request(fc, req); @@ -674,6 +675,7 @@ static int fuse_readpages_fill(void *_data, struct page *page) page_cache_get(page); req->pages[req->num_pages] = page; + req->page_descs[req->num_pages].length = PAGE_SIZE; req->num_pages++; data->nr_pages--; return 0; @@ -869,6 +871,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, err = 0; req->pages[req->num_pages] = page; + req->page_descs[req->num_pages].length = tmp; req->num_pages++; iov_iter_advance(ii, tmp); @@ -1044,6 +1047,15 @@ static void fuse_release_user_pages(struct fuse_req *req, int write) } } +static inline void fuse_page_descs_length_init(struct fuse_req *req) +{ + int i; + + for (i = 0; i < req->num_pages; i++) + req->page_descs[i].length = PAGE_SIZE - + req->page_descs[i].offset; +} + static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, size_t *nbytesp, int write) { @@ -1071,6 +1083,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, req->num_pages = npages; req->page_descs[0].offset = offset; + fuse_page_descs_length_init(req); if (write) req->in.argpages = 1; @@ -1078,6 +1091,11 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, req->out.argpages = 1; nbytes = (req->num_pages << PAGE_SHIFT) - req->page_descs[0].offset; + + if (*nbytesp < nbytes) + req->page_descs[req->num_pages - 1].length -= + nbytes - *nbytesp; + *nbytesp = min(*nbytesp, nbytes); return 0; @@ -1315,6 +1333,7 @@ static int fuse_writepage_locked(struct page *page) req->num_pages = 1; req->pages[0] = tmp_page; req->page_descs[0].offset = 0; + req->page_descs[0].length = PAGE_SIZE; req->end = fuse_writepage_end; req->inode = inode; @@ -1902,6 +1921,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, } memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages); req->num_pages = num_pages; + fuse_page_descs_length_init(req); /* okay, let's send it to the client */ req->in.h.opcode = FUSE_IOCTL; -- cgit v0.10.2 From d28574e043e8b7cb35482de6e9a553118a32803d Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:50:04 +0400 Subject: mm: minor cleanup of iov_iter_single_seg_count() The function does not modify iov_iter which 'i' points to. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi diff --git a/include/linux/fs.h b/include/linux/fs.h index 7617ee0..7d2e893 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -301,7 +301,7 @@ size_t iov_iter_copy_from_user(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes); void iov_iter_advance(struct iov_iter *i, size_t bytes); int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); -size_t iov_iter_single_seg_count(struct iov_iter *i); +size_t iov_iter_single_seg_count(const struct iov_iter *i); static inline void iov_iter_init(struct iov_iter *i, const struct iovec *iov, unsigned long nr_segs, diff --git a/mm/filemap.c b/mm/filemap.c index 83efee7..24a7ea5 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2056,7 +2056,7 @@ EXPORT_SYMBOL(iov_iter_fault_in_readable); /* * Return the count of just the current iov_iter segment. */ -size_t iov_iter_single_seg_count(struct iov_iter *i) +size_t iov_iter_single_seg_count(const struct iov_iter *i) { const struct iovec *iov = i->iov; if (i->nr_segs == 1) -- cgit v0.10.2 From b98d023a24496bf7d538c549e5426b1173c6f55d Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:50:15 +0400 Subject: fuse: pass iov[] to fuse_get_user_pages() The patch makes preliminary work for the next patch optimizing scatter-gather direct IO. The idea is to allow fuse_get_user_pages() to pack as many iov-s to each fuse request as possible. So, here we only rework all related call-paths to carry iov[] from fuse_direct_IO() to fuse_get_user_pages(). Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 3384a20..542ad97 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1056,14 +1056,18 @@ static inline void fuse_page_descs_length_init(struct fuse_req *req) req->page_descs[i].offset; } -static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, +static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, size_t *nbytesp, int write) { size_t nbytes = *nbytesp; - unsigned long user_addr = (unsigned long) buf; - unsigned offset = user_addr & ~PAGE_MASK; + size_t frag_size = min(iov_iter_single_seg_count(ii), nbytes); + unsigned long user_addr; + unsigned offset; int npages; + user_addr = (unsigned long)ii->iov->iov_base + ii->iov_offset; + offset = user_addr & ~PAGE_MASK; + /* Special case for kernel I/O: can copy directly into the buffer */ if (segment_eq(get_fs(), KERNEL_DS)) { if (write) @@ -1071,10 +1075,12 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, else req->out.args[0].value = (void *) user_addr; + iov_iter_advance(ii, frag_size); + *nbytesp = frag_size; return 0; } - nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); + nbytes = min_t(size_t, frag_size, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); npages = get_user_pages_fast(user_addr, npages, !write, req->pages); @@ -1092,17 +1098,19 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, nbytes = (req->num_pages << PAGE_SHIFT) - req->page_descs[0].offset; - if (*nbytesp < nbytes) + if (frag_size < nbytes) req->page_descs[req->num_pages - 1].length -= - nbytes - *nbytesp; + nbytes - frag_size; - *nbytesp = min(*nbytesp, nbytes); + *nbytesp = min(frag_size, nbytes); + iov_iter_advance(ii, *nbytesp); return 0; } -ssize_t fuse_direct_io(struct file *file, const char __user *buf, - size_t count, loff_t *ppos, int write) +static ssize_t __fuse_direct_io(struct file *file, const struct iovec *iov, + unsigned long nr_segs, size_t count, + loff_t *ppos, int write) { struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fc; @@ -1110,6 +1118,9 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf, loff_t pos = *ppos; ssize_t res = 0; struct fuse_req *req; + struct iov_iter ii; + + iov_iter_init(&ii, iov, nr_segs, count, 0); req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); if (IS_ERR(req)) @@ -1119,7 +1130,7 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf, size_t nres; fl_owner_t owner = current->files; size_t nbytes = min(count, nmax); - int err = fuse_get_user_pages(req, buf, &nbytes, write); + int err = fuse_get_user_pages(req, &ii, &nbytes, write); if (err) { res = err; break; @@ -1142,7 +1153,6 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf, count -= nres; res += nres; pos += nres; - buf += nres; if (nres != nbytes) break; if (count) { @@ -1159,10 +1169,17 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf, return res; } + +ssize_t fuse_direct_io(struct file *file, const char __user *buf, + size_t count, loff_t *ppos, int write) +{ + struct iovec iov = { .iov_base = (void *)buf, .iov_len = count }; + return __fuse_direct_io(file, &iov, 1, count, ppos, write); +} EXPORT_SYMBOL_GPL(fuse_direct_io); -static ssize_t fuse_direct_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t __fuse_direct_read(struct file *file, const struct iovec *iov, + unsigned long nr_segs, loff_t *ppos) { ssize_t res; struct inode *inode = file->f_path.dentry->d_inode; @@ -1170,22 +1187,31 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf, if (is_bad_inode(inode)) return -EIO; - res = fuse_direct_io(file, buf, count, ppos, 0); + res = __fuse_direct_io(file, iov, nr_segs, iov_length(iov, nr_segs), + ppos, 0); fuse_invalidate_attr(inode); return res; } -static ssize_t __fuse_direct_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t fuse_direct_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct iovec iov = { .iov_base = (void *)buf, .iov_len = count }; + return __fuse_direct_read(file, &iov, 1, ppos); +} + +static ssize_t __fuse_direct_write(struct file *file, const struct iovec *iov, + unsigned long nr_segs, loff_t *ppos) { struct inode *inode = file->f_path.dentry->d_inode; + size_t count = iov_length(iov, nr_segs); ssize_t res; res = generic_write_checks(file, ppos, &count, 0); if (!res) { - res = fuse_direct_io(file, buf, count, ppos, 1); + res = __fuse_direct_io(file, iov, nr_segs, count, ppos, 1); if (res > 0) fuse_write_update_size(inode, *ppos); } @@ -1198,6 +1224,7 @@ static ssize_t __fuse_direct_write(struct file *file, const char __user *buf, static ssize_t fuse_direct_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { + struct iovec iov = { .iov_base = (void *)buf, .iov_len = count }; struct inode *inode = file->f_path.dentry->d_inode; ssize_t res; @@ -1206,7 +1233,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, /* Don't allow parallel writes to the same file */ mutex_lock(&inode->i_mutex); - res = __fuse_direct_write(file, buf, count, ppos); + res = __fuse_direct_write(file, &iov, 1, ppos); mutex_unlock(&inode->i_mutex); return res; @@ -2167,41 +2194,6 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc, return 0; } -static ssize_t fuse_loop_dio(struct file *filp, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos, int rw) -{ - const struct iovec *vector = iov; - ssize_t ret = 0; - - while (nr_segs > 0) { - void __user *base; - size_t len; - ssize_t nr; - - base = vector->iov_base; - len = vector->iov_len; - vector++; - nr_segs--; - - if (rw == WRITE) - nr = __fuse_direct_write(filp, base, len, ppos); - else - nr = fuse_direct_read(filp, base, len, ppos); - - if (nr < 0) { - if (!ret) - ret = nr; - break; - } - ret += nr; - if (nr != len) - break; - } - - return ret; -} - - static ssize_t fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) @@ -2213,7 +2205,10 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, file = iocb->ki_filp; pos = offset; - ret = fuse_loop_dio(file, iov, nr_segs, &pos, rw); + if (rw == WRITE) + ret = __fuse_direct_write(file, iov, nr_segs, &pos); + else + ret = __fuse_direct_read(file, iov, nr_segs, &pos); return ret; } -- cgit v0.10.2 From 7c190c8b9c0dd373cdd4d96e63306ec6e1a7115d Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:50:29 +0400 Subject: fuse: optimize fuse_get_user_pages() Let fuse_get_user_pages() pack as many iov-s to a single fuse_req as possible. This is very beneficial in case of iov[] consisting of many iov-s of relatively small sizes (e.g. PAGE_SIZE). Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 542ad97..b2aa6c2 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1047,29 +1047,37 @@ static void fuse_release_user_pages(struct fuse_req *req, int write) } } -static inline void fuse_page_descs_length_init(struct fuse_req *req) +static inline void fuse_page_descs_length_init(struct fuse_req *req, + unsigned index, unsigned nr_pages) { int i; - for (i = 0; i < req->num_pages; i++) + for (i = index; i < index + nr_pages; i++) req->page_descs[i].length = PAGE_SIZE - req->page_descs[i].offset; } +static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii) +{ + return (unsigned long)ii->iov->iov_base + ii->iov_offset; +} + +static inline size_t fuse_get_frag_size(const struct iov_iter *ii, + size_t max_size) +{ + return min(iov_iter_single_seg_count(ii), max_size); +} + static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, size_t *nbytesp, int write) { - size_t nbytes = *nbytesp; - size_t frag_size = min(iov_iter_single_seg_count(ii), nbytes); - unsigned long user_addr; - unsigned offset; - int npages; - - user_addr = (unsigned long)ii->iov->iov_base + ii->iov_offset; - offset = user_addr & ~PAGE_MASK; + size_t nbytes = 0; /* # bytes already packed in req */ /* Special case for kernel I/O: can copy directly into the buffer */ if (segment_eq(get_fs(), KERNEL_DS)) { + unsigned long user_addr = fuse_get_user_addr(ii); + size_t frag_size = fuse_get_frag_size(ii, *nbytesp); + if (write) req->in.args[1].value = (void *) user_addr; else @@ -1080,30 +1088,45 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, return 0; } - nbytes = min_t(size_t, frag_size, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); - npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; - npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); - npages = get_user_pages_fast(user_addr, npages, !write, req->pages); - if (npages < 0) - return npages; + while (nbytes < *nbytesp && req->num_pages < FUSE_MAX_PAGES_PER_REQ) { + unsigned npages; + unsigned long user_addr = fuse_get_user_addr(ii); + unsigned offset = user_addr & ~PAGE_MASK; + size_t frag_size = fuse_get_frag_size(ii, *nbytesp - nbytes); + int ret; - req->num_pages = npages; - req->page_descs[0].offset = offset; - fuse_page_descs_length_init(req); + unsigned n = FUSE_MAX_PAGES_PER_REQ - req->num_pages; + frag_size = min_t(size_t, frag_size, n << PAGE_SHIFT); + + npages = (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; + npages = clamp(npages, 1U, n); + + ret = get_user_pages_fast(user_addr, npages, !write, + &req->pages[req->num_pages]); + if (ret < 0) + return ret; + + npages = ret; + frag_size = min_t(size_t, frag_size, + (npages << PAGE_SHIFT) - offset); + iov_iter_advance(ii, frag_size); + + req->page_descs[req->num_pages].offset = offset; + fuse_page_descs_length_init(req, req->num_pages, npages); + + req->num_pages += npages; + req->page_descs[req->num_pages - 1].length -= + (npages << PAGE_SHIFT) - offset - frag_size; + + nbytes += frag_size; + } if (write) req->in.argpages = 1; else req->out.argpages = 1; - nbytes = (req->num_pages << PAGE_SHIFT) - req->page_descs[0].offset; - - if (frag_size < nbytes) - req->page_descs[req->num_pages - 1].length -= - nbytes - frag_size; - - *nbytesp = min(frag_size, nbytes); - iov_iter_advance(ii, *nbytesp); + *nbytesp = nbytes; return 0; } @@ -1948,7 +1971,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, } memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages); req->num_pages = num_pages; - fuse_page_descs_length_init(req); + fuse_page_descs_length_init(req, 0, req->num_pages); /* okay, let's send it to the client */ req->in.h.opcode = FUSE_IOCTL; -- cgit v0.10.2 From 5565a9d884327ac45d49041f1b846dac273e110c Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 26 Oct 2012 19:50:36 +0400 Subject: fuse: optimize __fuse_direct_io() __fuse_direct_io() allocates fuse-requests by calling fuse_get_req(fc, n). The patch calculates 'n' based on iov[] array. This is useful because allocating FUSE_MAX_PAGES_PER_REQ page pointers and descriptors for each fuse request would be waste of memory in case of iov-s of smaller size. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b2aa6c2..68e10d4 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1088,14 +1088,14 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, return 0; } - while (nbytes < *nbytesp && req->num_pages < FUSE_MAX_PAGES_PER_REQ) { + while (nbytes < *nbytesp && req->num_pages < req->max_pages) { unsigned npages; unsigned long user_addr = fuse_get_user_addr(ii); unsigned offset = user_addr & ~PAGE_MASK; size_t frag_size = fuse_get_frag_size(ii, *nbytesp - nbytes); int ret; - unsigned n = FUSE_MAX_PAGES_PER_REQ - req->num_pages; + unsigned n = req->max_pages - req->num_pages; frag_size = min_t(size_t, frag_size, n << PAGE_SHIFT); npages = (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -1131,6 +1131,23 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, return 0; } +static inline int fuse_iter_npages(const struct iov_iter *ii_p) +{ + struct iov_iter ii = *ii_p; + int npages = 0; + + while (iov_iter_count(&ii) && npages < FUSE_MAX_PAGES_PER_REQ) { + unsigned long user_addr = fuse_get_user_addr(&ii); + unsigned offset = user_addr & ~PAGE_MASK; + size_t frag_size = iov_iter_single_seg_count(&ii); + + npages += (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; + iov_iter_advance(&ii, frag_size); + } + + return min(npages, FUSE_MAX_PAGES_PER_REQ); +} + static ssize_t __fuse_direct_io(struct file *file, const struct iovec *iov, unsigned long nr_segs, size_t count, loff_t *ppos, int write) @@ -1145,7 +1162,7 @@ static ssize_t __fuse_direct_io(struct file *file, const struct iovec *iov, iov_iter_init(&ii, iov, nr_segs, count, 0); - req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); + req = fuse_get_req(fc, fuse_iter_npages(&ii)); if (IS_ERR(req)) return PTR_ERR(req); @@ -1180,7 +1197,7 @@ static ssize_t __fuse_direct_io(struct file *file, const struct iovec *iov, break; if (count) { fuse_put_request(fc, req); - req = fuse_get_req(fc, FUSE_MAX_PAGES_PER_REQ); + req = fuse_get_req(fc, fuse_iter_npages(&ii)); if (IS_ERR(req)) break; } -- cgit v0.10.2 From fb05f41f5f96f7423c53da4d87913fb44fd0565d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Sat, 10 Nov 2012 16:55:56 +0100 Subject: fuse: cleanup fuse_direct_io() Fix the following sparse warnings: fs/fuse/file.c:1216:43: warning: cast removes address space of expression fs/fuse/file.c:1216:43: warning: incorrect type in initializer (different address spaces) fs/fuse/file.c:1216:43: expected void [noderef] *iov_base fs/fuse/file.c:1216:43: got void * fs/fuse/file.c:1241:43: warning: cast removes address space of expression fs/fuse/file.c:1241:43: warning: incorrect type in initializer (different address spaces) fs/fuse/file.c:1241:43: expected void [noderef] *iov_base fs/fuse/file.c:1241:43: got void * fs/fuse/file.c:1267:43: warning: cast removes address space of expression fs/fuse/file.c:1267:43: warning: incorrect type in initializer (different address spaces) fs/fuse/file.c:1267:43: expected void [noderef] *iov_base fs/fuse/file.c:1267:43: got void * Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index fb9dcfd..6f96a8d 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -91,19 +91,22 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { loff_t pos = 0; + struct iovec iov = { .iov_base = buf, .iov_len = count }; - return fuse_direct_io(file, buf, count, &pos, 0); + return fuse_direct_io(file, &iov, 1, count, &pos, 0); } static ssize_t cuse_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { loff_t pos = 0; + struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; + /* * No locking or generic_write_checks(), the server is * responsible for locking and sanity checks. */ - return fuse_direct_io(file, buf, count, &pos, 1); + return fuse_direct_io(file, &iov, 1, count, &pos, 1); } static int cuse_open(struct inode *inode, struct file *file) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 68e10d4..28bc9c6 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1148,9 +1148,9 @@ static inline int fuse_iter_npages(const struct iov_iter *ii_p) return min(npages, FUSE_MAX_PAGES_PER_REQ); } -static ssize_t __fuse_direct_io(struct file *file, const struct iovec *iov, - unsigned long nr_segs, size_t count, - loff_t *ppos, int write) +ssize_t fuse_direct_io(struct file *file, const struct iovec *iov, + unsigned long nr_segs, size_t count, loff_t *ppos, + int write) { struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fc; @@ -1209,13 +1209,6 @@ static ssize_t __fuse_direct_io(struct file *file, const struct iovec *iov, return res; } - -ssize_t fuse_direct_io(struct file *file, const char __user *buf, - size_t count, loff_t *ppos, int write) -{ - struct iovec iov = { .iov_base = (void *)buf, .iov_len = count }; - return __fuse_direct_io(file, &iov, 1, count, ppos, write); -} EXPORT_SYMBOL_GPL(fuse_direct_io); static ssize_t __fuse_direct_read(struct file *file, const struct iovec *iov, @@ -1227,8 +1220,8 @@ static ssize_t __fuse_direct_read(struct file *file, const struct iovec *iov, if (is_bad_inode(inode)) return -EIO; - res = __fuse_direct_io(file, iov, nr_segs, iov_length(iov, nr_segs), - ppos, 0); + res = fuse_direct_io(file, iov, nr_segs, iov_length(iov, nr_segs), + ppos, 0); fuse_invalidate_attr(inode); @@ -1238,7 +1231,7 @@ static ssize_t __fuse_direct_read(struct file *file, const struct iovec *iov, static ssize_t fuse_direct_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct iovec iov = { .iov_base = (void *)buf, .iov_len = count }; + struct iovec iov = { .iov_base = buf, .iov_len = count }; return __fuse_direct_read(file, &iov, 1, ppos); } @@ -1251,7 +1244,7 @@ static ssize_t __fuse_direct_write(struct file *file, const struct iovec *iov, res = generic_write_checks(file, ppos, &count, 0); if (!res) { - res = __fuse_direct_io(file, iov, nr_segs, count, ppos, 1); + res = fuse_direct_io(file, iov, nr_segs, count, ppos, 1); if (res > 0) fuse_write_update_size(inode, *ppos); } @@ -1264,7 +1257,7 @@ static ssize_t __fuse_direct_write(struct file *file, const struct iovec *iov, static ssize_t fuse_direct_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct iovec iov = { .iov_base = (void *)buf, .iov_len = count }; + struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; struct inode *inode = file->f_path.dentry->d_inode; ssize_t res; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 70cef60..13befcd 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -811,8 +811,9 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, bool isdir); -ssize_t fuse_direct_io(struct file *file, const char __user *buf, - size_t count, loff_t *ppos, int write); +ssize_t fuse_direct_io(struct file *file, const struct iovec *iov, + unsigned long nr_segs, size_t count, loff_t *ppos, + int write); long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, unsigned int flags); long fuse_ioctl_common(struct file *file, unsigned int cmd, -- cgit v0.10.2 From c2132c1bc73d9a279cec148f74ea709c960b3d89 Mon Sep 17 00:00:00 2001 From: Anatol Pomozov Date: Mon, 14 Jan 2013 22:30:00 -0800 Subject: Do not use RCU for current process credentials Commit c69e8d9c0 added rcu lock to fuse/dir.c It was assuming that 'task' is some other process but in fact this parameter always equals to 'current'. Inline this parameter to make it more readable and remove RCU lock as it is not needed when access current process credentials. Signed-off-by: Anatol Pomozov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index ed8f8c5..aa0b6ad 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -985,7 +985,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, /* * Calling into a user-controlled filesystem gives the filesystem - * daemon ptrace-like capabilities over the requester process. This + * daemon ptrace-like capabilities over the current process. This * means, that the filesystem daemon is able to record the exact * filesystem operations performed, and can also control the behavior * of the requester process in otherwise impossible ways. For example @@ -996,27 +996,23 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, * for which the owner of the mount has ptrace privilege. This * excludes processes started by other users, suid or sgid processes. */ -int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) +int fuse_allow_current_process(struct fuse_conn *fc) { const struct cred *cred; - int ret; if (fc->flags & FUSE_ALLOW_OTHER) return 1; - rcu_read_lock(); - ret = 0; - cred = __task_cred(task); + cred = current_cred(); if (uid_eq(cred->euid, fc->user_id) && uid_eq(cred->suid, fc->user_id) && uid_eq(cred->uid, fc->user_id) && gid_eq(cred->egid, fc->group_id) && gid_eq(cred->sgid, fc->group_id) && gid_eq(cred->gid, fc->group_id)) - ret = 1; - rcu_read_unlock(); + return 1; - return ret; + return 0; } static int fuse_access(struct inode *inode, int mask) @@ -1077,7 +1073,7 @@ static int fuse_permission(struct inode *inode, int mask) bool refreshed = false; int err = 0; - if (!fuse_allow_task(fc, current)) + if (!fuse_allow_current_process(fc)) return -EACCES; /* @@ -1544,7 +1540,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, loff_t oldsize; int err; - if (!fuse_allow_task(fc, current)) + if (!fuse_allow_current_process(fc)) return -EACCES; if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) @@ -1653,7 +1649,7 @@ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry, struct inode *inode = entry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - if (!fuse_allow_task(fc, current)) + if (!fuse_allow_current_process(fc)) return -EACCES; return fuse_update_attributes(inode, stat, NULL, NULL); @@ -1756,7 +1752,7 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) struct fuse_getxattr_out outarg; ssize_t ret; - if (!fuse_allow_task(fc, current)) + if (!fuse_allow_current_process(fc)) return -EACCES; if (fc->no_listxattr) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 28bc9c6..a010585 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2082,7 +2082,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd, struct inode *inode = file->f_dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - if (!fuse_allow_task(fc, current)) + if (!fuse_allow_current_process(fc)) return -EACCES; if (is_bad_inode(inode)) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 13befcd..af51c14 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -774,9 +774,9 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc); int fuse_valid_type(int m); /** - * Is task allowed to perform filesystem operation? + * Is current process allowed to perform filesystem operation? */ -int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task); +int fuse_allow_current_process(struct fuse_conn *fc); u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9d95a5a..79b70de 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -408,7 +408,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) struct fuse_statfs_out outarg; int err; - if (!fuse_allow_task(fc, current)) { + if (!fuse_allow_current_process(fc)) { buf->f_type = FUSE_SUPER_MAGIC; return 0; } -- cgit v0.10.2 From 4582a4ab2a0e7218449fb2e895d0aae9ea753c94 Mon Sep 17 00:00:00 2001 From: Feng Shuo Date: Tue, 15 Jan 2013 11:23:28 +0800 Subject: FUSE: Adapt readdirplus to application usage patterns Use the same adaptive readdirplus mechanism as NFS: http://permalink.gmane.org/gmane.linux.nfs/49299 If the user space implementation wants to disable readdirplus temporarily, it could just return ENOTSUPP. Then kernel will recall it with readdir. Signed-off-by: Feng Shuo Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index aa0b6ad..dc5e648 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -14,6 +14,27 @@ #include #include +static bool fuse_use_readdirplus(struct inode *dir, struct file *filp) +{ + struct fuse_conn *fc = get_fuse_conn(dir); + struct fuse_inode *fi = get_fuse_inode(dir); + + if (!fc->do_readdirplus) + return false; + if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state)) + return true; + if (filp->f_pos == 0) + return true; + return false; +} + +static void fuse_advise_use_readdirplus(struct inode *dir) +{ + struct fuse_inode *fi = get_fuse_inode(dir); + + set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state); +} + #if BITS_PER_LONG >= 64 static inline void fuse_dentry_settime(struct dentry *entry, u64 time) { @@ -219,6 +240,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) attr_version); fuse_change_entry_timeout(entry, &outarg); } + fuse_advise_use_readdirplus(inode); return 1; } @@ -355,6 +377,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, else fuse_invalidate_entry_cache(entry); + fuse_advise_use_readdirplus(dir); return newent; out_iput: @@ -1290,7 +1313,7 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) { - int err; + int plus, err; size_t nbytes; struct page *page; struct inode *inode = file->f_path.dentry->d_inode; @@ -1310,11 +1333,13 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) fuse_put_request(fc, req); return -ENOMEM; } + + plus = fuse_use_readdirplus(inode, file); req->out.argpages = 1; req->num_pages = 1; req->pages[0] = page; req->page_descs[0].length = PAGE_SIZE; - if (fc->do_readdirplus) { + if (plus) { attr_version = fuse_get_attr_version(fc); fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIRPLUS); @@ -1327,7 +1352,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) err = req->out.h.error; fuse_put_request(fc, req); if (!err) { - if (fc->do_readdirplus) { + if (plus) { err = parse_dirplusfile(page_address(page), nbytes, file, dstbuf, filldir, attr_version); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index af51c14..fc55dd3 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -106,6 +106,15 @@ struct fuse_inode { /** List of writepage requestst (pending or sent) */ struct list_head writepages; + + /** Miscellaneous bits describing inode state */ + unsigned long state; +}; + +/** FUSE inode state bits */ +enum { + /** Advise readdirplus */ + FUSE_I_ADVISE_RDPLUS, }; struct fuse_conn; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 79b70de..9876a87 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -92,6 +92,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) fi->attr_version = 0; fi->writectr = 0; fi->orig_ino = 0; + fi->state = 0; INIT_LIST_HEAD(&fi->write_files); INIT_LIST_HEAD(&fi->queued_writes); INIT_LIST_HEAD(&fi->writepages); -- cgit v0.10.2 From 23c153e54197171f30b889d9654929d74b6599d5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 31 Jan 2013 17:08:11 +0100 Subject: fuse: bump version for READDIRPLUS Yeah, we have a capability flag for this as well, so this is not strictly necessary, but it doesn't hurt either. Signed-off-by: Miklos Szeredi diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 5dc1fea..3451b60 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -60,6 +60,9 @@ * * 7.20 * - add FUSE_AUTO_INVAL_DATA + * + * 7.21 + * - add FUSE_READDIRPLUS */ #ifndef _LINUX_FUSE_H @@ -91,7 +94,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 20 +#define FUSE_KERNEL_MINOR_VERSION 21 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 -- cgit v0.10.2 From 6a4e922c3db06f7da27e072729e047185c2fba66 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 4 Feb 2013 13:04:44 +0000 Subject: fuse: avoid out-of-scope stack access The all pointers within fuse_req must point to valid memory once fuse_force_forget() returns. This bug appeared in "fuse: implement NFS-like readdirplus support" and was never in any official Linux release. I tested the fuse_force_forget() code path by injecting to fake -ENOMEM and verified the FORGET operation was called properly in userspace. Signed-off-by: Eric Wong Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index cbae09e..e9bdec0 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -440,9 +440,8 @@ __acquires(fc->lock) } } -void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) +static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) { - req->isreply = 1; spin_lock(&fc->lock); if (!fc->connected) req->out.h.error = -ENOTCONN; @@ -459,6 +458,12 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) } spin_unlock(&fc->lock); } + +void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) +{ + req->isreply = 1; + __fuse_request_send(fc, req); +} EXPORT_SYMBOL_GPL(fuse_request_send); static void fuse_request_send_nowait_locked(struct fuse_conn *fc, @@ -541,7 +546,9 @@ void fuse_force_forget(struct file *file, u64 nodeid) req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; req->isreply = 0; - fuse_request_send_nowait(fc, req); + __fuse_request_send(fc, req); + /* ignore errors */ + fuse_put_request(fc, req); } /* -- cgit v0.10.2 From dfca7cebc2679f3d129f8e680a8f199a7ad16e38 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 4 Feb 2013 15:57:42 +0100 Subject: fuse: don't WARN when nlink is zero drop_nlink() warns if nlink is already zero. This is triggerable by a buggy userspace filesystem. The cure, I think, is worse than the disease so disable the warning. Reported-by: Tero Roponen Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index dc5e648..2b112d9 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -705,7 +705,14 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) spin_lock(&fc->lock); fi->attr_version = ++fc->attr_version; - drop_nlink(inode); + /* + * If i_nlink == 0 then unlink doesn't make sense, yet this can + * happen if userspace filesystem is careless. It would be + * difficult to enforce correct nlink usage so just ignore this + * condition here + */ + if (inode->i_nlink > 0) + drop_nlink(inode); spin_unlock(&fc->lock); fuse_invalidate_attr(inode); fuse_invalidate_attr(dir); -- cgit v0.10.2 From 0415d291022543d83ee799e9ffee08d856bca6e8 Mon Sep 17 00:00:00 2001 From: Enke Chen Date: Mon, 4 Feb 2013 16:14:32 +0100 Subject: fuse: send poll events commit 626cf23660 "poll: add poll_requested_events()..." enabled us to send the requested events to the filesystem. Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index a010585..c807176 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2167,6 +2167,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait) return DEFAULT_POLLMASK; poll_wait(file, &ff->poll_wait, wait); + inarg.events = (__u32)poll_requested_events(wait); /* * Ask for notification iff there's someone waiting for it. diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 3451b60..68619e9 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -63,6 +63,7 @@ * * 7.21 * - add FUSE_READDIRPLUS + * - send the requested events in POLL request */ #ifndef _LINUX_FUSE_H @@ -585,7 +586,7 @@ struct fuse_poll_in { __u64 fh; __u64 kh; __u32 flags; - __u32 padding; + __u32 events; }; struct fuse_poll_out { -- cgit v0.10.2 From 7e98d53086d18c877cb44e9065219335184024de Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Feb 2013 11:58:12 +0100 Subject: Synchronize fuse header with one used in library The library one has provisions for use in *BSD, add them to the kernel one too. They don't hurt and ease maintenance. Signed-off-by: Miklos Szeredi diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 68619e9..baee03e 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -1,9 +1,35 @@ /* - FUSE: Filesystem in Userspace + This file defines the kernel interface of FUSE Copyright (C) 2001-2008 Miklos Szeredi This program can be distributed under the terms of the GNU GPL. See the file COPYING. + + This -- and only this -- header file may also be distributed under + the terms of the BSD Licence as follows: + + Copyright (C) 2001-2007 Miklos Szeredi. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. */ /* @@ -69,7 +95,16 @@ #ifndef _LINUX_FUSE_H #define _LINUX_FUSE_H +#ifdef __linux__ #include +#else +#include +#define __u64 uint64_t +#define __s64 int64_t +#define __u32 uint32_t +#define __s32 int32_t +#define __u16 uint16_t +#endif /* * Version negotiation: -- cgit v0.10.2 From 634734b63ac39e137a1c623ba74f3e062b6577db Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 6 Feb 2013 22:29:01 +0000 Subject: fuse: allow control of adaptive readdirplus use For some filesystems (e.g. GlusterFS), the cost of performing a normal readdir and readdirplus are identical. Since adaptively using readdirplus has no benefit for those systems, give users/filesystems the option to control adaptive readdirplus use. v2 of this patch incorporates Miklos's suggestion to simplify the code, as well as improving consistency of macro names and documentation. Signed-off-by: Eric Wong Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 2b112d9..8506522 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -21,6 +21,8 @@ static bool fuse_use_readdirplus(struct inode *dir, struct file *filp) if (!fc->do_readdirplus) return false; + if (!fc->readdirplus_auto) + return true; if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state)) return true; if (filp->f_pos == 0) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index fc55dd3..6aeba86 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -514,9 +514,12 @@ struct fuse_conn { /** Use enhanced/automatic page cache invalidation. */ unsigned auto_inval_data:1; - /** Does the filesystem support readdir-plus? */ + /** Does the filesystem support readdirplus? */ unsigned do_readdirplus:1; + /** Does the filesystem want adaptive readdirplus? */ + unsigned readdirplus_auto:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9876a87..01353ed 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -866,6 +866,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->auto_inval_data = 1; if (arg->flags & FUSE_DO_READDIRPLUS) fc->do_readdirplus = 1; + if (arg->flags & FUSE_READDIRPLUS_AUTO) + fc->readdirplus_auto = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -893,7 +895,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | - FUSE_DO_READDIRPLUS; + FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index baee03e..4c43b44 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -218,6 +218,8 @@ struct fuse_file_lock { * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages + * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one) + * FUSE_READDIRPLUS_AUTO: adaptive readdirplus */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -233,6 +235,7 @@ struct fuse_file_lock { #define FUSE_HAS_IOCTL_DIR (1 << 11) #define FUSE_AUTO_INVAL_DATA (1 << 12) #define FUSE_DO_READDIRPLUS (1 << 13) +#define FUSE_READDIRPLUS_AUTO (1 << 14) /** * CUSE INIT request/reply flags -- cgit v0.10.2