From adcadfa8f373f301e9f622fbf45957adf2d98622 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sat, 29 Jun 2013 21:42:20 +0400 Subject: fuse: Getting file for writeback helper There will be a .writepageS callback implementation which will need to get a fuse_file out of a fuse_inode, thus make a helper for this. Signed-off-by: Maxim Patlasov Signed-off-by: Pavel Emelyanov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4598345..aab6d05 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1494,6 +1494,20 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) fuse_writepage_free(fc, req); } +static struct fuse_file *fuse_write_file(struct fuse_conn *fc, + struct fuse_inode *fi) +{ + struct fuse_file *ff; + + spin_lock(&fc->lock); + BUG_ON(list_empty(&fi->write_files)); + ff = list_entry(fi->write_files.next, struct fuse_file, write_entry); + fuse_file_get(ff); + spin_unlock(&fc->lock); + + return ff; +} + static int fuse_writepage_locked(struct page *page) { struct address_space *mapping = page->mapping; @@ -1501,7 +1515,6 @@ static int fuse_writepage_locked(struct page *page) struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_req *req; - struct fuse_file *ff; struct page *tmp_page; set_page_writeback(page); @@ -1515,13 +1528,8 @@ static int fuse_writepage_locked(struct page *page) if (!tmp_page) goto err_free; - spin_lock(&fc->lock); - BUG_ON(list_empty(&fi->write_files)); - ff = list_entry(fi->write_files.next, struct fuse_file, write_entry); - req->ff = fuse_file_get(ff); - spin_unlock(&fc->lock); - - fuse_write_fill(req, ff, page_offset(page), 0); + req->ff = fuse_write_file(fc, fi); + fuse_write_fill(req, req->ff, page_offset(page), 0); copy_highpage(tmp_page, page); req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; -- cgit v0.10.2 From 385b126815d9278cf347504cf17ba62fb57e00ca Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sat, 29 Jun 2013 21:42:48 +0400 Subject: fuse: Prepare to handle multiple pages in writeback The .writepages callback will issue writeback requests with more than one page aboard. Make existing end/check code be aware of this. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index aab6d05..944acc0 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -334,7 +334,8 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) BUG_ON(req->inode != inode); curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; - if (curr_index == index) { + if (curr_index <= index && + index < curr_index + req->num_pages) { found = true; break; } @@ -1409,7 +1410,10 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) { - __free_page(req->pages[0]); + int i; + + for (i = 0; i < req->num_pages; i++) + __free_page(req->pages[i]); fuse_file_put(req->ff, false); } @@ -1418,11 +1422,14 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) struct inode *inode = req->inode; struct fuse_inode *fi = get_fuse_inode(inode); struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; + int i; list_del(&req->writepages_entry); - dec_bdi_stat(bdi, BDI_WRITEBACK); - dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP); - bdi_writeout_inc(bdi); + for (i = 0; i < req->num_pages; i++) { + dec_bdi_stat(bdi, BDI_WRITEBACK); + dec_zone_page_state(req->pages[i], NR_WRITEBACK_TEMP); + bdi_writeout_inc(bdi); + } wake_up(&fi->page_waitq); } @@ -1434,14 +1441,15 @@ __acquires(fc->lock) struct fuse_inode *fi = get_fuse_inode(req->inode); loff_t size = i_size_read(req->inode); struct fuse_write_in *inarg = &req->misc.write.in; + __u64 data_size = req->num_pages * PAGE_CACHE_SIZE; if (!fc->connected) goto out_free; - if (inarg->offset + PAGE_CACHE_SIZE <= size) { - inarg->size = PAGE_CACHE_SIZE; + if (inarg->offset + data_size <= size) { + inarg->size = data_size; } else if (inarg->offset < size) { - inarg->size = size & (PAGE_CACHE_SIZE - 1); + inarg->size = size - inarg->offset; } else { /* Got truncated off completely */ goto out_free; -- cgit v0.10.2 From cca2437045dda994d23bd65891b71e091fa35b5a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 1 Oct 2013 16:44:51 +0200 Subject: fuse: lock page in mkwrite Lock the page in fuse_page_mkwrite() to protect against a race with fuse_writepage() where the page is redirtied before the actual writeback begins. Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 944acc0..6fb9425 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1618,14 +1618,17 @@ static void fuse_vma_close(struct vm_area_struct *vma) static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; - /* - * Don't use page->mapping as it may become NULL from a - * concurrent truncate. - */ - struct inode *inode = vma->vm_file->f_mapping->host; + struct inode *inode = file_inode(vma->vm_file); + + file_update_time(vma->vm_file); + lock_page(page); + if (page->mapping != inode->i_mapping) { + unlock_page(page); + return VM_FAULT_NOPAGE; + } fuse_wait_on_page_writeback(inode, page->index); - return 0; + return VM_FAULT_LOCKED; } static const struct vm_operations_struct fuse_file_vm_ops = { -- cgit v0.10.2 From 72523425fb434e81c0c9f611bd880ce339c4e06b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 1 Oct 2013 16:44:52 +0200 Subject: fuse: don't BUG on no write file Don't bug if there's no writable files found for page writeback. If ever this is triggered, a WARN_ON helps debugging it much better then a BUG_ON. Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 6fb9425..6ce0066 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1505,12 +1505,14 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) static struct fuse_file *fuse_write_file(struct fuse_conn *fc, struct fuse_inode *fi) { - struct fuse_file *ff; + struct fuse_file *ff = NULL; spin_lock(&fc->lock); - BUG_ON(list_empty(&fi->write_files)); - ff = list_entry(fi->write_files.next, struct fuse_file, write_entry); - fuse_file_get(ff); + if (!WARN_ON(list_empty(&fi->write_files))) { + ff = list_entry(fi->write_files.next, struct fuse_file, + write_entry); + fuse_file_get(ff); + } spin_unlock(&fc->lock); return ff; @@ -1524,6 +1526,7 @@ static int fuse_writepage_locked(struct page *page) struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_req *req; struct page *tmp_page; + int error = -ENOMEM; set_page_writeback(page); @@ -1536,7 +1539,11 @@ static int fuse_writepage_locked(struct page *page) if (!tmp_page) goto err_free; + error = -EIO; req->ff = fuse_write_file(fc, fi); + if (!req->ff) + goto err_free; + fuse_write_fill(req, req->ff, page_offset(page), 0); copy_highpage(tmp_page, page); @@ -1566,7 +1573,7 @@ err_free: fuse_request_free(req); err: end_page_writeback(page); - return -ENOMEM; + return error; } static int fuse_writepage(struct page *page, struct writeback_control *wbc) -- cgit v0.10.2 From 26d614df1da9d7d255686af5d6d4508f77853c01 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sat, 29 Jun 2013 21:45:29 +0400 Subject: fuse: Implement writepages callback The .writepages one is required to make each writeback request carry more than one page on it. The patch enables optimized behaviour unconditionally, i.e. mmap-ed writes will benefit from the patch even if fc->writeback_cache=0. [SzM: simplify, add comments] Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 6ce0066..0bd349d 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1502,8 +1502,8 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) fuse_writepage_free(fc, req); } -static struct fuse_file *fuse_write_file(struct fuse_conn *fc, - struct fuse_inode *fi) +static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc, + struct fuse_inode *fi) { struct fuse_file *ff = NULL; @@ -1540,7 +1540,7 @@ static int fuse_writepage_locked(struct page *page) goto err_free; error = -EIO; - req->ff = fuse_write_file(fc, fi); + req->ff = fuse_write_file_get(fc, fi); if (!req->ff) goto err_free; @@ -1586,6 +1586,149 @@ static int fuse_writepage(struct page *page, struct writeback_control *wbc) return err; } +struct fuse_fill_wb_data { + struct fuse_req *req; + struct fuse_file *ff; + struct inode *inode; +}; + +static void fuse_writepages_send(struct fuse_fill_wb_data *data) +{ + struct fuse_req *req = data->req; + struct inode *inode = data->inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + + req->ff = fuse_file_get(data->ff); + spin_lock(&fc->lock); + list_add_tail(&req->list, &fi->queued_writes); + fuse_flush_writepages(inode); + spin_unlock(&fc->lock); +} + +static int fuse_writepages_fill(struct page *page, + struct writeback_control *wbc, void *_data) +{ + struct fuse_fill_wb_data *data = _data; + struct fuse_req *req = data->req; + struct inode *inode = data->inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct page *tmp_page; + int err; + + if (!data->ff) { + err = -EIO; + data->ff = fuse_write_file_get(fc, get_fuse_inode(inode)); + if (!data->ff) + goto out_unlock; + } + + if (req) { + BUG_ON(!req->num_pages); + if (req->num_pages == FUSE_MAX_PAGES_PER_REQ || + (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write || + req->pages[req->num_pages - 1]->index + 1 != page->index) { + + fuse_writepages_send(data); + data->req = NULL; + } + } + err = -ENOMEM; + tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (!tmp_page) + goto out_unlock; + + /* + * The page must not be redirtied until the writeout is completed + * (i.e. userspace has sent a reply to the write request). Otherwise + * there could be more than one temporary page instance for each real + * page. + * + * This is ensured by holding the page lock in page_mkwrite() while + * checking fuse_page_is_writeback(). We already hold the page lock + * since clear_page_dirty_for_io() and keep it held until we add the + * request to the fi->writepages list and increment req->num_pages. + * After this fuse_page_is_writeback() will indicate that the page is + * under writeback, so we can release the page lock. + */ + if (data->req == NULL) { + struct fuse_inode *fi = get_fuse_inode(inode); + + err = -ENOMEM; + req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ); + if (!req) { + __free_page(tmp_page); + goto out_unlock; + } + + fuse_write_fill(req, data->ff, page_offset(page), 0); + req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; + req->in.argpages = 1; + req->background = 1; + req->num_pages = 0; + req->end = fuse_writepage_end; + req->inode = inode; + + spin_lock(&fc->lock); + list_add(&req->writepages_entry, &fi->writepages); + spin_unlock(&fc->lock); + + data->req = req; + } + set_page_writeback(page); + + copy_highpage(tmp_page, page); + req->pages[req->num_pages] = tmp_page; + req->page_descs[req->num_pages].offset = 0; + req->page_descs[req->num_pages].length = PAGE_SIZE; + + inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK); + inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); + end_page_writeback(page); + + /* + * Protected by fc->lock against concurrent access by + * fuse_page_is_writeback(). + */ + spin_lock(&fc->lock); + req->num_pages++; + spin_unlock(&fc->lock); + + err = 0; +out_unlock: + unlock_page(page); + + return err; +} + +static int fuse_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct inode *inode = mapping->host; + struct fuse_fill_wb_data data; + int err; + + err = -EIO; + if (is_bad_inode(inode)) + goto out; + + data.inode = inode; + data.req = NULL; + data.ff = NULL; + + err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data); + if (data.req) { + /* Ignore errors if we can write at least one page */ + BUG_ON(!data.req->num_pages); + fuse_writepages_send(&data); + err = 0; + } + if (data.ff) + fuse_file_put(data.ff, false); +out: + return err; +} + static int fuse_launder_page(struct page *page) { int err = 0; @@ -2607,6 +2750,7 @@ static const struct file_operations fuse_direct_io_file_operations = { static const struct address_space_operations fuse_file_aops = { .readpage = fuse_readpage, .writepage = fuse_writepage, + .writepages = fuse_writepages, .launder_page = fuse_launder_page, .readpages = fuse_readpages, .set_page_dirty = __set_page_dirty_nobuffers, -- cgit v0.10.2 From 2d033eaa0073d276ee6c324dd0ade0c5074a5542 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 16 Aug 2013 15:51:41 +0400 Subject: fuse: fix race in fuse_writepages() The patch fixes a race between ftruncate(2), mmap-ed write and write(2): 1) An user makes a page dirty via mmap-ed write. 2) The user performs shrinking truncate(2) intended to purge the page. 3) Before fuse_do_setattr calls truncate_pagecache, the page goes to writeback. fuse_writepages_fill attaches a new page to FUSE_WRITE request, then releases the original page by end_page_writeback and unlock it. 4) fuse_do_setattr completes and successfully returns. Since now, i_mutex is free. 5) Ordinary write(2) extends i_size back to cover the page. Note that fuse_send_write_pages do wait for fuse writeback, but for another page->index. 6) fuse_writepages_fill attaches more pages to the request (if any), then fuse_writepages_send is eventually called. It is supposed to crop inarg->size of the request, but it doesn't because i_size has already been extended back. Moving end_page_writeback behind fuse_writepages_send guarantees that __fuse_release_nowrite (called from fuse_do_setattr) will crop inarg->size of the request before write(2) gets the chance to extend i_size. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 0bd349d..cc3a6c4 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1590,6 +1590,7 @@ struct fuse_fill_wb_data { struct fuse_req *req; struct fuse_file *ff; struct inode *inode; + struct page **orig_pages; }; static void fuse_writepages_send(struct fuse_fill_wb_data *data) @@ -1598,12 +1599,17 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data) struct inode *inode = data->inode; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); + int num_pages = req->num_pages; + int i; req->ff = fuse_file_get(data->ff); spin_lock(&fc->lock); list_add_tail(&req->list, &fi->queued_writes); fuse_flush_writepages(inode); spin_unlock(&fc->lock); + + for (i = 0; i < num_pages; i++) + end_page_writeback(data->orig_pages[i]); } static int fuse_writepages_fill(struct page *page, @@ -1684,7 +1690,7 @@ static int fuse_writepages_fill(struct page *page, inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK); inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); - end_page_writeback(page); + data->orig_pages[req->num_pages] = page; /* * Protected by fc->lock against concurrent access by @@ -1716,6 +1722,13 @@ static int fuse_writepages(struct address_space *mapping, data.req = NULL; data.ff = NULL; + err = -ENOMEM; + data.orig_pages = kzalloc(sizeof(struct page *) * + FUSE_MAX_PAGES_PER_REQ, + GFP_NOFS); + if (!data.orig_pages) + goto out; + err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data); if (data.req) { /* Ignore errors if we can write at least one page */ @@ -1725,6 +1738,8 @@ static int fuse_writepages(struct address_space *mapping, } if (data.ff) fuse_file_put(data.ff, false); + + kfree(data.orig_pages); out: return err; } -- cgit v0.10.2 From 1e112a484e58cd13e6c90a3dfd9e54ea5859b795 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 1 Oct 2013 16:44:53 +0200 Subject: fuse: writepages: fix aggregation Checking against tmp-page indexes is not very useful, and results in one (or rarely two) page requests. Which is not much of an improvement... Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index cc3a6c4..bf765cf 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1633,7 +1633,7 @@ static int fuse_writepages_fill(struct page *page, BUG_ON(!req->num_pages); if (req->num_pages == FUSE_MAX_PAGES_PER_REQ || (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write || - req->pages[req->num_pages - 1]->index + 1 != page->index) { + data->orig_pages[req->num_pages - 1]->index + 1 != page->index) { fuse_writepages_send(data); data->req = NULL; -- cgit v0.10.2 From 8b284dc47291daf72fe300e1138a2e7ed56f38ab Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 1 Oct 2013 16:44:53 +0200 Subject: fuse: writepages: handle same page rewrites As Maxim Patlasov pointed out, it's possible to get a dirty page while it's copy is still under writeback, despite fuse_page_mkwrite() doing its thing (direct IO). This could result in two concurrent write request for the same offset, with data corruption if they get mixed up. To prevent this, fuse needs to check and delay such writes. This implementation does this by: 1. check if page is still under writeout, if so create a new, single page secondary request for it 2. chain this secondary request onto the in-flight request 2/a. if a seconday request for the same offset was already chained to the in-flight request, then just copy the contents of the page and discard the new secondary request. This makes sure that for each page will have at most two requests associated with it 3. when the in-flight request finished, send off all secondary requests chained onto it Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index bf765cf..f8ff019 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1414,7 +1414,9 @@ static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) for (i = 0; i < req->num_pages; i++) __free_page(req->pages[i]); - fuse_file_put(req->ff, false); + + if (req->ff) + fuse_file_put(req->ff, false); } static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) @@ -1496,6 +1498,14 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) mapping_set_error(inode->i_mapping, req->out.h.error); spin_lock(&fc->lock); + while (req->misc.write.next) { + struct fuse_req *next = req->misc.write.next; + req->misc.write.next = next->misc.write.next; + next->misc.write.next = NULL; + list_add(&next->writepages_entry, &fi->writepages); + list_add_tail(&next->list, &fi->queued_writes); + fuse_flush_writepages(inode); + } fi->writectr--; fuse_writepage_finish(fc, req); spin_unlock(&fc->lock); @@ -1548,6 +1558,7 @@ static int fuse_writepage_locked(struct page *page) copy_highpage(tmp_page, page); req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; + req->misc.write.next = NULL; req->in.argpages = 1; req->num_pages = 1; req->pages[0] = tmp_page; @@ -1612,6 +1623,62 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data) end_page_writeback(data->orig_pages[i]); } +static bool fuse_writepage_in_flight(struct fuse_req *new_req, + struct page *page) +{ + struct fuse_conn *fc = get_fuse_conn(new_req->inode); + struct fuse_inode *fi = get_fuse_inode(new_req->inode); + struct fuse_req *tmp; + struct fuse_req *old_req; + bool found = false; + pgoff_t curr_index; + + BUG_ON(new_req->num_pages != 0); + + spin_lock(&fc->lock); + list_del(&new_req->writepages_entry); + new_req->num_pages = 1; + list_for_each_entry(old_req, &fi->writepages, writepages_entry) { + BUG_ON(old_req->inode != new_req->inode); + curr_index = old_req->misc.write.in.offset >> PAGE_CACHE_SHIFT; + if (curr_index <= page->index && + page->index < curr_index + old_req->num_pages) { + found = true; + break; + } + } + if (!found) + goto out_unlock; + + for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) { + BUG_ON(tmp->inode != new_req->inode); + curr_index = tmp->misc.write.in.offset >> PAGE_CACHE_SHIFT; + if (tmp->num_pages == 1 && + curr_index == page->index) { + old_req = tmp; + } + } + + if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT || + old_req->state == FUSE_REQ_PENDING)) { + copy_highpage(old_req->pages[0], page); + spin_unlock(&fc->lock); + + dec_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK); + dec_zone_page_state(page, NR_WRITEBACK_TEMP); + fuse_writepage_free(fc, new_req); + fuse_request_free(new_req); + goto out; + } else { + new_req->misc.write.next = old_req->misc.write.next; + old_req->misc.write.next = new_req; + } +out_unlock: + spin_unlock(&fc->lock); +out: + return found; +} + static int fuse_writepages_fill(struct page *page, struct writeback_control *wbc, void *_data) { @@ -1620,6 +1687,7 @@ static int fuse_writepages_fill(struct page *page, struct inode *inode = data->inode; struct fuse_conn *fc = get_fuse_conn(inode); struct page *tmp_page; + bool is_writeback; int err; if (!data->ff) { @@ -1629,15 +1697,20 @@ static int fuse_writepages_fill(struct page *page, goto out_unlock; } - if (req) { - BUG_ON(!req->num_pages); - if (req->num_pages == FUSE_MAX_PAGES_PER_REQ || - (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write || - data->orig_pages[req->num_pages - 1]->index + 1 != page->index) { + /* + * Being under writeback is unlikely but possible. For example direct + * read to an mmaped fuse file will set the page dirty twice; once when + * the pages are faulted with get_user_pages(), and then after the read + * completed. + */ + is_writeback = fuse_page_is_writeback(inode, page->index); - fuse_writepages_send(data); - data->req = NULL; - } + if (req && req->num_pages && + (is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ || + (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write || + data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) { + fuse_writepages_send(data); + data->req = NULL; } err = -ENOMEM; tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); @@ -1669,6 +1742,7 @@ static int fuse_writepages_fill(struct page *page, fuse_write_fill(req, data->ff, page_offset(page), 0); req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; + req->misc.write.next = NULL; req->in.argpages = 1; req->background = 1; req->num_pages = 0; @@ -1690,6 +1764,13 @@ static int fuse_writepages_fill(struct page *page, inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK); inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); + + err = 0; + if (is_writeback && fuse_writepage_in_flight(req, page)) { + end_page_writeback(page); + data->req = NULL; + goto out_unlock; + } data->orig_pages[req->num_pages] = page; /* @@ -1700,7 +1781,6 @@ static int fuse_writepages_fill(struct page *page, req->num_pages++; spin_unlock(&fc->lock); - err = 0; out_unlock: unlock_page(page); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 5b9e6f3..6432748 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -321,6 +321,7 @@ struct fuse_req { struct { struct fuse_write_in in; struct fuse_write_out out; + struct fuse_req *next; } write; struct fuse_notify_retrieve_in retrieve_in; struct fuse_lk_in lk_in; -- cgit v0.10.2 From ff17be0864777fe0dfb0a477868a8cb95c1ff90e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 1 Oct 2013 16:44:53 +0200 Subject: fuse: writepage: skip already in flight If ->writepage() tries to write back a page whose copy is still in flight, then just skip by calling redirty_page_for_writepage(). This is OK, since now ->writepage() should never be called for data integrity sync. Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f8ff019..135360e 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1591,6 +1591,18 @@ static int fuse_writepage(struct page *page, struct writeback_control *wbc) { int err; + if (fuse_page_is_writeback(page->mapping->host, page->index)) { + /* + * ->writepages() should be called for sync() and friends. We + * should only get here on direct reclaim and then we are + * allowed to skip a page which is already in flight + */ + WARN_ON(wbc->sync_mode == WB_SYNC_ALL); + + redirty_page_for_writepage(wbc, page); + return 0; + } + err = fuse_writepage_locked(page); unlock_page(page); -- cgit v0.10.2 From cb2ffb26e67ef89c44f46e971440cda2f83ae236 Mon Sep 17 00:00:00 2001 From: Tom Gundersen Date: Mon, 9 Sep 2013 20:18:27 +0200 Subject: cuse: add fix minor number to /dev/cuse This allows udev (or more recently systemd-tmpfiles) to create /dev/cuse on boot, in the same way as /dev/fuse is currently created, and the corresponding module to be loaded on first access. The corresponding functionalty was introduced for fuse in commit 578454f. Signed-off-by: Tom Gundersen Cc: Kay Sievers Signed-off-by: Miklos Szeredi diff --git a/Documentation/devices.txt b/Documentation/devices.txt index 23721d3..80b7241 100644 --- a/Documentation/devices.txt +++ b/Documentation/devices.txt @@ -414,6 +414,7 @@ Your cooperation is appreciated. 200 = /dev/net/tun TAP/TUN network device 201 = /dev/button/gulpb Transmeta GULP-B buttons 202 = /dev/emd/ctl Enhanced Metadisk RAID (EMD) control + 203 = /dev/cuse Cuse (character device in user-space) 204 = /dev/video/em8300 EM8300 DVD decoder control 205 = /dev/video/em8300_mv EM8300 DVD decoder video 206 = /dev/video/em8300_ma EM8300 DVD decoder audio diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index adbfd66..24da581 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -589,11 +589,14 @@ static struct attribute *cuse_class_dev_attrs[] = { ATTRIBUTE_GROUPS(cuse_class_dev); static struct miscdevice cuse_miscdev = { - .minor = MISC_DYNAMIC_MINOR, + .minor = CUSE_MINOR, .name = "cuse", .fops = &cuse_channel_fops, }; +MODULE_ALIAS_MISCDEV(CUSE_MINOR); +MODULE_ALIAS("devname:cuse"); + static int __init cuse_init(void) { int i, rc; diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 09c2300..c8cf093 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -31,6 +31,7 @@ #define I2O_MINOR 166 #define MICROCODE_MINOR 184 #define TUN_MINOR 200 +#define CUSE_MINOR 203 #define MWAVE_MINOR 219 /* ACP/Mwave Modem */ #define MPT_MINOR 220 #define MPT2SAS_MINOR 221 -- cgit v0.10.2 From f6011081f5e290756bd90fe96f1e86d3eac76f77 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Wed, 2 Oct 2013 15:01:07 +0400 Subject: fuse: writepages: roll back changes if request not found fuse_writepage_in_flight() returns false if it fails to find request with given index in fi->writepages. Then the caller proceeds with populating data->orig_pages[] and incrementing req->num_pages. Hence, fuse_writepage_in_flight() must revert changes it made in request before returning false. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 135360e..077b038 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1649,7 +1649,6 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, spin_lock(&fc->lock); list_del(&new_req->writepages_entry); - new_req->num_pages = 1; list_for_each_entry(old_req, &fi->writepages, writepages_entry) { BUG_ON(old_req->inode != new_req->inode); curr_index = old_req->misc.write.in.offset >> PAGE_CACHE_SHIFT; @@ -1659,9 +1658,12 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, break; } } - if (!found) + if (!found) { + list_add(&new_req->writepages_entry, &fi->writepages); goto out_unlock; + } + new_req->num_pages = 1; for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) { BUG_ON(tmp->inode != new_req->inode); curr_index = tmp->misc.write.in.offset >> PAGE_CACHE_SHIFT; -- cgit v0.10.2 From 6eaf4782eb09e28dbd13d23b9ce0fb7646daf37e Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Wed, 2 Oct 2013 21:38:32 +0400 Subject: fuse: writepages: crop secondary requests If writeback happens while fuse is in FUSE_NOWRITE condition, the request will be queued but not processed immediately (see fuse_flush_writepages()). Until FUSE_NOWRITE becomes relaxed, more writebacks can happen. They will be queued as "secondary" requests to that first ("primary") request. Existing implementation crops only primary request. This is not correct because a subsequent extending write(2) may increase i_size and then secondary requests won't be cropped properly. The result would be stale data written to the server to a file offset where zeros must be. Similar problem may happen if secondary requests are attached to an in-flight request that was already cropped. The patch solves the issue by cropping all secondary requests in fuse_writepage_end(). Thanks to Miklos for idea. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 077b038..1cb303e 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1436,12 +1436,12 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) } /* Called under fc->lock, may release and reacquire it */ -static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) +static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req, + loff_t size) __releases(fc->lock) __acquires(fc->lock) { struct fuse_inode *fi = get_fuse_inode(req->inode); - loff_t size = i_size_read(req->inode); struct fuse_write_in *inarg = &req->misc.write.in; __u64 data_size = req->num_pages * PAGE_CACHE_SIZE; @@ -1482,12 +1482,13 @@ __acquires(fc->lock) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); + size_t crop = i_size_read(inode); struct fuse_req *req; while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) { req = list_entry(fi->queued_writes.next, struct fuse_req, list); list_del_init(&req->list); - fuse_send_writepage(fc, req); + fuse_send_writepage(fc, req, crop); } } @@ -1499,12 +1500,37 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) mapping_set_error(inode->i_mapping, req->out.h.error); spin_lock(&fc->lock); while (req->misc.write.next) { + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_write_in *inarg = &req->misc.write.in; struct fuse_req *next = req->misc.write.next; req->misc.write.next = next->misc.write.next; next->misc.write.next = NULL; list_add(&next->writepages_entry, &fi->writepages); - list_add_tail(&next->list, &fi->queued_writes); - fuse_flush_writepages(inode); + + /* + * Skip fuse_flush_writepages() to make it easy to crop requests + * based on primary request size. + * + * 1st case (trivial): there are no concurrent activities using + * fuse_set/release_nowrite. Then we're on safe side because + * fuse_flush_writepages() would call fuse_send_writepage() + * anyway. + * + * 2nd case: someone called fuse_set_nowrite and it is waiting + * now for completion of all in-flight requests. This happens + * rarely and no more than once per page, so this should be + * okay. + * + * 3rd case: someone (e.g. fuse_do_setattr()) is in the middle + * of fuse_set_nowrite..fuse_release_nowrite section. The fact + * that fuse_set_nowrite returned implies that all in-flight + * requests were completed along with all of their secondary + * requests. Further primary requests are blocked by negative + * writectr. Hence there cannot be any in-flight requests and + * no invocations of fuse_writepage_end() while we're in + * fuse_set_nowrite..fuse_release_nowrite section. + */ + fuse_send_writepage(fc, next, inarg->offset + inarg->size); } fi->writectr--; fuse_writepage_finish(fc, req); -- cgit v0.10.2 From 41b6e41fc609753a9386d24295f7ed03b28c4601 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Wed, 2 Oct 2013 21:38:43 +0400 Subject: fuse: writepages: update bdi writeout when deleting secondary request BDI_WRITTEN counter is used to estimate bdi bandwidth. It must be incremented every time as bdi ends page writeback. No matter whether it was fulfilled by actual write or by discarding the request (e.g. due to shrunk i_size). Note that even before writepages patches, the case "Got truncated off completely" was handled in fuse_send_writepage() by calling fuse_writepage_finish() which updated BDI_WRITTEN unconditionally. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 1cb303e..e6fdd59 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1701,11 +1701,14 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT || old_req->state == FUSE_REQ_PENDING)) { + struct backing_dev_info *bdi = page->mapping->backing_dev_info; + copy_highpage(old_req->pages[0], page); spin_unlock(&fc->lock); - dec_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK); + dec_bdi_stat(bdi, BDI_WRITEBACK); dec_zone_page_state(page, NR_WRITEBACK_TEMP); + bdi_writeout_inc(bdi); fuse_writepage_free(fc, new_req); fuse_request_free(new_req); goto out; -- cgit v0.10.2 From ce128de6260f86a990ed44a697f26d0859684f28 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Wed, 2 Oct 2013 21:38:54 +0400 Subject: fuse: writepages: protect secondary requests from fuse file release All async fuse requests must be supplied with extra reference to a fuse file. This is necessary to ensure that the fuse file is not released until all in-flight requests are completed. Fuse secondary writeback requests must obey this rule as well. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index e6fdd59..7e70506 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1505,6 +1505,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) struct fuse_req *next = req->misc.write.next; req->misc.write.next = next->misc.write.next; next->misc.write.next = NULL; + next->ff = fuse_file_get(req->ff); list_add(&next->writepages_entry, &fi->writepages); /* -- cgit v0.10.2