From 691a7c6f28ac90cccd0dbcf81348ea90b211bdd0 Mon Sep 17 00:00:00 2001 From: hujianyang Date: Wed, 30 Apr 2014 14:06:06 +0800 Subject: UBIFS: fix an mmap and fsync race condition There is a race condition in UBIFS: Thread A (mmap) Thread B (fsync) ->__do_fault ->write_cache_pages -> ubifs_vm_page_mkwrite -> budget_space -> lock_page -> release/convert_page_budget -> SetPagePrivate -> TestSetPageDirty -> unlock_page -> lock_page -> TestClearPageDirty -> ubifs_writepage -> do_writepage -> release_budget -> ClearPagePrivate -> unlock_page -> !(ret & VM_FAULT_LOCKED) -> lock_page -> set_page_dirty -> ubifs_set_page_dirty -> TestSetPageDirty (set page dirty without budgeting) -> unlock_page This leads to situation where we have a diry page but no budget allocated for this page, so further write-back may fail with -ENOSPC. In this fix we return from page_mkwrite without performing unlock_page. We return VM_FAULT_LOCKED instead. After doing this, the race above will not happen. Signed-off-by: hujianyang Tested-by: Laurence Withers Cc: stable@vger.kernel.org Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 4f34dba..f7d48a0 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1525,8 +1525,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, } wait_for_stable_page(page); - unlock_page(page); - return 0; + return VM_FAULT_LOCKED; out_unlock: unlock_page(page); -- cgit v0.10.2 From 778c7eb82f29e0f09748290b380c1ed646ce9620 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Wed, 16 Apr 2014 17:47:33 -0300 Subject: UBI: weaken the 'exclusive' constraint when opening volumes to rename The UBI volume rename ioctl (UBI_IOCRNVOL) open the volumes in exclusive mode. The volumes are opened for two reasons: to build a volume rename list, and a volume remove list. However, the first open constraint is excessive and can be replaced by a 'read-write' open mode. The second open constraint is properly set as 'exclusive' given the volume is opened for removal and we don't want any users around. By weakening the former 'exclusive' mode, we allow 'read-only' users to keep the volume open, while a rename is taking place. This is useful to perform an atomic rename, in a firmware upgrade scenario, while keeping the volume in read-only use (for instance, if a ubiblock is mounted as rootfs). It's worth mention this is not the case of UBIFS, which keeps the volume opened as 'read-write' despite mounted as read-write or read-only mode. This change was suggested at least twice by Artem: http://lists.infradead.org/pipermail/linux-mtd/2012-September/044175.html http://permalink.gmane.org/gmane.linux.drivers.mtd/39866 Signed-off-by: Ezequiel Garcia Signed-off-by: Artem Bityutskiy diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index f54562a..7646220 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -731,7 +731,7 @@ static int rename_volumes(struct ubi_device *ubi, goto out_free; } - re->desc = ubi_open_volume(ubi->ubi_num, vol_id, UBI_EXCLUSIVE); + re->desc = ubi_open_volume(ubi->ubi_num, vol_id, UBI_READWRITE); if (IS_ERR(re->desc)) { err = PTR_ERR(re->desc); ubi_err("cannot open volume %d, error %d", vol_id, err); -- cgit v0.10.2 From 0da846f42ffa0b6597484d1d9ba92755bfc03018 Mon Sep 17 00:00:00 2001 From: hujianyang Date: Tue, 29 Apr 2014 09:44:44 +0800 Subject: UBIFS: Remove unused variables in ubifs_budget_space I found two variables in ubifs_budget_space declared but not use. This state remains since the first commit 1e5176. So just remove them. Signed-off-by: hujianyang Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index e8e01d7..eb997e9 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -437,7 +437,6 @@ static int calc_dd_growth(const struct ubifs_info *c, */ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) { - int uninitialized_var(cmt_retries), uninitialized_var(wb_retries); int err, idx_growth, data_growth, dd_growth, retried = 0; ubifs_assert(req->new_page <= 1); -- cgit v0.10.2 From 604b592e6fd3c98f21435e1181ba7723ffc24715 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 21 Mar 2014 15:54:03 -0400 Subject: UBI: fix rb_tree node comparison in add_map The comparisons used in add_vol() shouldn't be identical. Pretty sure the following is correct but it is completely untested. Signed-off-by: Mike Snitzer Acked-by: Richard Weinberger Signed-off-by: Artem Bityutskiy diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index c5dad65..b04e7d0 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -125,9 +125,9 @@ static struct ubi_ainf_volume *add_vol(struct ubi_attach_info *ai, int vol_id, parent = *p; av = rb_entry(parent, struct ubi_ainf_volume, rb); - if (vol_id > av->vol_id) + if (vol_id < av->vol_id) p = &(*p)->rb_left; - else if (vol_id > av->vol_id) + else p = &(*p)->rb_right; } -- cgit v0.10.2 From dac3698147655aba4d71a8e67d6dd46d7a86154f Mon Sep 17 00:00:00 2001 From: hujianyang Date: Wed, 21 May 2014 17:19:45 +0800 Subject: UBIFS: Fix dump messages in ubifs_dump_lprops Function ubifs_read_one_lp will not set @lp and returns an error when ubifs_read_one_lp failed. We should not perform ubifs_dump_lprop in this case because @lp is not initialized as we wanted. Signed-off-by: hujianyang Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 5157b86..177b015 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -745,8 +745,10 @@ void ubifs_dump_lprops(struct ubifs_info *c) for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { err = ubifs_read_one_lp(c, lnum, &lp); - if (err) + if (err) { ubifs_err("cannot read lprops for LEB %d", lnum); + continue; + } ubifs_dump_lprop(c, &lp); } -- cgit v0.10.2 From 151d6b21f973c585efa052b0ff0fab473ef47831 Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Tue, 20 May 2014 11:13:48 +0200 Subject: UBI: block: Fix error path on alloc_workqueue failure Otherwise we'd return a random value if allocation of the workqueue fails. Signed-off-by: Helmut Schaa Acked-by: Brian Norris Signed-off-by: Artem Bityutskiy diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index 8d659e6..389e5f0 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -432,8 +432,10 @@ int ubiblock_create(struct ubi_volume_info *vi) * Rembember workqueues are cheap, they're not threads. */ dev->wq = alloc_workqueue("%s", 0, 0, gd->disk_name); - if (!dev->wq) + if (!dev->wq) { + ret = -ENOMEM; goto out_free_queue; + } INIT_WORK(&dev->work, ubiblock_do_work); mutex_lock(&devices_mutex); -- cgit v0.10.2 From a0fd59511e0a0514d24044a29da0f6144f8600e5 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 13 May 2014 22:27:58 +0200 Subject: UBIFS: add missing ui pointer in debugging code If UBIFS_DEBUG is defined an additional assertion of the ui_lock spinlock in do_writepage cannot compile because the ui pointer has not been previously declared. Fix this by declaring and initializing the ui pointer in case UBIFS_DEBUG is defined. Signed-off-by: Daniel Golle Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index f7d48a0..727506b 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -903,6 +903,7 @@ static int do_writepage(struct page *page, int len) struct ubifs_info *c = inode->i_sb->s_fs_info; #ifdef UBIFS_DEBUG + struct ubifs_inode *ui = ubifs_inode(inode); spin_lock(&ui->ui_lock); ubifs_assert(page->index <= ui->synced_i_size << PAGE_CACHE_SIZE); spin_unlock(&ui->ui_lock); -- cgit v0.10.2 From ba6a7d55634b9ddf119216faef55f2463b17d60b Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 27 May 2014 15:24:39 +0300 Subject: UBIFS: fix debugging check The debugging check which verifies that we never write outside of the file length was incorrect, since it was multiplying file length by the page size, instead of dividing. Fix this. Spotted-by: hujianyang Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 727506b..0ab7f7d 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -905,7 +905,7 @@ static int do_writepage(struct page *page, int len) #ifdef UBIFS_DEBUG struct ubifs_inode *ui = ubifs_inode(inode); spin_lock(&ui->ui_lock); - ubifs_assert(page->index <= ui->synced_i_size << PAGE_CACHE_SIZE); + ubifs_assert(page->index <= ui->synced_i_size >> PAGE_CACHE_SHIFT); spin_unlock(&ui->ui_lock); #endif -- cgit v0.10.2 From 72abc8f4b4e8574318189886de627a2bfe6cd0da Mon Sep 17 00:00:00 2001 From: hujianyang Date: Sat, 31 May 2014 11:39:32 +0800 Subject: UBIFS: Remove incorrect assertion in shrink_tnc() I hit the same assert failed as Dolev Raviv reported in Kernel v3.10 shows like this: [ 9641.164028] UBIFS assert failed in shrink_tnc at 131 (pid 13297) [ 9641.234078] CPU: 1 PID: 13297 Comm: mmap.test Tainted: G O 3.10.40 #1 [ 9641.234116] [] (unwind_backtrace+0x0/0x12c) from [] (show_stack+0x20/0x24) [ 9641.234137] [] (show_stack+0x20/0x24) from [] (dump_stack+0x20/0x28) [ 9641.234188] [] (dump_stack+0x20/0x28) from [] (shrink_tnc_trees+0x25c/0x350 [ubifs]) [ 9641.234265] [] (shrink_tnc_trees+0x25c/0x350 [ubifs]) from [] (ubifs_shrinker+0x25c/0x310 [ubifs]) [ 9641.234307] [] (ubifs_shrinker+0x25c/0x310 [ubifs]) from [] (shrink_slab+0x1d4/0x2f8) [ 9641.234327] [] (shrink_slab+0x1d4/0x2f8) from [] (do_try_to_free_pages+0x300/0x544) [ 9641.234344] [] (do_try_to_free_pages+0x300/0x544) from [] (try_to_free_pages+0x2d0/0x398) [ 9641.234363] [] (try_to_free_pages+0x2d0/0x398) from [] (__alloc_pages_nodemask+0x494/0x7e8) [ 9641.234382] [] (__alloc_pages_nodemask+0x494/0x7e8) from [] (new_slab+0x78/0x238) [ 9641.234400] [] (new_slab+0x78/0x238) from [] (__slab_alloc.constprop.42+0x1a4/0x50c) [ 9641.234419] [] (__slab_alloc.constprop.42+0x1a4/0x50c) from [] (kmem_cache_alloc_trace+0x54/0x188) [ 9641.234459] [] (kmem_cache_alloc_trace+0x54/0x188) from [] (do_readpage+0x168/0x468 [ubifs]) [ 9641.234553] [] (do_readpage+0x168/0x468 [ubifs]) from [] (ubifs_readpage+0x424/0x464 [ubifs]) [ 9641.234606] [] (ubifs_readpage+0x424/0x464 [ubifs]) from [] (filemap_fault+0x304/0x418) [ 9641.234638] [] (filemap_fault+0x304/0x418) from [] (__do_fault+0xd4/0x530) [ 9641.234665] [] (__do_fault+0xd4/0x530) from [] (handle_pte_fault+0x480/0xf54) [ 9641.234690] [] (handle_pte_fault+0x480/0xf54) from [] (handle_mm_fault+0x140/0x184) [ 9641.234716] [] (handle_mm_fault+0x140/0x184) from [] (do_page_fault+0x150/0x3ac) [ 9641.234737] [] (do_page_fault+0x150/0x3ac) from [] (do_DataAbort+0x3c/0xa0) [ 9641.234759] [] (do_DataAbort+0x3c/0xa0) from [] (__dabt_usr+0x38/0x40) After analyzing the code, I found a condition that may cause this failed in correct operations. Thus, I think this assertion is wrong and should be removed. Suppose there are two clean znodes and one dirty znode in TNC. So the per-filesystem atomic_t @clean_zn_cnt is (2). If commit start, dirty_znode is set to COW_ZNODE in get_znodes_to_commit() in case of potentially ops on this znode. We clear COW bit and DIRTY bit in write_index() without @tnc_mutex locked. We don't increase @clean_zn_cnt in this place. As the comments in write_index() shows, if another process hold @tnc_mutex and dirty this znode after we clean it, @clean_zn_cnt would be decreased to (1). We will increase @clean_zn_cnt to (2) with @tnc_mutex locked in free_obsolete_znodes() to keep it right. If shrink_tnc() performs between decrease and increase, it will release other 2 clean znodes it holds and found @clean_zn_cnt is less than zero (1 - 2 = -1), then hit the assertion. Because free_obsolete_znodes() will soon correct @clean_zn_cnt and no harm to fs in this case, I think this assertion could be removed. 2 clean zondes and 1 dirty znode, @clean_zn_cnt == 2 Thread A (commit) Thread B (write or others) Thread C (shrinker) ->write_index ->clear_bit(DIRTY_NODE) ->clear_bit(COW_ZNODE) @clean_zn_cnt == 2 ->mutex_locked(&tnc_mutex) ->dirty_cow_znode ->!ubifs_zn_cow(znode) ->!test_and_set_bit(DIRTY_NODE) ->atomic_dec(&clean_zn_cnt) ->mutex_unlocked(&tnc_mutex) @clean_zn_cnt == 1 ->mutex_locked(&tnc_mutex) ->shrink_tnc ->destroy_tnc_subtree ->atomic_sub(&clean_zn_cnt, 2) ->ubifs_assert <- hit ->mutex_unlocked(&tnc_mutex) @clean_zn_cnt == -1 ->mutex_lock(&tnc_mutex) ->free_obsolete_znodes ->atomic_inc(&clean_zn_cnt) ->mutux_unlock(&tnc_mutex) @clean_zn_cnt == 0 (correct after shrink) Signed-off-by: hujianyang Cc: stable@vger.kernel.org Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index f35135e..9a9fb94 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -128,7 +128,6 @@ static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) freed = ubifs_destroy_tnc_subtree(znode); atomic_long_sub(freed, &ubifs_clean_zn_cnt); atomic_long_sub(freed, &c->clean_zn_cnt); - ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0); total_freed += freed; znode = zprev; } -- cgit v0.10.2 From 90bea5a3f0bf680b87b90516f3c231997f4b8f3b Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 2 Jun 2014 15:51:10 +0200 Subject: UBIFS: respect MS_SILENT mount flag When attempting to mount a non-ubifs formatted volume, lots of error messages (including a stack dump) are thrown to the kernel log even if the MS_SILENT mount flag is set. Fix this by introducing adding an additional state-variable in struct ubifs_info and suppress error messages in ubifs_read_node if MS_SILENT is set. Signed-off-by: Daniel Golle Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index e18b988..2290d58 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -988,30 +988,32 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, return err; if (type != ch->node_type) { - ubifs_err("bad node type (%d but expected %d)", - ch->node_type, type); + ubifs_errc(c, "bad node type (%d but expected %d)", + ch->node_type, type); goto out; } err = ubifs_check_node(c, buf, lnum, offs, 0, 0); if (err) { - ubifs_err("expected node type %d", type); + ubifs_errc(c, "expected node type %d", type); return err; } l = le32_to_cpu(ch->len); if (l != len) { - ubifs_err("bad node length %d, expected %d", l, len); + ubifs_errc(c, "bad node length %d, expected %d", l, len); goto out; } return 0; out: - ubifs_err("bad node at LEB %d:%d, LEB mapping status %d", lnum, offs, - ubi_is_mapped(c->ubi, lnum)); - ubifs_dump_node(c, buf); - dump_stack(); + ubifs_errc(c, "bad node at LEB %d:%d, LEB mapping status %d", lnum, + offs, ubi_is_mapped(c->ubi, lnum)); + if (!c->probing) { + ubifs_dump_node(c, buf); + dump_stack(); + } return -EINVAL; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index a81c7b5..3904c85 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1149,6 +1149,9 @@ static int mount_ubifs(struct ubifs_info *c) size_t sz; c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY); + /* Suppress error messages while probing if MS_SILENT is set */ + c->probing = !!(c->vfs_sb->s_flags & MS_SILENT); + err = init_constants_early(c); if (err) return err; @@ -1214,6 +1217,8 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_free; + c->probing = 0; + /* * Make sure the compressor which is set as default in the superblock * or overridden by mount options is actually compiled in. diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index e8c8cfe..c1f71fe 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -51,6 +51,15 @@ #define ubifs_warn(fmt, ...) \ pr_warn("UBIFS warning (pid %d): %s: " fmt "\n", \ current->pid, __func__, ##__VA_ARGS__) +/* + * A variant of 'ubifs_err()' which takes the UBIFS file-sytem description + * object as an argument. + */ +#define ubifs_errc(c, fmt, ...) \ + do { \ + if (!(c)->probing) \ + ubifs_err(fmt, ##__VA_ARGS__); \ + } while (0) /* UBIFS file system VFS magic number */ #define UBIFS_SUPER_MAGIC 0x24051905 @@ -1209,6 +1218,7 @@ struct ubifs_debug_info; * @need_recovery: %1 if the file-system needs recovery * @replaying: %1 during journal replay * @mounting: %1 while mounting + * @probing: %1 while attempting to mount if MS_SILENT mount flag is set * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode * @replay_list: temporary list used during journal replay * @replay_buds: list of buds to replay @@ -1441,6 +1451,7 @@ struct ubifs_info { unsigned int replaying:1; unsigned int mounting:1; unsigned int remounting_rw:1; + unsigned int probing:1; struct list_head replay_list; struct list_head replay_buds; unsigned long long cs_sqnum; -- cgit v0.10.2 From 380347e9ca76828ee9bac63cfc338ca99cdee4f3 Mon Sep 17 00:00:00 2001 From: hujianyang Date: Tue, 3 Jun 2014 14:49:11 +0800 Subject: UBIFS: Add an assertion for clean_zn_cnt This patch adds a new ubifs_assert() in ubifs_tnc_close() to check if there are any leaks of per-filesystem @clean_zn_cnt. This new assert inspects whether the return value of ubifs_destroy_tnc_subtree() is equal to @clean_zn_cnt or not while umount. Artem: a minor amendment Signed-off-by: hujianyang Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 9083bc7..8a40cf9 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -2859,10 +2859,11 @@ void ubifs_tnc_close(struct ubifs_info *c) { tnc_destroy_cnext(c); if (c->zroot.znode) { - long n; + long n, freed; - ubifs_destroy_tnc_subtree(c->zroot.znode); n = atomic_long_read(&c->clean_zn_cnt); + freed = ubifs_destroy_tnc_subtree(c->zroot.znode); + ubifs_assert(freed == n); atomic_long_sub(n, &ubifs_clean_zn_cnt); } kfree(c->gap_lebs); -- cgit v0.10.2