From 2673b4cf5d59c3ee5e0c12f6d734d38770324dc4 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Sun, 29 Jan 2012 12:17:33 -0600 Subject: backing-dev: fix wakeup timer races with bdi_unregister() While 7a401a972df8e18 ("backing-dev: ensure wakeup_timer is deleted") addressed the problem of the bdi being freed with a queued wakeup timer, there are other races that could happen if the wakeup timer expires after/during bdi_unregister(), before bdi_destroy() is called. wakeup_timer_fn() could attempt to wakeup a task which has already has been freed, or could access a NULL bdi->dev via the wake_forker_thread tracepoint. Cc: Cc: Jens Axboe Reported-by: Chanho Min Reviewed-by: Namjae Jeon Signed-off-by: Rabin Vincent Signed-off-by: Wu Fengguang diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 7ba8fea..dd8e2aa 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -318,7 +318,7 @@ static void wakeup_timer_fn(unsigned long data) if (bdi->wb.task) { trace_writeback_wake_thread(bdi); wake_up_process(bdi->wb.task); - } else { + } else if (bdi->dev) { /* * When bdi tasks are inactive for long time, they are killed. * In this case we have to wake-up the forker thread which @@ -584,6 +584,8 @@ EXPORT_SYMBOL(bdi_register_dev); */ static void bdi_wb_shutdown(struct backing_dev_info *bdi) { + struct task_struct *task; + if (!bdi_cap_writeback_dirty(bdi)) return; @@ -602,8 +604,13 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) * Finally, kill the kernel thread. We don't need to be RCU * safe anymore, since the bdi is gone from visibility. */ - if (bdi->wb.task) - kthread_stop(bdi->wb.task); + spin_lock_bh(&bdi->wb_lock); + task = bdi->wb.task; + bdi->wb.task = NULL; + spin_unlock_bh(&bdi->wb_lock); + + if (task) + kthread_stop(task); } /* @@ -623,7 +630,9 @@ static void bdi_prune_sb(struct backing_dev_info *bdi) void bdi_unregister(struct backing_dev_info *bdi) { - if (bdi->dev) { + struct device *dev = bdi->dev; + + if (dev) { bdi_set_min_ratio(bdi, 0); trace_writeback_bdi_unregister(bdi); bdi_prune_sb(bdi); @@ -632,8 +641,12 @@ void bdi_unregister(struct backing_dev_info *bdi) if (!bdi_cap_flush_forker(bdi)) bdi_wb_shutdown(bdi); bdi_debug_unregister(bdi); - device_unregister(bdi->dev); + + spin_lock_bh(&bdi->wb_lock); bdi->dev = NULL; + spin_unlock_bh(&bdi->wb_lock); + + device_unregister(dev); } } EXPORT_SYMBOL(bdi_unregister); -- cgit v0.10.2 From 15eb77a07c714ac80201abd0a9568888bcee6276 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Tue, 17 Jan 2012 11:18:56 -0600 Subject: writeback: fix NULL bdi->dev in trace writeback_single_inode bdi_prune_sb() resets sb->s_bdi to default_backing_dev_info when the tearing down the original bdi. Fix trace_writeback_single_inode to use sb->s_bdi=default_backing_dev_info rather than bdi->dev=NULL for a teared down bdi. Cc: Reported-by: Rabin Vincent Tested-by: Rabin Vincent Signed-off-by: Wu Fengguang diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index f855916..5b4a936 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -53,14 +53,6 @@ struct wb_writeback_work { }; /* - * Include the creation of the trace points after defining the - * wb_writeback_work structure so that the definition remains local to this - * file. - */ -#define CREATE_TRACE_POINTS -#include - -/* * We don't actually have pdflush, but this one is exported though /proc... */ int nr_pdflush_threads; @@ -92,6 +84,14 @@ static inline struct inode *wb_inode(struct list_head *head) return list_entry(head, struct inode, i_wb_list); } +/* + * Include the creation of the trace points after defining the + * wb_writeback_work structure and inline functions so that the definition + * remains local to this file. + */ +#define CREATE_TRACE_POINTS +#include + /* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */ static void bdi_wakeup_flusher(struct backing_dev_info *bdi) { diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 8588a89..06d302e 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -426,7 +426,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, TP_fast_assign( strncpy(__entry->name, - dev_name(inode->i_mapping->backing_dev_info->dev), 32); + dev_name(inode_to_bdi(inode)->dev), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; -- cgit v0.10.2 From 3310225dfc71a35a2cc9340c15c0e08b14b3c754 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Mon, 9 Jan 2012 11:53:50 -0600 Subject: lib: proportion: lower PROP_MAX_SHIFT to 32 on 64-bit kernel PROP_MAX_SHIFT should be set to <=32 on 64-bit box. This fixes two bugs in the below lines of bdi_dirty_limit(): bdi_dirty *= numerator; do_div(bdi_dirty, denominator); 1) divide error: do_div() only uses the lower 32 bit of the denominator, which may trimmed to be 0 when PROP_MAX_SHIFT > 32. 2) overflow: (bdi_dirty * numerator) could easily overflow if numerator used up to 48 bits, leaving only 16 bits to bdi_dirty Cc: Cc: Peter Zijlstra Reported-by: Ilya Tumaykin Tested-by: Ilya Tumaykin Signed-off-by: Wu Fengguang diff --git a/include/linux/proportions.h b/include/linux/proportions.h index ef35bb7..26a8a4e 100644 --- a/include/linux/proportions.h +++ b/include/linux/proportions.h @@ -81,7 +81,11 @@ void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) * Limit the time part in order to ensure there are some bits left for the * cycle counter and fraction multiply. */ +#if BITS_PER_LONG == 32 #define PROP_MAX_SHIFT (3*BITS_PER_LONG/4) +#else +#define PROP_MAX_SHIFT (BITS_PER_LONG/2) +#endif #define PROP_FRAC_SHIFT (BITS_PER_LONG - PROP_MAX_SHIFT - 1) #define PROP_FRAC_BASE (1UL << PROP_FRAC_SHIFT) -- cgit v0.10.2 From 977b7e3a52a7421ad33a393a38ece59f3d41c2fa Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Sat, 4 Feb 2012 20:54:03 -0600 Subject: writeback: fix dereferencing NULL bdi->dev on trace_writeback_queue When a SD card is hot removed without umount, del_gendisk() will call bdi_unregister() without destroying/freeing it. This leaves the bdi in the bdi->dev = NULL, bdi->wb.task = NULL, bdi->bdi_list removed state. When sync(2) gets the bdi before bdi_unregister() and calls bdi_queue_work() after the unregister, trace_writeback_queue will be dereferencing the NULL bdi->dev. Fix it with a simple test for NULL. LKML-reference: http://lkml.org/lkml/2012/1/18/346 Cc: stable@kernel.org Reported-by: Rabin Vincent Tested-by: Namjae Jeon Signed-off-by: Wu Fengguang diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 06d302e..5973410 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -47,7 +47,10 @@ DECLARE_EVENT_CLASS(writeback_work_class, __field(int, reason) ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + struct device *dev = bdi->dev; + if (!dev) + dev = default_backing_dev_info.dev; + strncpy(__entry->name, dev_name(dev), 32); __entry->nr_pages = work->nr_pages; __entry->sb_dev = work->sb ? work->sb->s_dev : 0; __entry->sync_mode = work->sync_mode; -- cgit v0.10.2