From 499d05ecf990a7a7bbf9e0a273f9969f8ec69efc Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Nov 2011 19:34:48 +0800 Subject: mm: Make task in balance_dirty_pages() killable There is no reason why task in balance_dirty_pages() shouldn't be killable and it helps in recovering from some error conditions (like when filesystem goes in error state and cannot accept writeback anymore but we still want to kill processes using it to be able to unmount it). There will be follow up patches to further abort the generic_perform_write() and other filesystem write loops, to avoid large write + SIGKILL combination exceeding the dirty limit and possibly strange OOM. Reported-by: Kazuya Mio Tested-by: Kazuya Mio Reviewed-by: Neil Brown Reviewed-by: KOSAKI Motohiro Signed-off-by: Jan Kara Signed-off-by: Wu Fengguang diff --git a/mm/page-writeback.c b/mm/page-writeback.c index a3278f0..79c3441 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1133,7 +1133,7 @@ pause: pages_dirtied, pause, start_time); - __set_current_state(TASK_UNINTERRUPTIBLE); + __set_current_state(TASK_KILLABLE); io_schedule_timeout(pause); dirty_thresh = hard_dirty_limit(dirty_thresh); @@ -1145,6 +1145,9 @@ pause: */ if (nr_dirty < dirty_thresh) break; + + if (fatal_signal_pending(current)) + break; } if (!dirty_exceeded && bdi->dirty_exceeded) -- cgit v0.10.2 From 1df647197c5b8aacaeb58592cba9a1df322c9000 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Sun, 13 Nov 2011 19:47:32 -0600 Subject: writeback: hard throttle 1000+ dd on a slow USB stick The sleep based balance_dirty_pages() can pause at most MAX_PAUSE=200ms on every 1 4KB-page, which means it cannot throttle a task under 4KB/200ms=20KB/s. So when there are more than 512 dd writing to a 10MB/s USB stick, its bdi dirty pages could grow out of control. Even if we can increase MAX_PAUSE, the minimal (task_ratelimit = 1) means a limit of 4KB/s. They can eventually be safeguarded by the global limit check (nr_dirty < dirty_thresh). However if someone is also writing to an HDD at the same time, it'll get poor HDD write performance. We at least want to maintain good write performance for other devices when one device is attacked by some "massive parallel" workload, or suffers from slow write bandwidth, or somehow get stalled due to some error condition (eg. NFS server not responding). For a stalled device, we need to completely block its dirtiers, too, before its bdi dirty pages grow all the way up to the global limit and leave no space for the other functional devices. So change the loop exit condition to /* * Always enforce global dirty limit; also enforce bdi dirty limit * if the normal max_pause sleeps cannot keep things under control. */ if (nr_dirty < dirty_thresh && (bdi_dirty < bdi_thresh || bdi->dirty_ratelimit > 1)) break; which can be further simplified to if (task_ratelimit) break; Signed-off-by: Wu Fengguang diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 79c3441..e7cb5ff 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1136,14 +1136,11 @@ pause: __set_current_state(TASK_KILLABLE); io_schedule_timeout(pause); - dirty_thresh = hard_dirty_limit(dirty_thresh); /* - * max-pause area. If dirty exceeded but still within this - * area, no need to sleep for more than 200ms: (a) 8 pages per - * 200ms is typically more than enough to curb heavy dirtiers; - * (b) the pause time limit makes the dirtiers more responsive. + * This is typically equal to (nr_dirty < dirty_thresh) and can + * also keep "1000+ dd on a slow USB stick" under control. */ - if (nr_dirty < dirty_thresh) + if (task_ratelimit) break; if (fatal_signal_pending(current)) -- cgit v0.10.2 From 468e6a20afaccb67e2a7d7f60d301f90e1c6f301 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 7 Sep 2011 10:41:32 -0600 Subject: writeback: remove vm_dirties and task->dirties They are not used any more. Signed-off-by: Wu Fengguang diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 08ffab0..94b1e35 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -184,7 +184,6 @@ extern struct cred init_cred; [PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \ }, \ .thread_group = LIST_HEAD_INIT(tsk.thread_group), \ - .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ INIT_IDS \ INIT_PERF_EVENTS(tsk) \ INIT_TRACE_IRQFLAGS \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 68daf4f..1c4f3e9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1521,7 +1521,6 @@ struct task_struct { #ifdef CONFIG_FAULT_INJECTION int make_it_fail; #endif - struct prop_local_single dirties; /* * when (nr_dirtied >= nr_dirtied_pause), it's time to call * balance_dirty_pages() for some dirty throttling pause diff --git a/kernel/fork.c b/kernel/fork.c index ba0d172..da4a6a1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -162,7 +162,6 @@ static void account_kernel_stack(struct thread_info *ti, int account) void free_task(struct task_struct *tsk) { - prop_local_destroy_single(&tsk->dirties); account_kernel_stack(tsk->stack, -1); free_thread_info(tsk->stack); rt_mutex_debug_task_free(tsk); @@ -274,10 +273,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) tsk->stack = ti; - err = prop_local_init_single(&tsk->dirties); - if (err) - goto out; - setup_thread_stack(tsk, orig); clear_user_return_notifier(tsk); clear_tsk_need_resched(tsk); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e7cb5ff..7125248 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -128,7 +128,6 @@ unsigned long global_dirty_limit; * */ static struct prop_descriptor vm_completions; -static struct prop_descriptor vm_dirties; /* * couple the period to the dirty_ratio: @@ -154,7 +153,6 @@ static void update_completion_period(void) { int shift = calc_period_shift(); prop_change_shift(&vm_completions, shift); - prop_change_shift(&vm_dirties, shift); writeback_set_ratelimit(); } @@ -235,11 +233,6 @@ void bdi_writeout_inc(struct backing_dev_info *bdi) } EXPORT_SYMBOL_GPL(bdi_writeout_inc); -void task_dirty_inc(struct task_struct *tsk) -{ - prop_inc_single(&vm_dirties, &tsk->dirties); -} - /* * Obtain an accurate fraction of the BDI's portion. */ @@ -1395,7 +1388,6 @@ void __init page_writeback_init(void) shift = calc_period_shift(); prop_descriptor_init(&vm_completions, shift); - prop_descriptor_init(&vm_dirties, shift); } /** @@ -1724,7 +1716,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) __inc_zone_page_state(page, NR_DIRTIED); __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); __inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED); - task_dirty_inc(current); task_io_account_write(PAGE_CACHE_SIZE); } } -- cgit v0.10.2