From 21e52e15666323078b8517a4312712579176b56f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 30 Apr 2012 14:16:19 -0700 Subject: rcu: Make RCU_FAST_NO_HZ handle timer migration The current RCU_FAST_NO_HZ assumes that timers do not migrate unless a CPU goes offline, in which case it assumes that the CPU will have to come out of dyntick-idle mode (cancelling the timer) in order to go offline. This is important because when RCU_FAST_NO_HZ permits a CPU to enter dyntick-idle mode despite having RCU callbacks pending, it posts a timer on that CPU to force a wakeup on that CPU. This wakeup ensures that the CPU will eventually handle the end of the grace period, including invoking its RCU callbacks. However, Pascal Chapperon's test setup shows that the timer handler rcu_idle_gp_timer_func() really does get invoked in some cases. This is problematic because this can cause the CPU that entered dyntick-idle mode despite still having RCU callbacks pending to remain in dyntick-idle mode indefinitely, which means that its RCU callbacks might never be invoked. This situation can result in grace-period delays or even system hangs, which matches Pascal's observations of slow boot-up and shutdown (https://lkml.org/lkml/2012/4/5/142). See also the bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=806548 This commit therefore causes the "should never be invoked" timer handler rcu_idle_gp_timer_func() to use smp_call_function_single() to wake up the CPU for which the timer was intended, allowing that CPU to invoke its RCU callbacks in a timely manner. Reported-by: Pascal Chapperon Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index aaa55e1..1480900 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -292,6 +292,7 @@ TRACE_EVENT(rcu_dyntick, * "More callbacks": Still more callbacks, try again to clear them out. * "Callbacks drained": All callbacks processed, off to dyntick idle! * "Timer": Timer fired to cause CPU to continue processing callbacks. + * "Demigrate": Timer fired on wrong CPU, woke up correct CPU. * "Cleanup after idle": Idle exited, timer canceled. */ TRACE_EVENT(rcu_prep_idle, diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index d01e26d..bbb43ca 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2057,16 +2057,34 @@ static bool rcu_cpu_has_nonlazy_callbacks(int cpu) } /* + * Handler for smp_call_function_single(). The only point of this + * handler is to wake the CPU up, so the handler does only tracing. + */ +void rcu_idle_demigrate(void *unused) +{ + trace_rcu_prep_idle("Demigrate"); +} + +/* * Timer handler used to force CPU to start pushing its remaining RCU * callbacks in the case where it entered dyntick-idle mode with callbacks * pending. The hander doesn't really need to do anything because the * real work is done upon re-entry to idle, or by the next scheduling-clock * interrupt should idle not be re-entered. + * + * One special case: the timer gets migrated without awakening the CPU + * on which the timer was scheduled on. In this case, we must wake up + * that CPU. We do so with smp_call_function_single(). */ -static void rcu_idle_gp_timer_func(unsigned long unused) +static void rcu_idle_gp_timer_func(unsigned long cpu_in) { - WARN_ON_ONCE(1); /* Getting here can hang the system... */ + int cpu = (int)cpu_in; + trace_rcu_prep_idle("Timer"); + if (cpu != smp_processor_id()) + smp_call_function_single(cpu, rcu_idle_demigrate, NULL, 0); + else + WARN_ON_ONCE(1); /* Getting here can hang the system... */ } /* @@ -2075,7 +2093,7 @@ static void rcu_idle_gp_timer_func(unsigned long unused) static void rcu_prepare_for_idle_init(int cpu) { setup_timer(&per_cpu(rcu_idle_gp_timer, cpu), - rcu_idle_gp_timer_func, 0); + rcu_idle_gp_timer_func, cpu); } /* -- cgit v0.10.2