From c13f3d378f77ce3176628ade452b0e461242faf3 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 15 Feb 2010 11:33:04 +0900 Subject: x86/gart: Unexport gart_iommu_aperture I wrongly exported gart_iommu_aperture in the commit 42590a75019a50012f25a962246498dead428433. It's not necessary so let's unexport it. Signed-off-by: FUJITA Tomonori Cc: Joerg Roedel LKML-Reference: <20100215113241P.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index f147a95..3704997 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -31,7 +31,6 @@ #include int gart_iommu_aperture; -EXPORT_SYMBOL_GPL(gart_iommu_aperture); int gart_iommu_aperture_disabled __initdata; int gart_iommu_aperture_allowed __initdata; -- cgit v0.10.2 From 622ea685f1fafdf84d612440535c84341f0860b8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 27 Feb 2010 14:53:07 -0800 Subject: rcu: Fix holdoff for accelerated GPs for last non-dynticked CPU Make the holdoff only happen when the full number of attempts have been made. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1267311188-16603-1-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 464ad2c..79b53bd 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1010,6 +1010,10 @@ int rcu_needs_cpu(int cpu) int c = 0; int thatcpu; + /* Check for being in the holdoff period. */ + if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) + return rcu_needs_cpu_quick_check(cpu); + /* Don't bother unless we are the last non-dyntick-idle CPU. */ for_each_cpu_not(thatcpu, nohz_cpu_mask) if (thatcpu != cpu) { @@ -1041,10 +1045,8 @@ int rcu_needs_cpu(int cpu) } /* If RCU callbacks are still pending, RCU still needs this CPU. */ - if (c) { + if (c) raise_softirq(RCU_SOFTIRQ); - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; - } return c; } -- cgit v0.10.2 From 1883c79a57a5fe25309007590cccb1b2782c41b2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 27 Feb 2010 14:53:08 -0800 Subject: rcu: Make task_subsys_state() RCU-lockdep checks handle boot-time use It is apparently legal to invoke task_subsys_state() without RCU protection during early boot time. After all, there are no concurrent tasks, so there can be no grace periods completing concurrently. But this does need an Acked-by from the cgroups folks. Located-by: Ingo Molnar Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1267311188-16603-2-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c9bbcb2..a73e1ce 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -489,6 +489,7 @@ static inline struct cgroup_subsys_state *task_subsys_state( { return rcu_dereference_check(task->cgroups->subsys[subsys_id], rcu_read_lock_held() || + !rcu_scheduler_active || cgroup_lock_is_held()); } -- cgit v0.10.2 From 90a6501f94aedd7fb40f5556334843194fb598be Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 28 Feb 2010 08:32:18 -0800 Subject: sched, rcu: Fix rcu_dereference() for RCU-lockdep Make rcu_dereference() of runqueue data structures be rcu_dereference_sched(). Located-by: Ingo Molnar Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <20100228163218.GD6846@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 3e1fd96..5a5ea2c 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -3476,7 +3476,7 @@ static void run_rebalance_domains(struct softirq_action *h) static inline int on_null_domain(int cpu) { - return !rcu_dereference(cpu_rq(cpu)->sd); + return !rcu_dereference_sched(cpu_rq(cpu)->sd); } /* -- cgit v0.10.2 From db1466b3e1bd1727375cdbfcbea4bcce2f860f61 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 Mar 2010 07:46:56 -0800 Subject: rcu: Use wrapper function instead of exporting tasklist_lock Lockdep-RCU commit d11c563d exported tasklist_lock, which is not a good thing. This patch instead exports a function that uses lockdep to check whether tasklist_lock is held. Suggested-by: Christoph Hellwig Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: Christoph Hellwig LKML-Reference: <1267631219-8713-1-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/cred.h b/include/linux/cred.h index 4db09f8..52507c3 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -280,7 +280,7 @@ static inline void put_cred(const struct cred *_cred) * task or by holding tasklist_lock to prevent it from being unlinked. */ #define __task_cred(task) \ - ((const struct cred *)(rcu_dereference_check((task)->real_cred, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock)))) + ((const struct cred *)(rcu_dereference_check((task)->real_cred, rcu_read_lock_held() || lockdep_tasklist_lock_is_held()))) /** * get_task_cred - Get another task's objective credentials diff --git a/include/linux/sched.h b/include/linux/sched.h index 0eef87b..a47af20 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -258,6 +258,10 @@ extern spinlock_t mmlist_lock; struct task_struct; +#ifdef CONFIG_PROVE_RCU +extern int lockdep_tasklist_lock_is_held(void); +#endif /* #ifdef CONFIG_PROVE_RCU */ + extern void sched_init(void); extern void sched_init_smp(void); extern asmlinkage void schedule_tail(struct task_struct *prev); diff --git a/kernel/exit.c b/kernel/exit.c index 45ed043..fed3a4d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -87,7 +87,7 @@ static void __exit_signal(struct task_struct *tsk) sighand = rcu_dereference_check(tsk->sighand, rcu_read_lock_held() || - lockdep_is_held(&tasklist_lock)); + lockdep_tasklist_lock_is_held()); spin_lock(&sighand->siglock); posix_cpu_timers_exit(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index 17bbf09..8691c54 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -86,7 +86,14 @@ int max_threads; /* tunable limit on nr_threads */ DEFINE_PER_CPU(unsigned long, process_counts) = 0; __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ -EXPORT_SYMBOL_GPL(tasklist_lock); + +#ifdef CONFIG_PROVE_RCU +int lockdep_tasklist_lock_is_held(void) +{ + return lockdep_is_held(&tasklist_lock); +} +EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held); +#endif /* #ifdef CONFIG_PROVE_RCU */ int nr_processes(void) { diff --git a/kernel/pid.c b/kernel/pid.c index b08e697..b606440 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -367,7 +367,9 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type) struct task_struct *result = NULL; if (pid) { struct hlist_node *first; - first = rcu_dereference_check(pid->tasks[type].first, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock)); + first = rcu_dereference_check(pid->tasks[type].first, + rcu_read_lock_held() || + lockdep_tasklist_lock_is_held()); if (first) result = hlist_entry(first, struct task_struct, pids[(type)].node); } -- cgit v0.10.2 From 5ed42b8113667c06a6ff2c72717395b5044d30a1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 Mar 2010 07:46:58 -0800 Subject: rcu, cgroup: Relax the check in task_subsys_state() as early boot is now handled by lockdep-RCU This patch removes the check for !rcu_scheduler_active because this check has been incorporated into rcu_dereference_check(). Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1267631219-8713-3-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a73e1ce..c9bbcb2 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -489,7 +489,6 @@ static inline struct cgroup_subsys_state *task_subsys_state( { return rcu_dereference_check(task->cgroups->subsys[subsys_id], rcu_read_lock_held() || - !rcu_scheduler_active || cgroup_lock_is_held()); } -- cgit v0.10.2 From cc5b83a9f884fe8722a275069a5a6fde39988455 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 Mar 2010 07:46:59 -0800 Subject: rcu: Add control variables to lockdep_rcu_dereference() diagnostics Add the values of rcu_scheduler_active() and debug_locks() to the lockdep_rcu_dereference() output to help diagnose RCU lockdep splats that occur shortly after the scheduler starts. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1267631219-8713-4-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 0c30d04..681bc2e 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3822,6 +3822,7 @@ void lockdep_rcu_dereference(const char *file, const int line) printk("%s:%d invoked rcu_dereference_check() without protection!\n", file, line); printk("\nother info that might help us debug this:\n\n"); + printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks); lockdep_print_held_locks(curr); printk("\nstack backtrace:\n"); dump_stack(); -- cgit v0.10.2 From e6033e3b307fcfae08408e0673266db38392bda4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 Mar 2010 17:50:16 -0800 Subject: rcu: Make rcu_read_lock_sched_held() handle !PREEMPT The rcu_read_lock_sched_held() needs to unconditionally return the value "1" in a !PREEMPT kernel, because under !PREEMPT, -all- kernel code is implicitly preempt-disabled. This patch makes this happen. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1267667418-32233-1-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index c843736..e22960ec 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -136,6 +136,7 @@ static inline int rcu_read_lock_bh_held(void) * can prove otherwise. Note that disabling of preemption (including * disabling irqs) counts as an RCU-sched read-side critical section. */ +#ifdef CONFIG_PREEMPT static inline int rcu_read_lock_sched_held(void) { int lockdep_opinion = 0; @@ -144,6 +145,12 @@ static inline int rcu_read_lock_sched_held(void) lockdep_opinion = lock_is_held(&rcu_sched_lock_map); return lockdep_opinion || preempt_count() != 0 || !rcu_scheduler_active; } +#else /* #ifdef CONFIG_PREEMPT */ +static inline int rcu_read_lock_sched_held(void) +{ + return 1; +} +#endif /* #else #ifdef CONFIG_PREEMPT */ #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ @@ -164,10 +171,17 @@ static inline int rcu_read_lock_bh_held(void) return 1; } +#ifdef CONFIG_PREEMPT static inline int rcu_read_lock_sched_held(void) { return preempt_count() != 0 || !rcu_scheduler_active; } +#else /* #ifdef CONFIG_PREEMPT */ +static inline int rcu_read_lock_sched_held(void) +{ + return 1; +} +#endif /* #else #ifdef CONFIG_PREEMPT */ #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -- cgit v0.10.2 From 99ee4ca746dda71326db7645463b4075ac1d665c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 Mar 2010 17:50:17 -0800 Subject: rcu: Suppress __mpol_dup() false positive from RCU lockdep Common code is used during task creation and after the task has started running. RCU protection is not needed during task creation because no other CPU has access to the under-construction task. Provide the RCU protection anyway to suppress the false positive, as there does not appear to be a good way for the common code to recognize that the task is only accessible to the CPU creating it. Signed-off-by: Paul E. McKenney Cc: Paul Menage Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1267667418-32233-2-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 290fb5b..3cec080 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1730,10 +1730,12 @@ struct mempolicy *__mpol_dup(struct mempolicy *old) if (!new) return ERR_PTR(-ENOMEM); + rcu_read_lock(); if (current_cpuset_is_being_rebound()) { nodemask_t mems = cpuset_mems_allowed(current); mpol_rebind_policy(old, &mems); } + rcu_read_unlock(); *new = *old; atomic_set(&new->refcnt, 1); return new; -- cgit v0.10.2 From 8d53dd546f36073e0d29b0cfc24c665db301e3e7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 Mar 2010 17:50:18 -0800 Subject: rcu, ftrace: Fix RCU lockdep splat in ftrace_perf_buf_prepare() Change the pair of rcu_dereference() calls in ftrace_perf_buf_prepare() to rcu_dereference_sched(). Signed-off-by: Paul E. McKenney Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: Frederic Weisbecker LKML-Reference: <1267667418-32233-3-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 0804cd5..601ad77 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -699,9 +699,9 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * __cpu = smp_processor_id(); * * if (in_nmi()) - * trace_buf = rcu_dereference(perf_trace_buf_nmi); + * trace_buf = rcu_dereference_sched(perf_trace_buf_nmi); * else - * trace_buf = rcu_dereference(perf_trace_buf); + * trace_buf = rcu_dereference_sched(perf_trace_buf); * * if (!trace_buf) * goto end; diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index f0d6930..c1cc3ab 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -138,9 +138,9 @@ __kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type, cpu = smp_processor_id(); if (in_nmi()) - trace_buf = rcu_dereference(perf_trace_buf_nmi); + trace_buf = rcu_dereference_sched(perf_trace_buf_nmi); else - trace_buf = rcu_dereference(perf_trace_buf); + trace_buf = rcu_dereference_sched(perf_trace_buf); if (!trace_buf) goto err; -- cgit v0.10.2 From 54dbf96c921513bf98484a20ef366d51944a4c4d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 Mar 2010 07:46:57 -0800 Subject: rcu: Suppress RCU lockdep warnings during early boot RCU is used during very early boot, before RCU and lockdep have been initialized. So make the underlying primitives (rcu_read_lock_held(), rcu_read_lock_bh_held(), rcu_read_lock_sched_held(), and rcu_dereference_check()) check for early boot via the rcu_scheduler_active flag. This will suppress false positives. Also introduce a debug_lockdep_rcu_enabled() static inline helper function, which tags the CONTINUE_PROVE_RCU case as likely(), as suggested by Ingo Molnar. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1267631219-8713-2-git-send-email-paulmck@linux.vnet.ibm.com> [ v2: removed incomplete debug_lockdep_rcu_update() bits ] Signed-off-by: Ingo Molnar diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index e22960ec..75921b8 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -97,6 +97,11 @@ extern struct lockdep_map rcu_sched_lock_map; # define rcu_read_release_sched() \ lock_release(&rcu_sched_lock_map, 1, _THIS_IP_) +static inline int debug_lockdep_rcu_enabled(void) +{ + return likely(rcu_scheduler_active && debug_locks); +} + /** * rcu_read_lock_held - might we be in RCU read-side critical section? * @@ -104,12 +109,14 @@ extern struct lockdep_map rcu_sched_lock_map; * an RCU read-side critical section. In absence of CONFIG_PROVE_LOCKING, * this assumes we are in an RCU read-side critical section unless it can * prove otherwise. + * + * Check rcu_scheduler_active to prevent false positives during boot. */ static inline int rcu_read_lock_held(void) { - if (debug_locks) - return lock_is_held(&rcu_lock_map); - return 1; + if (!debug_lockdep_rcu_enabled()) + return 1; + return lock_is_held(&rcu_lock_map); } /** @@ -119,12 +126,14 @@ static inline int rcu_read_lock_held(void) * an RCU-bh read-side critical section. In absence of CONFIG_PROVE_LOCKING, * this assumes we are in an RCU-bh read-side critical section unless it can * prove otherwise. + * + * Check rcu_scheduler_active to prevent false positives during boot. */ static inline int rcu_read_lock_bh_held(void) { - if (debug_locks) - return lock_is_held(&rcu_bh_lock_map); - return 1; + if (!debug_lockdep_rcu_enabled()) + return 1; + return lock_is_held(&rcu_bh_lock_map); } /** @@ -135,15 +144,19 @@ static inline int rcu_read_lock_bh_held(void) * this assumes we are in an RCU-sched read-side critical section unless it * can prove otherwise. Note that disabling of preemption (including * disabling irqs) counts as an RCU-sched read-side critical section. + * + * Check rcu_scheduler_active to prevent false positives during boot. */ #ifdef CONFIG_PREEMPT static inline int rcu_read_lock_sched_held(void) { int lockdep_opinion = 0; + if (!debug_lockdep_rcu_enabled()) + return 1; if (debug_locks) lockdep_opinion = lock_is_held(&rcu_sched_lock_map); - return lockdep_opinion || preempt_count() != 0 || !rcu_scheduler_active; + return lockdep_opinion || preempt_count() != 0; } #else /* #ifdef CONFIG_PREEMPT */ static inline int rcu_read_lock_sched_held(void) @@ -174,7 +187,7 @@ static inline int rcu_read_lock_bh_held(void) #ifdef CONFIG_PREEMPT static inline int rcu_read_lock_sched_held(void) { - return preempt_count() != 0 || !rcu_scheduler_active; + return !rcu_scheduler_active || preempt_count() != 0; } #else /* #ifdef CONFIG_PREEMPT */ static inline int rcu_read_lock_sched_held(void) @@ -198,7 +211,7 @@ static inline int rcu_read_lock_sched_held(void) */ #define rcu_dereference_check(p, c) \ ({ \ - if (debug_locks && !(c)) \ + if (debug_lockdep_rcu_enabled() && !(c)) \ lockdep_rcu_dereference(__FILE__, __LINE__); \ rcu_dereference_raw(p); \ }) -- cgit v0.10.2 From 3f379b03fbfddd20536389a85c6456f8233d1f8d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 5 Mar 2010 15:03:25 -0800 Subject: ftrace: Replace read_barrier_depends() with rcu_dereference_raw() Replace the calls to read_barrier_depends() in ftrace_list_func() with rcu_dereference_raw() to improve readability. The reason that we use rcu_dereference_raw() here is that removed entries are never freed, instead they are simply leaked. This is one of a very few cases where use of rcu_dereference_raw() is the long-term right answer. And I don't yet know of any others. ;-) Signed-off-by: Paul E. McKenney Acked-by: Steven Rostedt Cc: Frederic Weisbecker Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1267830207-9474-1-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8378357..8c5adc0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -88,18 +89,22 @@ ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; static int ftrace_set_func(unsigned long *array, int *idx, char *buffer); #endif +/* + * Traverse the ftrace_list, invoking all entries. The reason that we + * can use rcu_dereference_raw() is that elements removed from this list + * are simply leaked, so there is no need to interact with a grace-period + * mechanism. The rcu_dereference_raw() calls are needed to handle + * concurrent insertions into the ftrace_list. + * + * Silly Alpha and silly pointer-speculation compiler optimizations! + */ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) { - struct ftrace_ops *op = ftrace_list; - - /* in case someone actually ports this to alpha! */ - read_barrier_depends(); + struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/ while (op != &ftrace_list_end) { - /* silly alpha */ - read_barrier_depends(); op->func(ip, parent_ip); - op = op->next; + op = rcu_dereference_raw(op->next); /*see above*/ }; } @@ -154,8 +159,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops) * the ops->next pointer is valid before another CPU sees * the ops pointer included into the ftrace_list. */ - smp_wmb(); - ftrace_list = ops; + rcu_assign_pointer(ftrace_list, ops); if (ftrace_enabled) { ftrace_func_t func; -- cgit v0.10.2 From 007b09243b099811124f69d492adeebe9e439f96 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 5 Mar 2010 15:03:26 -0800 Subject: rcu: Increase RCU CPU stall timeouts if PROVE_RCU CONFIG_PROVE_RCU imposes additional overhead on the kernel, so increase the RCU CPU stall timeouts in an attempt to allow for this effect. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1267830207-9474-2-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 1439eb5..4a525a3 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -246,12 +246,21 @@ struct rcu_data { #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ #ifdef CONFIG_RCU_CPU_STALL_DETECTOR -#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rsp->jiffies_stall */ -#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rsp->jiffies_stall */ -#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ - /* to take at least one */ - /* scheduling clock irq */ - /* before ratting on them. */ + +#ifdef CONFIG_PROVE_RCU +#define RCU_STALL_DELAY_DELTA (5 * HZ) +#else +#define RCU_STALL_DELAY_DELTA 0 +#endif + +#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ + RCU_STALL_DELAY_DELTA) + /* for rsp->jiffies_stall */ +#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ + RCU_STALL_DELAY_DELTA) + /* for rsp->jiffies_stall */ +#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ + /* to take at least one */ + /* scheduling clock irq */ + /* before ratting on them. */ #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ -- cgit v0.10.2 From f56e8a0765cc4374e02f4e3a79e2427b5096b075 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 5 Mar 2010 15:03:27 -0800 Subject: x86/mce: Fix RCU lockdep splats Create an rcu_dereference_check_mce() that checks for RCU-sched read side and mce_read_mutex being held on update side. Replace uses of rcu_dereference() in arch/x86/kernel/cpu/mcheck/mce.c with this new macro. Signed-off-by: Paul E. McKenney Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1267830207-9474-3-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a8aacd4..4442e9e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -46,6 +46,11 @@ #include "mce-internal.h" +#define rcu_dereference_check_mce(p) \ + rcu_dereference_check((p), \ + rcu_read_lock_sched_held() || \ + lockdep_is_held(&mce_read_mutex)) + #define CREATE_TRACE_POINTS #include @@ -158,7 +163,7 @@ void mce_log(struct mce *mce) mce->finished = 0; wmb(); for (;;) { - entry = rcu_dereference(mcelog.next); + entry = rcu_dereference_check_mce(mcelog.next); for (;;) { /* * When the buffer fills up discard new entries. @@ -1500,7 +1505,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, return -ENOMEM; mutex_lock(&mce_read_mutex); - next = rcu_dereference(mcelog.next); + next = rcu_dereference_check_mce(mcelog.next); /* Only supports full reads right now */ if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { @@ -1565,7 +1570,7 @@ timeout: static unsigned int mce_poll(struct file *file, poll_table *wait) { poll_wait(file, &mce_wait, wait); - if (rcu_dereference(mcelog.next)) + if (rcu_dereference_check_mce(mcelog.next)) return POLLIN | POLLRDNORM; return 0; } -- cgit v0.10.2 From b97c4bc16734a2e597dac7f91ee9eb78f4aeef9a Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Thu, 11 Mar 2010 14:08:45 -0800 Subject: locking: Make sparse work with inline spinlocks and rwlocks Currently sparse does not work with inline spinlock and rwlock functions. The problem is that they do not use the __acquires/__releases out-of-line functions, but use inline functions with no sparse annotations. This patch adds the appropriate annotations to make it work properly. Signed-off-by: Luca Barbieri Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index 71e0b00..bc2994e 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -29,25 +29,25 @@ do { \ #endif #ifdef CONFIG_DEBUG_SPINLOCK - extern void do_raw_read_lock(rwlock_t *lock); + extern void do_raw_read_lock(rwlock_t *lock) __acquires(lock); #define do_raw_read_lock_flags(lock, flags) do_raw_read_lock(lock) extern int do_raw_read_trylock(rwlock_t *lock); - extern void do_raw_read_unlock(rwlock_t *lock); - extern void do_raw_write_lock(rwlock_t *lock); + extern void do_raw_read_unlock(rwlock_t *lock) __releases(lock); + extern void do_raw_write_lock(rwlock_t *lock) __acquires(lock); #define do_raw_write_lock_flags(lock, flags) do_raw_write_lock(lock) extern int do_raw_write_trylock(rwlock_t *lock); - extern void do_raw_write_unlock(rwlock_t *lock); + extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock); #else -# define do_raw_read_lock(rwlock) arch_read_lock(&(rwlock)->raw_lock) +# define do_raw_read_lock(rwlock) do {__acquire(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0) # define do_raw_read_lock_flags(lock, flags) \ - arch_read_lock_flags(&(lock)->raw_lock, *(flags)) + do {__acquire(lock); arch_read_lock_flags(&(lock)->raw_lock, *(flags)); } while (0) # define do_raw_read_trylock(rwlock) arch_read_trylock(&(rwlock)->raw_lock) -# define do_raw_read_unlock(rwlock) arch_read_unlock(&(rwlock)->raw_lock) -# define do_raw_write_lock(rwlock) arch_write_lock(&(rwlock)->raw_lock) +# define do_raw_read_unlock(rwlock) do {arch_read_unlock(&(rwlock)->raw_lock); __release(lock); } while (0) +# define do_raw_write_lock(rwlock) do {__acquire(lock); arch_write_lock(&(rwlock)->raw_lock); } while (0) # define do_raw_write_lock_flags(lock, flags) \ - arch_write_lock_flags(&(lock)->raw_lock, *(flags)) + do {__acquire(lock); arch_write_lock_flags(&(lock)->raw_lock, *(flags)); } while (0) # define do_raw_write_trylock(rwlock) arch_write_trylock(&(rwlock)->raw_lock) -# define do_raw_write_unlock(rwlock) arch_write_unlock(&(rwlock)->raw_lock) +# define do_raw_write_unlock(rwlock) do {arch_write_unlock(&(rwlock)->raw_lock); __release(lock); } while (0) #endif #define read_can_lock(rwlock) arch_read_can_lock(&(rwlock)->raw_lock) diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 8608821..89fac6a 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -128,19 +128,21 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } #define raw_spin_unlock_wait(lock) arch_spin_unlock_wait(&(lock)->raw_lock) #ifdef CONFIG_DEBUG_SPINLOCK - extern void do_raw_spin_lock(raw_spinlock_t *lock); + extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock); #define do_raw_spin_lock_flags(lock, flags) do_raw_spin_lock(lock) extern int do_raw_spin_trylock(raw_spinlock_t *lock); - extern void do_raw_spin_unlock(raw_spinlock_t *lock); + extern void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock); #else -static inline void do_raw_spin_lock(raw_spinlock_t *lock) +static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock) { + __acquire(lock); arch_spin_lock(&lock->raw_lock); } static inline void -do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags) +do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags) __acquires(lock) { + __acquire(lock); arch_spin_lock_flags(&lock->raw_lock, *flags); } @@ -149,9 +151,10 @@ static inline int do_raw_spin_trylock(raw_spinlock_t *lock) return arch_spin_trylock(&(lock)->raw_lock); } -static inline void do_raw_spin_unlock(raw_spinlock_t *lock) +static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) { arch_spin_unlock(&lock->raw_lock); + __release(lock); } #endif -- cgit v0.10.2