From 2e636d5e66c35dfcbaf617aa8fa963f6847478fe Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 16 May 2016 15:01:11 +0200 Subject: sched/preempt: Fix preempt_count manipulations Vikram reported that his ARM64 compiler managed to 'optimize' away the preempt_count manipulations in code like: preempt_enable_no_resched(); put_user(); preempt_disable(); Irrespective of that fact that that is horrible code that should be fixed for many reasons, it does highlight a deficiency in the generic preempt_count manipulators. As it is never right to combine/elide preempt_count manipulations like this. Therefore sprinkle some volatile in the two generic accessors to ensure the compiler is aware of the fact that the preempt_count is observed outside of the regular program-order view and thus cannot be optimized away like this. x86; the only arch not using the generic code is not affected as we do all this in asm in order to use the segment base per-cpu stuff. Reported-by: Vikram Mulukutla Tested-by: Vikram Mulukutla Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: a787870924db ("sched, arch: Create asm/preempt.h") Link: http://lkml.kernel.org/r/20160516131751.GH3205@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index 5d8ffa3..c1cde35 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -7,10 +7,10 @@ static __always_inline int preempt_count(void) { - return current_thread_info()->preempt_count; + return READ_ONCE(current_thread_info()->preempt_count); } -static __always_inline int *preempt_count_ptr(void) +static __always_inline volatile int *preempt_count_ptr(void) { return ¤t_thread_info()->preempt_count; } -- cgit v0.10.2 From b7e7ade34e6188bee2e3b0d42b51d25137d9e2a5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 May 2016 11:19:07 +0200 Subject: sched/core: Fix remote wakeups Commit: b5179ac70de8 ("sched/fair: Prepare to fix fairness problems on migration") ... introduced a bug: Mike Galbraith found that it introduced a performance regression, while Paul E. McKenney reported lost wakeups and bisected it to this commit. The reason is that I mis-read ttwu_queue() such that I assumed any wakeup that got a remote queue must have had the task migrated. Since this is not so; we need to transfer this information between queueing the wakeup and actually doing the wakeup. Use a new task_struct::sched_flag for this, we already write to sched_contributes_to_load in the wakeup path so this is a hot and modified cacheline. Reported-by: Paul E. McKenney Reported-by: Mike Galbraith Tested-by: Mike Galbraith Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Hunter Cc: Andy Lutomirski Cc: Ben Segall Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Matt Fleming Cc: Morten Rasmussen Cc: Oleg Nesterov Cc: Paul Turner Cc: Pavan Kondeti Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Thomas Gleixner Cc: byungchul.park@lge.com Fixes: b5179ac70de8 ("sched/fair: Prepare to fix fairness problems on migration") Link: http://lkml.kernel.org/r/20160523091907.GD15728@worktop.ger.corp.intel.com Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index 6cc0df9..e053517 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1533,6 +1533,7 @@ struct task_struct { unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; unsigned sched_migrated:1; + unsigned sched_remote_wakeup:1; unsigned :0; /* force alignment to the next boundary */ /* unserialized, strictly 'current' */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 404c078..7f2cae4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1768,13 +1768,15 @@ void sched_ttwu_pending(void) cookie = lockdep_pin_lock(&rq->lock); while (llist) { + int wake_flags = 0; + p = llist_entry(llist, struct task_struct, wake_entry); llist = llist_next(llist); - /* - * See ttwu_queue(); we only call ttwu_queue_remote() when - * its a x-cpu wakeup. - */ - ttwu_do_activate(rq, p, WF_MIGRATED, cookie); + + if (p->sched_remote_wakeup) + wake_flags = WF_MIGRATED; + + ttwu_do_activate(rq, p, wake_flags, cookie); } lockdep_unpin_lock(&rq->lock, cookie); @@ -1819,10 +1821,12 @@ void scheduler_ipi(void) irq_exit(); } -static void ttwu_queue_remote(struct task_struct *p, int cpu) +static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) { struct rq *rq = cpu_rq(cpu); + p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); + if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) { if (!set_nr_if_polling(rq->idle)) smp_send_reschedule(cpu); @@ -1869,7 +1873,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) #if defined(CONFIG_SMP) if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) { sched_clock_cpu(cpu); /* sync clocks x-cpu */ - ttwu_queue_remote(p, cpu); + ttwu_queue_remote(p, cpu, wake_flags); return; } #endif -- cgit v0.10.2