diff options
-rw-r--r-- | include/linux/ftrace_event.h | 1 | ||||
-rw-r--r-- | include/linux/preempt.h | 38 | ||||
-rw-r--r-- | include/linux/sched.h | 51 | ||||
-rw-r--r-- | kernel/Kconfig.preempt | 6 | ||||
-rw-r--r-- | kernel/sched/core.c | 60 | ||||
-rw-r--r-- | kernel/sched/fair.c | 16 | ||||
-rw-r--r-- | kernel/sched/features.h | 3 | ||||
-rw-r--r-- | kernel/sched/sched.h | 9 | ||||
-rw-r--r-- | kernel/trace/trace.c | 41 | ||||
-rw-r--r-- | kernel/trace/trace.h | 2 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 13 |
11 files changed, 206 insertions, 34 deletions
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 4515232..ab58c33 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -58,6 +58,7 @@ struct trace_entry { int pid; unsigned short migrate_disable; unsigned short padding; + unsigned char preempt_lazy_count; }; #define FTRACE_MAX_EVENT \ diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 7d08aab..c153cf2 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -23,15 +23,38 @@ #define preempt_count() (current_thread_info()->preempt_count) +#ifdef CONFIG_PREEMPT_LAZY +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0) +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0) +#define inc_preempt_lazy_count() add_preempt_lazy_count(1) +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1) +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count) +#else +#define add_preempt_lazy_count(val) do { } while (0) +#define sub_preempt_lazy_count(val) do { } while (0) +#define inc_preempt_lazy_count() do { } while (0) +#define dec_preempt_lazy_count() do { } while (0) +#define preempt_lazy_count() (0) +#endif + #ifdef CONFIG_PREEMPT asmlinkage void preempt_schedule(void); +# ifdef CONFIG_PREEMPT_LAZY #define preempt_check_resched() \ do { \ - if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \ + if (unlikely(test_thread_flag(TIF_NEED_RESCHED) || \ + test_thread_flag(TIF_NEED_RESCHED_LAZY))) \ preempt_schedule(); \ } while (0) +# else +#define preempt_check_resched() \ +do { \ + if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \ + preempt_schedule(); \ +} while (0) +# endif #ifdef CONFIG_CONTEXT_TRACKING @@ -64,6 +87,12 @@ do { \ barrier(); \ } while (0) +#define preempt_lazy_disable() \ +do { \ + inc_preempt_lazy_count(); \ + barrier(); \ +} while (0) + #define sched_preempt_enable_no_resched() \ do { \ barrier(); \ @@ -85,6 +114,13 @@ do { \ preempt_check_resched(); \ } while (0) +#define preempt_lazy_enable() \ +do { \ + dec_preempt_lazy_count(); \ + barrier(); \ + preempt_check_resched(); \ +} while (0) + /* For debugging and tracer internals only! */ #define add_preempt_count_notrace(val) \ do { preempt_count() += (val); } while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 0035c94..625a41f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2451,6 +2451,52 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } +#ifdef CONFIG_PREEMPT_LAZY +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk) +{ + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); +} + +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) +{ + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); +} + +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk) +{ + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY)); +} + +static inline int need_resched_lazy(void) +{ + return test_thread_flag(TIF_NEED_RESCHED_LAZY); +} + +static inline int need_resched_now(void) +{ + return test_thread_flag(TIF_NEED_RESCHED); +} + +static inline int need_resched(void) +{ + return test_thread_flag(TIF_NEED_RESCHED) || + test_thread_flag(TIF_NEED_RESCHED_LAZY); +} +#else +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { } +static inline int need_resched_lazy(void) { return 0; } + +static inline int need_resched_now(void) +{ + return test_thread_flag(TIF_NEED_RESCHED); +} + +static inline int need_resched(void) +{ + return test_thread_flag(TIF_NEED_RESCHED); +} +#endif + static inline int restart_syscall(void) { set_tsk_thread_flag(current, TIF_SIGPENDING); @@ -2482,11 +2528,6 @@ static inline int signal_pending_state(long state, struct task_struct *p) return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); } -static inline int need_resched(void) -{ - return unlikely(test_thread_flag(TIF_NEED_RESCHED)); -} - static inline bool __task_is_stopped_or_traced(struct task_struct *task) { if (task->state & (__TASK_STOPPED | __TASK_TRACED)) diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index f8a2982..11dbe26 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -6,6 +6,12 @@ config PREEMPT_RT_BASE bool select PREEMPT +config HAVE_PREEMPT_LAZY + bool + +config PREEMPT_LAZY + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL + choice prompt "Preemption Model" default PREEMPT_NONE diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 387a4c6..01aec90 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -540,6 +540,37 @@ void resched_task(struct task_struct *p) smp_send_reschedule(cpu); } +#ifdef CONFIG_PREEMPT_LAZY +void resched_task_lazy(struct task_struct *p) +{ + int cpu; + + if (!sched_feat(PREEMPT_LAZY)) { + resched_task(p); + return; + } + + assert_raw_spin_locked(&task_rq(p)->lock); + + if (test_tsk_need_resched(p)) + return; + + if (test_tsk_need_resched_lazy(p)) + return; + + set_tsk_need_resched_lazy(p); + + cpu = task_cpu(p); + if (cpu == smp_processor_id()) + return; + + /* NEED_RESCHED_LAZY must be visible before we test polling */ + smp_mb(); + if (!tsk_is_polling(p)) + smp_send_reschedule(cpu); +} +#endif + void resched_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); @@ -704,6 +735,17 @@ void resched_task(struct task_struct *p) assert_raw_spin_locked(&task_rq(p)->lock); set_tsk_need_resched(p); } +#ifdef CONFIG_PREEMPT_LAZY +void resched_task_lazy(struct task_struct *p) +{ + if (!sched_feat(PREEMPT_LAZY)) { + resched_task(p); + return; + } + assert_raw_spin_locked(&task_rq(p)->lock); + set_tsk_need_resched_lazy(p); +} +#endif #endif /* CONFIG_SMP */ #if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \ @@ -1736,6 +1778,9 @@ void sched_fork(struct task_struct *p) /* Want to start with kernel preemption disabled. */ task_thread_info(p)->preempt_count = 1; #endif +#ifdef CONFIG_HAVE_PREEMPT_LAZY + task_thread_info(p)->preempt_lazy_count = 0; +#endif #ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); #endif @@ -2388,6 +2433,7 @@ void migrate_disable(void) } preempt_disable(); + preempt_lazy_disable(); pin_current_cpu(); p->migrate_disable = 1; preempt_enable(); @@ -2442,6 +2488,7 @@ void migrate_enable(void) unpin_current_cpu(); preempt_enable(); + preempt_lazy_enable(); } EXPORT_SYMBOL(migrate_enable); #else @@ -2569,6 +2616,7 @@ need_resched: put_prev_task(rq, prev); next = pick_next_task(rq); clear_tsk_need_resched(prev); + clear_tsk_need_resched_lazy(prev); rq->skip_clock_update = 0; if (likely(prev != next)) { @@ -2673,6 +2721,14 @@ asmlinkage void __sched notrace preempt_schedule(void) if (likely(!preemptible())) return; +#ifdef CONFIG_PREEMPT_LAZY + /* + * Check for lazy preemption + */ + if (current_thread_info()->preempt_lazy_count && + !test_thread_flag(TIF_NEED_RESCHED)) + return; +#endif do { add_preempt_count_notrace(PREEMPT_ACTIVE); /* @@ -4417,7 +4473,9 @@ void init_idle(struct task_struct *idle, int cpu) /* Set the preempt count _outside_ the spinlocks! */ task_thread_info(idle)->preempt_count = 0; - +#ifdef CONFIG_HAVE_PREEMPT_LAZY + task_thread_info(idle)->preempt_lazy_count = 0; +#endif /* * The idle tasks have their own, simple scheduling class: */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 790e2fc..0af1448 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1902,7 +1902,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) ideal_runtime = sched_slice(cfs_rq, curr); delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; if (delta_exec > ideal_runtime) { - resched_task(rq_of(cfs_rq)->curr); + resched_task_lazy(rq_of(cfs_rq)->curr); /* * The current task ran long enough, ensure it doesn't get * re-elected due to buddy favours. @@ -1926,7 +1926,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) return; if (delta > ideal_runtime) - resched_task(rq_of(cfs_rq)->curr); + resched_task_lazy(rq_of(cfs_rq)->curr); } static void @@ -2047,7 +2047,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) * validating it and just reschedule. */ if (queued) { - resched_task(rq_of(cfs_rq)->curr); + resched_task_lazy(rq_of(cfs_rq)->curr); return; } /* @@ -2237,7 +2237,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, * hierarchy can be throttled */ if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) - resched_task(rq_of(cfs_rq)->curr); + resched_task_lazy(rq_of(cfs_rq)->curr); } static __always_inline @@ -2837,7 +2837,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) if (delta < 0) { if (rq->curr == p) - resched_task(p); + resched_task_lazy(p); return; } @@ -3704,7 +3704,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ return; preempt: - resched_task(curr); + resched_task_lazy(curr); /* * Only set the backward buddy when the current task is still * on the rq. This can happen when a wakeup gets interleaved @@ -5979,7 +5979,7 @@ static void task_fork_fair(struct task_struct *p) * 'current' within the tree based on its new key value. */ swap(curr->vruntime, se->vruntime); - resched_task(rq->curr); + resched_task_lazy(rq->curr); } se->vruntime -= cfs_rq->min_vruntime; @@ -6004,7 +6004,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) */ if (rq->curr == p) { if (p->prio > oldprio) - resched_task(rq->curr); + resched_task_lazy(rq->curr); } else check_preempt_curr(rq, p, 0); } diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 938274c..4594051 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -58,6 +58,9 @@ SCHED_FEAT(NONTASK_POWER, true) SCHED_FEAT(TTWU_QUEUE, true) #else SCHED_FEAT(TTWU_QUEUE, false) +# ifdef CONFIG_PREEMPT_LAZY +SCHED_FEAT(PREEMPT_LAZY, true) +# endif #endif SCHED_FEAT(FORCE_SD_OVERLAP, false) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ca61374..2843303 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1046,6 +1046,15 @@ extern void init_sched_fair_class(void); extern void resched_task(struct task_struct *p); extern void resched_cpu(int cpu); +#ifdef CONFIG_PREEMPT_LAZY +extern void resched_task_lazy(struct task_struct *tsk); +#else +static inline void resched_task_lazy(struct task_struct *tsk) +{ + resched_task(tsk); +} +#endif + extern struct rt_bandwidth def_rt_bandwidth; extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 402fcc6..f9401ed 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1509,6 +1509,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, struct task_struct *tsk = current; entry->preempt_count = pc & 0xff; + entry->preempt_lazy_count = preempt_lazy_count(); entry->pid = (tsk) ? tsk->pid : 0; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT @@ -1518,7 +1519,8 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, #endif ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | - (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); + (need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) | + (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0); entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0; } @@ -2410,15 +2412,17 @@ get_total_entries(struct trace_buffer *buf, static void print_lat_help_header(struct seq_file *m) { - seq_puts(m, "# _------=> CPU# \n"); - seq_puts(m, "# / _-----=> irqs-off \n"); - seq_puts(m, "# | / _----=> need-resched \n"); - seq_puts(m, "# || / _---=> hardirq/softirq \n"); - seq_puts(m, "# ||| / _--=> preempt-depth \n"); - seq_puts(m, "# |||| / _--=> migrate-disable\n"); - seq_puts(m, "# ||||| / delay \n"); - seq_puts(m, "# cmd pid |||||| time | caller \n"); - seq_puts(m, "# \\ / ||||| \\ | / \n"); + seq_puts(m, "# _--------=> CPU# \n"); + seq_puts(m, "# / _-------=> irqs-off \n"); + seq_puts(m, "# | / _------=> need-resched \n"); + seq_puts(m, "# || / _-----=> need-resched_lazy \n"); + seq_puts(m, "# ||| / _----=> hardirq/softirq \n"); + seq_puts(m, "# |||| / _---=> preempt-depth \n"); + seq_puts(m, "# ||||| / _--=> preempt-lazy-depth\n"); + seq_puts(m, "# |||||| / _-=> migrate-disable \n"); + seq_puts(m, "# ||||||| / delay \n"); + seq_puts(m, "# cmd pid |||||||| time | caller \n"); + seq_puts(m, "# \\ / |||||||| \\ | / \n"); } static void print_event_info(struct trace_buffer *buf, struct seq_file *m) @@ -2442,13 +2446,16 @@ static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m) static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m) { print_event_info(buf, m); - seq_puts(m, "# _-----=> irqs-off\n"); - seq_puts(m, "# / _----=> need-resched\n"); - seq_puts(m, "# | / _---=> hardirq/softirq\n"); - seq_puts(m, "# || / _--=> preempt-depth\n"); - seq_puts(m, "# ||| / delay\n"); - seq_puts(m, "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"); - seq_puts(m, "# | | | |||| | |\n"); + seq_puts(m, "# _-------=> irqs-off \n"); + seq_puts(m, "# / _------=> need-resched \n"); + seq_puts(m, "# |/ _-----=> need-resched_lazy \n"); + seq_puts(m, "# ||/ _----=> hardirq/softirq \n"); + seq_puts(m, "# |||/ _---=> preempt-depth \n"); + seq_puts(m, "# ||||/ _--=> preempt-lazy-depth\n"); + seq_puts(m, "# ||||| / _-=> migrate-disable \n"); + seq_puts(m, "# |||||| / delay\n"); + seq_puts(m, "# TASK-PID CPU# |||||| TIMESTAMP FUNCTION\n"); + seq_puts(m, "# | | | |||||| | |\n"); } void diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 10c86fb..109291a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -117,6 +117,7 @@ struct kretprobe_trace_entry_head { * NEED_RESCHED - reschedule is requested * HARDIRQ - inside an interrupt handler * SOFTIRQ - inside a softirq handler + * NEED_RESCHED_LAZY - lazy reschedule is requested */ enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, @@ -124,6 +125,7 @@ enum trace_flag_type { TRACE_FLAG_NEED_RESCHED = 0x04, TRACE_FLAG_HARDIRQ = 0x08, TRACE_FLAG_SOFTIRQ = 0x10, + TRACE_FLAG_NEED_RESCHED_LAZY = 0x20, }; #define TRACE_BUF_SIZE 1024 diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index b54b3c8..46b6467 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -606,6 +606,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) { char hardsoft_irq; char need_resched; + char need_resched_lazy; char irqs_off; int hardirq; int softirq; @@ -620,14 +621,17 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) '.'; need_resched = (entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'; + need_resched_lazy = + (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.'; hardsoft_irq = (hardirq && softirq) ? 'H' : hardirq ? 'h' : softirq ? 's' : '.'; - if (!trace_seq_printf(s, "%c%c%c", - irqs_off, need_resched, hardsoft_irq)) + if (!trace_seq_printf(s, "%c%c%c%c", + irqs_off, need_resched, need_resched_lazy, + hardsoft_irq)) return 0; if (entry->preempt_count) @@ -635,6 +639,11 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) else ret = trace_seq_putc(s, '.'); + if (entry->preempt_lazy_count) + ret = trace_seq_printf(s, "%x", entry->preempt_lazy_count); + else + ret = trace_seq_putc(s, '.'); + if (entry->migrate_disable) ret = trace_seq_printf(s, "%x", entry->migrate_disable); else |