From dcbf832e5823156e8f155359b47bd108cac8ad68 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 5 Oct 2012 23:07:19 +0200 Subject: vtime: Gather vtime declarations to their own header file These APIs are scattered around and are going to expand a bit. Let's create a dedicated header file for sanity. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index cab3da3..b083a47 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -4,6 +4,7 @@ #include #include #include +#include #include /* @@ -129,16 +130,6 @@ extern void synchronize_irq(unsigned int irq); # define synchronize_irq(irq) barrier() #endif -struct task_struct; - -#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING) -static inline void vtime_account(struct task_struct *tsk) -{ -} -#else -extern void vtime_account(struct task_struct *tsk); -#endif - #if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) static inline void rcu_nmi_enter(void) diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 36d12f0..1865b1f 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -130,12 +131,4 @@ extern void account_process_tick(struct task_struct *, int user); extern void account_steal_ticks(unsigned long ticks); extern void account_idle_ticks(unsigned long ticks); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -extern void vtime_task_switch(struct task_struct *prev); -extern void vtime_account_system(struct task_struct *tsk); -extern void vtime_account_idle(struct task_struct *tsk); -#else -static inline void vtime_task_switch(struct task_struct *prev) { } -#endif - #endif /* _LINUX_KERNEL_STAT_H */ diff --git a/include/linux/vtime.h b/include/linux/vtime.h new file mode 100644 index 0000000..7199c24 --- /dev/null +++ b/include/linux/vtime.h @@ -0,0 +1,22 @@ +#ifndef _LINUX_KERNEL_VTIME_H +#define _LINUX_KERNEL_VTIME_H + +struct task_struct; + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +extern void vtime_task_switch(struct task_struct *prev); +extern void vtime_account_system(struct task_struct *tsk); +extern void vtime_account_idle(struct task_struct *tsk); +#else +static inline void vtime_task_switch(struct task_struct *prev) { } +#endif + +#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING) +static inline void vtime_account(struct task_struct *tsk) +{ +} +#else +extern void vtime_account(struct task_struct *tsk); +#endif + +#endif /* _LINUX_KERNEL_VTIME_H */ -- cgit v0.10.2 From 11113334d1c5dd5355c86e531c29f1202a855c86 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 24 Oct 2012 18:05:51 +0200 Subject: vtime: Make vtime_account_system() irqsafe vtime_account_system() currently has only one caller with vtime_account() which is irq safe. Now we are going to call it from other places like kvm where irqs are not always disabled by the time we account the cputime. So let's make it irqsafe. The arch implementation part is now prefixed with "__". vtime_account_idle() arch implementation is prefixed accordingly to stay consistent. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker Cc: Tony Luck Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index f638821..5e48503 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -106,9 +106,9 @@ void vtime_task_switch(struct task_struct *prev) struct thread_info *ni = task_thread_info(current); if (idle_task(smp_processor_id()) != prev) - vtime_account_system(prev); + __vtime_account_system(prev); else - vtime_account_idle(prev); + __vtime_account_idle(prev); vtime_account_user(prev); @@ -135,14 +135,14 @@ static cputime_t vtime_delta(struct task_struct *tsk) return delta_stime; } -void vtime_account_system(struct task_struct *tsk) +void __vtime_account_system(struct task_struct *tsk) { cputime_t delta = vtime_delta(tsk); account_system_time(tsk, 0, delta, delta); } -void vtime_account_idle(struct task_struct *tsk) +void __vtime_account_idle(struct task_struct *tsk) { account_idle_time(vtime_delta(tsk)); } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index ce4cb77..0db456f 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -336,7 +336,7 @@ static u64 vtime_delta(struct task_struct *tsk, return delta; } -void vtime_account_system(struct task_struct *tsk) +void __vtime_account_system(struct task_struct *tsk) { u64 delta, sys_scaled, stolen; @@ -346,7 +346,7 @@ void vtime_account_system(struct task_struct *tsk) account_steal_time(stolen); } -void vtime_account_idle(struct task_struct *tsk) +void __vtime_account_idle(struct task_struct *tsk) { u64 delta, sys_scaled, stolen; diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 7903344..783e988 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -140,6 +140,10 @@ void vtime_account(struct task_struct *tsk) } EXPORT_SYMBOL_GPL(vtime_account); +void __vtime_account_system(struct task_struct *tsk) +__attribute__((alias("vtime_account"))); +EXPORT_SYMBOL_GPL(__vtime_account_system); + void __kprobes vtime_stop_cpu(void) { struct s390_idle_data *idle = &__get_cpu_var(s390_idle); diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 7199c24..b9fc4f9 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -5,10 +5,12 @@ struct task_struct; #ifdef CONFIG_VIRT_CPU_ACCOUNTING extern void vtime_task_switch(struct task_struct *prev); +extern void __vtime_account_system(struct task_struct *tsk); extern void vtime_account_system(struct task_struct *tsk); -extern void vtime_account_idle(struct task_struct *tsk); +extern void __vtime_account_idle(struct task_struct *tsk); #else static inline void vtime_task_switch(struct task_struct *prev) { } +static inline void vtime_account_system(struct task_struct *tsk) { } #endif #if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 81b763b..0359f47 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -433,10 +433,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) *st = cputime.stime; } +void vtime_account_system(struct task_struct *tsk) +{ + unsigned long flags; + + local_irq_save(flags); + __vtime_account_system(tsk); + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(vtime_account_system); + /* * Archs that account the whole time spent in the idle task * (outside irq) as idle time can rely on this and just implement - * vtime_account_system() and vtime_account_idle(). Archs that + * __vtime_account_system() and __vtime_account_idle(). Archs that * have other meaning of the idle time (s390 only includes the * time spent by the CPU when it's in low power mode) must override * vtime_account(). @@ -449,9 +459,9 @@ void vtime_account(struct task_struct *tsk) local_irq_save(flags); if (in_interrupt() || !is_idle_task(tsk)) - vtime_account_system(tsk); + __vtime_account_system(tsk); else - vtime_account_idle(tsk); + __vtime_account_idle(tsk); local_irq_restore(flags); } -- cgit v0.10.2 From b080935c8638e08134629d0a9ebdf35669bec14d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 5 Oct 2012 23:07:19 +0200 Subject: kvm: Directly account vtime to system on guest switch Switching to or from guest context is done on ioctl context. So by the time we call kvm_guest_enter() or kvm_guest_exit() we know we are not running the idle task. As a result, we can directly account the cputime using vtime_account_system(). There are two good reasons to do this: * We avoid some useless checks on guest switch. It optimizes a bit this fast path. * In the case of CONFIG_IRQ_TIME_ACCOUNTING, calling vtime_account() checks for irq time to account. This is pointless since we know we are not in an irq on guest switch. This is wasting cpu cycles for no good reason. vtime_account_system() OTOH is a no-op in this config option. * We can remove the irq disable/enable around kvm guest switch in s390. A further optimization may consist in introducing a vtime_account_guest() that directly calls account_guest_time(). Signed-off-by: Frederic Weisbecker Cc: Tony Luck Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Avi Kivity Cc: Marcelo Tosatti Cc: Joerg Roedel Cc: Alexander Graf Cc: Xiantao Zhang Cc: Christian Borntraeger Cc: Cornelia Huck Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ecced9d..d91a955 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -608,9 +608,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) kvm_s390_deliver_pending_interrupts(vcpu); vcpu->arch.sie_block->icptcode = 0; - local_irq_disable(); kvm_guest_enter(); - local_irq_enable(); VCPU_EVENT(vcpu, 6, "entering sie flags %x", atomic_read(&vcpu->arch.sie_block->cpuflags)); trace_kvm_s390_sie_enter(vcpu, @@ -629,9 +627,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); - local_irq_disable(); kvm_guest_exit(); - local_irq_enable(); memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); return rc; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 93bfc9f..0e2212f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -737,7 +737,11 @@ static inline int kvm_deassign_device(struct kvm *kvm, static inline void kvm_guest_enter(void) { BUG_ON(preemptible()); - vtime_account(current); + /* + * This is running in ioctl context so we can avoid + * the call to vtime_account() with its unnecessary idle check. + */ + vtime_account_system(current); current->flags |= PF_VCPU; /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode @@ -751,7 +755,11 @@ static inline void kvm_guest_enter(void) static inline void kvm_guest_exit(void) { - vtime_account(current); + /* + * This is running in ioctl context so we can avoid + * the call to vtime_account() with its unnecessary idle check. + */ + vtime_account_system(current); current->flags &= ~PF_VCPU; } -- cgit v0.10.2 From fa5058f3b63153e0147ef65bcdb3a4ee63581346 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 6 Oct 2012 04:07:19 +0200 Subject: cputime: Specialize irq vtime hooks With CONFIG_VIRT_CPU_ACCOUNTING, when vtime_account() is called in irq entry/exit, we perform a check on the context: if we are interrupting the idle task we account the pending cputime to idle, otherwise account to system time or its sub-areas: tsk->stime, hardirq time, softirq time, ... However this check for idle only concerns the hardirq entry and softirq entry: * Hardirq may directly interrupt the idle task, in which case we need to flush the pending CPU time to idle. * The idle task may be directly interrupted by a softirq if it calls local_bh_enable(). There is probably no such call in any idle task but we need to cover every case. Ksoftirqd is not concerned because the idle time is flushed on context switch and softirq in the end of hardirq have the idle time already flushed from the hardirq entry. In the other cases we always account to system/irq time: * On hardirq exit we account the time to hardirq time. * On softirq exit we account the time to softirq time. To optimize this and avoid the indirect call to vtime_account() and the checks it performs, specialize the vtime irq APIs and only perform the check on irq entry. Irq exit can directly call vtime_account_system(). CONFIG_IRQ_TIME_ACCOUNTING behaviour doesn't change and directly maps to its own vtime_account() implementation. One may want to take benefits from the new APIs to optimize irq time accounting as well in the future. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index b083a47..624ef3f 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void); */ #define __irq_enter() \ do { \ - vtime_account(current); \ + vtime_account_irq_enter(current); \ add_preempt_count(HARDIRQ_OFFSET); \ trace_hardirq_enter(); \ } while (0) @@ -169,7 +169,7 @@ extern void irq_enter(void); #define __irq_exit() \ do { \ trace_hardirq_exit(); \ - vtime_account(current); \ + vtime_account_irq_exit(current); \ sub_preempt_count(HARDIRQ_OFFSET); \ } while (0) diff --git a/include/linux/vtime.h b/include/linux/vtime.h index b9fc4f9..c35c022 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -21,4 +21,29 @@ static inline void vtime_account(struct task_struct *tsk) extern void vtime_account(struct task_struct *tsk); #endif +static inline void vtime_account_irq_enter(struct task_struct *tsk) +{ + /* + * Hardirq can interrupt idle task anytime. So we need vtime_account() + * that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING. + * Softirq can also interrupt idle task directly if it calls + * local_bh_enable(). Such case probably don't exist but we never know. + * Ksoftirqd is not concerned because idle time is flushed on context + * switch. Softirqs in the end of hardirqs are also not a problem because + * the idle time is flushed on hardirq time already. + */ + vtime_account(tsk); +} + +static inline void vtime_account_irq_exit(struct task_struct *tsk) +{ +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + /* On hard|softirq exit we always account to hard|softirq cputime */ + __vtime_account_system(tsk); +#endif +#ifdef CONFIG_IRQ_TIME_ACCOUNTING + vtime_account(tsk); +#endif +} + #endif /* _LINUX_KERNEL_VTIME_H */ diff --git a/kernel/softirq.c b/kernel/softirq.c index cc96bdc..ed567ba 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -221,7 +221,7 @@ asmlinkage void __do_softirq(void) current->flags &= ~PF_MEMALLOC; pending = local_softirq_pending(); - vtime_account(current); + vtime_account_irq_enter(current); __local_bh_disable((unsigned long)__builtin_return_address(0), SOFTIRQ_OFFSET); @@ -272,7 +272,7 @@ restart: lockdep_softirq_exit(); - vtime_account(current); + vtime_account_irq_exit(current); __local_bh_enable(SOFTIRQ_OFFSET); tsk_restore_flags(current, old_flags, PF_MEMALLOC); } @@ -341,7 +341,7 @@ static inline void invoke_softirq(void) */ void irq_exit(void) { - vtime_account(current); + vtime_account_irq_exit(current); trace_hardirq_exit(); sub_preempt_count(IRQ_EXIT_OFFSET); if (!in_interrupt() && local_softirq_pending()) -- cgit v0.10.2 From 3e1df4f506836e6bea1ab61cf88c75c8b1840643 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 6 Oct 2012 05:23:22 +0200 Subject: cputime: Separate irqtime accounting from generic vtime vtime_account() doesn't have the same role in CONFIG_VIRT_CPU_ACCOUNTING and CONFIG_IRQ_TIME_ACCOUNTING. In the first case it handles time accounting in any context. In the second case it only handles irq time accounting. So when vtime_account() is called from outside vtime_account_irq_*() this call is pointless to CONFIG_IRQ_TIME_ACCOUNTING. To fix the confusion, change vtime_account() to irqtime_account_irq() in CONFIG_IRQ_TIME_ACCOUNTING. This way we ensure future account_vtime() calls won't waste useless cycles in the irqtime APIs. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker diff --git a/include/linux/vtime.h b/include/linux/vtime.h index c35c022..0c2a2d3 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -8,17 +8,18 @@ extern void vtime_task_switch(struct task_struct *prev); extern void __vtime_account_system(struct task_struct *tsk); extern void vtime_account_system(struct task_struct *tsk); extern void __vtime_account_idle(struct task_struct *tsk); +extern void vtime_account(struct task_struct *tsk); #else static inline void vtime_task_switch(struct task_struct *prev) { } +static inline void __vtime_account_system(struct task_struct *tsk) { } static inline void vtime_account_system(struct task_struct *tsk) { } +static inline void vtime_account(struct task_struct *tsk) { } #endif -#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING) -static inline void vtime_account(struct task_struct *tsk) -{ -} +#ifdef CONFIG_IRQ_TIME_ACCOUNTING +extern void irqtime_account_irq(struct task_struct *tsk); #else -extern void vtime_account(struct task_struct *tsk); +static inline void irqtime_account_irq(struct task_struct *tsk) { } #endif static inline void vtime_account_irq_enter(struct task_struct *tsk) @@ -33,17 +34,14 @@ static inline void vtime_account_irq_enter(struct task_struct *tsk) * the idle time is flushed on hardirq time already. */ vtime_account(tsk); + irqtime_account_irq(tsk); } static inline void vtime_account_irq_exit(struct task_struct *tsk) { -#ifdef CONFIG_VIRT_CPU_ACCOUNTING /* On hard|softirq exit we always account to hard|softirq cputime */ __vtime_account_system(tsk); -#endif -#ifdef CONFIG_IRQ_TIME_ACCOUNTING - vtime_account(tsk); -#endif + irqtime_account_irq(tsk); } #endif /* _LINUX_KERNEL_VTIME_H */ diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 0359f47..8d859da 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -43,7 +43,7 @@ DEFINE_PER_CPU(seqcount_t, irq_time_seq); * Called before incrementing preempt_count on {soft,}irq_enter * and before decrementing preempt_count on {soft,}irq_exit. */ -void vtime_account(struct task_struct *curr) +void irqtime_account_irq(struct task_struct *curr) { unsigned long flags; s64 delta; @@ -73,7 +73,7 @@ void vtime_account(struct task_struct *curr) irq_time_write_end(); local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(vtime_account); +EXPORT_SYMBOL_GPL(irqtime_account_irq); static int irqtime_account_hi_update(void) { -- cgit v0.10.2