From 95a79fd458b85132c25e351d45037ec9643312b2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 7 Jan 2013 18:12:14 +0100 Subject: context_tracking: Export context state for generic vtime Export the context state: whether we run in user / kernel from the context tracking subsystem point of view. This is going to be used by the generic virtual cputime accounting subsystem that is needed to implement the full dynticks. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Ingo Molnar Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index e24339c..b28d161 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -3,12 +3,40 @@ #ifdef CONFIG_CONTEXT_TRACKING #include +#include + +struct context_tracking { + /* + * When active is false, probes are unset in order + * to minimize overhead: TIF flags are cleared + * and calls to user_enter/exit are ignored. This + * may be further optimized using static keys. + */ + bool active; + enum { + IN_KERNEL = 0, + IN_USER, + } state; +}; + +DECLARE_PER_CPU(struct context_tracking, context_tracking); + +static inline bool context_tracking_in_user(void) +{ + return __this_cpu_read(context_tracking.state) == IN_USER; +} + +static inline bool context_tracking_active(void) +{ + return __this_cpu_read(context_tracking.active); +} extern void user_enter(void); extern void user_exit(void); extern void context_tracking_task_switch(struct task_struct *prev, struct task_struct *next); #else +static inline bool context_tracking_in_user(void) { return false; } static inline void user_enter(void) { } static inline void user_exit(void) { } static inline void context_tracking_task_switch(struct task_struct *prev, diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index e0e07fd..54f471e 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -1,24 +1,10 @@ #include #include #include -#include #include -struct context_tracking { - /* - * When active is false, hooks are not set to - * minimize overhead: TIF flags are cleared - * and calls to user_enter/exit are ignored. This - * may be further optimized using static keys. - */ - bool active; - enum { - IN_KERNEL = 0, - IN_USER, - } state; -}; -static DEFINE_PER_CPU(struct context_tracking, context_tracking) = { +DEFINE_PER_CPU(struct context_tracking, context_tracking) = { #ifdef CONFIG_CONTEXT_TRACKING_FORCE .active = true, #endif -- cgit v0.10.2 From 39613766e8826756a4bf8c74274eb971c3c1d929 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 15 Jan 2013 13:25:07 +0100 Subject: cputime: Librarize per nsecs resolution cputime definitions The full dynticks cputime accounting that we'll soon introduce will rely on sched_clock(). And its clock can have a per nanosecond granularity. To prepare for this, we need to have a cputime_t implementation that has this precision. ia64 virtual cputime accounting already uses that granularity so all we need is to librarize its implementation in the asm generic headers. Also librarize the default per jiffy granularity cputime_t as well so that we can easily pick either implementation depending on the cputime accounting config we choose. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Ingo Molnar Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tony Luck Cc: Fenghua Yu diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h index 7fcf7f0..040b316 100644 --- a/arch/ia64/include/asm/cputime.h +++ b/arch/ia64/include/asm/cputime.h @@ -19,91 +19,11 @@ #define __IA64_CPUTIME_H #ifndef CONFIG_VIRT_CPU_ACCOUNTING -#include +# include #else - -#include -#include -#include - -typedef u64 __nocast cputime_t; -typedef u64 __nocast cputime64_t; - -#define cputime_one_jiffy jiffies_to_cputime(1) - -/* - * Convert cputime <-> jiffies (HZ) - */ -#define cputime_to_jiffies(__ct) \ - ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) -#define jiffies_to_cputime(__jif) \ - (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ)) -#define cputime64_to_jiffies64(__ct) \ - ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) -#define jiffies64_to_cputime64(__jif) \ - (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ)) - -/* - * Convert cputime <-> microseconds - */ -#define cputime_to_usecs(__ct) \ - ((__force u64)(__ct) / NSEC_PER_USEC) -#define usecs_to_cputime(__usecs) \ - (__force cputime_t)((__usecs) * NSEC_PER_USEC) -#define usecs_to_cputime64(__usecs) \ - (__force cputime64_t)((__usecs) * NSEC_PER_USEC) - -/* - * Convert cputime <-> seconds - */ -#define cputime_to_secs(__ct) \ - ((__force u64)(__ct) / NSEC_PER_SEC) -#define secs_to_cputime(__secs) \ - (__force cputime_t)((__secs) * NSEC_PER_SEC) - -/* - * Convert cputime <-> timespec (nsec) - */ -static inline cputime_t timespec_to_cputime(const struct timespec *val) -{ - u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec; - return (__force cputime_t) ret; -} -static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val) -{ - val->tv_sec = (__force u64) ct / NSEC_PER_SEC; - val->tv_nsec = (__force u64) ct % NSEC_PER_SEC; -} - -/* - * Convert cputime <-> timeval (msec) - */ -static inline cputime_t timeval_to_cputime(struct timeval *val) -{ - u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC; - return (__force cputime_t) ret; -} -static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val) -{ - val->tv_sec = (__force u64) ct / NSEC_PER_SEC; - val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC; -} - -/* - * Convert cputime <-> clock (USER_HZ) - */ -#define cputime_to_clock_t(__ct) \ - ((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ)) -#define clock_t_to_cputime(__x) \ - (__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ)) - -/* - * Convert cputime64 to clock. - */ -#define cputime64_to_clock_t(__ct) \ - cputime_to_clock_t((__force cputime_t)__ct) - +# include +# include extern void arch_vtime_task_switch(struct task_struct *tsk); - #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ + #endif /* __IA64_CPUTIME_H */ diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h index 9a62937..c6eddf5 100644 --- a/include/asm-generic/cputime.h +++ b/include/asm-generic/cputime.h @@ -4,66 +4,6 @@ #include #include -typedef unsigned long __nocast cputime_t; - -#define cputime_one_jiffy jiffies_to_cputime(1) -#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct) -#define cputime_to_scaled(__ct) (__ct) -#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz) - -typedef u64 __nocast cputime64_t; - -#define cputime64_to_jiffies64(__ct) (__force u64)(__ct) -#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif) - -#define nsecs_to_cputime64(__ct) \ - jiffies64_to_cputime64(nsecs_to_jiffies64(__ct)) - - -/* - * Convert cputime to microseconds and back. - */ -#define cputime_to_usecs(__ct) \ - jiffies_to_usecs(cputime_to_jiffies(__ct)) -#define usecs_to_cputime(__usec) \ - jiffies_to_cputime(usecs_to_jiffies(__usec)) -#define usecs_to_cputime64(__usec) \ - jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000)) - -/* - * Convert cputime to seconds and back. - */ -#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ) -#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ) - -/* - * Convert cputime to timespec and back. - */ -#define timespec_to_cputime(__val) \ - jiffies_to_cputime(timespec_to_jiffies(__val)) -#define cputime_to_timespec(__ct,__val) \ - jiffies_to_timespec(cputime_to_jiffies(__ct),__val) - -/* - * Convert cputime to timeval and back. - */ -#define timeval_to_cputime(__val) \ - jiffies_to_cputime(timeval_to_jiffies(__val)) -#define cputime_to_timeval(__ct,__val) \ - jiffies_to_timeval(cputime_to_jiffies(__ct),__val) - -/* - * Convert cputime to clock and back. - */ -#define cputime_to_clock_t(__ct) \ - jiffies_to_clock_t(cputime_to_jiffies(__ct)) -#define clock_t_to_cputime(__x) \ - jiffies_to_cputime(clock_t_to_jiffies(__x)) - -/* - * Convert cputime64 to clock. - */ -#define cputime64_to_clock_t(__ct) \ - jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct)) +#include #endif diff --git a/include/asm-generic/cputime_jiffies.h b/include/asm-generic/cputime_jiffies.h new file mode 100644 index 0000000..b747f1f --- /dev/null +++ b/include/asm-generic/cputime_jiffies.h @@ -0,0 +1,66 @@ +#ifndef _ASM_GENERIC_CPUTIME_JIFFIES_H +#define _ASM_GENERIC_CPUTIME_JIFFIES_H + +typedef unsigned long __nocast cputime_t; + +#define cputime_one_jiffy jiffies_to_cputime(1) +#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct) +#define cputime_to_scaled(__ct) (__ct) +#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz) + +typedef u64 __nocast cputime64_t; + +#define cputime64_to_jiffies64(__ct) (__force u64)(__ct) +#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif) + +#define nsecs_to_cputime64(__ct) \ + jiffies64_to_cputime64(nsecs_to_jiffies64(__ct)) + + +/* + * Convert cputime to microseconds and back. + */ +#define cputime_to_usecs(__ct) \ + jiffies_to_usecs(cputime_to_jiffies(__ct)) +#define usecs_to_cputime(__usec) \ + jiffies_to_cputime(usecs_to_jiffies(__usec)) +#define usecs_to_cputime64(__usec) \ + jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000)) + +/* + * Convert cputime to seconds and back. + */ +#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ) +#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ) + +/* + * Convert cputime to timespec and back. + */ +#define timespec_to_cputime(__val) \ + jiffies_to_cputime(timespec_to_jiffies(__val)) +#define cputime_to_timespec(__ct,__val) \ + jiffies_to_timespec(cputime_to_jiffies(__ct),__val) + +/* + * Convert cputime to timeval and back. + */ +#define timeval_to_cputime(__val) \ + jiffies_to_cputime(timeval_to_jiffies(__val)) +#define cputime_to_timeval(__ct,__val) \ + jiffies_to_timeval(cputime_to_jiffies(__ct),__val) + +/* + * Convert cputime to clock and back. + */ +#define cputime_to_clock_t(__ct) \ + jiffies_to_clock_t(cputime_to_jiffies(__ct)) +#define clock_t_to_cputime(__x) \ + jiffies_to_cputime(clock_t_to_jiffies(__x)) + +/* + * Convert cputime64 to clock. + */ +#define cputime64_to_clock_t(__ct) \ + jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct)) + +#endif diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h new file mode 100644 index 0000000..c73d182 --- /dev/null +++ b/include/asm-generic/cputime_nsecs.h @@ -0,0 +1,96 @@ +/* + * Definitions for measuring cputime in nsecs resolution. + * + * Based on + * + * Copyright (C) 2007 FUJITSU LIMITED + * Copyright (C) 2007 Hidetoshi Seto + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _ASM_GENERIC_CPUTIME_NSECS_H +#define _ASM_GENERIC_CPUTIME_NSECS_H + +typedef u64 __nocast cputime_t; +typedef u64 __nocast cputime64_t; + +#define cputime_one_jiffy jiffies_to_cputime(1) + +/* + * Convert cputime <-> jiffies (HZ) + */ +#define cputime_to_jiffies(__ct) \ + ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) +#define jiffies_to_cputime(__jif) \ + (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ)) +#define cputime64_to_jiffies64(__ct) \ + ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) +#define jiffies64_to_cputime64(__jif) \ + (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ)) + +/* + * Convert cputime <-> microseconds + */ +#define cputime_to_usecs(__ct) \ + ((__force u64)(__ct) / NSEC_PER_USEC) +#define usecs_to_cputime(__usecs) \ + (__force cputime_t)((__usecs) * NSEC_PER_USEC) +#define usecs_to_cputime64(__usecs) \ + (__force cputime64_t)((__usecs) * NSEC_PER_USEC) + +/* + * Convert cputime <-> seconds + */ +#define cputime_to_secs(__ct) \ + ((__force u64)(__ct) / NSEC_PER_SEC) +#define secs_to_cputime(__secs) \ + (__force cputime_t)((__secs) * NSEC_PER_SEC) + +/* + * Convert cputime <-> timespec (nsec) + */ +static inline cputime_t timespec_to_cputime(const struct timespec *val) +{ + u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec; + return (__force cputime_t) ret; +} +static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val) +{ + val->tv_sec = (__force u64) ct / NSEC_PER_SEC; + val->tv_nsec = (__force u64) ct % NSEC_PER_SEC; +} + +/* + * Convert cputime <-> timeval (msec) + */ +static inline cputime_t timeval_to_cputime(struct timeval *val) +{ + u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC; + return (__force cputime_t) ret; +} +static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val) +{ + val->tv_sec = (__force u64) ct / NSEC_PER_SEC; + val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC; +} + +/* + * Convert cputime <-> clock (USER_HZ) + */ +#define cputime_to_clock_t(__ct) \ + ((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ)) +#define clock_t_to_cputime(__x) \ + (__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ)) + +/* + * Convert cputime64 to clock. + */ +#define cputime64_to_clock_t(__ct) \ + cputime_to_clock_t((__force cputime_t)__ct) + +#endif -- cgit v0.10.2 From ae8dda5c473bf1a85913942adcaac449e5754bf3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 16 Jan 2013 18:02:04 +0100 Subject: cputime: Move default nsecs_to_cputime() to jiffies based cputime file If the architecture doesn't provide an implementation of nsecs_to_cputime(), the cputime accounting core uses a default one that converts the nanoseconds to jiffies. However this only makes sense if we use the jiffies based cputime. For now it doesn't matter much because this API is only called on code that uses jiffies based cputime accounting. But the code may evolve and this API may be used more broadly in the future. Keeping this default implementation around is very error prone as it may introduce a bug and hide it on architectures that don't override this API. Fix this by moving this definition to the jiffies based cputime headers as it is the only place where it belongs to. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Ingo Molnar Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner diff --git a/include/asm-generic/cputime_jiffies.h b/include/asm-generic/cputime_jiffies.h index b747f1f..272ecba 100644 --- a/include/asm-generic/cputime_jiffies.h +++ b/include/asm-generic/cputime_jiffies.h @@ -13,8 +13,14 @@ typedef u64 __nocast cputime64_t; #define cputime64_to_jiffies64(__ct) (__force u64)(__ct) #define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif) -#define nsecs_to_cputime64(__ct) \ - jiffies64_to_cputime64(nsecs_to_jiffies64(__ct)) + +/* + * Convert nanoseconds to cputime + */ +#define nsecs_to_cputime64(__nsec) \ + jiffies64_to_cputime64(nsecs_to_jiffies64(__nsec)) +#define nsecs_to_cputime(__nsec) \ + jiffies_to_cputime(nsecs_to_jiffies(__nsec)) /* diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 293b202..5849448b 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -505,10 +505,6 @@ EXPORT_SYMBOL_GPL(vtime_account); #else -#ifndef nsecs_to_cputime -# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) -#endif - static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) { u64 temp = (__force u64) rtime; -- cgit v0.10.2 From abf917cd91cbb73952758f9741e2fa65002a48ee Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 25 Jul 2012 07:56:04 +0200 Subject: cputime: Generic on-demand virtual cputime accounting If we want to stop the tick further idle, we need to be able to account the cputime without using the tick. Virtual based cputime accounting solves that problem by hooking into kernel/user boundaries. However implementing CONFIG_VIRT_CPU_ACCOUNTING require low level hooks and involves more overhead. But we already have a generic context tracking subsystem that is required for RCU needs by archs which plan to shut down the tick outside idle. This patch implements a generic virtual based cputime accounting that relies on these generic kernel/user hooks. There are some upsides of doing this: - This requires no arch code to implement CONFIG_VIRT_CPU_ACCOUNTING if context tracking is already built (already necessary for RCU in full tickless mode). - We can rely on the generic context tracking subsystem to dynamically (de)activate the hooks, so that we can switch anytime between virtual and tick based accounting. This way we don't have the overhead of the virtual accounting when the tick is running periodically. And one downside: - There is probably more overhead than a native virtual based cputime accounting. But this relies on hooks that are already set anyway. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Ingo Molnar Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h index 040b316..e2d3f5b 100644 --- a/arch/ia64/include/asm/cputime.h +++ b/arch/ia64/include/asm/cputime.h @@ -11,19 +11,19 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in nsec. + * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in nsec. * Otherwise we measure cpu time in jiffies using the generic definitions. */ #ifndef __IA64_CPUTIME_H #define __IA64_CPUTIME_H -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE # include #else # include # include extern void arch_vtime_task_switch(struct task_struct *tsk); -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #endif /* __IA64_CPUTIME_H */ diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index ff2ae41..020d655 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -31,7 +31,7 @@ struct thread_info { mm_segment_t addr_limit; /* user-level address space limit */ int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ struct restart_block restart_block; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE __u64 ac_stamp; __u64 ac_leave; __u64 ac_stime; @@ -69,7 +69,7 @@ struct thread_info { #define task_stack_page(tsk) ((void *)(tsk)) #define __HAVE_THREAD_FUNCTIONS -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #define setup_thread_stack(p, org) \ *task_thread_info(p) = *task_thread_info(org); \ task_thread_info(p)->ac_stime = 0; \ diff --git a/arch/ia64/include/asm/xen/minstate.h b/arch/ia64/include/asm/xen/minstate.h index c57fa91..00cf03e 100644 --- a/arch/ia64/include/asm/xen/minstate.h +++ b/arch/ia64/include/asm/xen/minstate.h @@ -1,5 +1,5 @@ -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* read ar.itc in advance, and use it before leaving bank 0 */ #define XEN_ACCOUNT_GET_STAMP \ MOV_FROM_ITC(pUStk, p6, r20, r2); diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index a48bd9a..46c9e30 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -41,7 +41,7 @@ void foo(void) DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp)); DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave)); DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime)); diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 6bfd842..7a53530 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -724,7 +724,7 @@ GLOBAL_ENTRY(__paravirt_leave_syscall) #endif .global __paravirt_work_processed_syscall; __paravirt_work_processed_syscall: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE adds r2=PT(LOADRS)+16,r12 MOV_FROM_ITC(pUStk, p9, r22, r19) // fetch time at leave adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 @@ -762,7 +762,7 @@ __paravirt_work_processed_syscall: ld8 r29=[r2],16 // M0|1 load cr.ipsr ld8 r28=[r3],16 // M0|1 load cr.iip -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE (pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13 ;; ld8 r30=[r2],16 // M0|1 load cr.ifs @@ -793,7 +793,7 @@ __paravirt_work_processed_syscall: ld8.fill r1=[r3],16 // M0|1 load r1 (pUStk) mov r17=1 // A ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE (pUStk) st1 [r15]=r17 // M2|3 #else (pUStk) st1 [r14]=r17 // M2|3 @@ -813,7 +813,7 @@ __paravirt_work_processed_syscall: shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition COVER // B add current frame into dirty partition & set cr.ifs ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE mov r19=ar.bsp // M2 get new backing store pointer st8 [r14]=r22 // M save time at leave mov f10=f0 // F clear f10 @@ -948,7 +948,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) adds r16=PT(CR_IPSR)+16,r12 adds r17=PT(CR_IIP)+16,r12 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE .pred.rel.mutex pUStk,pKStk MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled MOV_FROM_ITC(pUStk, p9, r22, r29) // M fetch time at leave @@ -981,7 +981,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) ;; ld8.fill r12=[r16],16 ld8.fill r13=[r17],16 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE (pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18 #else (pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 @@ -989,7 +989,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) ;; ld8 r20=[r16],16 // ar.fpsr ld8.fill r15=[r17],16 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE (pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred #endif ;; @@ -997,7 +997,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) ld8.fill r2=[r17] (pUStk) mov r17=1 ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE // mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;; // mib : mov add br -> mib : ld8 add br // bbb_ : br nop cover;; mbb_ : mov br cover;; diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index e662f17..c4cd45d 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -529,7 +529,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down) nop.i 0 ;; mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting #else nop.m 0 @@ -555,7 +555,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down) cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 br.call.sptk.many b7=ia64_syscall_setup // B ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE // mov.m r30=ar.itc is called in advance add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2 add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2 diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index 4738ff7..9be4e49 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -1073,7 +1073,7 @@ END(ia64_native_sched_clock) sched_clock = ia64_native_sched_clock #endif -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE GLOBAL_ENTRY(cycle_to_cputime) alloc r16=ar.pfs,1,0,0,0 addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 @@ -1091,7 +1091,7 @@ GLOBAL_ENTRY(cycle_to_cputime) shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT br.ret.sptk.many rp END(cycle_to_cputime) -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #ifdef CONFIG_IA64_BRL_EMU diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index fa25689..689ffca 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -784,7 +784,7 @@ ENTRY(break_fault) (p8) adds r28=16,r28 // A switch cr.iip to next bundle (p9) adds r8=1,r8 // A increment ei to next slot -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE ;; mov b6=r30 // I0 setup syscall handler branch reg early #else @@ -801,7 +801,7 @@ ENTRY(break_fault) // /////////////////////////////////////////////////////////////////////// st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE MOV_FROM_ITC(p0, p14, r30, r18) // M get cycle for accounting #else mov b6=r30 // I0 setup syscall handler branch reg early @@ -817,7 +817,7 @@ ENTRY(break_fault) cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited? br.call.sptk.many b7=ia64_syscall_setup // B 1: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE // mov.m r30=ar.itc is called in advance, and r13 is current add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A @@ -1043,7 +1043,7 @@ END(ia64_syscall_setup) DBG_FAULT(16) FAULT(16) -#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE) +#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE) /* * There is no particular reason for this code to be here, other than * that there happens to be space here that would go unused otherwise. diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h index d56753a..cc82a7d 100644 --- a/arch/ia64/kernel/minstate.h +++ b/arch/ia64/kernel/minstate.h @@ -4,7 +4,7 @@ #include "entry.h" #include "paravirt_inst.h" -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* read ar.itc in advance, and use it before leaving bank 0 */ #define ACCOUNT_GET_STAMP \ (pUStk) mov.m r20=ar.itc; diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 88a7945..a3a3f5a 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -77,7 +77,7 @@ static struct clocksource clocksource_itc = { }; static struct clocksource *itc_clocksource; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #include @@ -142,7 +142,7 @@ void vtime_account_idle(struct task_struct *tsk) account_idle_time(vtime_delta(tsk)); } -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ static irqreturn_t timer_interrupt (int irq, void *dev_id) diff --git a/arch/powerpc/configs/chroma_defconfig b/arch/powerpc/configs/chroma_defconfig index 29bb11e..4f35fc4 100644 --- a/arch/powerpc/configs/chroma_defconfig +++ b/arch/powerpc/configs/chroma_defconfig @@ -1,6 +1,6 @@ CONFIG_PPC64=y CONFIG_PPC_BOOK3E_64=y -# CONFIG_VIRT_CPU_ACCOUNTING is not set +# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set CONFIG_SMP=y CONFIG_NR_CPUS=256 CONFIG_EXPERIMENTAL=y diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig index 88fa5c4..f7df836 100644 --- a/arch/powerpc/configs/corenet64_smp_defconfig +++ b/arch/powerpc/configs/corenet64_smp_defconfig @@ -1,6 +1,6 @@ CONFIG_PPC64=y CONFIG_PPC_BOOK3E_64=y -# CONFIG_VIRT_CPU_ACCOUNTING is not set +# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set CONFIG_SMP=y CONFIG_NR_CPUS=2 CONFIG_EXPERIMENTAL=y diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 840a2c2..bcedeea 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -1,6 +1,6 @@ CONFIG_PPC64=y CONFIG_ALTIVEC=y -# CONFIG_VIRT_CPU_ACCOUNTING is not set +# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set CONFIG_SMP=y CONFIG_NR_CPUS=2 CONFIG_EXPERIMENTAL=y diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 483733b..607559a 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -8,7 +8,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in + * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in * the same units as the timebase. Otherwise we measure cpu time * in jiffies using the generic definitions. */ @@ -16,7 +16,7 @@ #ifndef __POWERPC_CPUTIME_H #define __POWERPC_CPUTIME_H -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #include #ifdef __KERNEL__ static inline void setup_cputime_one_jiffy(void) { } @@ -231,5 +231,5 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk) static inline void arch_vtime_task_switch(struct task_struct *tsk) { } #endif /* __KERNEL__ */ -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #endif /* __POWERPC_CPUTIME_H */ diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index 531fe0c..b1e7f2a 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -145,7 +145,7 @@ struct dtl_entry { extern struct kmem_cache *dtl_cache; /* - * When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls + * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls * reading from the dispatch trace log. If other code wants to consume * DTL entries, it can set this pointer to a function that will get * called once for each DTL entry that gets processed. diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index ea2a86e..2d0e1f5 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -24,7 +24,7 @@ * user_time and system_time fields in the paca. */ -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #define ACCOUNT_CPU_USER_ENTRY(ra, rb) #define ACCOUNT_CPU_USER_EXIT(ra, rb) #define ACCOUNT_STOLEN_TIME @@ -70,7 +70,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #endif /* CONFIG_PPC_SPLPAR */ -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ /* * Macros for storing registers into and loading registers from diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index b310a05..a0ca42f 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -94,7 +94,7 @@ system_call_common: addi r9,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) std r11,-16(r9) /* "regshere" marker */ -#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR) +#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR) BEGIN_FW_FTR_SECTION beq 33f /* if from user, see if there are any DTL entries to process */ @@ -110,7 +110,7 @@ BEGIN_FW_FTR_SECTION addi r9,r1,STACK_FRAME_OVERHEAD 33: END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) -#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */ /* * A syscall should always be called with interrupts enabled diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 6f6b1cc..22c9b67 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -143,7 +143,7 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq); unsigned long ppc_tb_freq; EXPORT_SYMBOL_GPL(ppc_tb_freq); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* * Factors for converting from cputime_t (timebase ticks) to * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds). @@ -377,7 +377,7 @@ void vtime_account_user(struct task_struct *tsk) account_user_time(tsk, utime, utimescaled); } -#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ +#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #define calc_cputime_factors() #endif diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index a764854..0cc0ac0 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -57,7 +57,7 @@ static u8 dtl_event_mask = 0x7; */ static int dtl_buf_entries = N_DISPATCH_LOG; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE struct dtl_ring { u64 write_index; struct dtl_entry *write_ptr; @@ -142,7 +142,7 @@ static u64 dtl_current_index(struct dtl *dtl) return per_cpu(dtl_rings, dtl->cpu).write_index; } -#else /* CONFIG_VIRT_CPU_ACCOUNTING */ +#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ static int dtl_start(struct dtl *dtl) { @@ -188,7 +188,7 @@ static u64 dtl_current_index(struct dtl *dtl) { return lppaca_of(dtl->cpu).dtl_idx; } -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ static int dtl_enable(struct dtl *dtl) { diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index ca55882..527e12c 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -281,7 +281,7 @@ static struct notifier_block pci_dn_reconfig_nb = { struct kmem_cache *dtl_cache; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* * Allocate space for the dispatch trace log for all possible cpus * and register the buffers with the hypervisor. This is used for @@ -332,12 +332,12 @@ static int alloc_dispatch_logs(void) return 0; } -#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ +#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ static inline int alloc_dispatch_logs(void) { return 0; } -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ static int alloc_dispatch_log_kmem_cache(void) { diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h index c6eddf5..5196943 100644 --- a/include/asm-generic/cputime.h +++ b/include/asm-generic/cputime.h @@ -4,6 +4,12 @@ #include #include -#include +#ifndef CONFIG_VIRT_CPU_ACCOUNTING +# include +#endif + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +# include +#endif #endif diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h index c73d182..b6485ca 100644 --- a/include/asm-generic/cputime_nsecs.h +++ b/include/asm-generic/cputime_nsecs.h @@ -26,6 +26,7 @@ typedef u64 __nocast cputime64_t; */ #define cputime_to_jiffies(__ct) \ ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) +#define cputime_to_scaled(__ct) (__ct) #define jiffies_to_cputime(__jif) \ (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ)) #define cputime64_to_jiffies64(__ct) \ @@ -33,6 +34,13 @@ typedef u64 __nocast cputime64_t; #define jiffies64_to_cputime64(__jif) \ (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ)) + +/* + * Convert cputime <-> nanoseconds + */ +#define nsecs_to_cputime(__nsecs) ((__force u64)(__nsecs)) + + /* * Convert cputime <-> microseconds */ diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 66b7078..ed5f6ed 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t) extern void account_steal_time(cputime_t); extern void account_idle_time(cputime_t); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE static inline void account_process_tick(struct task_struct *tsk, int user) { vtime_account_user(tsk); diff --git a/include/linux/vtime.h b/include/linux/vtime.h index ae30ab5..21ef703 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -14,9 +14,25 @@ extern void vtime_account(struct task_struct *tsk); static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_account_system(struct task_struct *tsk) { } static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { } +static inline void vtime_account_user(struct task_struct *tsk) { } static inline void vtime_account(struct task_struct *tsk) { } #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +static inline void arch_vtime_task_switch(struct task_struct *tsk) { } +static inline void vtime_user_enter(struct task_struct *tsk) +{ + vtime_account_system(tsk); +} +static inline void vtime_user_exit(struct task_struct *tsk) +{ + vtime_account_user(tsk); +} +#else +static inline void vtime_user_enter(struct task_struct *tsk) { } +static inline void vtime_user_exit(struct task_struct *tsk) { } +#endif + #ifdef CONFIG_IRQ_TIME_ACCOUNTING extern void irqtime_account_irq(struct task_struct *tsk); #else diff --git a/init/Kconfig b/init/Kconfig index be8b7f5..a05f843 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -326,6 +326,9 @@ source "kernel/time/Kconfig" menu "CPU/Task time and stats accounting" +config VIRT_CPU_ACCOUNTING + bool + choice prompt "Cputime accounting" default TICK_CPU_ACCOUNTING if !PPC64 @@ -342,9 +345,10 @@ config TICK_CPU_ACCOUNTING If unsure, say Y. -config VIRT_CPU_ACCOUNTING +config VIRT_CPU_ACCOUNTING_NATIVE bool "Deterministic task and CPU time accounting" depends on HAVE_VIRT_CPU_ACCOUNTING + select VIRT_CPU_ACCOUNTING help Select this option to enable more accurate task and CPU time accounting. This is done by reading a CPU counter on each @@ -354,6 +358,23 @@ config VIRT_CPU_ACCOUNTING this also enables accounting of stolen time on logically-partitioned systems. +config VIRT_CPU_ACCOUNTING_GEN + bool "Full dynticks CPU time accounting" + depends on HAVE_CONTEXT_TRACKING && 64BIT + select VIRT_CPU_ACCOUNTING + select CONTEXT_TRACKING + help + Select this option to enable task and CPU time accounting on full + dynticks systems. This accounting is implemented by watching every + kernel-user boundaries using the context tracking subsystem. + The accounting is thus performed at the expense of some significant + overhead. + + For now this is only useful if you are working on the full + dynticks subsystem development. + + If unsure, say N. + config IRQ_TIME_ACCOUNTING bool "Fine granularity task level IRQ time accounting" depends on HAVE_IRQ_TIME_ACCOUNTING diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 54f471e..9002e92 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -30,8 +30,9 @@ void user_enter(void) local_irq_save(flags); if (__this_cpu_read(context_tracking.active) && __this_cpu_read(context_tracking.state) != IN_USER) { - __this_cpu_write(context_tracking.state, IN_USER); + vtime_user_enter(current); rcu_user_enter(); + __this_cpu_write(context_tracking.state, IN_USER); } local_irq_restore(flags); } @@ -53,8 +54,9 @@ void user_exit(void) local_irq_save(flags); if (__this_cpu_read(context_tracking.state) == IN_USER) { - __this_cpu_write(context_tracking.state, IN_KERNEL); rcu_user_exit(); + vtime_user_exit(current); + __this_cpu_write(context_tracking.state, IN_KERNEL); } local_irq_restore(flags); } diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 5849448b..1c964ec 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "sched.h" @@ -479,7 +480,9 @@ void vtime_task_switch(struct task_struct *prev) else vtime_account_system(prev); +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE vtime_account_user(prev); +#endif arch_vtime_task_switch(prev); } #endif @@ -495,10 +498,24 @@ void vtime_task_switch(struct task_struct *prev) #ifndef __ARCH_HAS_VTIME_ACCOUNT void vtime_account(struct task_struct *tsk) { - if (in_interrupt() || !is_idle_task(tsk)) - vtime_account_system(tsk); - else - vtime_account_idle(tsk); + if (!in_interrupt()) { + /* + * If we interrupted user, context_tracking_in_user() + * is 1 because the context tracking don't hook + * on irq entry/exit. This way we know if + * we need to flush user time on kernel entry. + */ + if (context_tracking_in_user()) { + vtime_account_user(tsk); + return; + } + + if (is_idle_task(tsk)) { + vtime_account_idle(tsk); + return; + } + } + vtime_account_system(tsk); } EXPORT_SYMBOL_GPL(vtime_account); #endif /* __ARCH_HAS_VTIME_ACCOUNT */ @@ -583,3 +600,39 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); } #endif + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +static DEFINE_PER_CPU(unsigned long long, cputime_snap); + +static cputime_t get_vtime_delta(void) +{ + unsigned long long delta; + + delta = sched_clock() - __this_cpu_read(cputime_snap); + __this_cpu_add(cputime_snap, delta); + + /* CHECKME: always safe to convert nsecs to cputime? */ + return nsecs_to_cputime(delta); +} + +void vtime_account_system(struct task_struct *tsk) +{ + cputime_t delta_cpu = get_vtime_delta(); + + account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); +} + +void vtime_account_user(struct task_struct *tsk) +{ + cputime_t delta_cpu = get_vtime_delta(); + + account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); +} + +void vtime_account_idle(struct task_struct *tsk) +{ + cputime_t delta_cpu = get_vtime_delta(); + + account_idle_time(delta_cpu); +} +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ -- cgit v0.10.2 From 3f4724ea85b7d9055a9976fa8f30b471bdfbca93 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 16 Jul 2012 18:00:34 +0200 Subject: cputime: Allow dynamic switch between tick/virtual based cputime accounting Allow to dynamically switch between tick and virtual based cputime accounting. This way we can provide a kind of "on-demand" virtual based cputime accounting. In this mode, the kernel relies on the context tracking subsystem to dynamically probe on kernel boundaries. This is in preparation for being able to stop the timer tick in more places than just the idle state. Doing so will depend on CONFIG_VIRT_CPU_ACCOUNTING_GEN which makes it possible to account the cputime without the tick by hooking on kernel/user boundaries. Depending whether the tick is stopped or not, we can switch between tick and vtime based accounting anytime in order to minimize the overhead associated to user hooks. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Ingo Molnar Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 21ef703..5368af9 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -10,12 +10,20 @@ extern void vtime_account_system_irqsafe(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); extern void vtime_account_user(struct task_struct *tsk); extern void vtime_account(struct task_struct *tsk); + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +extern bool vtime_accounting_enabled(void); #else +static inline bool vtime_accounting_enabled(void) { return true; } +#endif + +#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_account_system(struct task_struct *tsk) { } static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { } static inline void vtime_account_user(struct task_struct *tsk) { } static inline void vtime_account(struct task_struct *tsk) { } +static inline bool vtime_accounting_enabled(void) { return false; } #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 1c964ec..e1939d3 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -317,8 +317,6 @@ out: rcu_read_unlock(); } -#ifndef CONFIG_VIRT_CPU_ACCOUNTING - #ifdef CONFIG_IRQ_TIME_ACCOUNTING /* * Account a tick to a process and cpustat @@ -383,11 +381,12 @@ static void irqtime_account_idle_ticks(int ticks) irqtime_account_process_tick(current, 0, rq); } #else /* CONFIG_IRQ_TIME_ACCOUNTING */ -static void irqtime_account_idle_ticks(int ticks) {} -static void irqtime_account_process_tick(struct task_struct *p, int user_tick, +static inline void irqtime_account_idle_ticks(int ticks) {} +static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, struct rq *rq) {} #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* * Account a single tick of cpu time. * @p: the process that the cpu time gets accounted to @@ -398,6 +397,9 @@ void account_process_tick(struct task_struct *p, int user_tick) cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); struct rq *rq = this_rq(); + if (vtime_accounting_enabled()) + return; + if (sched_clock_irqtime) { irqtime_account_process_tick(p, user_tick, rq); return; @@ -439,8 +441,7 @@ void account_idle_ticks(unsigned long ticks) account_idle_time(jiffies_to_cputime(ticks)); } - -#endif +#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ /* * Use precise platform statistics if available: @@ -475,6 +476,9 @@ EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe); #ifndef __ARCH_HAS_VTIME_TASK_SWITCH void vtime_task_switch(struct task_struct *prev) { + if (!vtime_accounting_enabled()) + return; + if (is_idle_task(prev)) vtime_account_idle(prev); else @@ -498,6 +502,9 @@ void vtime_task_switch(struct task_struct *prev) #ifndef __ARCH_HAS_VTIME_ACCOUNT void vtime_account(struct task_struct *tsk) { + if (!vtime_accounting_enabled()) + return; + if (!in_interrupt()) { /* * If we interrupted user, context_tracking_in_user() @@ -520,7 +527,7 @@ void vtime_account(struct task_struct *tsk) EXPORT_SYMBOL_GPL(vtime_account); #endif /* __ARCH_HAS_VTIME_ACCOUNT */ -#else +#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) { @@ -599,7 +606,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime thread_group_cputime(p, &cputime); cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); } -#endif +#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN static DEFINE_PER_CPU(unsigned long long, cputime_snap); @@ -617,14 +624,23 @@ static cputime_t get_vtime_delta(void) void vtime_account_system(struct task_struct *tsk) { - cputime_t delta_cpu = get_vtime_delta(); + cputime_t delta_cpu; + + if (!vtime_accounting_enabled()) + return; + delta_cpu = get_vtime_delta(); account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); } void vtime_account_user(struct task_struct *tsk) { - cputime_t delta_cpu = get_vtime_delta(); + cputime_t delta_cpu; + + if (!vtime_accounting_enabled()) + return; + + delta_cpu = get_vtime_delta(); account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); } @@ -635,4 +651,9 @@ void vtime_account_idle(struct task_struct *tsk) account_idle_time(delta_cpu); } + +bool vtime_accounting_enabled(void) +{ + return context_tracking_active(); +} #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d58e552..46dfb6d 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -631,8 +631,11 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) static void tick_nohz_account_idle_ticks(struct tick_sched *ts) { -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE unsigned long ticks; + + if (vtime_accounting_enabled()) + return; /* * We stopped the tick in idle. Update process times would miss the * time we slept as update_process_times does only a 1 tick -- cgit v0.10.2 From 6fac4829ce0ef9b7f24369086ce5f0e9f38d37bc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 13 Nov 2012 14:20:55 +0100 Subject: cputime: Use accessors to read task cputime stats This is in preparation for the full dynticks feature. While remotely reading the cputime of a task running in a full dynticks CPU, we'll need to do some extra-computation. This way we can account the time it spent tickless in userspace since its last cputime snapshot. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Ingo Molnar Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 14db93e..dbc1760 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1139,6 +1139,7 @@ struct rusage32 { SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru) { struct rusage32 r; + cputime_t utime, stime; if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN) return -EINVAL; @@ -1146,8 +1147,9 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru) memset(&r, 0, sizeof(r)); switch (who) { case RUSAGE_SELF: - jiffies_to_timeval32(current->utime, &r.ru_utime); - jiffies_to_timeval32(current->stime, &r.ru_stime); + task_cputime(current, &utime, &stime); + jiffies_to_timeval32(utime, &r.ru_utime); + jiffies_to_timeval32(stime, &r.ru_stime); r.ru_minflt = current->min_flt; r.ru_majflt = current->maj_flt; break; diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index d65464e..8d7012b 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -899,6 +899,7 @@ static void apm_cpu_idle(void) static int use_apm_idle; /* = 0 */ static unsigned int last_jiffies; /* = 0 */ static unsigned int last_stime; /* = 0 */ + cputime_t stime; int apm_idle_done = 0; unsigned int jiffies_since_last_check = jiffies - last_jiffies; @@ -906,23 +907,23 @@ static void apm_cpu_idle(void) WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012"); recalc: + task_cputime(current, NULL, &stime); if (jiffies_since_last_check > IDLE_CALC_LIMIT) { use_apm_idle = 0; - last_jiffies = jiffies; - last_stime = current->stime; } else if (jiffies_since_last_check > idle_period) { unsigned int idle_percentage; - idle_percentage = current->stime - last_stime; + idle_percentage = stime - last_stime; idle_percentage *= 100; idle_percentage /= jiffies_since_last_check; use_apm_idle = (idle_percentage > idle_threshold); if (apm_info.forbid_idle) use_apm_idle = 0; - last_jiffies = jiffies; - last_stime = current->stime; } + last_jiffies = jiffies; + last_stime = stime; + bucket = IDLE_LEAKY_MAX; while (!need_resched()) { diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c index 5f21f62..deda591 100644 --- a/drivers/isdn/mISDN/stack.c +++ b/drivers/isdn/mISDN/stack.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "core.h" static u_int *debug; @@ -202,6 +203,9 @@ static int mISDNStackd(void *data) { struct mISDNstack *st = data; +#ifdef MISDN_MSG_STATS + cputime_t utime, stime; +#endif int err = 0; sigfillset(¤t->blocked); @@ -303,9 +307,10 @@ mISDNStackd(void *data) "msg %d sleep %d stopped\n", dev_name(&st->dev->dev), st->msg_cnt, st->sleep_cnt, st->stopped_cnt); + task_cputime(st->thread, &utime, &stime); printk(KERN_DEBUG "mISDNStackd daemon for %s utime(%ld) stime(%ld)\n", - dev_name(&st->dev->dev), st->thread->utime, st->thread->stime); + dev_name(&st->dev->dev), utime, stime); printk(KERN_DEBUG "mISDNStackd daemon for %s nvcsw(%ld) nivcsw(%ld)\n", dev_name(&st->dev->dev), st->thread->nvcsw, st->thread->nivcsw); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 0c42cdb..49d0b43 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1320,8 +1321,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus, cputime_to_timeval(cputime.utime, &prstatus->pr_utime); cputime_to_timeval(cputime.stime, &prstatus->pr_stime); } else { - cputime_to_timeval(p->utime, &prstatus->pr_utime); - cputime_to_timeval(p->stime, &prstatus->pr_stime); + cputime_t utime, stime; + + task_cputime(p, &utime, &stime); + cputime_to_timeval(utime, &prstatus->pr_utime); + cputime_to_timeval(stime, &prstatus->pr_stime); } cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index dc84732..cb240dd 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1375,8 +1375,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus, cputime_to_timeval(cputime.utime, &prstatus->pr_utime); cputime_to_timeval(cputime.stime, &prstatus->pr_stime); } else { - cputime_to_timeval(p->utime, &prstatus->pr_utime); - cputime_to_timeval(p->stime, &prstatus->pr_stime); + cputime_t utime, stime; + + task_cputime(p, &utime, &stime); + cputime_to_timeval(utime, &prstatus->pr_utime); + cputime_to_timeval(stime, &prstatus->pr_stime); } cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); diff --git a/fs/proc/array.c b/fs/proc/array.c index 6a91e6f..f7ed9ee 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -449,7 +449,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, do { min_flt += t->min_flt; maj_flt += t->maj_flt; - gtime += t->gtime; + gtime += task_gtime(t); t = next_thread(t); } while (t != task); @@ -472,7 +472,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, min_flt = task->min_flt; maj_flt = task->maj_flt; task_cputime_adjusted(task, &utime, &stime); - gtime = task->gtime; + gtime = task_gtime(task); } /* scale priority and nice values from timeslices to -20..20 */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 6fc8f45..a9c608b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1792,6 +1792,29 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } +static inline cputime_t task_gtime(struct task_struct *t) +{ + return t->gtime; +} + +static inline void task_cputime(struct task_struct *t, + cputime_t *utime, cputime_t *stime) +{ + if (utime) + *utime = t->utime; + if (stime) + *stime = t->stime; +} + +static inline void task_cputime_scaled(struct task_struct *t, + cputime_t *utimescaled, + cputime_t *stimescaled) +{ + if (utimescaled) + *utimescaled = t->utimescaled; + if (stimescaled) + *stimescaled = t->stimescaled; +} extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); diff --git a/include/linux/tsacct_kern.h b/include/linux/tsacct_kern.h index 44893e5..3251965 100644 --- a/include/linux/tsacct_kern.h +++ b/include/linux/tsacct_kern.h @@ -23,12 +23,15 @@ static inline void bacct_add_tsk(struct user_namespace *user_ns, #ifdef CONFIG_TASK_XACCT extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p); extern void acct_update_integrals(struct task_struct *tsk); +extern void acct_account_cputime(struct task_struct *tsk); extern void acct_clear_integrals(struct task_struct *tsk); #else static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) {} static inline void acct_update_integrals(struct task_struct *tsk) {} +static inline void acct_account_cputime(struct task_struct *tsk) +{} static inline void acct_clear_integrals(struct task_struct *tsk) {} #endif /* CONFIG_TASK_XACCT */ diff --git a/kernel/acct.c b/kernel/acct.c index 051e071..e8b1627 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -566,6 +566,7 @@ out: void acct_collect(long exitcode, int group_dead) { struct pacct_struct *pacct = ¤t->signal->pacct; + cputime_t utime, stime; unsigned long vsize = 0; if (group_dead && current->mm) { @@ -593,8 +594,9 @@ void acct_collect(long exitcode, int group_dead) pacct->ac_flag |= ACORE; if (current->flags & PF_SIGNALED) pacct->ac_flag |= AXSIG; - pacct->ac_utime += current->utime; - pacct->ac_stime += current->stime; + task_cputime(current, &utime, &stime); + pacct->ac_utime += utime; + pacct->ac_stime += stime; pacct->ac_minflt += current->min_flt; pacct->ac_majflt += current->maj_flt; spin_unlock_irq(¤t->sighand->siglock); diff --git a/kernel/cpu.c b/kernel/cpu.c index 3046a50..e5d5e8e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -224,11 +224,13 @@ void clear_tasks_mm_cpumask(int cpu) static inline void check_for_tasks(int cpu) { struct task_struct *p; + cputime_t utime, stime; write_lock_irq(&tasklist_lock); for_each_process(p) { + task_cputime(p, &utime, &stime); if (task_cpu(p) == cpu && p->state == TASK_RUNNING && - (p->utime || p->stime)) + (utime || stime)) printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d " "(state = %ld, flags = %x)\n", p->comm, task_pid_nr(p), cpu, diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 418b3f7..d473988 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -106,6 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) unsigned long long t2, t3; unsigned long flags; struct timespec ts; + cputime_t utime, stime, stimescaled, utimescaled; /* Though tsk->delays accessed later, early exit avoids * unnecessary returning of other data @@ -114,12 +115,14 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) goto done; tmp = (s64)d->cpu_run_real_total; - cputime_to_timespec(tsk->utime + tsk->stime, &ts); + task_cputime(tsk, &utime, &stime); + cputime_to_timespec(utime + stime, &ts); tmp += timespec_to_ns(&ts); d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; tmp = (s64)d->cpu_scaled_run_real_total; - cputime_to_timespec(tsk->utimescaled + tsk->stimescaled, &ts); + task_cputime_scaled(tsk, &utimescaled, &stimescaled); + cputime_to_timespec(utimescaled + stimescaled, &ts); tmp += timespec_to_ns(&ts); d->cpu_scaled_run_real_total = (tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp; diff --git a/kernel/exit.c b/kernel/exit.c index b4df219..7dd2040 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -85,6 +85,7 @@ static void __exit_signal(struct task_struct *tsk) bool group_dead = thread_group_leader(tsk); struct sighand_struct *sighand; struct tty_struct *uninitialized_var(tty); + cputime_t utime, stime; sighand = rcu_dereference_check(tsk->sighand, lockdep_tasklist_lock_is_held()); @@ -123,9 +124,10 @@ static void __exit_signal(struct task_struct *tsk) * We won't ever get here for the group leader, since it * will have been the last reference on the signal_struct. */ - sig->utime += tsk->utime; - sig->stime += tsk->stime; - sig->gtime += tsk->gtime; + task_cputime(tsk, &utime, &stime); + sig->utime += utime; + sig->stime += stime; + sig->gtime += task_gtime(tsk); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; sig->nvcsw += tsk->nvcsw; @@ -1092,7 +1094,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) sig = p->signal; psig->cutime += tgutime + sig->cutime; psig->cstime += tgstime + sig->cstime; - psig->cgtime += p->gtime + sig->gtime + sig->cgtime; + psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime; psig->cmin_flt += p->min_flt + sig->min_flt + sig->cmin_flt; psig->cmaj_flt += diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index a278cad..165d476 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -155,11 +155,19 @@ static void bump_cpu_timer(struct k_itimer *timer, static inline cputime_t prof_ticks(struct task_struct *p) { - return p->utime + p->stime; + cputime_t utime, stime; + + task_cputime(p, &utime, &stime); + + return utime + stime; } static inline cputime_t virt_ticks(struct task_struct *p) { - return p->utime; + cputime_t utime; + + task_cputime(p, &utime, NULL); + + return utime; } static int @@ -471,18 +479,23 @@ static void cleanup_timers(struct list_head *head, */ void posix_cpu_timers_exit(struct task_struct *tsk) { + cputime_t utime, stime; + add_device_randomness((const void*) &tsk->se.sum_exec_runtime, sizeof(unsigned long long)); + task_cputime(tsk, &utime, &stime); cleanup_timers(tsk->cpu_timers, - tsk->utime, tsk->stime, tsk->se.sum_exec_runtime); + utime, stime, tsk->se.sum_exec_runtime); } void posix_cpu_timers_exit_group(struct task_struct *tsk) { struct signal_struct *const sig = tsk->signal; + cputime_t utime, stime; + task_cputime(tsk, &utime, &stime); cleanup_timers(tsk->signal->cpu_timers, - tsk->utime + sig->utime, tsk->stime + sig->stime, + utime + sig->utime, stime + sig->stime, tsk->se.sum_exec_runtime + sig->sum_sched_runtime); } @@ -1226,11 +1239,14 @@ static inline int task_cputime_expired(const struct task_cputime *sample, static inline int fastpath_timer_check(struct task_struct *tsk) { struct signal_struct *sig; + cputime_t utime, stime; + + task_cputime(tsk, &utime, &stime); if (!task_cputime_zero(&tsk->cputime_expires)) { struct task_cputime task_sample = { - .utime = tsk->utime, - .stime = tsk->stime, + .utime = utime, + .stime = stime, .sum_exec_runtime = tsk->se.sum_exec_runtime }; diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index e1939d3..c533dea 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -164,7 +164,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime, task_group_account_field(p, index, (__force u64) cputime); /* Account for user time used */ - acct_update_integrals(p); + acct_account_cputime(p); } /* @@ -214,7 +214,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime, task_group_account_field(p, index, (__force u64) cputime); /* Account for system time used */ - acct_update_integrals(p); + acct_account_cputime(p); } /* @@ -296,6 +296,7 @@ static __always_inline bool steal_account_process_tick(void) void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) { struct signal_struct *sig = tsk->signal; + cputime_t utime, stime; struct task_struct *t; times->utime = sig->utime; @@ -309,8 +310,9 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) t = tsk; do { - times->utime += t->utime; - times->stime += t->stime; + task_cputime(tsk, &utime, &stime); + times->utime += utime; + times->stime += stime; times->sum_exec_runtime += task_sched_runtime(t); } while_each_thread(tsk, t); out: @@ -588,11 +590,10 @@ static void cputime_adjust(struct task_cputime *curr, void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) { struct task_cputime cputime = { - .utime = p->utime, - .stime = p->stime, .sum_exec_runtime = p->se.sum_exec_runtime, }; + task_cputime(p, &cputime.utime, &cputime.stime); cputime_adjust(&cputime, &p->prev_cputime, ut, st); } diff --git a/kernel/signal.c b/kernel/signal.c index 372771e..776a45a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1638,6 +1638,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig) unsigned long flags; struct sighand_struct *psig; bool autoreap = false; + cputime_t utime, stime; BUG_ON(sig == -1); @@ -1675,8 +1676,9 @@ bool do_notify_parent(struct task_struct *tsk, int sig) task_uid(tsk)); rcu_read_unlock(); - info.si_utime = cputime_to_clock_t(tsk->utime + tsk->signal->utime); - info.si_stime = cputime_to_clock_t(tsk->stime + tsk->signal->stime); + task_cputime(tsk, &utime, &stime); + info.si_utime = cputime_to_clock_t(utime + tsk->signal->utime); + info.si_stime = cputime_to_clock_t(stime + tsk->signal->stime); info.si_status = tsk->exit_code & 0x7f; if (tsk->exit_code & 0x80) @@ -1740,6 +1742,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, unsigned long flags; struct task_struct *parent; struct sighand_struct *sighand; + cputime_t utime, stime; if (for_ptracer) { parent = tsk->parent; @@ -1758,8 +1761,9 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk)); rcu_read_unlock(); - info.si_utime = cputime_to_clock_t(tsk->utime); - info.si_stime = cputime_to_clock_t(tsk->stime); + task_cputime(tsk, &utime, &stime); + info.si_utime = cputime_to_clock_t(utime); + info.si_stime = cputime_to_clock_t(stime); info.si_code = why; switch (why) { diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 625df0b..a1dd9a1 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -32,6 +32,7 @@ void bacct_add_tsk(struct user_namespace *user_ns, { const struct cred *tcred; struct timespec uptime, ts; + cputime_t utime, stime, utimescaled, stimescaled; u64 ac_etime; BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN); @@ -65,10 +66,15 @@ void bacct_add_tsk(struct user_namespace *user_ns, stats->ac_ppid = pid_alive(tsk) ? task_tgid_nr_ns(rcu_dereference(tsk->real_parent), pid_ns) : 0; rcu_read_unlock(); - stats->ac_utime = cputime_to_usecs(tsk->utime); - stats->ac_stime = cputime_to_usecs(tsk->stime); - stats->ac_utimescaled = cputime_to_usecs(tsk->utimescaled); - stats->ac_stimescaled = cputime_to_usecs(tsk->stimescaled); + + task_cputime(tsk, &utime, &stime); + stats->ac_utime = cputime_to_usecs(utime); + stats->ac_stime = cputime_to_usecs(stime); + + task_cputime_scaled(tsk, &utimescaled, &stimescaled); + stats->ac_utimescaled = cputime_to_usecs(utimescaled); + stats->ac_stimescaled = cputime_to_usecs(stimescaled); + stats->ac_minflt = tsk->min_flt; stats->ac_majflt = tsk->maj_flt; @@ -115,11 +121,8 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) #undef KB #undef MB -/** - * acct_update_integrals - update mm integral fields in task_struct - * @tsk: task_struct for accounting - */ -void acct_update_integrals(struct task_struct *tsk) +static void __acct_update_integrals(struct task_struct *tsk, + cputime_t utime, cputime_t stime) { if (likely(tsk->mm)) { cputime_t time, dtime; @@ -128,7 +131,7 @@ void acct_update_integrals(struct task_struct *tsk) u64 delta; local_irq_save(flags); - time = tsk->stime + tsk->utime; + time = stime + utime; dtime = time - tsk->acct_timexpd; jiffies_to_timeval(cputime_to_jiffies(dtime), &value); delta = value.tv_sec; @@ -145,6 +148,27 @@ void acct_update_integrals(struct task_struct *tsk) } /** + * acct_update_integrals - update mm integral fields in task_struct + * @tsk: task_struct for accounting + */ +void acct_update_integrals(struct task_struct *tsk) +{ + cputime_t utime, stime; + + task_cputime(tsk, &utime, &stime); + __acct_update_integrals(tsk, utime, stime); +} + +/** + * acct_account_cputime - update mm integral after cputime update + * @tsk: task_struct for accounting + */ +void acct_account_cputime(struct task_struct *tsk) +{ + __acct_update_integrals(tsk, tsk->utime, tsk->stime); +} + +/** * acct_clear_integrals - clear the mm integral fields in task_struct * @tsk: task_struct whose accounting fields are cleared */ -- cgit v0.10.2 From c11f11fcbdb5be790c565aed46411486a7586afc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 21 Jan 2013 00:50:22 +0100 Subject: kvm: Prepare to add generic guest entry/exit callbacks Do some ground preparatory work before adding guest_enter() and guest_exit() context tracking callbacks. Those will be later used to read the guest cputime safely when we run in full dynticks mode. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Gleb Natapov Cc: Ingo Molnar Cc: Li Zhong Cc: Marcelo Tosatti Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index a3a3f5a..fbaac1a 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -136,6 +136,7 @@ void vtime_account_system(struct task_struct *tsk) account_system_time(tsk, 0, delta, delta); } +EXPORT_SYMBOL_GPL(vtime_account_system); void vtime_account_idle(struct task_struct *tsk) { diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 22c9b67..2e04b37 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -347,6 +347,7 @@ void vtime_account_system(struct task_struct *tsk) if (stolen) account_steal_time(stolen); } +EXPORT_SYMBOL_GPL(vtime_account_system); void vtime_account_idle(struct task_struct *tsk) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2c497ab..4fe2396 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -740,15 +741,36 @@ static inline int kvm_deassign_device(struct kvm *kvm, } #endif /* CONFIG_IOMMU_API */ -static inline void kvm_guest_enter(void) +static inline void guest_enter(void) { - BUG_ON(preemptible()); /* * This is running in ioctl context so we can avoid * the call to vtime_account() with its unnecessary idle check. */ - vtime_account_system_irqsafe(current); + vtime_account_system(current); current->flags |= PF_VCPU; +} + +static inline void guest_exit(void) +{ + /* + * This is running in ioctl context so we can avoid + * the call to vtime_account() with its unnecessary idle check. + */ + vtime_account_system(current); + current->flags &= ~PF_VCPU; +} + +static inline void kvm_guest_enter(void) +{ + unsigned long flags; + + BUG_ON(preemptible()); + + local_irq_save(flags); + guest_enter(); + local_irq_restore(flags); + /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode * is very similar to exiting to userspase from rcu point of view. In @@ -761,12 +783,11 @@ static inline void kvm_guest_enter(void) static inline void kvm_guest_exit(void) { - /* - * This is running in ioctl context so we can avoid - * the call to vtime_account() with its unnecessary idle check. - */ - vtime_account_system_irqsafe(current); - current->flags &= ~PF_VCPU; + unsigned long flags; + + local_irq_save(flags); + guest_exit(); + local_irq_restore(flags); } /* diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 5368af9..bb50c3c 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -6,7 +6,6 @@ struct task_struct; #ifdef CONFIG_VIRT_CPU_ACCOUNTING extern void vtime_task_switch(struct task_struct *prev); extern void vtime_account_system(struct task_struct *tsk); -extern void vtime_account_system_irqsafe(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); extern void vtime_account_user(struct task_struct *tsk); extern void vtime_account(struct task_struct *tsk); @@ -20,7 +19,6 @@ static inline bool vtime_accounting_enabled(void) { return true; } #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_account_system(struct task_struct *tsk) { } -static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { } static inline void vtime_account_user(struct task_struct *tsk) { } static inline void vtime_account(struct task_struct *tsk) { } static inline bool vtime_accounting_enabled(void) { return false; } diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index c533dea..a44ecdf 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -465,16 +465,6 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime *st = cputime.stime; } -void vtime_account_system_irqsafe(struct task_struct *tsk) -{ - unsigned long flags; - - local_irq_save(flags); - vtime_account_system(tsk); - local_irq_restore(flags); -} -EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe); - #ifndef __ARCH_HAS_VTIME_TASK_SWITCH void vtime_task_switch(struct task_struct *prev) { -- cgit v0.10.2 From 6a61671bb2f3a1bd12cd17b8fca811a624782632 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 16 Dec 2012 20:00:34 +0100 Subject: cputime: Safely read cputime of full dynticks CPUs While remotely reading the cputime of a task running in a full dynticks CPU, the values stored in utime/stime fields of struct task_struct may be stale. Its values may be those of the last kernel <-> user transition time snapshot and we need to add the tickless time spent since this snapshot. To fix this, flush the cputime of the dynticks CPUs on kernel <-> user transition and record the time / context where we did this. Then on top of this snapshot and the current time, perform the fixup on the reader side from task_times() accessors. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Ingo Molnar Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner [fixed kvm module related build errors] Signed-off-by: Sedat Dilek diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index e84b8b6..ce9cc5a 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -127,7 +127,7 @@ void vtime_account_user(struct task_struct *tsk) * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -void vtime_account(struct task_struct *tsk) +void vtime_account_irq_enter(struct task_struct *tsk) { struct thread_info *ti = task_thread_info(tsk); u64 timer, system; @@ -145,10 +145,10 @@ void vtime_account(struct task_struct *tsk) virt_timer_forward(system); } -EXPORT_SYMBOL_GPL(vtime_account); +EXPORT_SYMBOL_GPL(vtime_account_irq_enter); void vtime_account_system(struct task_struct *tsk) -__attribute__((alias("vtime_account"))); +__attribute__((alias("vtime_account_irq_enter"))); EXPORT_SYMBOL_GPL(vtime_account_system); void __kprobes vtime_stop_cpu(void) diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 624ef3f..7105d5c 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void); */ #define __irq_enter() \ do { \ - vtime_account_irq_enter(current); \ + account_irq_enter_time(current); \ add_preempt_count(HARDIRQ_OFFSET); \ trace_hardirq_enter(); \ } while (0) @@ -169,7 +169,7 @@ extern void irq_enter(void); #define __irq_exit() \ do { \ trace_hardirq_exit(); \ - vtime_account_irq_exit(current); \ + account_irq_exit_time(current); \ sub_preempt_count(HARDIRQ_OFFSET); \ } while (0) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 6d087c5..cc898b8 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #ifdef CONFIG_SMP @@ -141,6 +142,15 @@ extern struct task_group root_task_group; # define INIT_PERF_EVENTS(tsk) #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +# define INIT_VTIME(tsk) \ + .vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \ + .vtime_snap = 0, \ + .vtime_snap_whence = VTIME_SYS, +#else +# define INIT_VTIME(tsk) +#endif + #define INIT_TASK_COMM "swapper" /* @@ -210,6 +220,7 @@ extern struct task_group root_task_group; INIT_TRACE_RECURSION \ INIT_TASK_RCU_PREEMPT(tsk) \ INIT_CPUSET_SEQ \ + INIT_VTIME(tsk) \ } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4fe2396..b7996a7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -741,7 +741,7 @@ static inline int kvm_deassign_device(struct kvm *kvm, } #endif /* CONFIG_IOMMU_API */ -static inline void guest_enter(void) +static inline void __guest_enter(void) { /* * This is running in ioctl context so we can avoid @@ -751,7 +751,7 @@ static inline void guest_enter(void) current->flags |= PF_VCPU; } -static inline void guest_exit(void) +static inline void __guest_exit(void) { /* * This is running in ioctl context so we can avoid @@ -761,6 +761,22 @@ static inline void guest_exit(void) current->flags &= ~PF_VCPU; } +#ifdef CONFIG_CONTEXT_TRACKING +extern void guest_enter(void); +extern void guest_exit(void); + +#else /* !CONFIG_CONTEXT_TRACKING */ +static inline void guest_enter(void) +{ + __guest_enter(); +} + +static inline void guest_exit(void) +{ + __guest_exit(); +} +#endif /* !CONFIG_CONTEXT_TRACKING */ + static inline void kvm_guest_enter(void) { unsigned long flags; diff --git a/include/linux/sched.h b/include/linux/sched.h index a9c608b..a9fa514 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1368,6 +1368,15 @@ struct task_struct { #ifndef CONFIG_VIRT_CPU_ACCOUNTING struct cputime prev_cputime; #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN + seqlock_t vtime_seqlock; + unsigned long long vtime_snap; + enum { + VTIME_SLEEPING = 0, + VTIME_USER, + VTIME_SYS, + } vtime_snap_whence; +#endif unsigned long nvcsw, nivcsw; /* context switch counts */ struct timespec start_time; /* monotonic time */ struct timespec real_start_time; /* boot based time */ @@ -1792,11 +1801,13 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } -static inline cputime_t task_gtime(struct task_struct *t) -{ - return t->gtime; -} - +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +extern void task_cputime(struct task_struct *t, + cputime_t *utime, cputime_t *stime); +extern void task_cputime_scaled(struct task_struct *t, + cputime_t *utimescaled, cputime_t *stimescaled); +extern cputime_t task_gtime(struct task_struct *t); +#else static inline void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) { @@ -1815,6 +1826,12 @@ static inline void task_cputime_scaled(struct task_struct *t, if (stimescaled) *stimescaled = t->stimescaled; } + +static inline cputime_t task_gtime(struct task_struct *t) +{ + return t->gtime; +} +#endif extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); diff --git a/include/linux/vtime.h b/include/linux/vtime.h index bb50c3c..71a5782 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -8,35 +8,44 @@ extern void vtime_task_switch(struct task_struct *prev); extern void vtime_account_system(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); extern void vtime_account_user(struct task_struct *tsk); -extern void vtime_account(struct task_struct *tsk); +extern void vtime_account_irq_enter(struct task_struct *tsk); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -extern bool vtime_accounting_enabled(void); -#else +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE static inline bool vtime_accounting_enabled(void) { return true; } #endif #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ + static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_account_system(struct task_struct *tsk) { } static inline void vtime_account_user(struct task_struct *tsk) { } -static inline void vtime_account(struct task_struct *tsk) { } +static inline void vtime_account_irq_enter(struct task_struct *tsk) { } static inline bool vtime_accounting_enabled(void) { return false; } #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -static inline void arch_vtime_task_switch(struct task_struct *tsk) { } -static inline void vtime_user_enter(struct task_struct *tsk) -{ - vtime_account_system(tsk); -} +extern void arch_vtime_task_switch(struct task_struct *tsk); +extern void vtime_account_irq_exit(struct task_struct *tsk); +extern bool vtime_accounting_enabled(void); +extern void vtime_user_enter(struct task_struct *tsk); static inline void vtime_user_exit(struct task_struct *tsk) { vtime_account_user(tsk); } +extern void vtime_guest_enter(struct task_struct *tsk); +extern void vtime_guest_exit(struct task_struct *tsk); +extern void vtime_init_idle(struct task_struct *tsk); #else +static inline void vtime_account_irq_exit(struct task_struct *tsk) +{ + /* On hard|softirq exit we always account to hard|softirq cputime */ + vtime_account_system(tsk); +} static inline void vtime_user_enter(struct task_struct *tsk) { } static inline void vtime_user_exit(struct task_struct *tsk) { } +static inline void vtime_guest_enter(struct task_struct *tsk) { } +static inline void vtime_guest_exit(struct task_struct *tsk) { } +static inline void vtime_init_idle(struct task_struct *tsk) { } #endif #ifdef CONFIG_IRQ_TIME_ACCOUNTING @@ -45,25 +54,15 @@ extern void irqtime_account_irq(struct task_struct *tsk); static inline void irqtime_account_irq(struct task_struct *tsk) { } #endif -static inline void vtime_account_irq_enter(struct task_struct *tsk) +static inline void account_irq_enter_time(struct task_struct *tsk) { - /* - * Hardirq can interrupt idle task anytime. So we need vtime_account() - * that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING. - * Softirq can also interrupt idle task directly if it calls - * local_bh_enable(). Such case probably don't exist but we never know. - * Ksoftirqd is not concerned because idle time is flushed on context - * switch. Softirqs in the end of hardirqs are also not a problem because - * the idle time is flushed on hardirq time already. - */ - vtime_account(tsk); + vtime_account_irq_enter(tsk); irqtime_account_irq(tsk); } -static inline void vtime_account_irq_exit(struct task_struct *tsk) +static inline void account_irq_exit_time(struct task_struct *tsk) { - /* On hard|softirq exit we always account to hard|softirq cputime */ - vtime_account_system(tsk); + vtime_account_irq_exit(tsk); irqtime_account_irq(tsk); } diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 9002e92..74f68f4 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -1,8 +1,9 @@ #include +#include #include #include #include - +#include DEFINE_PER_CPU(struct context_tracking, context_tracking) = { #ifdef CONFIG_CONTEXT_TRACKING_FORCE @@ -61,6 +62,24 @@ void user_exit(void) local_irq_restore(flags); } +void guest_enter(void) +{ + if (vtime_accounting_enabled()) + vtime_guest_enter(current); + else + __guest_enter(); +} +EXPORT_SYMBOL_GPL(guest_enter); + +void guest_exit(void) +{ + if (vtime_accounting_enabled()) + vtime_guest_exit(current); + else + __guest_exit(); +} +EXPORT_SYMBOL_GPL(guest_exit); + void context_tracking_task_switch(struct task_struct *prev, struct task_struct *next) { diff --git a/kernel/fork.c b/kernel/fork.c index 65ca6d2..e68a95b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1233,6 +1233,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifndef CONFIG_VIRT_CPU_ACCOUNTING p->prev_cputime.utime = p->prev_cputime.stime = 0; #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN + seqlock_init(&p->vtime_seqlock); + p->vtime_snap = 0; + p->vtime_snap_whence = VTIME_SLEEPING; +#endif + #if defined(SPLIT_RSS_COUNTING) memset(&p->rss_stat, 0, sizeof(p->rss_stat)); #endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 257002c..261022d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4666,6 +4666,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) */ idle->sched_class = &idle_sched_class; ftrace_graph_init_idle_task(idle, cpu); + vtime_init_idle(idle); #if defined(CONFIG_SMP) sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); #endif diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index a44ecdf..082e05d 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -492,7 +492,7 @@ void vtime_task_switch(struct task_struct *prev) * vtime_account(). */ #ifndef __ARCH_HAS_VTIME_ACCOUNT -void vtime_account(struct task_struct *tsk) +void vtime_account_irq_enter(struct task_struct *tsk) { if (!vtime_accounting_enabled()) return; @@ -516,7 +516,7 @@ void vtime_account(struct task_struct *tsk) } vtime_account_system(tsk); } -EXPORT_SYMBOL_GPL(vtime_account); +EXPORT_SYMBOL_GPL(vtime_account_irq_enter); #endif /* __ARCH_HAS_VTIME_ACCOUNT */ #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ @@ -600,28 +600,55 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -static DEFINE_PER_CPU(unsigned long long, cputime_snap); +static unsigned long long vtime_delta(struct task_struct *tsk) +{ + unsigned long long clock; + + clock = sched_clock(); + if (clock < tsk->vtime_snap) + return 0; -static cputime_t get_vtime_delta(void) + return clock - tsk->vtime_snap; +} + +static cputime_t get_vtime_delta(struct task_struct *tsk) { - unsigned long long delta; + unsigned long long delta = vtime_delta(tsk); - delta = sched_clock() - __this_cpu_read(cputime_snap); - __this_cpu_add(cputime_snap, delta); + WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING); + tsk->vtime_snap += delta; /* CHECKME: always safe to convert nsecs to cputime? */ return nsecs_to_cputime(delta); } +static void __vtime_account_system(struct task_struct *tsk) +{ + cputime_t delta_cpu = get_vtime_delta(tsk); + + account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); +} + void vtime_account_system(struct task_struct *tsk) { - cputime_t delta_cpu; + if (!vtime_accounting_enabled()) + return; + + write_seqlock(&tsk->vtime_seqlock); + __vtime_account_system(tsk); + write_sequnlock(&tsk->vtime_seqlock); +} +void vtime_account_irq_exit(struct task_struct *tsk) +{ if (!vtime_accounting_enabled()) return; - delta_cpu = get_vtime_delta(); - account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); + write_seqlock(&tsk->vtime_seqlock); + if (context_tracking_in_user()) + tsk->vtime_snap_whence = VTIME_USER; + __vtime_account_system(tsk); + write_sequnlock(&tsk->vtime_seqlock); } void vtime_account_user(struct task_struct *tsk) @@ -631,14 +658,44 @@ void vtime_account_user(struct task_struct *tsk) if (!vtime_accounting_enabled()) return; - delta_cpu = get_vtime_delta(); + delta_cpu = get_vtime_delta(tsk); + write_seqlock(&tsk->vtime_seqlock); + tsk->vtime_snap_whence = VTIME_SYS; account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); + write_sequnlock(&tsk->vtime_seqlock); +} + +void vtime_user_enter(struct task_struct *tsk) +{ + if (!vtime_accounting_enabled()) + return; + + write_seqlock(&tsk->vtime_seqlock); + tsk->vtime_snap_whence = VTIME_USER; + __vtime_account_system(tsk); + write_sequnlock(&tsk->vtime_seqlock); +} + +void vtime_guest_enter(struct task_struct *tsk) +{ + write_seqlock(&tsk->vtime_seqlock); + __vtime_account_system(tsk); + current->flags |= PF_VCPU; + write_sequnlock(&tsk->vtime_seqlock); +} + +void vtime_guest_exit(struct task_struct *tsk) +{ + write_seqlock(&tsk->vtime_seqlock); + __vtime_account_system(tsk); + current->flags &= ~PF_VCPU; + write_sequnlock(&tsk->vtime_seqlock); } void vtime_account_idle(struct task_struct *tsk) { - cputime_t delta_cpu = get_vtime_delta(); + cputime_t delta_cpu = get_vtime_delta(tsk); account_idle_time(delta_cpu); } @@ -647,4 +704,116 @@ bool vtime_accounting_enabled(void) { return context_tracking_active(); } + +void arch_vtime_task_switch(struct task_struct *prev) +{ + write_seqlock(&prev->vtime_seqlock); + prev->vtime_snap_whence = VTIME_SLEEPING; + write_sequnlock(&prev->vtime_seqlock); + + write_seqlock(¤t->vtime_seqlock); + current->vtime_snap_whence = VTIME_SYS; + current->vtime_snap = sched_clock(); + write_sequnlock(¤t->vtime_seqlock); +} + +void vtime_init_idle(struct task_struct *t) +{ + unsigned long flags; + + write_seqlock_irqsave(&t->vtime_seqlock, flags); + t->vtime_snap_whence = VTIME_SYS; + t->vtime_snap = sched_clock(); + write_sequnlock_irqrestore(&t->vtime_seqlock, flags); +} + +cputime_t task_gtime(struct task_struct *t) +{ + unsigned long flags; + unsigned int seq; + cputime_t gtime; + + do { + seq = read_seqbegin_irqsave(&t->vtime_seqlock, flags); + + gtime = t->gtime; + if (t->flags & PF_VCPU) + gtime += vtime_delta(t); + + } while (read_seqretry_irqrestore(&t->vtime_seqlock, seq, flags)); + + return gtime; +} + +/* + * Fetch cputime raw values from fields of task_struct and + * add up the pending nohz execution time since the last + * cputime snapshot. + */ +static void +fetch_task_cputime(struct task_struct *t, + cputime_t *u_dst, cputime_t *s_dst, + cputime_t *u_src, cputime_t *s_src, + cputime_t *udelta, cputime_t *sdelta) +{ + unsigned long flags; + unsigned int seq; + unsigned long long delta; + + do { + *udelta = 0; + *sdelta = 0; + + seq = read_seqbegin_irqsave(&t->vtime_seqlock, flags); + + if (u_dst) + *u_dst = *u_src; + if (s_dst) + *s_dst = *s_src; + + /* Task is sleeping, nothing to add */ + if (t->vtime_snap_whence == VTIME_SLEEPING || + is_idle_task(t)) + continue; + + delta = vtime_delta(t); + + /* + * Task runs either in user or kernel space, add pending nohz time to + * the right place. + */ + if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) { + *udelta = delta; + } else { + if (t->vtime_snap_whence == VTIME_SYS) + *sdelta = delta; + } + } while (read_seqretry_irqrestore(&t->vtime_seqlock, seq, flags)); +} + + +void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) +{ + cputime_t udelta, sdelta; + + fetch_task_cputime(t, utime, stime, &t->utime, + &t->stime, &udelta, &sdelta); + if (utime) + *utime += udelta; + if (stime) + *stime += sdelta; +} + +void task_cputime_scaled(struct task_struct *t, + cputime_t *utimescaled, cputime_t *stimescaled) +{ + cputime_t udelta, sdelta; + + fetch_task_cputime(t, utimescaled, stimescaled, + &t->utimescaled, &t->stimescaled, &udelta, &sdelta); + if (utimescaled) + *utimescaled += cputime_to_scaled(udelta); + if (stimescaled) + *stimescaled += cputime_to_scaled(sdelta); +} #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ diff --git a/kernel/softirq.c b/kernel/softirq.c index ed567ba..f5cc25f 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -221,7 +221,7 @@ asmlinkage void __do_softirq(void) current->flags &= ~PF_MEMALLOC; pending = local_softirq_pending(); - vtime_account_irq_enter(current); + account_irq_enter_time(current); __local_bh_disable((unsigned long)__builtin_return_address(0), SOFTIRQ_OFFSET); @@ -272,7 +272,7 @@ restart: lockdep_softirq_exit(); - vtime_account_irq_exit(current); + account_irq_exit_time(current); __local_bh_enable(SOFTIRQ_OFFSET); tsk_restore_flags(current, old_flags, PF_MEMALLOC); } @@ -341,7 +341,7 @@ static inline void invoke_softirq(void) */ void irq_exit(void) { - vtime_account_irq_exit(current); + account_irq_exit_time(current); trace_hardirq_exit(); sub_preempt_count(IRQ_EXIT_OFFSET); if (!in_interrupt() && local_softirq_pending()) -- cgit v0.10.2