From 69bb2600c9f8ca450fede9633edf9c2513c9ee6f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 15:33:18 +0100 Subject: cputime: Fix nsecs_to_cputime() return type cast Even though nsec based cputime_t maps to u64, nsecs_to_cputime() must return a cputime_t value. We want to enforce this kind of cast in order to track down buggy manipulations of cputime_t such as direct access of its values under wrong assumptions on its backend type (nsecs, jiffies, etc...) by core code. Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h index 2c9e62c..768294f 100644 --- a/include/asm-generic/cputime_nsecs.h +++ b/include/asm-generic/cputime_nsecs.h @@ -44,7 +44,8 @@ typedef u64 __nocast cputime64_t; /* * Convert cputime <-> nanoseconds */ -#define nsecs_to_cputime(__nsecs) ((__force u64)(__nsecs)) +#define nsecs_to_cputime(__nsecs) \ + (__force cputime_t)(__nsecs) /* -- cgit v0.10.2 From bfc3f0281e08066fa8111c3972cff6edc1049864 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 16:33:42 +0100 Subject: cputime: Default implementation of nsecs -> cputime conversion The architectures that override cputime_t (s390, ppc) don't provide any version of nsecs_to_cputime(). Indeed this cputime_t implementation by backend only happens when CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y under which the core code doesn't make any use of nsecs_to_cputime(). At least for now. We are going to make a broader use of it so lets provide a default version with a per usecs granularity. It should be good enough for most usecases. Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 5793e14..79911a2 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include static spinlock_t cpufreq_stats_lock; diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 88e35d8..5154513 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 6f599c6..9d231e9 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #ifndef arch_irq_stat_cpu diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 7141b8d..33de567 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include static int uptime_proc_show(struct seq_file *m, void *v) { diff --git a/include/linux/cputime.h b/include/linux/cputime.h new file mode 100644 index 0000000..2842ebe --- /dev/null +++ b/include/linux/cputime.h @@ -0,0 +1,11 @@ +#ifndef __LINUX_CPUTIME_H +#define __LINUX_CPUTIME_H + +#include + +#ifndef nsecs_to_cputime +# define nsecs_to_cputime(__nsecs) \ + usecs_to_cputime((__nsecs) / NSEC_PER_USEC) +#endif + +#endif /* __LINUX_CPUTIME_H */ diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 51c72be..d7c6131 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include /* * 'kernel_stat.h' contains the definitions needed for doing diff --git a/include/linux/sched.h b/include/linux/sched.h index 68a0e84..1ac566c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -27,7 +27,7 @@ struct sched_param { #include #include -#include +#include #include #include -- cgit v0.10.2 From d8a9ce3f8ad2b546b9ebaf65de809da0793f11c5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 16:22:37 +0100 Subject: cputime: Bring cputime -> nsecs conversion We already have nsecs_to_cputime(). Now we need to be able to convert the other way around in order to fix a bug on steal time accounting. Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker diff --git a/include/asm-generic/cputime_jiffies.h b/include/asm-generic/cputime_jiffies.h index 272ecba..d5cb78f5 100644 --- a/include/asm-generic/cputime_jiffies.h +++ b/include/asm-generic/cputime_jiffies.h @@ -15,8 +15,10 @@ typedef u64 __nocast cputime64_t; /* - * Convert nanoseconds to cputime + * Convert nanoseconds <-> cputime */ +#define cputime_to_nsecs(__ct) \ + jiffies_to_nsecs(cputime_to_jiffies(__ct)) #define nsecs_to_cputime64(__nsec) \ jiffies64_to_cputime64(nsecs_to_jiffies64(__nsec)) #define nsecs_to_cputime(__nsec) \ diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h index 768294f..4e81760 100644 --- a/include/asm-generic/cputime_nsecs.h +++ b/include/asm-generic/cputime_nsecs.h @@ -44,6 +44,8 @@ typedef u64 __nocast cputime64_t; /* * Convert cputime <-> nanoseconds */ +#define cputime_to_nsecs(__ct) \ + (__force u64)(__ct) #define nsecs_to_cputime(__nsecs) \ (__force cputime_t)(__nsecs) diff --git a/include/linux/cputime.h b/include/linux/cputime.h index 2842ebe..f2eb2ee 100644 --- a/include/linux/cputime.h +++ b/include/linux/cputime.h @@ -3,6 +3,11 @@ #include +#ifndef cputime_to_nsecs +# define cputime_to_nsecs(__ct) \ + (cputime_to_usecs(__ct) * NSEC_PER_USEC) +#endif + #ifndef nsecs_to_cputime # define nsecs_to_cputime(__nsecs) \ usecs_to_cputime((__nsecs) / NSEC_PER_USEC) -- cgit v0.10.2 From dee08a72deefac251267ed2717717596aa8b6818 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 17:02:22 +0100 Subject: cputime: Fix jiffies based cputime assumption on steal accounting The steal guest time accounting code assumes that cputime_t is based on jiffies. So when CONFIG_NO_HZ_FULL=y, which implies that cputime_t is based on nsecs, steal_account_process_tick() passes the delta in jiffies to account_steal_time() which then accounts it as if it's a value in nsecs. As a result, accounting 1 second of steal time (with HZ=100 that would be 100 jiffies) is spuriously accounted as 100 nsecs. As such /proc/stat may report 0 values of steal time even when two guests have run concurrently for a few seconds on the same host and same CPU. In order to fix this, lets convert the nsecs based steal delta to cputime instead of jiffies by using the right conversion API. Given that the steal time is stored in cputime_t and this type can have a smaller granularity than nsecs, we only account the rounded converted value and leave the remaining nsecs for the next deltas. Reported-by: Huiqingding Reported-by: Marcelo Tosatti Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 9994791..c91b097 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -258,16 +258,22 @@ static __always_inline bool steal_account_process_tick(void) { #ifdef CONFIG_PARAVIRT if (static_key_false(¶virt_steal_enabled)) { - u64 steal, st = 0; + u64 steal; + cputime_t steal_ct; steal = paravirt_steal_clock(smp_processor_id()); steal -= this_rq()->prev_steal_time; - st = steal_ticks(steal); - this_rq()->prev_steal_time += st * TICK_NSEC; + /* + * cputime_t may be less precise than nsecs (eg: if it's + * based on jiffies). Lets cast the result to cputime + * granularity and account the rest on the next rounds. + */ + steal_ct = nsecs_to_cputime(steal); + this_rq()->prev_steal_time += cputime_to_nsecs(steal_ct); - account_steal_time(st); - return st; + account_steal_time(steal_ct); + return steal_ct; } #endif return false; -- cgit v0.10.2 From 300a9d887ea221f344962506f724e02101bacc08 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 17:05:57 +0100 Subject: sched: Remove needless round trip nsecs <-> tick conversion of steal time When update_rq_clock_task() accounts the pending steal time for a task, it converts the steal delta from nsecs to tick then from tick to nsecs. There is no apparent good reason for doing that though because both the task clock and the prev steal delta are u64 and store values in nsecs. So lets remove the needless conversion. Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b46131e..b14a188 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -823,19 +823,13 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) #endif #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING if (static_key_false((¶virt_steal_rq_enabled))) { - u64 st; - steal = paravirt_steal_clock(cpu_of(rq)); steal -= rq->prev_steal_time_rq; if (unlikely(steal > delta)) steal = delta; - st = steal_ticks(steal); - steal = st * TICK_NSEC; - rq->prev_steal_time_rq += steal; - delta -= steal; } #endif diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c2119fd..5ec9910 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1214,16 +1214,6 @@ extern void update_idle_cpu_load(struct rq *this_rq); extern void init_task_runnable_average(struct task_struct *p); -#ifdef CONFIG_PARAVIRT -static inline u64 steal_ticks(u64 steal) -{ - if (unlikely(steal > NSEC_PER_SEC)) - return div_u64(steal, TICK_NSEC); - - return __iter_div_u64_rem(steal, TICK_NSEC, &steal); -} -#endif - static inline void inc_nr_running(struct rq *rq) { rq->nr_running++; -- cgit v0.10.2 From 073d8224d299528778e90773becd1e890953443c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 17:24:12 +0100 Subject: arch: Remove stub cputime.h headers Many architectures have a stub cputime.h that only include the default cputime.h Lets remove the useless headers, we only need to mention that we want the default headers on the Kbuild files. Cc: Archs Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index 7736f42..96e54be 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -1,6 +1,7 @@ generic-y += clkdev.h +generic-y += cputime.h generic-y += exec.h generic-y += hash.h generic-y += mcs_spinlock.h diff --git a/arch/alpha/include/asm/cputime.h b/arch/alpha/include/asm/cputime.h deleted file mode 100644 index 19577fd..0000000 --- a/arch/alpha/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ALPHA_CPUTIME_H -#define __ALPHA_CPUTIME_H - -#include - -#endif /* __ALPHA_CPUTIME_H */ diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 056027f..afff510 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -5,6 +5,7 @@ header-y += arch-v32/ generic-y += barrier.h generic-y += clkdev.h +generic-y += cputime.h generic-y += exec.h generic-y += hash.h generic-y += kvm_para.h diff --git a/arch/cris/include/asm/cputime.h b/arch/cris/include/asm/cputime.h deleted file mode 100644 index 4446a65..0000000 --- a/arch/cris/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __CRIS_CPUTIME_H -#define __CRIS_CPUTIME_H - -#include - -#endif /* __CRIS_CPUTIME_H */ diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index babb933..87b95eb 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -1,5 +1,6 @@ generic-y += clkdev.h +generic-y += cputime.h generic-y += exec.h generic-y += hash.h generic-y += mcs_spinlock.h diff --git a/arch/frv/include/asm/cputime.h b/arch/frv/include/asm/cputime.h deleted file mode 100644 index f6c373a..0000000 --- a/arch/frv/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_CPUTIME_H -#define _ASM_CPUTIME_H - -#include - -#endif /* _ASM_CPUTIME_H */ diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index 5825a35..67779a7 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -1,5 +1,6 @@ generic-y += clkdev.h +generic-y += cputime.h generic-y += exec.h generic-y += hash.h generic-y += mcs_spinlock.h diff --git a/arch/m32r/include/asm/cputime.h b/arch/m32r/include/asm/cputime.h deleted file mode 100644 index 0a47550..0000000 --- a/arch/m32r/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __M32R_CPUTIME_H -#define __M32R_CPUTIME_H - -#include - -#endif /* __M32R_CPUTIME_H */ diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 1f590ab..c98ed95 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -1,6 +1,7 @@ generic-y += barrier.h generic-y += clkdev.h +generic-y += cputime.h generic-y += exec.h generic-y += hash.h generic-y += mcs_spinlock.h diff --git a/arch/microblaze/include/asm/cputime.h b/arch/microblaze/include/asm/cputime.h deleted file mode 100644 index 6d68ad7..0000000 --- a/arch/microblaze/include/asm/cputime.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index cbc6b9b..654d5ba 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -1,6 +1,7 @@ generic-y += barrier.h generic-y += clkdev.h +generic-y += cputime.h generic-y += exec.h generic-y += hash.h generic-y += mcs_spinlock.h diff --git a/arch/mn10300/include/asm/cputime.h b/arch/mn10300/include/asm/cputime.h deleted file mode 100644 index 6d68ad7..0000000 --- a/arch/mn10300/include/asm/cputime.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index 4630cf2..2f947ab 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -4,6 +4,7 @@ header-y += generic-y += barrier.h generic-y += clkdev.h +generic-y += cputime.h generic-y += hash.h generic-y += mcs_spinlock.h generic-y += preempt.h diff --git a/arch/score/include/asm/cputime.h b/arch/score/include/asm/cputime.h deleted file mode 100644 index 1fced99..0000000 --- a/arch/score/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_SCORE_CPUTIME_H -#define _ASM_SCORE_CPUTIME_H - -#include - -#endif /* _ASM_SCORE_CPUTIME_H */ diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index a8fee07..4acddc4 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -5,4 +5,5 @@ genhdr-y += unistd_64.h genhdr-y += unistd_x32.h generic-y += clkdev.h +generic-y += cputime.h generic-y += mcs_spinlock.h diff --git a/arch/x86/include/asm/cputime.h b/arch/x86/include/asm/cputime.h deleted file mode 100644 index 6d68ad7..0000000 --- a/arch/x86/include/asm/cputime.h +++ /dev/null @@ -1 +0,0 @@ -#include -- cgit v0.10.2