From c4b4057267f258e6d823080205f47227fb2abfd7 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 16 Aug 2016 15:27:19 +0900 Subject: cpufreq: dt: Add exynos5433 compatible to use generic cpufreq driver This patch adds the exynos5433 compatible string for supporting the generic cpufreq driver on Exynos5433. Signed-off-by: Chanwoo Choi Reviewed-by: Krzysztof Kozlowski Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 0bb44d5..470db30 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -40,6 +40,7 @@ static const struct of_device_id machines[] __initconst = { { .compatible = "samsung,exynos5250", }, #ifndef CONFIG_BL_SWITCHER { .compatible = "samsung,exynos5420", }, + { .compatible = "samsung,exynos5433", }, { .compatible = "samsung,exynos5800", }, #endif -- cgit v0.10.2 From 58919e83c85c3a3c5fb34025dc0e95ddd998c478 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 16 Aug 2016 22:14:55 +0200 Subject: cpufreq / sched: Pass flags to cpufreq_update_util() It is useful to know the reason why cpufreq_update_util() has just been called and that can be passed as flags to cpufreq_update_util() and to the ->func() callback in struct update_util_data. However, doing that in addition to passing the util and max arguments they already take would be clumsy, so avoid it. Instead, use the observation that the schedutil governor is part of the scheduler proper, so it can access scheduler data directly. This allows the util and max arguments of cpufreq_update_util() and the ->func() callback in struct update_util_data to be replaced with a flags one, but schedutil has to be modified to follow. Thus make the schedutil governor obtain the CFS utilization information from the scheduler and use the "RT" and "DL" flags instead of the special utilization value of ULONG_MAX to track updates from the RT and DL sched classes. Make it non-modular too to avoid having to export scheduler variables to modules at large. Next, update all of the other users of cpufreq_update_util() and the ->func() callback in struct update_util_data accordingly. Suggested-by: Peter Zijlstra Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Acked-by: Viresh Kumar diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 74919aa..4dc9525 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -194,7 +194,7 @@ config CPU_FREQ_GOV_CONSERVATIVE If in doubt, say N. config CPU_FREQ_GOV_SCHEDUTIL - tristate "'schedutil' cpufreq policy governor" + bool "'schedutil' cpufreq policy governor" depends on CPU_FREQ && SMP select CPU_FREQ_GOV_ATTR_SET select IRQ_WORK @@ -208,9 +208,6 @@ config CPU_FREQ_GOV_SCHEDUTIL frequency tipping point is at utilization/capacity equal to 80% in both cases. - To compile this driver as a module, choose M here: the module will - be called cpufreq_schedutil. - If in doubt, say N. comment "CPU frequency scaling drivers" diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index e415349..642dd0f 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -260,7 +260,7 @@ static void dbs_irq_work(struct irq_work *irq_work) } static void dbs_update_util_handler(struct update_util_data *data, u64 time, - unsigned long util, unsigned long max) + unsigned int flags) { struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util); struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index be9eade..bdbe936 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1329,7 +1329,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) } static void intel_pstate_update_util(struct update_util_data *data, u64 time, - unsigned long util, unsigned long max) + unsigned int flags) { struct cpudata *cpu = container_of(data, struct cpudata, update_util); u64 delta_ns = time - cpu->sample.time; diff --git a/include/linux/sched.h b/include/linux/sched.h index 62c68e5..b0fa726 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3469,15 +3469,19 @@ static inline unsigned long rlimit_max(unsigned int limit) return task_rlimit_max(current, limit); } +#define SCHED_CPUFREQ_RT (1U << 0) +#define SCHED_CPUFREQ_DL (1U << 1) + +#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL) + #ifdef CONFIG_CPU_FREQ struct update_util_data { - void (*func)(struct update_util_data *data, - u64 time, unsigned long util, unsigned long max); + void (*func)(struct update_util_data *data, u64 time, unsigned int flags); }; void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, - void (*func)(struct update_util_data *data, u64 time, - unsigned long util, unsigned long max)); + void (*func)(struct update_util_data *data, u64 time, + unsigned int flags)); void cpufreq_remove_update_util_hook(int cpu); #endif /* CONFIG_CPU_FREQ */ diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c index 1141954..dbc5144 100644 --- a/kernel/sched/cpufreq.c +++ b/kernel/sched/cpufreq.c @@ -33,7 +33,7 @@ DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); */ void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, void (*func)(struct update_util_data *data, u64 time, - unsigned long util, unsigned long max)) + unsigned int flags)) { if (WARN_ON(!data || !func)) return; diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index a84641b..60d985f 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -12,7 +12,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include #include #include @@ -53,6 +52,7 @@ struct sugov_cpu { unsigned long util; unsigned long max; u64 last_update; + unsigned int flags; }; static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); @@ -144,24 +144,39 @@ static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, return cpufreq_driver_resolve_freq(policy, freq); } +static void sugov_get_util(unsigned long *util, unsigned long *max) +{ + struct rq *rq = this_rq(); + unsigned long cfs_max = rq->cpu_capacity_orig; + + *util = min(rq->cfs.avg.util_avg, cfs_max); + *max = cfs_max; +} + static void sugov_update_single(struct update_util_data *hook, u64 time, - unsigned long util, unsigned long max) + unsigned int flags) { struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; + unsigned long util, max; unsigned int next_f; if (!sugov_should_update_freq(sg_policy, time)) return; - next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq : - get_next_freq(sg_cpu, util, max); + if (flags & SCHED_CPUFREQ_RT_DL) { + next_f = policy->cpuinfo.max_freq; + } else { + sugov_get_util(&util, &max); + next_f = get_next_freq(sg_cpu, util, max); + } sugov_update_commit(sg_policy, time, next_f); } static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, - unsigned long util, unsigned long max) + unsigned long util, unsigned long max, + unsigned int flags) { struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; @@ -169,7 +184,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 last_freq_update_time = sg_policy->last_freq_update_time; unsigned int j; - if (util == ULONG_MAX) + if (flags & SCHED_CPUFREQ_RT_DL) return max_f; for_each_cpu(j, policy->cpus) { @@ -192,10 +207,10 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, if (delta_ns > TICK_NSEC) continue; - j_util = j_sg_cpu->util; - if (j_util == ULONG_MAX) + if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL) return max_f; + j_util = j_sg_cpu->util; j_max = j_sg_cpu->max; if (j_util * max > j_max * util) { util = j_util; @@ -207,20 +222,24 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, } static void sugov_update_shared(struct update_util_data *hook, u64 time, - unsigned long util, unsigned long max) + unsigned int flags) { struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); struct sugov_policy *sg_policy = sg_cpu->sg_policy; + unsigned long util, max; unsigned int next_f; + sugov_get_util(&util, &max); + raw_spin_lock(&sg_policy->update_lock); sg_cpu->util = util; sg_cpu->max = max; + sg_cpu->flags = flags; sg_cpu->last_update = time; if (sugov_should_update_freq(sg_policy, time)) { - next_f = sugov_next_freq_shared(sg_cpu, util, max); + next_f = sugov_next_freq_shared(sg_cpu, util, max, flags); sugov_update_commit(sg_policy, time, next_f); } @@ -444,8 +463,9 @@ static int sugov_start(struct cpufreq_policy *policy) sg_cpu->sg_policy = sg_policy; if (policy_is_shared(policy)) { - sg_cpu->util = ULONG_MAX; + sg_cpu->util = 0; sg_cpu->max = 0; + sg_cpu->flags = SCHED_CPUFREQ_RT; sg_cpu->last_update = 0; sg_cpu->cached_raw_freq = 0; cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, @@ -495,28 +515,15 @@ static struct cpufreq_governor schedutil_gov = { .limits = sugov_limits, }; -static int __init sugov_module_init(void) -{ - return cpufreq_register_governor(&schedutil_gov); -} - -static void __exit sugov_module_exit(void) -{ - cpufreq_unregister_governor(&schedutil_gov); -} - -MODULE_AUTHOR("Rafael J. Wysocki "); -MODULE_DESCRIPTION("Utilization-based CPU frequency selection"); -MODULE_LICENSE("GPL"); - #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL struct cpufreq_governor *cpufreq_default_governor(void) { return &schedutil_gov; } - -fs_initcall(sugov_module_init); -#else -module_init(sugov_module_init); #endif -module_exit(sugov_module_exit); + +static int __init sugov_register(void) +{ + return cpufreq_register_governor(&schedutil_gov); +} +fs_initcall(sugov_register); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 1ce8867..4464cc3 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -735,9 +735,9 @@ static void update_curr_dl(struct rq *rq) return; } - /* kick cpufreq (see the comment in linux/cpufreq.h). */ + /* kick cpufreq (see the comment in kernel/sched/sched.h). */ if (cpu_of(rq) == smp_processor_id()) - cpufreq_trigger_update(rq_clock(rq)); + cpufreq_update_util(rq_clock(rq), SCHED_CPUFREQ_DL); schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 039de34..f91fa57 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2875,11 +2875,8 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {} static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) { - struct rq *rq = rq_of(cfs_rq); - int cpu = cpu_of(rq); - - if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) { - unsigned long max = rq->cpu_capacity_orig; + if (&this_rq()->cfs == cfs_rq) { + struct rq *rq = rq_of(cfs_rq); /* * There are a few boundary cases this might miss but it should @@ -2897,8 +2894,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) * * See cpu_util(). */ - cpufreq_update_util(rq_clock(rq), - min(cfs_rq->avg.util_avg, max), max); + cpufreq_update_util(rq_clock(rq), 0); } } @@ -3162,7 +3158,7 @@ static inline void update_load_avg(struct sched_entity *se, int not_used) struct cfs_rq *cfs_rq = cfs_rq_of(se); struct rq *rq = rq_of(cfs_rq); - cpufreq_trigger_update(rq_clock(rq)); + cpufreq_update_util(rq_clock(rq), 0); } static inline void diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index d5690b7..8a9cd9b 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -957,9 +957,9 @@ static void update_curr_rt(struct rq *rq) if (unlikely((s64)delta_exec <= 0)) return; - /* Kick cpufreq (see the comment in linux/cpufreq.h). */ + /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ if (cpu_of(rq) == smp_processor_id()) - cpufreq_trigger_update(rq_clock(rq)); + cpufreq_update_util(rq_clock(rq), SCHED_CPUFREQ_RT); schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c64fc51..82fc554 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1764,26 +1764,12 @@ DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); /** * cpufreq_update_util - Take a note about CPU utilization changes. * @time: Current time. - * @util: Current utilization. - * @max: Utilization ceiling. + * @flags: Update reason flags. * - * This function is called by the scheduler on every invocation of - * update_load_avg() on the CPU whose utilization is being updated. + * This function is called by the scheduler on the CPU whose utilization is + * being updated. * * It can only be called from RCU-sched read-side critical sections. - */ -static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) -{ - struct update_util_data *data; - - data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); - if (data) - data->func(data, time, util, max); -} - -/** - * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed. - * @time: Current time. * * The way cpufreq is currently arranged requires it to evaluate the CPU * performance state (frequency/voltage) on a regular basis to prevent it from @@ -1797,13 +1783,16 @@ static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned lo * but that really is a band-aid. Going forward it should be replaced with * solutions targeted more specifically at RT and DL tasks. */ -static inline void cpufreq_trigger_update(u64 time) +static inline void cpufreq_update_util(u64 time, unsigned int flags) { - cpufreq_update_util(time, ULONG_MAX, 0); + struct update_util_data *data; + + data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); + if (data) + data->func(data, time, flags); } #else -static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {} -static inline void cpufreq_trigger_update(u64 time) {} +static inline void cpufreq_update_util(u64 time, unsigned int flags) {} #endif /* CONFIG_CPU_FREQ */ #ifdef arch_scale_freq_capacity -- cgit v0.10.2 From 12bde33dbb3eadd60343a8a71c39766073c1d752 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 10 Aug 2016 03:11:17 +0200 Subject: cpufreq / sched: Pass runqueue pointer to cpufreq_update_util() All of the callers of cpufreq_update_util() pass rq_clock(rq) to it as the time argument and some of them check whether or not cpu_of(rq) is equal to smp_processor_id() before calling it, so rework it to take a runqueue pointer as the argument and move the rq_clock(rq) evaluation into it. Additionally, provide a wrapper checking cpu_of(rq) against smp_processor_id() for the cpufreq_update_util() callers that need it. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Acked-by: Viresh Kumar diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 4464cc3..9747796 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -736,8 +736,7 @@ static void update_curr_dl(struct rq *rq) } /* kick cpufreq (see the comment in kernel/sched/sched.h). */ - if (cpu_of(rq) == smp_processor_id()) - cpufreq_update_util(rq_clock(rq), SCHED_CPUFREQ_DL); + cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL); schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f91fa57..5d558cc 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2876,8 +2876,6 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {} static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) { if (&this_rq()->cfs == cfs_rq) { - struct rq *rq = rq_of(cfs_rq); - /* * There are a few boundary cases this might miss but it should * get called often enough that that should (hopefully) not be @@ -2894,7 +2892,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) * * See cpu_util(). */ - cpufreq_update_util(rq_clock(rq), 0); + cpufreq_update_util(rq_of(cfs_rq), 0); } } @@ -3155,10 +3153,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) static inline void update_load_avg(struct sched_entity *se, int not_used) { - struct cfs_rq *cfs_rq = cfs_rq_of(se); - struct rq *rq = rq_of(cfs_rq); - - cpufreq_update_util(rq_clock(rq), 0); + cpufreq_update_util(rq_of(cfs_rq_of(se)), 0); } static inline void diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 8a9cd9b..2516b8d 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -958,8 +958,7 @@ static void update_curr_rt(struct rq *rq) return; /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ - if (cpu_of(rq) == smp_processor_id()) - cpufreq_update_util(rq_clock(rq), SCHED_CPUFREQ_RT); + cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT); schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 82fc554..b7fc1ce 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1763,7 +1763,7 @@ DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); /** * cpufreq_update_util - Take a note about CPU utilization changes. - * @time: Current time. + * @rq: Runqueue to carry out the update for. * @flags: Update reason flags. * * This function is called by the scheduler on the CPU whose utilization is @@ -1783,16 +1783,23 @@ DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); * but that really is a band-aid. Going forward it should be replaced with * solutions targeted more specifically at RT and DL tasks. */ -static inline void cpufreq_update_util(u64 time, unsigned int flags) +static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) { struct update_util_data *data; data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); if (data) - data->func(data, time, flags); + data->func(data, rq_clock(rq), flags); +} + +static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) +{ + if (cpu_of(rq) == smp_processor_id()) + cpufreq_update_util(rq, flags); } #else -static inline void cpufreq_update_util(u64 time, unsigned int flags) {} +static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} +static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {} #endif /* CONFIG_CPU_FREQ */ #ifdef arch_scale_freq_capacity -- cgit v0.10.2 From b0d8a69d08069f4f4b1d9670d57a7e22da6e6c6a Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 16 Aug 2016 23:00:07 +0200 Subject: cpufreq-SCPI: Delete unnecessary assignment for the field "owner" The field "owner" is set by the core. Thus delete an unneeded initialisation. Generated by scripts/coccinelle/api/platform_no_drv_owner.cocci Signed-off-by: Markus Elfring Acked-by: Viresh Kumar Acked-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index e8a7bf5..ea7a4e1 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -105,7 +105,6 @@ static int scpi_cpufreq_remove(struct platform_device *pdev) static struct platform_driver scpi_cpufreq_platdrv = { .driver = { .name = "scpi-cpufreq", - .owner = THIS_MODULE, }, .probe = scpi_cpufreq_probe, .remove = scpi_cpufreq_remove, -- cgit v0.10.2 From 5f16d59cb97d24bafd172b7d935aff0f6c10d8ba Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 19 Aug 2016 11:43:12 -0400 Subject: ARM: exynos_defconfig: Don't attempt to enable schedutil governor as module The schedutil CPUFreq governor could be built as a module but the change "cpufreq / sched: Pass flags to cpufreq_update_util()" made it non-modular so attempting to enable as module leads to a warning: warning: symbol value 'm' invalid for CPU_FREQ_GOV_SCHEDUTIL Signed-off-by: Javier Martinez Canillas Signed-off-by: Krzysztof Kozlowski diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index 01986de..36cc7cc 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -28,7 +28,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_POWERSAVE=m CONFIG_CPU_FREQ_GOV_USERSPACE=m CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m -CONFIG_CPU_FREQ_GOV_SCHEDUTIL=m +CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y CONFIG_CPUFREQ_DT=y CONFIG_CPU_IDLE=y CONFIG_ARM_EXYNOS_CPUIDLE=y -- cgit v0.10.2 From 08023fb9cec03ee2992df51fa728c3f14a1ba0ea Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 19 Aug 2016 11:43:13 -0400 Subject: ARM: multi_v7_defconfig: Don't attempt to enable schedutil governor as module The schedutil CPUFreq governor could be built as a module but the change "cpufreq / sched: Pass flags to cpufreq_update_util()" made it non-modular so attempting to enable as module leads to a warning: warning: symbol value 'm' invalid for CPU_FREQ_GOV_SCHEDUTIL Signed-off-by: Javier Martinez Canillas Signed-off-by: Krzysztof Kozlowski diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 2c8665c..ab5fea7 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -135,7 +135,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_POWERSAVE=m CONFIG_CPU_FREQ_GOV_USERSPACE=m CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m -CONFIG_CPU_FREQ_GOV_SCHEDUTIL=m +CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y CONFIG_QORIQ_CPUFREQ=y CONFIG_CPU_IDLE=y CONFIG_ARM_CPUIDLE=y -- cgit v0.10.2 From 3689ad7ed6a836c4eec5e7bdd17a11a79591bef9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 31 Aug 2016 03:11:31 +0200 Subject: cpufreq: Drop unnecessary check from cpufreq_policy_alloc() Since cpufreq_policy_alloc() doesn't use its dev variable for anything useful, drop that variable from there along with the NULL check against it. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 3dd4884..13fb589 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1073,13 +1073,9 @@ static void handle_update(struct work_struct *work) static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) { - struct device *dev = get_cpu_device(cpu); struct cpufreq_policy *policy; int ret; - if (WARN_ON(!dev)) - return NULL; - policy = kzalloc(sizeof(*policy), GFP_KERNEL); if (!policy) return NULL; -- cgit v0.10.2 From 8314bc83f6a33958a033955e9bdc48e8dd4d5fb0 Mon Sep 17 00:00:00 2001 From: Steve Muckle Date: Fri, 26 Aug 2016 11:40:47 -0700 Subject: cpufreq / sched: ignore SMT when determining max cpu capacity PELT does not consider SMT when scaling its utilization values via arch_scale_cpu_capacity(). The value in rq->cpu_capacity_orig does take SMT into consideration though and therefore may be smaller than the utilization reported by PELT. On an Intel i7-3630QM for example rq->cpu_capacity_orig is 589 but util_avg scales up to 1024. This means that a 50% utilized CPU will show up in schedutil as ~86% busy. Fix this by using the same CPU scaling value in schedutil as that which is used by PELT. Signed-off-by: Steve Muckle Signed-off-by: Rafael J. Wysocki diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 60d985f..cb8a77b 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -147,7 +147,9 @@ static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, static void sugov_get_util(unsigned long *util, unsigned long *max) { struct rq *rq = this_rq(); - unsigned long cfs_max = rq->cpu_capacity_orig; + unsigned long cfs_max; + + cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id()); *util = min(rq->cfs.avg.util_avg, cfs_max); *max = cfs_max; -- cgit v0.10.2 From ffdf8b867b922d2d3150ccfa330d1c6d22716562 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 6 Sep 2016 14:18:20 +0200 Subject: cpufreq: dt: Add support for r8a7792 Add the compatible string for supporting the generic cpufreq driver on the Renesas R-Car V2H (r8a7792) SoC. Signed-off-by: Geert Uytterhoeven Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 285ed3e..276378a 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -52,6 +52,7 @@ static const struct of_device_id machines[] __initconst = { { .compatible = "renesas,r8a7779", }, { .compatible = "renesas,r8a7790", }, { .compatible = "renesas,r8a7791", }, + { .compatible = "renesas,r8a7792", }, { .compatible = "renesas,r8a7793", }, { .compatible = "renesas,r8a7794", }, { .compatible = "renesas,sh73a0", }, -- cgit v0.10.2 From dabe73cbeba463e559741355afce010cea7dacaf Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 9 Sep 2016 13:05:24 +0200 Subject: MAINTAINERS: Add Documentation/cpu-freq/ I am told the cpufreq documentation updates should go to the PM list. Signed-off-by: Jean Delvare Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki diff --git a/MAINTAINERS b/MAINTAINERS index db814a8..f88a50c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3281,6 +3281,7 @@ L: linux-pm@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git T: git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates) +F: Documentation/cpu-freq/ F: drivers/cpufreq/ F: include/linux/cpufreq.h -- cgit v0.10.2 From e86eee6bc2aaa6b3637f6497b26beee09a91bde9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 9 Sep 2016 16:48:06 +0530 Subject: cpufreq: dt: Remove unused code This is leftover from an earlier patch which removed the usage of platform data but forgot to remove this line. Remove it now. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 3957de8..2bd2053 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -366,8 +366,6 @@ static int dt_cpufreq_probe(struct platform_device *pdev) if (ret) return ret; - dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev); - ret = cpufreq_register_driver(&dt_cpufreq_driver); if (ret) dev_err(&pdev->dev, "failed register driver: %d\n", ret); -- cgit v0.10.2 From 33cc4fc1b2147ee3cf36d02c1106456cb276c043 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 9 Sep 2016 16:48:07 +0530 Subject: cpufreq: dt: Update kconfig description The cpufreq DT driver also supports systems that have multiple clock/voltage domains for CPUs, i.e. multiple policy systems. The description of the Kconfig entry was never updated after the driver was modified to support such systems, fix it. Reported-by: Juri Lelli Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 74919aa..9be9013 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -225,7 +225,7 @@ config CPUFREQ_DT help This adds a generic DT based cpufreq driver for frequency management. It supports both uniprocessor (UP) and symmetric multiprocessor (SMP) - systems which share clock and voltage across all CPUs. + systems. If in doubt, say N. -- cgit v0.10.2 From 297a66221d2bed61e7d74d11bf071c7b489fc33d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 9 Sep 2016 16:48:08 +0530 Subject: cpufreq: dt: Support governor tunables per policy The cpufreq-dt driver is also used for systems with multiple clock/voltage domains for CPUs, i.e. multiple cpufreq policies in a system. And in such cases the platform users may want to enable "governor tunables per policy". Support that via platform data, as not all users of the driver would want that behavior. Reported-by: Juri Lelli Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 276378a..d2637e1 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -11,6 +11,8 @@ #include #include +#include "cpufreq-dt.h" + static const struct of_device_id machines[] __initconst = { { .compatible = "allwinner,sun4i-a10", }, { .compatible = "allwinner,sun5i-a10s", }, @@ -93,7 +95,8 @@ static int __init cpufreq_dt_platdev_init(void) if (!match) return -ENODEV; - return PTR_ERR_OR_ZERO(platform_device_register_simple("cpufreq-dt", -1, - NULL, 0)); + return PTR_ERR_OR_ZERO(platform_device_register_data(NULL, "cpufreq-dt", + -1, match->data, + sizeof(struct cpufreq_dt_platform_data))); } device_initcall(cpufreq_dt_platdev_init); diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 2bd2053..5c07ae0 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -25,6 +25,8 @@ #include #include +#include "cpufreq-dt.h" + struct private_data { struct device *cpu_dev; struct thermal_cooling_device *cdev; @@ -353,6 +355,7 @@ static struct cpufreq_driver dt_cpufreq_driver = { static int dt_cpufreq_probe(struct platform_device *pdev) { + struct cpufreq_dt_platform_data *data = dev_get_platdata(&pdev->dev); int ret; /* @@ -366,6 +369,9 @@ static int dt_cpufreq_probe(struct platform_device *pdev) if (ret) return ret; + if (data && data->have_governor_per_policy) + dt_cpufreq_driver.flags |= CPUFREQ_HAVE_GOVERNOR_PER_POLICY; + ret = cpufreq_register_driver(&dt_cpufreq_driver); if (ret) dev_err(&pdev->dev, "failed register driver: %d\n", ret); diff --git a/drivers/cpufreq/cpufreq-dt.h b/drivers/cpufreq/cpufreq-dt.h new file mode 100644 index 0000000..54d774e --- /dev/null +++ b/drivers/cpufreq/cpufreq-dt.h @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2016 Linaro + * Viresh Kumar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __CPUFREQ_DT_H__ +#define __CPUFREQ_DT_H__ + +#include + +struct cpufreq_dt_platform_data { + bool have_governor_per_policy; +}; + +#endif /* __CPUFREQ_DT_H__ */ -- cgit v0.10.2 From 42ce8921cca08ae8d9068968e6658cae856a99df Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 11 Sep 2016 15:06:01 +0200 Subject: intel_pstate: constify local structures For structure types defined in the same file or local header files, find top-level static structure declarations that have the following properties: 1. Never reassigned. 2. Address never taken 3. Not passed to a top-level macro call 4. No pointer or array-typed field passed to a function or stored in a variable. Declare structures having all of these properties as const. Done using Coccinelle. Based on a suggestion by Joe Perches . Signed-off-by: Julia Lawall Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index be9eade..5095b89 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1029,7 +1029,7 @@ static struct cpu_defaults core_params = { }, }; -static struct cpu_defaults silvermont_params = { +static const struct cpu_defaults silvermont_params = { .pid_policy = { .sample_rate_ms = 10, .deadband = 0, @@ -1050,7 +1050,7 @@ static struct cpu_defaults silvermont_params = { }, }; -static struct cpu_defaults airmont_params = { +static const struct cpu_defaults airmont_params = { .pid_policy = { .sample_rate_ms = 10, .deadband = 0, @@ -1071,7 +1071,7 @@ static struct cpu_defaults airmont_params = { }, }; -static struct cpu_defaults knl_params = { +static const struct cpu_defaults knl_params = { .pid_policy = { .sample_rate_ms = 10, .deadband = 0, @@ -1091,7 +1091,7 @@ static struct cpu_defaults knl_params = { }, }; -static struct cpu_defaults bxt_params = { +static const struct cpu_defaults bxt_params = { .pid_policy = { .sample_rate_ms = 10, .deadband = 0, -- cgit v0.10.2 From 26619804e733bbe8a01ab8a438f91e230e91373a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 12 Sep 2016 12:07:05 +0530 Subject: cpufreq: create link to policy only for registered CPUs If a cpufreq driver is registered very early in the boot stage (e.g. registered from postcore_initcall()), then cpufreq core may generate kernel warnings for it. In this case, the CPUs are brought online, then the cpufreq driver is registered, and then the CPU topology devices are registered. However, by the time cpufreq_add_dev() gets called, the cpu device isn't stored in the per-cpu variable (cpu_sys_devices,) which is read by get_cpu_device(). So the cpufreq core fails to get device for the CPU, for which cpufreq_add_dev() was called in the first place and we will hit a WARN_ON(!cpu_dev). Even if we reuse the 'dev' parameter passed to cpufreq_add_dev() to avoid that warning, there might be other CPUs online that share the policy with the cpu for which cpufreq_add_dev() is called. Eventually get_cpu_device() will return NULL for them as well, and we will hit the same WARN_ON() again. In order to fix these issues, change cpufreq core to create links to the policy for a cpu only when cpufreq_add_dev() is called for that CPU. Reuse the 'real_cpus' mask to track that as well. Note that cpufreq_remove_dev() already handles removal of the links for individual CPUs and cpufreq_add_dev() has aligned with that now. Reported-by: Russell King Tested-by: Russell King Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 13fb589..3a64136 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -916,58 +916,18 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; -static int add_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu) +static int add_cpu_dev_symlink(struct cpufreq_policy *policy, + struct device *dev) { - struct device *cpu_dev; - - pr_debug("%s: Adding symlink for CPU: %u\n", __func__, cpu); - - if (!policy) - return 0; - - cpu_dev = get_cpu_device(cpu); - if (WARN_ON(!cpu_dev)) - return 0; - - return sysfs_create_link(&cpu_dev->kobj, &policy->kobj, "cpufreq"); + dev_dbg(dev, "%s: Adding symlink\n", __func__); + return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq"); } -static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu) +static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, + struct device *dev) { - struct device *cpu_dev; - - pr_debug("%s: Removing symlink for CPU: %u\n", __func__, cpu); - - cpu_dev = get_cpu_device(cpu); - if (WARN_ON(!cpu_dev)) - return; - - sysfs_remove_link(&cpu_dev->kobj, "cpufreq"); -} - -/* Add/remove symlinks for all related CPUs */ -static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy) -{ - unsigned int j; - int ret = 0; - - /* Some related CPUs might not be present (physically hotplugged) */ - for_each_cpu(j, policy->real_cpus) { - ret = add_cpu_dev_symlink(policy, j); - if (ret) - break; - } - - return ret; -} - -static void cpufreq_remove_dev_symlink(struct cpufreq_policy *policy) -{ - unsigned int j; - - /* Some related CPUs might not be present (physically hotplugged) */ - for_each_cpu(j, policy->real_cpus) - remove_cpu_dev_symlink(policy, j); + dev_dbg(dev, "%s: Removing symlink\n", __func__); + sysfs_remove_link(&dev->kobj, "cpufreq"); } static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) @@ -999,7 +959,7 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) return ret; } - return cpufreq_add_dev_symlink(policy); + return 0; } __weak struct cpufreq_governor *cpufreq_default_governor(void) @@ -1129,7 +1089,6 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy, bool notify) down_write(&policy->rwsem); cpufreq_stats_free_table(policy); - cpufreq_remove_dev_symlink(policy); kobj = &policy->kobj; cmp = &policy->kobj_unregister; up_write(&policy->rwsem); @@ -1211,8 +1170,8 @@ static int cpufreq_online(unsigned int cpu) if (new_policy) { /* related_cpus should at least include policy->cpus. */ cpumask_copy(policy->related_cpus, policy->cpus); - /* Remember CPUs present at the policy creation time. */ - cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask); + /* Clear mask of registered CPUs */ + cpumask_clear(policy->real_cpus); } /* @@ -1327,6 +1286,8 @@ out_free_policy: return ret; } +static void cpufreq_offline(unsigned int cpu); + /** * cpufreq_add_dev - the cpufreq interface for a CPU device. * @dev: CPU device. @@ -1336,22 +1297,28 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) { struct cpufreq_policy *policy; unsigned cpu = dev->id; + int ret; dev_dbg(dev, "%s: adding CPU%u\n", __func__, cpu); - if (cpu_online(cpu)) - return cpufreq_online(cpu); + if (cpu_online(cpu)) { + ret = cpufreq_online(cpu); + if (ret) + return ret; + } - /* - * A hotplug notifier will follow and we will handle it as CPU online - * then. For now, just create the sysfs link, unless there is no policy - * or the link is already present. - */ + /* Create sysfs link on CPU registration */ policy = per_cpu(cpufreq_cpu_data, cpu); if (!policy || cpumask_test_and_set_cpu(cpu, policy->real_cpus)) return 0; - return add_cpu_dev_symlink(policy, cpu); + ret = add_cpu_dev_symlink(policy, dev); + if (ret) { + cpumask_clear_cpu(cpu, policy->real_cpus); + cpufreq_offline(cpu); + } + + return ret; } static void cpufreq_offline(unsigned int cpu) @@ -1432,7 +1399,7 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) cpufreq_offline(cpu); cpumask_clear_cpu(cpu, policy->real_cpus); - remove_cpu_dev_symlink(policy, cpu); + remove_cpu_dev_symlink(policy, dev); if (cpumask_empty(policy->real_cpus)) cpufreq_policy_free(policy, true); -- cgit v0.10.2 From ad38677df44b67e0f5b6c4d31e9c2734abde8ed9 Mon Sep 17 00:00:00 2001 From: Al Stone Date: Wed, 20 Jul 2016 15:10:04 -0600 Subject: cpufreq: CPPC: Force reporting values in KHz to fix user space interface When CPPC is being used by ACPI on arm64, user space tools such as cpupower report CPU frequency values from sysfs that are incorrect. What the driver was doing was reporting the values given by ACPI tables in whatever scale was used to provide them. However, the ACPI spec defines the CPPC values as unitless abstract numbers. Internal kernel structures such as struct perf_cap, in contrast, expect these values to be in KHz. When these struct values get reported via sysfs, the user space tools also assume they are in KHz, causing them to report incorrect values (for example, reporting a CPU frequency of 1MHz when it should be 1.8GHz). The downside is that this approach has some assumptions: (1) It relies on SMBIOS3 being used, *and* that the Max Frequency value for a processor is set to a non-zero value. (2) It assumes that all processors run at the same speed, or that the CPPC values have all been scaled to reflect relative speed. This patch retrieves the largest CPU Max Frequency from a type 4 DMI record that it can find. This may not be an issue, however, as a sampling of DMI data on x86 and arm64 indicates there is often only one such record regardless. Since CPPC is relatively new, it is unclear if the ACPI ASL will always be written to reflect any sort of relative performance of processors of differing speeds. (3) It assumes that performance and frequency both scale linearly. For arm64 servers, this may be sufficient, but it does rely on firmware values being set correctly. Hence, other approaches will be considered in the future. This has been tested on three arm64 servers, with and without DMI, with and without CPPC support. Signed-off-by: Al Stone Signed-off-by: Prashanth Prakash Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 8882b8e..6debc18 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -19,10 +19,19 @@ #include #include #include +#include #include +#include + #include +/* Minimum struct length needed for the DMI processor entry we want */ +#define DMI_ENTRY_PROCESSOR_MIN_LENGTH 48 + +/* Offest in the DMI processor structure for the max frequency */ +#define DMI_PROCESSOR_MAX_SPEED 0x14 + /* * These structs contain information parsed from per CPU * ACPI _CPC structures. @@ -32,6 +41,39 @@ */ static struct cpudata **all_cpu_data; +/* Capture the max KHz from DMI */ +static u64 cppc_dmi_max_khz; + +/* Callback function used to retrieve the max frequency from DMI */ +static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private) +{ + const u8 *dmi_data = (const u8 *)dm; + u16 *mhz = (u16 *)private; + + if (dm->type == DMI_ENTRY_PROCESSOR && + dm->length >= DMI_ENTRY_PROCESSOR_MIN_LENGTH) { + u16 val = (u16)get_unaligned((const u16 *) + (dmi_data + DMI_PROCESSOR_MAX_SPEED)); + *mhz = val > *mhz ? val : *mhz; + } +} + +/* Look up the max frequency in DMI */ +static u64 cppc_get_dmi_max_khz(void) +{ + u16 mhz = 0; + + dmi_walk(cppc_find_dmi_mhz, &mhz); + + /* + * Real stupid fallback value, just in case there is no + * actual value set. + */ + mhz = mhz ? mhz : 1; + + return (1000 * mhz); +} + static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) @@ -42,7 +84,7 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, cpu = all_cpu_data[policy->cpu]; - cpu->perf_ctrls.desired_perf = target_freq; + cpu->perf_ctrls.desired_perf = target_freq * policy->max / cppc_dmi_max_khz; freqs.old = policy->cur; freqs.new = target_freq; @@ -94,8 +136,10 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) return ret; } - policy->min = cpu->perf_caps.lowest_perf; - policy->max = cpu->perf_caps.highest_perf; + cppc_dmi_max_khz = cppc_get_dmi_max_khz(); + + policy->min = cpu->perf_caps.lowest_perf * cppc_dmi_max_khz / cpu->perf_caps.highest_perf; + policy->max = cppc_dmi_max_khz; policy->cpuinfo.min_freq = policy->min; policy->cpuinfo.max_freq = policy->max; policy->shared_type = cpu->shared_type; @@ -112,7 +156,8 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) cpu->cur_policy = policy; /* Set policy->cur to max now. The governors will adjust later. */ - policy->cur = cpu->perf_ctrls.desired_perf = cpu->perf_caps.highest_perf; + policy->cur = cppc_dmi_max_khz; + cpu->perf_ctrls.desired_perf = cpu->perf_caps.highest_perf; ret = cppc_set_perf(cpu_num, &cpu->perf_ctrls); if (ret) -- cgit v0.10.2 From 8c34ab1910a79319731107ec8ecd2e80893ea30c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 9 Sep 2016 23:59:33 +0200 Subject: cpufreq / sched: SCHED_CPUFREQ_IOWAIT flag to indicate iowait condition Testing indicates that it is possible to improve performace significantly without increasing energy consumption too much by teaching cpufreq governors to bump up the CPU performance level if the in_iowait flag is set for the task in enqueue_task_fair(). For this purpose, define a new cpufreq_update_util() flag SCHED_CPUFREQ_IOWAIT and modify enqueue_task_fair() to pass that flag to cpufreq_update_util() in the in_iowait case. That generally requires cpufreq_update_util() to be called directly from there, because update_load_avg() may not be invoked in that case. Signed-off-by: Rafael J. Wysocki Looks-good-to: Steve Muckle Acked-by: Peter Zijlstra (Intel) diff --git a/include/linux/sched.h b/include/linux/sched.h index b0fa726..98fe95f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3471,6 +3471,7 @@ static inline unsigned long rlimit_max(unsigned int limit) #define SCHED_CPUFREQ_RT (1U << 0) #define SCHED_CPUFREQ_DL (1U << 1) +#define SCHED_CPUFREQ_IOWAIT (1U << 2) #define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5d558cc..a5cd07b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4500,6 +4500,14 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; + /* + * If in_iowait is set, the code below may not trigger any cpufreq + * utilization updates, so do it here explicitly with the IOWAIT flag + * passed. + */ + if (p->in_iowait) + cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT); + for_each_sched_entity(se) { if (se->on_rq) break; -- cgit v0.10.2 From 21ca6d2c52f8ca8638129c1dfc489d0b0ae68532 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 10 Sep 2016 00:00:31 +0200 Subject: cpufreq: schedutil: Add iowait boosting Modify the schedutil cpufreq governor to boost the CPU frequency if the SCHED_CPUFREQ_IOWAIT flag is passed to it via cpufreq_update_util(). If that happens, the frequency is set to the maximum during the first update after receiving the SCHED_CPUFREQ_IOWAIT flag and then the boost is reduced by half during each following update. Signed-off-by: Rafael J. Wysocki Looks-good-to: Steve Muckle Acked-by: Peter Zijlstra (Intel) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index cb8a77b..69e0689 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -47,11 +47,13 @@ struct sugov_cpu { struct sugov_policy *sg_policy; unsigned int cached_raw_freq; + unsigned long iowait_boost; + unsigned long iowait_boost_max; + u64 last_update; /* The fields below are only needed when sharing a policy. */ unsigned long util; unsigned long max; - u64 last_update; unsigned int flags; }; @@ -155,6 +157,36 @@ static void sugov_get_util(unsigned long *util, unsigned long *max) *max = cfs_max; } +static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, + unsigned int flags) +{ + if (flags & SCHED_CPUFREQ_IOWAIT) { + sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; + } else if (sg_cpu->iowait_boost) { + s64 delta_ns = time - sg_cpu->last_update; + + /* Clear iowait_boost if the CPU apprears to have been idle. */ + if (delta_ns > TICK_NSEC) + sg_cpu->iowait_boost = 0; + } +} + +static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, + unsigned long *max) +{ + unsigned long boost_util = sg_cpu->iowait_boost; + unsigned long boost_max = sg_cpu->iowait_boost_max; + + if (!boost_util) + return; + + if (*util * boost_max < *max * boost_util) { + *util = boost_util; + *max = boost_max; + } + sg_cpu->iowait_boost >>= 1; +} + static void sugov_update_single(struct update_util_data *hook, u64 time, unsigned int flags) { @@ -164,6 +196,9 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, unsigned long util, max; unsigned int next_f; + sugov_set_iowait_boost(sg_cpu, time, flags); + sg_cpu->last_update = time; + if (!sugov_should_update_freq(sg_policy, time)) return; @@ -171,6 +206,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, next_f = policy->cpuinfo.max_freq; } else { sugov_get_util(&util, &max); + sugov_iowait_boost(sg_cpu, &util, &max); next_f = get_next_freq(sg_cpu, util, max); } sugov_update_commit(sg_policy, time, next_f); @@ -189,6 +225,8 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, if (flags & SCHED_CPUFREQ_RT_DL) return max_f; + sugov_iowait_boost(sg_cpu, &util, &max); + for_each_cpu(j, policy->cpus) { struct sugov_cpu *j_sg_cpu; unsigned long j_util, j_max; @@ -203,12 +241,13 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, * frequency update and the time elapsed between the last update * of the CPU utilization and the last frequency update is long * enough, don't take the CPU into account as it probably is - * idle now. + * idle now (and clear iowait_boost for it). */ delta_ns = last_freq_update_time - j_sg_cpu->last_update; - if (delta_ns > TICK_NSEC) + if (delta_ns > TICK_NSEC) { + j_sg_cpu->iowait_boost = 0; continue; - + } if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL) return max_f; @@ -218,6 +257,8 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, util = j_util; max = j_max; } + + sugov_iowait_boost(j_sg_cpu, &util, &max); } return get_next_freq(sg_cpu, util, max); @@ -238,6 +279,8 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time, sg_cpu->util = util; sg_cpu->max = max; sg_cpu->flags = flags; + + sugov_set_iowait_boost(sg_cpu, time, flags); sg_cpu->last_update = time; if (sugov_should_update_freq(sg_policy, time)) { @@ -470,6 +513,8 @@ static int sugov_start(struct cpufreq_policy *policy) sg_cpu->flags = SCHED_CPUFREQ_RT; sg_cpu->last_update = 0; sg_cpu->cached_raw_freq = 0; + sg_cpu->iowait_boost = 0; + sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, sugov_update_shared); } else { -- cgit v0.10.2 From 09c448d3c61f31322c097cb4c1484778d50da399 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 14 Sep 2016 02:28:13 +0200 Subject: cpufreq: intel_pstate: Use IOWAIT flag in Atom algorithm Modify the P-state selection algorithm for Atom processors to use the new SCHED_CPUFREQ_IOWAIT flag instead of the questionable get_cpu_iowait_time_us() function. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index bdbe936..7c457cc 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -181,6 +181,8 @@ struct _pid { * @cpu: CPU number for this instance data * @update_util: CPUFreq utility callback information * @update_util_set: CPUFreq utility callback is set + * @iowait_boost: iowait-related boost fraction + * @last_update: Time of the last update. * @pstate: Stores P state limits for this CPU * @vid: Stores VID limits for this CPU * @pid: Stores PID parameters for this CPU @@ -206,6 +208,7 @@ struct cpudata { struct vid_data vid; struct _pid pid; + u64 last_update; u64 last_sample_time; u64 prev_aperf; u64 prev_mperf; @@ -216,6 +219,7 @@ struct cpudata { struct acpi_processor_performance acpi_perf_data; bool valid_pss_table; #endif + unsigned int iowait_boost; }; static struct cpudata **all_cpu_data; @@ -229,6 +233,7 @@ static struct cpudata **all_cpu_data; * @p_gain_pct: PID proportional gain * @i_gain_pct: PID integral gain * @d_gain_pct: PID derivative gain + * @boost_iowait: Whether or not to use iowait boosting. * * Stores per CPU model static PID configuration data. */ @@ -240,6 +245,7 @@ struct pstate_adjust_policy { int p_gain_pct; int d_gain_pct; int i_gain_pct; + bool boost_iowait; }; /** @@ -1037,6 +1043,7 @@ static struct cpu_defaults silvermont_params = { .p_gain_pct = 14, .d_gain_pct = 0, .i_gain_pct = 4, + .boost_iowait = true, }, .funcs = { .get_max = atom_get_max_pstate, @@ -1058,6 +1065,7 @@ static struct cpu_defaults airmont_params = { .p_gain_pct = 14, .d_gain_pct = 0, .i_gain_pct = 4, + .boost_iowait = true, }, .funcs = { .get_max = atom_get_max_pstate, @@ -1099,6 +1107,7 @@ static struct cpu_defaults bxt_params = { .p_gain_pct = 14, .d_gain_pct = 0, .i_gain_pct = 4, + .boost_iowait = true, }, .funcs = { .get_max = core_get_max_pstate, @@ -1222,36 +1231,18 @@ static inline int32_t get_avg_pstate(struct cpudata *cpu) static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) { struct sample *sample = &cpu->sample; - u64 cummulative_iowait, delta_iowait_us; - u64 delta_iowait_mperf; - u64 mperf, now; - int32_t cpu_load; + int32_t busy_frac, boost; - cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now); + busy_frac = div_fp(sample->mperf, sample->tsc); - /* - * Convert iowait time into number of IO cycles spent at max_freq. - * IO is considered as busy only for the cpu_load algorithm. For - * performance this is not needed since we always try to reach the - * maximum P-State, so we are already boosting the IOs. - */ - delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait; - delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling * - cpu->pstate.max_pstate, MSEC_PER_SEC); + boost = cpu->iowait_boost; + cpu->iowait_boost >>= 1; - mperf = cpu->sample.mperf + delta_iowait_mperf; - cpu->prev_cummulative_iowait = cummulative_iowait; + if (busy_frac < boost) + busy_frac = boost; - /* - * The load can be estimated as the ratio of the mperf counter - * running at a constant frequency during active periods - * (C0) and the time stamp counter running at the same frequency - * also during C-states. - */ - cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc); - cpu->sample.busy_scaled = cpu_load; - - return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load); + sample->busy_scaled = busy_frac * 100; + return get_avg_pstate(cpu) - pid_calc(&cpu->pid, sample->busy_scaled); } static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) @@ -1332,8 +1323,21 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time, unsigned int flags) { struct cpudata *cpu = container_of(data, struct cpudata, update_util); - u64 delta_ns = time - cpu->sample.time; + u64 delta_ns; + + if (pid_params.boost_iowait) { + if (flags & SCHED_CPUFREQ_IOWAIT) { + cpu->iowait_boost = int_tofp(1); + } else if (cpu->iowait_boost) { + /* Clear iowait_boost if the CPU may have been idle. */ + delta_ns = time - cpu->last_update; + if (delta_ns > TICK_NSEC) + cpu->iowait_boost = 0; + } + cpu->last_update = time; + } + delta_ns = time - cpu->sample.time; if ((s64)delta_ns >= pid_params.sample_rate_ns) { bool sample_taken = intel_pstate_sample(cpu, time); -- cgit v0.10.2 From 3ba7bcaa3657f5fe32295ebd17fbdaaf16608e2f Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 13 Sep 2016 17:41:33 -0700 Subject: cpufreq: intel_pstate: Add io_boost trace Add io_boost percent to current pstate_sample tracepoint. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 7c457cc..86c29af 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1316,7 +1316,8 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) sample->mperf, sample->aperf, sample->tsc, - get_avg_frequency(cpu)); + get_avg_frequency(cpu), + fp_toint(cpu->iowait_boost * 100)); } static void intel_pstate_update_util(struct update_util_data *data, u64 time, diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 19e5030..54e3aad 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -69,7 +69,8 @@ TRACE_EVENT(pstate_sample, u64 mperf, u64 aperf, u64 tsc, - u32 freq + u32 freq, + u32 io_boost ), TP_ARGS(core_busy, @@ -79,7 +80,8 @@ TRACE_EVENT(pstate_sample, mperf, aperf, tsc, - freq + freq, + io_boost ), TP_STRUCT__entry( @@ -91,6 +93,7 @@ TRACE_EVENT(pstate_sample, __field(u64, aperf) __field(u64, tsc) __field(u32, freq) + __field(u32, io_boost) ), TP_fast_assign( @@ -102,9 +105,10 @@ TRACE_EVENT(pstate_sample, __entry->aperf = aperf; __entry->tsc = tsc; __entry->freq = freq; + __entry->io_boost = io_boost; ), - TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu ", + TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu io_boost=%lu", (unsigned long)__entry->core_busy, (unsigned long)__entry->scaled_busy, (unsigned long)__entry->from, @@ -112,7 +116,8 @@ TRACE_EVENT(pstate_sample, (unsigned long long)__entry->mperf, (unsigned long long)__entry->aperf, (unsigned long long)__entry->tsc, - (unsigned long)__entry->freq + (unsigned long)__entry->freq, + (unsigned long)__entry->io_boost ) ); -- cgit v0.10.2 From e01072d22d4e7f9ca966f848def22fe41eaef4de Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Wed, 14 Sep 2016 15:41:37 -0500 Subject: cpufreq: ti: Use generic platdev driver Now that the cpufreq-dt-platdev is used to create the cpufreq-dt platform device for all OMAP platforms and the platform code that did it before has been removed, add ti,am33xx and ti,dra7xx to the machine list in cpufreq-dt-platdev which had relied on the removed platform code to do this previously. Fixes: 7694ca6e1d6f (cpufreq: omap: Use generic platdev driver) Signed-off-by: Dave Gerlach Acked-by: Viresh Kumar Cc: 4.7+ # 4.7+ Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index d2637e1..7126762 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -72,6 +72,8 @@ static const struct of_device_id machines[] __initconst = { { .compatible = "sigma,tango4" }, + { .compatible = "ti,am33xx", }, + { .compatible = "ti,dra7", }, { .compatible = "ti,omap2", }, { .compatible = "ti,omap3", }, { .compatible = "ti,omap4", }, -- cgit v0.10.2 From f89f4147f76f91bfff6f32890fc34148178f144d Mon Sep 17 00:00:00 2001 From: Hoan Tran Date: Wed, 14 Sep 2016 16:08:28 -0700 Subject: cpufreq: CPPC: Avoid overflow when calculating desired_perf This patch fixes overflow issue when calculating the desired_perf. Fixes: ad38677df44b (cpufreq: CPPC: Force reporting values in KHz to fix user space interface) Signed-off-by: Hoan Tran Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 6debc18..99db422 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -84,7 +84,7 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, cpu = all_cpu_data[policy->cpu]; - cpu->perf_ctrls.desired_perf = target_freq * policy->max / cppc_dmi_max_khz; + cpu->perf_ctrls.desired_perf = (u64)target_freq * policy->max / cppc_dmi_max_khz; freqs.old = policy->cur; freqs.new = target_freq; -- cgit v0.10.2 From 4c232f94693a1d9cde4867b938830c361313eda7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 25 Sep 2016 14:35:43 -0700 Subject: cpufreq: kirkwood: add missing \n to end of dev_err messages Trival fix, dev_err messages are missing a \n, so add it. Signed-off-by: Colin Ian King Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c index be42f10..1b9bcd7 100644 --- a/drivers/cpufreq/kirkwood-cpufreq.c +++ b/drivers/cpufreq/kirkwood-cpufreq.c @@ -123,7 +123,7 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) priv.cpu_clk = of_clk_get_by_name(np, "cpu_clk"); if (IS_ERR(priv.cpu_clk)) { - dev_err(priv.dev, "Unable to get cpuclk"); + dev_err(priv.dev, "Unable to get cpuclk\n"); return PTR_ERR(priv.cpu_clk); } @@ -132,7 +132,7 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) priv.ddr_clk = of_clk_get_by_name(np, "ddrclk"); if (IS_ERR(priv.ddr_clk)) { - dev_err(priv.dev, "Unable to get ddrclk"); + dev_err(priv.dev, "Unable to get ddrclk\n"); err = PTR_ERR(priv.ddr_clk); goto out_cpu; } @@ -142,7 +142,7 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) priv.powersave_clk = of_clk_get_by_name(np, "powersave"); if (IS_ERR(priv.powersave_clk)) { - dev_err(priv.dev, "Unable to get powersave"); + dev_err(priv.dev, "Unable to get powersave\n"); err = PTR_ERR(priv.powersave_clk); goto out_ddr; } @@ -155,7 +155,7 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) if (!err) return 0; - dev_err(priv.dev, "Failed to register cpufreq driver"); + dev_err(priv.dev, "Failed to register cpufreq driver\n"); clk_disable_unprepare(priv.powersave_clk); out_ddr: -- cgit v0.10.2 From 9ad0a1b6a21fbfa0b308e704fd924836f7ae4458 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 25 Sep 2016 14:40:13 -0700 Subject: cpufreq: st: add missing \n to end of dev_err message Trival fix, dev_err message is missing a \n, so add it. Signed-off-by: Colin Ian King Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c index 0404203..b366e6d 100644 --- a/drivers/cpufreq/sti-cpufreq.c +++ b/drivers/cpufreq/sti-cpufreq.c @@ -163,7 +163,7 @@ static int sti_cpufreq_set_opp_info(void) reg_fields = sti_cpufreq_match(); if (!reg_fields) { - dev_err(dev, "This SoC doesn't support voltage scaling"); + dev_err(dev, "This SoC doesn't support voltage scaling\n"); return -ENODEV; } -- cgit v0.10.2