From 2d175069f2b5477692d4bd7586bc530ffe8107bb Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Mon, 13 Aug 2012 00:14:53 +0200 Subject: PM / cpufreq: Initialise the cpu field during conservative governor start This change initialises the cpu id field of cs_cpu_dbs_info structure in conservative governor and keep this consistent with other governors. Similar initialisation is present in ondemand governor. Signed-off-by: Amit Daniel Kachhap Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 235a340..a1563d7 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -504,6 +504,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; } + this_dbs_info->cpu = cpu; this_dbs_info->down_skip = 0; this_dbs_info->requested_freq = policy->cur; -- cgit v0.10.2 From 56835e6cc053c29bf1a15a07dbeb78f219a15214 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 5 Sep 2012 00:50:26 +0200 Subject: cpufreq / powernow-k8: Fixup missing _PSS objects message _PSS objects can also be missing if Cool'N'Quiet is disabled in the BIOS. Add that to the FW_BUG message for the user to try before updating her BIOS. Fix formatting while at it. Acked-by: Mark Langsdorf Signed-off-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index c0e8164..33f17c4 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -1265,12 +1265,15 @@ static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu) init_on_cpu->rc = 0; } +static const char missing_pss_msg[] = + KERN_ERR + FW_BUG PFX "No compatible ACPI _PSS objects found.\n" + FW_BUG PFX "First, make sure Cool'N'Quiet is enabled in the BIOS.\n" + FW_BUG PFX "If that doesn't help, try upgrading your BIOS.\n"; + /* per CPU init entry point to the driver */ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { - static const char ACPI_PSS_BIOS_BUG_MSG[] = - KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" - FW_BUG PFX "Try again with latest BIOS.\n"; struct powernow_k8_data *data; struct init_on_cpu init_on_cpu; int rc; @@ -1298,7 +1301,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) * an UP version, and is deprecated by AMD. */ if (num_online_cpus() != 1) { - printk_once(ACPI_PSS_BIOS_BUG_MSG); + printk_once(missing_pss_msg); goto err_out; } if (pol->cpu != 0) { -- cgit v0.10.2 From 3dc9a633f8a65b39c5897874138027328bfb0a94 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 4 Sep 2012 08:28:02 +0000 Subject: acpi-cpufreq: Add support for modern AMD CPUs The programming model for P-states on modern AMD CPUs is very similar to that of Intel and VIA. It makes sense to consolidate this support into one driver rather than duplicating functionality between two of them. This patch adds support for AMDs with hardware P-state control to acpi-cpufreq. Signed-off-by: Matthew Garrett Signed-off-by: Andre Przywara Signed-off-by: Rafael J. Wysocki diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 957ec87..1e1f3eb 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -248,6 +248,8 @@ #define MSR_IA32_PERF_STATUS 0x00000198 #define MSR_IA32_PERF_CTL 0x00000199 +#define MSR_AMD_PERF_STATUS 0xc0010063 +#define MSR_AMD_PERF_CTL 0xc0010062 #define MSR_IA32_MPERF 0x000000e7 #define MSR_IA32_APERF 0x000000e8 diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 78ff7ee..8d12e37 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -23,7 +23,8 @@ config X86_ACPI_CPUFREQ help This driver adds a CPUFreq driver which utilizes the ACPI Processor Performance States. - This driver also supports Intel Enhanced Speedstep. + This driver also supports Intel Enhanced Speedstep and newer + AMD CPUs. To compile this driver as a module, choose M here: the module will be called acpi-cpufreq. diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 56c6c6b..067a61f 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -54,10 +54,12 @@ MODULE_LICENSE("GPL"); enum { UNDEFINED_CAPABLE = 0, SYSTEM_INTEL_MSR_CAPABLE, + SYSTEM_AMD_MSR_CAPABLE, SYSTEM_IO_CAPABLE, }; #define INTEL_MSR_RANGE (0xffff) +#define AMD_MSR_RANGE (0x7) struct acpi_cpufreq_data { struct acpi_processor_performance *acpi_data; @@ -82,6 +84,13 @@ static int check_est_cpu(unsigned int cpuid) return cpu_has(cpu, X86_FEATURE_EST); } +static int check_amd_hwpstate_cpu(unsigned int cpuid) +{ + struct cpuinfo_x86 *cpu = &cpu_data(cpuid); + + return cpu_has(cpu, X86_FEATURE_HW_PSTATE); +} + static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) { struct acpi_processor_performance *perf; @@ -101,7 +110,11 @@ static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) int i; struct acpi_processor_performance *perf; - msr &= INTEL_MSR_RANGE; + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + msr &= AMD_MSR_RANGE; + else + msr &= INTEL_MSR_RANGE; + perf = data->acpi_data; for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { @@ -115,6 +128,7 @@ static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) { switch (data->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: + case SYSTEM_AMD_MSR_CAPABLE: return extract_msr(val, data); case SYSTEM_IO_CAPABLE: return extract_io(val, data); @@ -150,6 +164,7 @@ static void do_drv_read(void *_cmd) switch (cmd->type) { case SYSTEM_INTEL_MSR_CAPABLE: + case SYSTEM_AMD_MSR_CAPABLE: rdmsr(cmd->addr.msr.reg, cmd->val, h); break; case SYSTEM_IO_CAPABLE: @@ -174,6 +189,9 @@ static void do_drv_write(void *_cmd) lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); wrmsr(cmd->addr.msr.reg, lo, hi); break; + case SYSTEM_AMD_MSR_CAPABLE: + wrmsr(cmd->addr.msr.reg, cmd->val, 0); + break; case SYSTEM_IO_CAPABLE: acpi_os_write_port((acpi_io_address)cmd->addr.io.port, cmd->val, @@ -217,6 +235,10 @@ static u32 get_cur_val(const struct cpumask *mask) cmd.type = SYSTEM_INTEL_MSR_CAPABLE; cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; break; + case SYSTEM_AMD_MSR_CAPABLE: + cmd.type = SYSTEM_AMD_MSR_CAPABLE; + cmd.addr.msr.reg = MSR_AMD_PERF_STATUS; + break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data; @@ -326,6 +348,11 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, cmd.addr.msr.reg = MSR_IA32_PERF_CTL; cmd.val = (u32) perf->states[next_perf_state].control; break; + case SYSTEM_AMD_MSR_CAPABLE: + cmd.type = SYSTEM_AMD_MSR_CAPABLE; + cmd.addr.msr.reg = MSR_AMD_PERF_CTL; + cmd.val = (u32) perf->states[next_perf_state].control; + break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; cmd.addr.io.port = perf->control_register.address; @@ -580,12 +607,16 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) break; case ACPI_ADR_SPACE_FIXED_HARDWARE: pr_debug("HARDWARE addr space\n"); - if (!check_est_cpu(cpu)) { - result = -ENODEV; - goto err_unreg; + if (check_est_cpu(cpu)) { + data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; + break; } - data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; - break; + if (check_amd_hwpstate_cpu(cpu)) { + data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE; + break; + } + result = -ENODEV; + goto err_unreg; default: pr_debug("Unknown addr space %d\n", (u32) (perf->control_register.space_id)); -- cgit v0.10.2 From acd316248205d553594296f1895ba5196b89ffcc Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 4 Sep 2012 08:28:03 +0000 Subject: acpi-cpufreq: Add quirk to disable _PSD usage on all AMD CPUs To workaround some Windows specific behavior, the ACPI _PSD table on AMD desktop boards advertises all cores as dependent, meaning that they all can only use the same P-state. acpi-cpufreq strictly obeys this description, instantiating one CPU only and symlinking the others. But the hardware can have distinct frequencies for each core and powernow-k8 did it that way. So, in order to use the hardware to its full potential and keep the original powernow-k8 behavior, lets override the _PSD table setting on AMD hardware. We use the siblings table, as it matches the current hardware behavior. Signed-off-by: Andre Przywara Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 067a61f..70e7173 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -51,6 +51,8 @@ MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); MODULE_DESCRIPTION("ACPI Processor P-States Driver"); MODULE_LICENSE("GPL"); +#define PFX "acpi-cpufreq: " + enum { UNDEFINED_CAPABLE = 0, SYSTEM_INTEL_MSR_CAPABLE, @@ -586,6 +588,14 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; cpumask_copy(policy->cpus, cpu_core_mask(cpu)); } + + if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) { + cpumask_clear(policy->cpus); + cpumask_set_cpu(cpu, policy->cpus); + cpumask_copy(policy->related_cpus, cpu_sibling_mask(cpu)); + policy->shared_type = CPUFREQ_SHARED_TYPE_HW; + pr_info_once(PFX "overriding BIOS provided _PSD data\n"); + } #endif /* capability check */ -- cgit v0.10.2 From 034be8fd645bb77ca623036dc24c790b6cc803e2 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 4 Sep 2012 08:28:04 +0000 Subject: cpufreq: Add warning message to powernow-k8 cpufreq modules are often loaded from init scripts that assume that all recent AMD systems will use powernow-k8. To inform the user of the change of support and ease the transition to acpi-cpufreq, emit a warning message. Signed-off-by: Andre Przywara Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 8d12e37..b36ca1f 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -96,7 +96,8 @@ config X86_POWERNOW_K8 select CPU_FREQ_TABLE depends on ACPI && ACPI_PROCESSOR help - This adds the CPUFreq driver for K8/K10 Opteron/Athlon64 processors. + This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors. + Support for K10 and newer processors is now in acpi-cpufreq. To compile this driver as a module, choose M here: the module will be called powernow-k8. diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 33f17c4..e2defb8 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -1563,6 +1563,9 @@ static int __cpuinit powernowk8_init(void) if (!x86_match_cpu(powernow_k8_ids)) return -ENODEV; + if (static_cpu_has(X86_FEATURE_HW_PSTATE)) + pr_warn(PFX "support for this CPU is deprecated, use acpi-cpufreq instead.\n"); + for_each_online_cpu(i) { int rc; smp_call_function_single(i, check_supported_cpu, &rc, 1); -- cgit v0.10.2 From a2060958569a8e8e36202c373a55c458ff84856d Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 4 Sep 2012 08:28:05 +0000 Subject: powernow-k8: delay info messages until initialization has succeeded powernow-k8 is quite prematurely crying Hooray and outputs diagnostic messages, although the actual initialization can still fail. Since now we may have acpi-cpufreq already loaded, we move the messages at the end of the init routine to avoid confusing output if the loading of powernow-k8 should not succeed. Signed-off-by: Andre Przywara Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index e2defb8..f1035a9 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -1576,9 +1576,6 @@ static int __cpuinit powernowk8_init(void) if (supported_cpus != num_online_cpus()) return -ENODEV; - printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n", - num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus); - if (boot_cpu_has(X86_FEATURE_CPB)) { cpb_capable = true; @@ -1597,16 +1594,23 @@ static int __cpuinit powernowk8_init(void) struct msr *reg = per_cpu_ptr(msrs, cpu); cpb_enabled |= !(!!(reg->l & BIT(25))); } - - printk(KERN_INFO PFX "Core Performance Boosting: %s.\n", - (cpb_enabled ? "on" : "off")); } rv = cpufreq_register_driver(&cpufreq_amd64_driver); - if (rv < 0 && boot_cpu_has(X86_FEATURE_CPB)) { - unregister_cpu_notifier(&cpb_nb); - msrs_free(msrs); - msrs = NULL; + + if (!rv) + pr_info(PFX "Found %d %s (%d cpu cores) (" VERSION ")\n", + num_online_nodes(), boot_cpu_data.x86_model_id, + supported_cpus); + + if (boot_cpu_has(X86_FEATURE_CPB)) { + if (rv < 0) { + unregister_cpu_notifier(&cpb_nb); + msrs_free(msrs); + msrs = NULL; + } else + pr_info(PFX "Core Performance Boosting: %s.\n", + (cpb_enabled ? "on" : "off")); } return rv; } -- cgit v0.10.2 From f594065faf4f9067c2283a34619fc0714e79a98d Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 4 Sep 2012 08:28:06 +0000 Subject: ACPI: Add fixups for AMD P-state figures Some AMD systems may round the frequencies in ACPI tables to 100MHz boundaries. We can obtain the real frequencies from MSRs, so add a quirk to fix these frequencies up on AMD systems. Signed-off-by: Matthew Garrett Signed-off-by: Andre Przywara Signed-off-by: Rafael J. Wysocki diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1e1f3eb..fbee971 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -248,6 +248,7 @@ #define MSR_IA32_PERF_STATUS 0x00000198 #define MSR_IA32_PERF_CTL 0x00000199 +#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 #define MSR_AMD_PERF_STATUS 0xc0010063 #define MSR_AMD_PERF_CTL 0xc0010062 diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index a093dc1..836bfe0 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -324,6 +324,34 @@ static int acpi_processor_get_performance_control(struct acpi_processor *pr) return result; } +#ifdef CONFIG_X86 +/* + * Some AMDs have 50MHz frequency multiples, but only provide 100MHz rounding + * in their ACPI data. Calculate the real values and fix up the _PSS data. + */ +static void amd_fixup_frequency(struct acpi_processor_px *px, int i) +{ + u32 hi, lo, fid, did; + int index = px->control & 0x00000007; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return; + + if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10) + || boot_cpu_data.x86 == 0x11) { + rdmsr(MSR_AMD_PSTATE_DEF_BASE + index, lo, hi); + fid = lo & 0x3f; + did = (lo >> 6) & 7; + if (boot_cpu_data.x86 == 0x10) + px->core_frequency = (100 * (fid + 0x10)) >> did; + else + px->core_frequency = (100 * (fid + 8)) >> did; + } +} +#else +static void amd_fixup_frequency(struct acpi_processor_px *px, int i) {}; +#endif + static int acpi_processor_get_performance_states(struct acpi_processor *pr) { int result = 0; @@ -379,6 +407,8 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr) goto end; } + amd_fixup_frequency(px, i); + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "State [%d]: core_frequency[%d] power[%d] transition_latency[%d] bus_master_latency[%d] control[0x%x] status[0x%x]\n", i, -- cgit v0.10.2 From 615b7300717b9ad5c23d1f391843484fe30f6c12 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 4 Sep 2012 08:28:07 +0000 Subject: acpi-cpufreq: Add support for disabling dynamic overclocking One feature present in powernow-k8 that isn't present in acpi-cpufreq is support for enabling or disabling AMD's core performance boost technology. This patch adds support to acpi-cpufreq, but also includes support for Intel's dynamic acceleration. The original boost disabling sysfs file was per CPU, but acted globally. Also the naming (cpb) was at least not intuitive. So lets introduce a single file simply called "boost", which sits once in /sys/devices/system/cpu/cpufreq. This should be the only way of using this feature, so add documentation about the rationale and the usage. A following patch will re-introduce the cpb knob for compatibility reasons on AMD CPUs. Per-CPU boost switching is possible, but not trivial and is thus postponed to a later patch series. Signed-off-by: Andre Przywara Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 5dab364..6943133 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -176,3 +176,14 @@ Description: Disable L3 cache indices All AMD processors with L3 caches provide this functionality. For details, see BKDGs at http://developer.amd.com/documentation/guides/Pages/default.aspx + + +What: /sys/devices/system/cpu/cpufreq/boost +Date: August 2012 +Contact: Linux kernel mailing list +Description: Processor frequency boosting control + + This switch controls the boost setting for the whole system. + Boosting allows the CPU and the firmware to run at a frequency + beyound it's nominal limit. + More details can be found in Documentation/cpu-freq/boost.txt diff --git a/Documentation/cpu-freq/boost.txt b/Documentation/cpu-freq/boost.txt new file mode 100644 index 0000000..9b4edfc --- /dev/null +++ b/Documentation/cpu-freq/boost.txt @@ -0,0 +1,93 @@ +Processor boosting control + + - information for users - + +Quick guide for the impatient: +-------------------- +/sys/devices/system/cpu/cpufreq/boost +controls the boost setting for the whole system. You can read and write +that file with either "0" (boosting disabled) or "1" (boosting allowed). +Reading or writing 1 does not mean that the system is boosting at this +very moment, but only that the CPU _may_ raise the frequency at it's +discretion. +-------------------- + +Introduction +------------- +Some CPUs support a functionality to raise the operating frequency of +some cores in a multi-core package if certain conditions apply, mostly +if the whole chip is not fully utilized and below it's intended thermal +budget. This is done without operating system control by a combination +of hardware and firmware. +On Intel CPUs this is called "Turbo Boost", AMD calls it "Turbo-Core", +in technical documentation "Core performance boost". In Linux we use +the term "boost" for convenience. + +Rationale for disable switch +---------------------------- + +Though the idea is to just give better performance without any user +intervention, sometimes the need arises to disable this functionality. +Most systems offer a switch in the (BIOS) firmware to disable the +functionality at all, but a more fine-grained and dynamic control would +be desirable: +1. While running benchmarks, reproducible results are important. Since + the boosting functionality depends on the load of the whole package, + single thread performance can vary. By explicitly disabling the boost + functionality at least for the benchmark's run-time the system will run + at a fixed frequency and results are reproducible again. +2. To examine the impact of the boosting functionality it is helpful + to do tests with and without boosting. +3. Boosting means overclocking the processor, though under controlled + conditions. By raising the frequency and the voltage the processor + will consume more power than without the boosting, which may be + undesirable for instance for mobile users. Disabling boosting may + save power here, though this depends on the workload. + + +User controlled switch +---------------------- + +To allow the user to toggle the boosting functionality, the acpi-cpufreq +driver exports a sysfs knob to disable it. There is a file: +/sys/devices/system/cpu/cpufreq/boost +which can either read "0" (boosting disabled) or "1" (boosting enabled). +Reading the file is always supported, even if the processor does not +support boosting. In this case the file will be read-only and always +reads as "0". Explicitly changing the permissions and writing to that +file anyway will return EINVAL. + +On supported CPUs one can write either a "0" or a "1" into this file. +This will either disable the boost functionality on all cores in the +whole system (0) or will allow the hardware to boost at will (1). + +Writing a "1" does not explicitly boost the system, but just allows the +CPU (and the firmware) to boost at their discretion. Some implementations +take external factors like the chip's temperature into account, so +boosting once does not necessarily mean that it will occur every time +even using the exact same software setup. + + +AMD legacy cpb switch +--------------------- +The AMD powernow-k8 driver used to support a very similar switch to +disable or enable the "Core Performance Boost" feature of some AMD CPUs. +This switch was instantiated in each CPU's cpufreq directory +(/sys/devices/system/cpu[0-9]*/cpufreq) and was called "cpb". +Though the per CPU existence hints at a more fine grained control, the +actual implementation only supported a system-global switch semantics, +which was simply reflected into each CPU's file. Writing a 0 or 1 into it +would pull the other CPUs to the same state. +For compatibility reasons this file and its behavior is still supported +on AMD CPUs, though it is now protected by a config switch +(X86_ACPI_CPUFREQ_CPB). On Intel CPUs this file will never be created, +even with the config option set. +This functionality is considered legacy and will be removed in some future +kernel version. + +More fine grained boosting control +---------------------------------- + +Technically it is possible to switch the boosting functionality at least +on a per package basis, for some CPUs even per core. Currently the driver +does not support it, but this may be implemented in the future. diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 70e7173..dffa7af 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -63,6 +63,8 @@ enum { #define INTEL_MSR_RANGE (0xffff) #define AMD_MSR_RANGE (0x7) +#define MSR_K7_HWCR_CPB_DIS (1ULL << 25) + struct acpi_cpufreq_data { struct acpi_processor_performance *acpi_data; struct cpufreq_frequency_table *freq_table; @@ -78,6 +80,96 @@ static struct acpi_processor_performance __percpu *acpi_perf_data; static struct cpufreq_driver acpi_cpufreq_driver; static unsigned int acpi_pstate_strict; +static bool boost_enabled, boost_supported; +static struct msr __percpu *msrs; + +static bool boost_state(unsigned int cpu) +{ + u32 lo, hi; + u64 msr; + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi); + msr = lo | ((u64)hi << 32); + return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); + case X86_VENDOR_AMD: + rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); + msr = lo | ((u64)hi << 32); + return !(msr & MSR_K7_HWCR_CPB_DIS); + } + return false; +} + +static void boost_set_msrs(bool enable, const struct cpumask *cpumask) +{ + u32 cpu; + u32 msr_addr; + u64 msr_mask; + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + msr_addr = MSR_IA32_MISC_ENABLE; + msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE; + break; + case X86_VENDOR_AMD: + msr_addr = MSR_K7_HWCR; + msr_mask = MSR_K7_HWCR_CPB_DIS; + break; + default: + return; + } + + rdmsr_on_cpus(cpumask, msr_addr, msrs); + + for_each_cpu(cpu, cpumask) { + struct msr *reg = per_cpu_ptr(msrs, cpu); + if (enable) + reg->q &= ~msr_mask; + else + reg->q |= msr_mask; + } + + wrmsr_on_cpus(cpumask, msr_addr, msrs); +} + +static ssize_t store_global_boost(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned long val = 0; + + if (!boost_supported) + return -EINVAL; + + ret = kstrtoul(buf, 10, &val); + if (ret || (val > 1)) + return -EINVAL; + + if ((val && boost_enabled) || (!val && !boost_enabled)) + return count; + + get_online_cpus(); + + boost_set_msrs(val, cpu_online_mask); + + put_online_cpus(); + + boost_enabled = val; + pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis"); + + return count; +} + +static ssize_t show_global_boost(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", boost_enabled); +} + +static struct global_attr global_boost = __ATTR(boost, 0644, + show_global_boost, + store_global_boost); static int check_est_cpu(unsigned int cpuid) { @@ -448,6 +540,44 @@ static void free_acpi_perf_data(void) free_percpu(acpi_perf_data); } +static int boost_notify(struct notifier_block *nb, unsigned long action, + void *hcpu) +{ + unsigned cpu = (long)hcpu; + const struct cpumask *cpumask; + + cpumask = get_cpu_mask(cpu); + + /* + * Clear the boost-disable bit on the CPU_DOWN path so that + * this cpu cannot block the remaining ones from boosting. On + * the CPU_UP path we simply keep the boost-disable flag in + * sync with the current global state. + */ + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + boost_set_msrs(boost_enabled, cpumask); + break; + + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + boost_set_msrs(1, cpumask); + break; + + default: + break; + } + + return NOTIFY_OK; +} + + +static struct notifier_block boost_nb = { + .notifier_call = boost_notify, +}; + /* * acpi_cpufreq_early_init - initialize ACPI P-States library * @@ -774,6 +904,49 @@ static struct cpufreq_driver acpi_cpufreq_driver = { .attr = acpi_cpufreq_attr, }; +static void __init acpi_cpufreq_boost_init(void) +{ + if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) { + msrs = msrs_alloc(); + + if (!msrs) + return; + + boost_supported = true; + boost_enabled = boost_state(0); + + get_online_cpus(); + + /* Force all MSRs to the same value */ + boost_set_msrs(boost_enabled, cpu_online_mask); + + register_cpu_notifier(&boost_nb); + + put_online_cpus(); + } else + global_boost.attr.mode = 0444; + + /* We create the boost file in any case, though for systems without + * hardware support it will be read-only and hardwired to return 0. + */ + if (sysfs_create_file(cpufreq_global_kobject, &(global_boost.attr))) + pr_warn(PFX "could not register global boost sysfs file\n"); + else + pr_debug("registered global boost sysfs file\n"); +} + +static void __exit acpi_cpufreq_boost_exit(void) +{ + sysfs_remove_file(cpufreq_global_kobject, &(global_boost.attr)); + + if (msrs) { + unregister_cpu_notifier(&boost_nb); + + msrs_free(msrs); + msrs = NULL; + } +} + static int __init acpi_cpufreq_init(void) { int ret; @@ -790,6 +963,8 @@ static int __init acpi_cpufreq_init(void) ret = cpufreq_register_driver(&acpi_cpufreq_driver); if (ret) free_acpi_perf_data(); + else + acpi_cpufreq_boost_init(); return ret; } @@ -798,6 +973,8 @@ static void __exit acpi_cpufreq_exit(void) { pr_debug("acpi_cpufreq_exit\n"); + acpi_cpufreq_boost_exit(); + cpufreq_unregister_driver(&acpi_cpufreq_driver); free_acpi_perf_data(); -- cgit v0.10.2 From 11269ff506888a06b19c8c7a3297114f30673973 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 4 Sep 2012 08:28:08 +0000 Subject: acpi-cpufreq: Add compatibility for legacy AMD cpb sysfs knob The powernow-k8 driver supported a sysfs knob called "cpb", which was instantiated per CPU, but actually acted globally for the whole system. To keep some compatibility with this feature, we re-introduce this behavior here, but: a) only enable it on AMD CPUs and b) protect it with a Kconfig switch I'd like to consider this feature obsolete. Lets keep it around for some kernel versions and then phase it out. Signed-off-by: Andre Przywara Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index b36ca1f..934854a 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -33,6 +33,18 @@ config X86_ACPI_CPUFREQ If in doubt, say N. +config X86_ACPI_CPUFREQ_CPB + default y + bool "Legacy cpb sysfs knob support for AMD CPUs" + depends on X86_ACPI_CPUFREQ && CPU_SUP_AMD + help + The powernow-k8 driver used to provide a sysfs knob called "cpb" + to disable the Core Performance Boosting feature of AMD CPUs. This + file has now been superseeded by the more generic "boost" entry. + + By enabling this option the acpi_cpufreq driver provides the old + entry in addition to the new boost ones, for compatibility reasons. + config ELAN_CPUFREQ tristate "AMD Elan SC400 and SC410" select CPU_FREQ_TABLE diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index dffa7af..0d048f6 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -133,8 +133,7 @@ static void boost_set_msrs(bool enable, const struct cpumask *cpumask) wrmsr_on_cpus(cpumask, msr_addr, msrs); } -static ssize_t store_global_boost(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t count) +static ssize_t _store_boost(const char *buf, size_t count) { int ret; unsigned long val = 0; @@ -161,6 +160,12 @@ static ssize_t store_global_boost(struct kobject *kobj, struct attribute *attr, return count; } +static ssize_t store_global_boost(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + return _store_boost(buf, count); +} + static ssize_t show_global_boost(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -171,6 +176,21 @@ static struct global_attr global_boost = __ATTR(boost, 0644, show_global_boost, store_global_boost); +#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB +static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf, + size_t count) +{ + return _store_boost(buf, count); +} + +static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf) +{ + return sprintf(buf, "%u\n", boost_enabled); +} + +static struct freq_attr cpb = __ATTR(cpb, 0644, show_cpb, store_cpb); +#endif + static int check_est_cpu(unsigned int cpuid) { struct cpuinfo_x86 *cpu = &cpu_data(cpuid); @@ -889,6 +909,7 @@ static int acpi_cpufreq_resume(struct cpufreq_policy *policy) static struct freq_attr *acpi_cpufreq_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, + NULL, /* this is a placeholder for cpb, do not remove */ NULL, }; @@ -960,6 +981,27 @@ static int __init acpi_cpufreq_init(void) if (ret) return ret; +#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB + /* this is a sysfs file with a strange name and an even stranger + * semantic - per CPU instantiation, but system global effect. + * Lets enable it only on AMD CPUs for compatibility reasons and + * only if configured. This is considered legacy code, which + * will probably be removed at some point in the future. + */ + if (check_amd_hwpstate_cpu(0)) { + struct freq_attr **iter; + + pr_debug("adding sysfs entry for cpb\n"); + + for (iter = acpi_cpufreq_attr; *iter != NULL; iter++) + ; + + /* make sure there is a terminator behind it */ + if (iter[1] == NULL) + *iter = &cpb; + } +#endif + ret = cpufreq_register_driver(&acpi_cpufreq_driver); if (ret) free_acpi_perf_data(); -- cgit v0.10.2 From e1f0b8e9b04a262834ed111e605e5d215685dfab Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 4 Sep 2012 08:28:09 +0000 Subject: cpufreq: Remove support for hardware P-state chips from powernow-k8 These chips are now supported by acpi-cpufreq, so we can delete all the code handling them. Andre: Tighten the deprecation warning message. Trigger load of acpi-cpufreq and let the load of the module finally fail. This avoids the problem of users ending up without any cpufreq support after the transition. Signed-off-by: Matthew Garrett Signed-off-by: Andre Przywara Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 9531fc2..b99790f 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -19,7 +19,7 @@ obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o # K8 systems. ACPI is preferred to all other hardware-specific drivers. # speedstep-* is preferred over p4-clockmod. -obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o +obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index f1035a9..0b19faf 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -49,22 +49,12 @@ #define PFX "powernow-k8: " #define VERSION "version 2.20.00" #include "powernow-k8.h" -#include "mperf.h" /* serialize freq changes */ static DEFINE_MUTEX(fidvid_mutex); static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data); -static int cpu_family = CPU_OPTERON; - -/* array to map SW pstate number to acpi state */ -static u32 ps_to_as[8]; - -/* core performance boost */ -static bool cpb_capable, cpb_enabled; -static struct msr __percpu *msrs; - static struct cpufreq_driver cpufreq_amd64_driver; #ifndef CONFIG_SMP @@ -86,12 +76,6 @@ static u32 find_khz_freq_from_fid(u32 fid) return 1000 * find_freq_from_fid(fid); } -static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, - u32 pstate) -{ - return data[ps_to_as[pstate]].frequency; -} - /* Return the vco fid for an input fid * * Each "low" fid has corresponding "high" fid, and you can get to "low" fids @@ -114,9 +98,6 @@ static int pending_bit_stuck(void) { u32 lo, hi; - if (cpu_family == CPU_HW_PSTATE) - return 0; - rdmsr(MSR_FIDVID_STATUS, lo, hi); return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; } @@ -130,20 +111,6 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) u32 lo, hi; u32 i = 0; - if (cpu_family == CPU_HW_PSTATE) { - rdmsr(MSR_PSTATE_STATUS, lo, hi); - i = lo & HW_PSTATE_MASK; - data->currpstate = i; - - /* - * a workaround for family 11h erratum 311 might cause - * an "out-of-range Pstate if the core is in Pstate-0 - */ - if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps)) - data->currpstate = HW_PSTATE_0; - - return 0; - } do { if (i++ > 10000) { pr_debug("detected change pending stuck\n"); @@ -300,14 +267,6 @@ static int decrease_vid_code_by_step(struct powernow_k8_data *data, return 0; } -/* Change hardware pstate by single MSR write */ -static int transition_pstate(struct powernow_k8_data *data, u32 pstate) -{ - wrmsr(MSR_PSTATE_CTRL, pstate, 0); - data->currpstate = pstate; - return 0; -} - /* Change Opteron/Athlon64 fid and vid, by the 3 phases. */ static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid) @@ -524,8 +483,6 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, static const struct x86_cpu_id powernow_k8_ids[] = { /* IO based frequency switching */ { X86_VENDOR_AMD, 0xf }, - /* MSR based frequency switching supported */ - X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE), {} }; MODULE_DEVICE_TABLE(x86cpu, powernow_k8_ids); @@ -561,15 +518,8 @@ static void check_supported_cpu(void *_rc) "Power state transitions not supported\n"); return; } - } else { /* must be a HW Pstate capable processor */ - cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); - if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) - cpu_family = CPU_HW_PSTATE; - else - return; + *rc = 0; } - - *rc = 0; } static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, @@ -633,18 +583,11 @@ static void print_basics(struct powernow_k8_data *data) for (j = 0; j < data->numps; j++) { if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) { - if (cpu_family == CPU_HW_PSTATE) { - printk(KERN_INFO PFX - " %d : pstate %d (%d MHz)\n", j, - data->powernow_table[j].index, - data->powernow_table[j].frequency/1000); - } else { printk(KERN_INFO PFX "fid 0x%x (%d MHz), vid 0x%x\n", data->powernow_table[j].index & 0xff, data->powernow_table[j].frequency/1000, data->powernow_table[j].index >> 8); - } } } if (data->batps) @@ -652,20 +595,6 @@ static void print_basics(struct powernow_k8_data *data) data->batps); } -static u32 freq_from_fid_did(u32 fid, u32 did) -{ - u32 mhz = 0; - - if (boot_cpu_data.x86 == 0x10) - mhz = (100 * (fid + 0x10)) >> did; - else if (boot_cpu_data.x86 == 0x11) - mhz = (100 * (fid + 8)) >> did; - else - BUG(); - - return mhz * 1000; -} - static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) { @@ -825,7 +754,7 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, { u64 control; - if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) + if (!data->acpi_data.state_count) return; control = data->acpi_data.states[index].control; @@ -876,10 +805,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) data->numps = data->acpi_data.state_count; powernow_k8_acpi_pst_values(data, 0); - if (cpu_family == CPU_HW_PSTATE) - ret_val = fill_powernow_table_pstate(data, powernow_table); - else - ret_val = fill_powernow_table_fidvid(data, powernow_table); + ret_val = fill_powernow_table_fidvid(data, powernow_table); if (ret_val) goto err_out_mem; @@ -916,51 +842,6 @@ err_out: return ret_val; } -static int fill_powernow_table_pstate(struct powernow_k8_data *data, - struct cpufreq_frequency_table *powernow_table) -{ - int i; - u32 hi = 0, lo = 0; - rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi); - data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; - - for (i = 0; i < data->acpi_data.state_count; i++) { - u32 index; - - index = data->acpi_data.states[i].control & HW_PSTATE_MASK; - if (index > data->max_hw_pstate) { - printk(KERN_ERR PFX "invalid pstate %d - " - "bad value %d.\n", i, index); - printk(KERN_ERR PFX "Please report to BIOS " - "manufacturer\n"); - invalidate_entry(powernow_table, i); - continue; - } - - ps_to_as[index] = i; - - /* Frequency may be rounded for these */ - if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10) - || boot_cpu_data.x86 == 0x11) { - - rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); - if (!(hi & HW_PSTATE_VALID_MASK)) { - pr_debug("invalid pstate %d, ignoring\n", index); - invalidate_entry(powernow_table, i); - continue; - } - - powernow_table[i].frequency = - freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); - } else - powernow_table[i].frequency = - data->acpi_data.states[i].core_frequency * 1000; - - powernow_table[i].index = index; - } - return 0; -} - static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) { @@ -1037,15 +918,7 @@ static int get_transition_latency(struct powernow_k8_data *data) max_latency = cur_latency; } if (max_latency == 0) { - /* - * Fam 11h and later may return 0 as transition latency. This - * is intended and means "very fast". While cpufreq core and - * governors currently can handle that gracefully, better set it - * to 1 to avoid problems in the future. - */ - if (boot_cpu_data.x86 < 0x11) - printk(KERN_ERR FW_WARN PFX "Invalid zero transition " - "latency\n"); + pr_err(FW_WARN PFX "Invalid zero transition latency\n"); max_latency = 1; } /* value in usecs, needs to be in nanoseconds */ @@ -1105,40 +978,6 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, return res; } -/* Take a frequency, and issue the hardware pstate transition command */ -static int transition_frequency_pstate(struct powernow_k8_data *data, - unsigned int index) -{ - u32 pstate = 0; - int res, i; - struct cpufreq_freqs freqs; - - pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index); - - /* get MSR index for hardware pstate transition */ - pstate = index & HW_PSTATE_MASK; - if (pstate > data->max_hw_pstate) - return -EINVAL; - - freqs.old = find_khz_freq_from_pstate(data->powernow_table, - data->currpstate); - freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); - - for_each_cpu(i, data->available_cores) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } - - res = transition_pstate(data, pstate); - freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); - - for_each_cpu(i, data->available_cores) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - return res; -} - /* Driver entry point to switch to the target frequency */ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) @@ -1180,18 +1019,15 @@ static int powernowk8_target(struct cpufreq_policy *pol, if (query_current_values_with_pending_wait(data)) goto err_out; - if (cpu_family != CPU_HW_PSTATE) { - pr_debug("targ: curr fid 0x%x, vid 0x%x\n", - data->currfid, data->currvid); + pr_debug("targ: curr fid 0x%x, vid 0x%x\n", + data->currfid, data->currvid); - if ((checkvid != data->currvid) || - (checkfid != data->currfid)) { - printk(KERN_INFO PFX - "error - out of sync, fix 0x%x 0x%x, " - "vid 0x%x 0x%x\n", - checkfid, data->currfid, - checkvid, data->currvid); - } + if ((checkvid != data->currvid) || + (checkfid != data->currfid)) { + pr_info(PFX + "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n", + checkfid, data->currfid, + checkvid, data->currvid); } if (cpufreq_frequency_table_target(pol, data->powernow_table, @@ -1202,11 +1038,8 @@ static int powernowk8_target(struct cpufreq_policy *pol, powernow_k8_acpi_pst_values(data, newstate); - if (cpu_family == CPU_HW_PSTATE) - ret = transition_frequency_pstate(data, - data->powernow_table[newstate].index); - else - ret = transition_frequency_fidvid(data, newstate); + ret = transition_frequency_fidvid(data, newstate); + if (ret) { printk(KERN_ERR PFX "transition frequency failed\n"); ret = 1; @@ -1215,11 +1048,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, } mutex_unlock(&fidvid_mutex); - if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_pstate(data->powernow_table, - data->powernow_table[newstate].index); - else - pol->cur = find_khz_freq_from_fid(data->currfid); + pol->cur = find_khz_freq_from_fid(data->currfid); ret = 0; err_out: @@ -1259,8 +1088,7 @@ static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu) return; } - if (cpu_family == CPU_OPTERON) - fidvid_msr_init(); + fidvid_msr_init(); init_on_cpu->rc = 0; } @@ -1277,7 +1105,6 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) struct powernow_k8_data *data; struct init_on_cpu init_on_cpu; int rc; - struct cpuinfo_x86 *c = &cpu_data(pol->cpu); if (!cpu_online(pol->cpu)) return -ENODEV; @@ -1293,7 +1120,6 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) } data->cpu = pol->cpu; - data->currpstate = HW_PSTATE_INVALID; if (powernow_k8_cpu_init_acpi(data)) { /* @@ -1330,17 +1156,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) if (rc != 0) goto err_out_exit_acpi; - if (cpu_family == CPU_HW_PSTATE) - cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); - else - cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu)); + cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu)); data->available_cores = pol->cpus; - if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_pstate(data->powernow_table, - data->currpstate); - else - pol->cur = find_khz_freq_from_fid(data->currfid); + pol->cur = find_khz_freq_from_fid(data->currfid); pr_debug("policy current frequency %d kHz\n", pol->cur); /* min/max the cpu is capable of */ @@ -1352,18 +1171,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) return -EINVAL; } - /* Check for APERF/MPERF support in hardware */ - if (cpu_has(c, X86_FEATURE_APERFMPERF)) - cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf; - cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); - if (cpu_family == CPU_HW_PSTATE) - pr_debug("cpu_init done, current pstate 0x%x\n", - data->currpstate); - else - pr_debug("cpu_init done, current fid 0x%x, vid 0x%x\n", - data->currfid, data->currvid); + pr_debug("cpu_init done, current fid 0x%x, vid 0x%x\n", + data->currfid, data->currvid); per_cpu(powernow_data, pol->cpu) = data; @@ -1416,88 +1227,15 @@ static unsigned int powernowk8_get(unsigned int cpu) if (err) goto out; - if (cpu_family == CPU_HW_PSTATE) - khz = find_khz_freq_from_pstate(data->powernow_table, - data->currpstate); - else - khz = find_khz_freq_from_fid(data->currfid); + khz = find_khz_freq_from_fid(data->currfid); out: return khz; } -static void _cpb_toggle_msrs(bool t) -{ - int cpu; - - get_online_cpus(); - - rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); - - for_each_cpu(cpu, cpu_online_mask) { - struct msr *reg = per_cpu_ptr(msrs, cpu); - if (t) - reg->l &= ~BIT(25); - else - reg->l |= BIT(25); - } - wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); - - put_online_cpus(); -} - -/* - * Switch on/off core performance boosting. - * - * 0=disable - * 1=enable. - */ -static void cpb_toggle(bool t) -{ - if (!cpb_capable) - return; - - if (t && !cpb_enabled) { - cpb_enabled = true; - _cpb_toggle_msrs(t); - printk(KERN_INFO PFX "Core Boosting enabled.\n"); - } else if (!t && cpb_enabled) { - cpb_enabled = false; - _cpb_toggle_msrs(t); - printk(KERN_INFO PFX "Core Boosting disabled.\n"); - } -} - -static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf, - size_t count) -{ - int ret = -EINVAL; - unsigned long val = 0; - - ret = strict_strtoul(buf, 10, &val); - if (!ret && (val == 0 || val == 1) && cpb_capable) - cpb_toggle(val); - else - return -EINVAL; - - return count; -} - -static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf) -{ - return sprintf(buf, "%u\n", cpb_enabled); -} - -#define define_one_rw(_name) \ -static struct freq_attr _name = \ -__ATTR(_name, 0644, show_##_name, store_##_name) - -define_one_rw(cpb); - static struct freq_attr *powernow_k8_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, - &cpb, NULL, }; @@ -1513,58 +1251,20 @@ static struct cpufreq_driver cpufreq_amd64_driver = { .attr = powernow_k8_attr, }; -/* - * Clear the boost-disable flag on the CPU_DOWN path so that this cpu - * cannot block the remaining ones from boosting. On the CPU_UP path we - * simply keep the boost-disable flag in sync with the current global - * state. - */ -static int cpb_notify(struct notifier_block *nb, unsigned long action, - void *hcpu) -{ - unsigned cpu = (long)hcpu; - u32 lo, hi; - - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - - if (!cpb_enabled) { - rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); - lo |= BIT(25); - wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); - } - break; - - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); - lo &= ~BIT(25); - wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); - break; - - default: - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block cpb_nb = { - .notifier_call = cpb_notify, -}; - /* driver entry point for init */ static int __cpuinit powernowk8_init(void) { - unsigned int i, supported_cpus = 0, cpu; + unsigned int i, supported_cpus = 0; int rv; - if (!x86_match_cpu(powernow_k8_ids)) + if (static_cpu_has(X86_FEATURE_HW_PSTATE)) { + pr_warn(PFX "this CPU is not supported anymore, using acpi-cpufreq instead.\n"); + request_module("acpi-cpufreq"); return -ENODEV; + } - if (static_cpu_has(X86_FEATURE_HW_PSTATE)) - pr_warn(PFX "support for this CPU is deprecated, use acpi-cpufreq instead.\n"); + if (!x86_match_cpu(powernow_k8_ids)) + return -ENODEV; for_each_online_cpu(i) { int rc; @@ -1576,26 +1276,6 @@ static int __cpuinit powernowk8_init(void) if (supported_cpus != num_online_cpus()) return -ENODEV; - if (boot_cpu_has(X86_FEATURE_CPB)) { - - cpb_capable = true; - - msrs = msrs_alloc(); - if (!msrs) { - printk(KERN_ERR "%s: Error allocating msrs!\n", __func__); - return -ENOMEM; - } - - register_cpu_notifier(&cpb_nb); - - rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); - - for_each_cpu(cpu, cpu_online_mask) { - struct msr *reg = per_cpu_ptr(msrs, cpu); - cpb_enabled |= !(!!(reg->l & BIT(25))); - } - } - rv = cpufreq_register_driver(&cpufreq_amd64_driver); if (!rv) @@ -1603,15 +1283,6 @@ static int __cpuinit powernowk8_init(void) num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus); - if (boot_cpu_has(X86_FEATURE_CPB)) { - if (rv < 0) { - unregister_cpu_notifier(&cpb_nb); - msrs_free(msrs); - msrs = NULL; - } else - pr_info(PFX "Core Performance Boosting: %s.\n", - (cpb_enabled ? "on" : "off")); - } return rv; } @@ -1620,13 +1291,6 @@ static void __exit powernowk8_exit(void) { pr_debug("exit\n"); - if (boot_cpu_has(X86_FEATURE_CPB)) { - msrs_free(msrs); - msrs = NULL; - - unregister_cpu_notifier(&cpb_nb); - } - cpufreq_unregister_driver(&cpufreq_amd64_driver); } diff --git a/drivers/cpufreq/powernow-k8.h b/drivers/cpufreq/powernow-k8.h index 3744d26..79329d4 100644 --- a/drivers/cpufreq/powernow-k8.h +++ b/drivers/cpufreq/powernow-k8.h @@ -5,24 +5,11 @@ * http://www.gnu.org/licenses/gpl.html */ -enum pstate { - HW_PSTATE_INVALID = 0xff, - HW_PSTATE_0 = 0, - HW_PSTATE_1 = 1, - HW_PSTATE_2 = 2, - HW_PSTATE_3 = 3, - HW_PSTATE_4 = 4, - HW_PSTATE_5 = 5, - HW_PSTATE_6 = 6, - HW_PSTATE_7 = 7, -}; - struct powernow_k8_data { unsigned int cpu; u32 numps; /* number of p-states */ u32 batps; /* number of p-states supported on battery */ - u32 max_hw_pstate; /* maximum legal hardware pstate */ /* these values are constant when the PSB is used to determine * vid/fid pairings, but are modified during the ->target() call @@ -37,7 +24,6 @@ struct powernow_k8_data { /* keep track of the current fid / vid or pstate */ u32 currvid; u32 currfid; - enum pstate currpstate; /* the powernow_table includes all frequency and vid/fid pairings: * fid are the lower 8 bits of the index, vid are the upper 8 bits. @@ -97,23 +83,6 @@ struct powernow_k8_data { #define MSR_S_HI_CURRENT_VID 0x0000003f #define MSR_C_HI_STP_GNT_BENIGN 0x00000001 - -/* Hardware Pstate _PSS and MSR definitions */ -#define USE_HW_PSTATE 0x00000080 -#define HW_PSTATE_MASK 0x00000007 -#define HW_PSTATE_VALID_MASK 0x80000000 -#define HW_PSTATE_MAX_MASK 0x000000f0 -#define HW_PSTATE_MAX_SHIFT 4 -#define MSR_PSTATE_DEF_BASE 0xc0010064 /* base of Pstate MSRs */ -#define MSR_PSTATE_STATUS 0xc0010063 /* Pstate Status MSR */ -#define MSR_PSTATE_CTRL 0xc0010062 /* Pstate control MSR */ -#define MSR_PSTATE_CUR_LIMIT 0xc0010061 /* pstate current limit MSR */ - -/* define the two driver architectures */ -#define CPU_OPTERON 0 -#define CPU_HW_PSTATE 1 - - /* * There are restrictions frequencies have to follow: * - only 1 entry in the low fid table ( <=1.4GHz ) @@ -218,5 +187,4 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); -static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); -- cgit v0.10.2 From ec971ea5f2426a0bf9d5cca9a103743918c12978 Mon Sep 17 00:00:00 2001 From: Richard Zhao Date: Wed, 5 Sep 2012 01:08:59 +0200 Subject: ARM: add cpufreq transiton notifier to adjust loops_per_jiffy for smp If CONFIG_SMP, cpufreq skips loops_per_jiffy update, because different arch has different per-cpu loops_per_jiffy definition. Signed-off-by: Richard Zhao Acked-by: Russell King Acked-by: Santosh Shilimkar Signed-off-by: Shawn Guo Signed-off-by: Rafael J. Wysocki diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index ebd8ad2..8e03567 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -584,3 +585,56 @@ int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; } + +#ifdef CONFIG_CPU_FREQ + +static DEFINE_PER_CPU(unsigned long, l_p_j_ref); +static DEFINE_PER_CPU(unsigned long, l_p_j_ref_freq); +static unsigned long global_l_p_j_ref; +static unsigned long global_l_p_j_ref_freq; + +static int cpufreq_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + int cpu = freq->cpu; + + if (freq->flags & CPUFREQ_CONST_LOOPS) + return NOTIFY_OK; + + if (!per_cpu(l_p_j_ref, cpu)) { + per_cpu(l_p_j_ref, cpu) = + per_cpu(cpu_data, cpu).loops_per_jiffy; + per_cpu(l_p_j_ref_freq, cpu) = freq->old; + if (!global_l_p_j_ref) { + global_l_p_j_ref = loops_per_jiffy; + global_l_p_j_ref_freq = freq->old; + } + } + + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || + (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) { + loops_per_jiffy = cpufreq_scale(global_l_p_j_ref, + global_l_p_j_ref_freq, + freq->new); + per_cpu(cpu_data, cpu).loops_per_jiffy = + cpufreq_scale(per_cpu(l_p_j_ref, cpu), + per_cpu(l_p_j_ref_freq, cpu), + freq->new); + } + return NOTIFY_OK; +} + +static struct notifier_block cpufreq_notifier = { + .notifier_call = cpufreq_callback, +}; + +static int __init register_cpufreq_notifier(void) +{ + return cpufreq_register_notifier(&cpufreq_notifier, + CPUFREQ_TRANSITION_NOTIFIER); +} +core_initcall(register_cpufreq_notifier); + +#endif -- cgit v0.10.2 From b496dfbc94ab86f970ef0167eaabe51f930aa5fb Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 5 Sep 2012 01:09:12 +0200 Subject: PM / OPP: Initialize OPP table from device tree With a lot of devices booting from device tree nowadays, it requires that OPP table can be initialized from device tree. The patch adds a helper function of_init_opp_table together with a binding doc for that purpose. Signed-off-by: Shawn Guo Acked-by: Rob Herring Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/devicetree/bindings/power/opp.txt b/Documentation/devicetree/bindings/power/opp.txt new file mode 100644 index 0000000..74499e5 --- /dev/null +++ b/Documentation/devicetree/bindings/power/opp.txt @@ -0,0 +1,25 @@ +* Generic OPP Interface + +SoCs have a standard set of tuples consisting of frequency and +voltage pairs that the device will support per voltage domain. These +are called Operating Performance Points or OPPs. + +Properties: +- operating-points: An array of 2-tuples items, and each item consists + of frequency and voltage like . + freq: clock frequency in kHz + vol: voltage in microvolt + +Examples: + +cpu@0 { + compatible = "arm,cortex-a9"; + reg = <0>; + next-level-cache = <&L2>; + operating-points = < + /* kHz uV */ + 792000 1100000 + 396000 950000 + 198000 850000 + >; +}; diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index ac993ea..d946864 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -22,6 +22,7 @@ #include #include #include +#include /* * Internal data structure organization with the OPP layer library is as @@ -674,3 +675,49 @@ struct srcu_notifier_head *opp_get_notifier(struct device *dev) return &dev_opp->head; } + +#ifdef CONFIG_OF +/** + * of_init_opp_table() - Initialize opp table from device tree + * @dev: device pointer used to lookup device OPPs. + * + * Register the initial OPP table with the OPP library for given device. + */ +int of_init_opp_table(struct device *dev) +{ + const struct property *prop; + const __be32 *val; + int nr; + + prop = of_find_property(dev->of_node, "operating-points", NULL); + if (!prop) + return -ENODEV; + if (!prop->value) + return -ENODATA; + + /* + * Each OPP is a set of tuples consisting of frequency and + * voltage like . + */ + nr = prop->length / sizeof(u32); + if (nr % 2) { + dev_err(dev, "%s: Invalid OPP list\n", __func__); + return -EINVAL; + } + + val = prop->value; + while (nr) { + unsigned long freq = be32_to_cpup(val++) * 1000; + unsigned long volt = be32_to_cpup(val++); + + if (opp_add(dev, freq, volt)) { + dev_warn(dev, "%s: Failed to add OPP %ld\n", + __func__, freq); + continue; + } + nr -= 2; + } + + return 0; +} +#endif diff --git a/include/linux/opp.h b/include/linux/opp.h index 2a4e5fa..214e0ebc 100644 --- a/include/linux/opp.h +++ b/include/linux/opp.h @@ -48,6 +48,14 @@ int opp_disable(struct device *dev, unsigned long freq); struct srcu_notifier_head *opp_get_notifier(struct device *dev); +#ifdef CONFIG_OF +int of_init_opp_table(struct device *dev); +#else +static inline int of_init_opp_table(struct device *dev) +{ + return -EINVAL; +} +#endif /* CONFIG_OF */ #else static inline unsigned long opp_get_voltage(struct opp *opp) { -- cgit v0.10.2 From 95ceafd46359dfd901f9d3b881b33d3036e4b0ce Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 6 Sep 2012 07:09:11 +0000 Subject: cpufreq: Add a generic cpufreq-cpu0 driver It adds a generic cpufreq driver for CPU0 frequency management based on clk, regulator, OPP and device tree support. It can support both uniprocessor (UP) and those symmetric multiprocessor (SMP) systems which share clock and voltage across all CPUs. Signed-off-by: Shawn Guo Acked-by: Santosh Shilimkar Tested-by: AnilKumar Ch Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt new file mode 100644 index 0000000..4416ccc --- /dev/null +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt @@ -0,0 +1,55 @@ +Generic CPU0 cpufreq driver + +It is a generic cpufreq driver for CPU0 frequency management. It +supports both uniprocessor (UP) and symmetric multiprocessor (SMP) +systems which share clock and voltage across all CPUs. + +Both required and optional properties listed below must be defined +under node /cpus/cpu@0. + +Required properties: +- operating-points: Refer to Documentation/devicetree/bindings/power/opp.txt + for details + +Optional properties: +- clock-latency: Specify the possible maximum transition latency for clock, + in unit of nanoseconds. +- voltage-tolerance: Specify the CPU voltage tolerance in percentage. + +Examples: + +cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + compatible = "arm,cortex-a9"; + reg = <0>; + next-level-cache = <&L2>; + operating-points = < + /* kHz uV */ + 792000 1100000 + 396000 950000 + 198000 850000 + >; + transition-latency = <61036>; /* two CLK32 periods */ + }; + + cpu@1 { + compatible = "arm,cortex-a9"; + reg = <1>; + next-level-cache = <&L2>; + }; + + cpu@2 { + compatible = "arm,cortex-a9"; + reg = <2>; + next-level-cache = <&L2>; + }; + + cpu@3 { + compatible = "arm,cortex-a9"; + reg = <3>; + next-level-cache = <&L2>; + }; +}; diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index e24a2a1..ea512f4 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -179,6 +179,17 @@ config CPU_FREQ_GOV_CONSERVATIVE If in doubt, say N. +config GENERIC_CPUFREQ_CPU0 + bool "Generic CPU0 cpufreq driver" + depends on HAVE_CLK && REGULATOR && PM_OPP && OF + select CPU_FREQ_TABLE + help + This adds a generic cpufreq driver for CPU0 frequency management. + It supports both uniprocessor (UP) and symmetric multiprocessor (SMP) + systems which share clock and voltage across all CPUs. + + If in doubt, say N. + menu "x86 CPU frequency scaling drivers" depends on X86 source "drivers/cpufreq/Kconfig.x86" diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index b99790f..1bc90e1 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -13,6 +13,8 @@ obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o # CPUfreq cross-arch helpers obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o +obj-$(CONFIG_GENERIC_CPUFREQ_CPU0) += cpufreq-cpu0.o + ################################################################################## # x86 drivers. # Link order matters. K8 is preferred to ACPI because of firmware bugs in early diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c new file mode 100644 index 0000000..e915827 --- /dev/null +++ b/drivers/cpufreq/cpufreq-cpu0.c @@ -0,0 +1,269 @@ +/* + * Copyright (C) 2012 Freescale Semiconductor, Inc. + * + * The OPP code in function cpu0_set_target() is reused from + * drivers/cpufreq/omap-cpufreq.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int transition_latency; +static unsigned int voltage_tolerance; /* in percentage */ + +static struct device *cpu_dev; +static struct clk *cpu_clk; +static struct regulator *cpu_reg; +static struct cpufreq_frequency_table *freq_table; + +static int cpu0_verify_speed(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, freq_table); +} + +static unsigned int cpu0_get_speed(unsigned int cpu) +{ + return clk_get_rate(cpu_clk) / 1000; +} + +static int cpu0_set_target(struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ + struct cpufreq_freqs freqs; + struct opp *opp; + unsigned long freq_Hz, volt = 0, volt_old = 0, tol = 0; + unsigned int index, cpu; + int ret; + + ret = cpufreq_frequency_table_target(policy, freq_table, target_freq, + relation, &index); + if (ret) { + pr_err("failed to match target freqency %d: %d\n", + target_freq, ret); + return ret; + } + + freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000); + if (freq_Hz < 0) + freq_Hz = freq_table[index].frequency * 1000; + freqs.new = freq_Hz / 1000; + freqs.old = clk_get_rate(cpu_clk) / 1000; + + if (freqs.old == freqs.new) + return 0; + + for_each_online_cpu(cpu) { + freqs.cpu = cpu; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + if (cpu_reg) { + opp = opp_find_freq_ceil(cpu_dev, &freq_Hz); + if (IS_ERR(opp)) { + pr_err("failed to find OPP for %ld\n", freq_Hz); + return PTR_ERR(opp); + } + volt = opp_get_voltage(opp); + tol = volt * voltage_tolerance / 100; + volt_old = regulator_get_voltage(cpu_reg); + } + + pr_debug("%u MHz, %ld mV --> %u MHz, %ld mV\n", + freqs.old / 1000, volt_old ? volt_old / 1000 : -1, + freqs.new / 1000, volt ? volt / 1000 : -1); + + /* scaling up? scale voltage before frequency */ + if (cpu_reg && freqs.new > freqs.old) { + ret = regulator_set_voltage_tol(cpu_reg, volt, tol); + if (ret) { + pr_err("failed to scale voltage up: %d\n", ret); + freqs.new = freqs.old; + return ret; + } + } + + ret = clk_set_rate(cpu_clk, freqs.new * 1000); + if (ret) { + pr_err("failed to set clock rate: %d\n", ret); + if (cpu_reg) + regulator_set_voltage_tol(cpu_reg, volt_old, tol); + return ret; + } + + /* scaling down? scale voltage after frequency */ + if (cpu_reg && freqs.new < freqs.old) { + ret = regulator_set_voltage_tol(cpu_reg, volt, tol); + if (ret) { + pr_err("failed to scale voltage down: %d\n", ret); + clk_set_rate(cpu_clk, freqs.old * 1000); + freqs.new = freqs.old; + return ret; + } + } + + for_each_online_cpu(cpu) { + freqs.cpu = cpu; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + + return 0; +} + +static int cpu0_cpufreq_init(struct cpufreq_policy *policy) +{ + int ret; + + if (policy->cpu != 0) + return -EINVAL; + + ret = cpufreq_frequency_table_cpuinfo(policy, freq_table); + if (ret) { + pr_err("invalid frequency table: %d\n", ret); + return ret; + } + + policy->cpuinfo.transition_latency = transition_latency; + policy->cur = clk_get_rate(cpu_clk) / 1000; + + /* + * The driver only supports the SMP configuartion where all processors + * share the clock and voltage and clock. Use cpufreq affected_cpus + * interface to have all CPUs scaled together. + */ + policy->shared_type = CPUFREQ_SHARED_TYPE_ANY; + cpumask_setall(policy->cpus); + + cpufreq_frequency_table_get_attr(freq_table, policy->cpu); + + return 0; +} + +static int cpu0_cpufreq_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + + return 0; +} + +static struct freq_attr *cpu0_cpufreq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver cpu0_cpufreq_driver = { + .flags = CPUFREQ_STICKY, + .verify = cpu0_verify_speed, + .target = cpu0_set_target, + .get = cpu0_get_speed, + .init = cpu0_cpufreq_init, + .exit = cpu0_cpufreq_exit, + .name = "generic_cpu0", + .attr = cpu0_cpufreq_attr, +}; + +static int __devinit cpu0_cpufreq_driver_init(void) +{ + struct device_node *np; + int ret; + + np = of_find_node_by_path("/cpus/cpu@0"); + if (!np) { + pr_err("failed to find cpu0 node\n"); + return -ENOENT; + } + + cpu_dev = get_cpu_device(0); + if (!cpu_dev) { + pr_err("failed to get cpu0 device\n"); + ret = -ENODEV; + goto out_put_node; + } + + cpu_dev->of_node = np; + + cpu_clk = clk_get(cpu_dev, NULL); + if (IS_ERR(cpu_clk)) { + ret = PTR_ERR(cpu_clk); + pr_err("failed to get cpu0 clock: %d\n", ret); + goto out_put_node; + } + + cpu_reg = regulator_get(cpu_dev, "cpu0"); + if (IS_ERR(cpu_reg)) { + pr_warn("failed to get cpu0 regulator\n"); + cpu_reg = NULL; + } + + ret = of_init_opp_table(cpu_dev); + if (ret) { + pr_err("failed to init OPP table: %d\n", ret); + goto out_put_node; + } + + ret = opp_init_cpufreq_table(cpu_dev, &freq_table); + if (ret) { + pr_err("failed to init cpufreq table: %d\n", ret); + goto out_put_node; + } + + of_property_read_u32(np, "voltage-tolerance", &voltage_tolerance); + + if (of_property_read_u32(np, "clock-latency", &transition_latency)) + transition_latency = CPUFREQ_ETERNAL; + + if (cpu_reg) { + struct opp *opp; + unsigned long min_uV, max_uV; + int i; + + /* + * OPP is maintained in order of increasing frequency, and + * freq_table initialised from OPP is therefore sorted in the + * same order. + */ + for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) + ; + opp = opp_find_freq_exact(cpu_dev, + freq_table[0].frequency * 1000, true); + min_uV = opp_get_voltage(opp); + opp = opp_find_freq_exact(cpu_dev, + freq_table[i-1].frequency * 1000, true); + max_uV = opp_get_voltage(opp); + ret = regulator_set_voltage_time(cpu_reg, min_uV, max_uV); + if (ret > 0) + transition_latency += ret * 1000; + } + + ret = cpufreq_register_driver(&cpu0_cpufreq_driver); + if (ret) { + pr_err("failed register driver: %d\n", ret); + goto out_free_table; + } + + of_node_put(np); + return 0; + +out_free_table: + opp_free_cpufreq_table(cpu_dev, &freq_table); +out_put_node: + of_node_put(np); + return ret; +} +late_initcall(cpu0_cpufreq_driver_init); + +MODULE_AUTHOR("Shawn Guo "); +MODULE_DESCRIPTION("Generic CPU0 cpufreq driver"); +MODULE_LICENSE("GPL"); -- cgit v0.10.2 From f26365179d13c18591539d0518aa9de568c70ee0 Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Fri, 14 Sep 2012 21:07:39 +0200 Subject: cpufreq / ondemand: update frequency when limits are relaxed Reevaluate CPU load and update frequency immediately whenever limits are changed. Currently ondemand doesn't do that when limits are relaxed, wasting power on systems with relatively low sampling rate. Signed-off-by: Michal Pecio Reviewed-by: Thomas Renninger Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 836e9b0..9479fb3 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -761,6 +761,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, else if (policy->min > this_dbs_info->cur_policy->cur) __cpufreq_driver_target(this_dbs_info->cur_policy, policy->min, CPUFREQ_RELATION_L); + dbs_check_cpu(this_dbs_info); mutex_unlock(&this_dbs_info->timer_mutex); break; } -- cgit v0.10.2 From 2d8fced75cfa8c513ba1cbe682cb30207d941f2b Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Fri, 14 Sep 2012 21:07:48 +0200 Subject: cpufreq: conservative: update frequency when limits are relaxed Reevaluate CPU load and update frequency immediately whenever limits are changed. Currently conservative doesn't do that when limits are relaxed, wasting power on systems with relatively low sampling rate. Signed-off-by: Michal Pecio Reviewed-by: Thomas Renninger Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index a1563d7..b75dc2c 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -584,6 +584,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, __cpufreq_driver_target( this_dbs_info->cur_policy, policy->min, CPUFREQ_RELATION_L); + dbs_check_cpu(this_dbs_info); mutex_unlock(&this_dbs_info->timer_mutex); break; -- cgit v0.10.2 From 21ce35dc79346eb206f3271f3cc07fb9a5452ae7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 14 Sep 2012 21:10:46 +0200 Subject: sections: fix section conflicts in drivers/cpufreq Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/longhaul.h b/drivers/cpufreq/longhaul.h index cbf48fb..e2dc436 100644 --- a/drivers/cpufreq/longhaul.h +++ b/drivers/cpufreq/longhaul.h @@ -56,7 +56,7 @@ union msr_longhaul { /* * VIA C3 Samuel 1 & Samuel 2 (stepping 0) */ -static const int __cpuinitdata samuel1_mults[16] = { +static const int __cpuinitconst samuel1_mults[16] = { -1, /* 0000 -> RESERVED */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -75,7 +75,7 @@ static const int __cpuinitdata samuel1_mults[16] = { -1, /* 1111 -> RESERVED */ }; -static const int __cpuinitdata samuel1_eblcr[16] = { +static const int __cpuinitconst samuel1_eblcr[16] = { 50, /* 0000 -> RESERVED */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -97,7 +97,7 @@ static const int __cpuinitdata samuel1_eblcr[16] = { /* * VIA C3 Samuel2 Stepping 1->15 */ -static const int __cpuinitdata samuel2_eblcr[16] = { +static const int __cpuinitconst samuel2_eblcr[16] = { 50, /* 0000 -> 5.0x */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -119,7 +119,7 @@ static const int __cpuinitdata samuel2_eblcr[16] = { /* * VIA C3 Ezra */ -static const int __cpuinitdata ezra_mults[16] = { +static const int __cpuinitconst ezra_mults[16] = { 100, /* 0000 -> 10.0x */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -138,7 +138,7 @@ static const int __cpuinitdata ezra_mults[16] = { 120, /* 1111 -> 12.0x */ }; -static const int __cpuinitdata ezra_eblcr[16] = { +static const int __cpuinitconst ezra_eblcr[16] = { 50, /* 0000 -> 5.0x */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -160,7 +160,7 @@ static const int __cpuinitdata ezra_eblcr[16] = { /* * VIA C3 (Ezra-T) [C5M]. */ -static const int __cpuinitdata ezrat_mults[32] = { +static const int __cpuinitconst ezrat_mults[32] = { 100, /* 0000 -> 10.0x */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -196,7 +196,7 @@ static const int __cpuinitdata ezrat_mults[32] = { -1, /* 1111 -> RESERVED (12.0x) */ }; -static const int __cpuinitdata ezrat_eblcr[32] = { +static const int __cpuinitconst ezrat_eblcr[32] = { 50, /* 0000 -> 5.0x */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -235,7 +235,7 @@ static const int __cpuinitdata ezrat_eblcr[32] = { /* * VIA C3 Nehemiah */ -static const int __cpuinitdata nehemiah_mults[32] = { +static const int __cpuinitconst nehemiah_mults[32] = { 100, /* 0000 -> 10.0x */ -1, /* 0001 -> 16.0x */ 40, /* 0010 -> 4.0x */ @@ -270,7 +270,7 @@ static const int __cpuinitdata nehemiah_mults[32] = { -1, /* 1111 -> 12.0x */ }; -static const int __cpuinitdata nehemiah_eblcr[32] = { +static const int __cpuinitconst nehemiah_eblcr[32] = { 50, /* 0000 -> 5.0x */ 160, /* 0001 -> 16.0x */ 40, /* 0010 -> 4.0x */ @@ -315,7 +315,7 @@ struct mV_pos { unsigned short pos; }; -static const struct mV_pos __cpuinitdata vrm85_mV[32] = { +static const struct mV_pos __cpuinitconst vrm85_mV[32] = { {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, @@ -326,14 +326,14 @@ static const struct mV_pos __cpuinitdata vrm85_mV[32] = { {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} }; -static const unsigned char __cpuinitdata mV_vrm85[32] = { +static const unsigned char __cpuinitconst mV_vrm85[32] = { 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 }; -static const struct mV_pos __cpuinitdata mobilevrm_mV[32] = { +static const struct mV_pos __cpuinitconst mobilevrm_mV[32] = { {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, @@ -344,7 +344,7 @@ static const struct mV_pos __cpuinitdata mobilevrm_mV[32] = { {675, 3}, {650, 2}, {625, 1}, {600, 0} }; -static const unsigned char __cpuinitdata mV_mobilevrm[32] = { +static const unsigned char __cpuinitconst mV_mobilevrm[32] = { 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, -- cgit v0.10.2 From cd664cc3a574b30988476143c1dcc9298b1fa531 Mon Sep 17 00:00:00 2001 From: Richard Zhao Date: Fri, 14 Sep 2012 21:15:05 +0200 Subject: cpufreq: OMAP: remove loops_per_jiffy recalculate for smp With ARM smp common code recalculating loops_per_jiffy in a cpufreq transiton notifier call, the loops_per_jiffy recalculate in omap-cpufreq driver becomes redundant. Remove it. Signed-off-by: Richard Zhao Acked-by: Santosh Shilimkar Signed-off-by: Shawn Guo Acked-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c index b47034e..6e22f44 100644 --- a/drivers/cpufreq/omap-cpufreq.c +++ b/drivers/cpufreq/omap-cpufreq.c @@ -40,16 +40,6 @@ /* OPP tolerance in percentage */ #define OPP_TOLERANCE 4 -#ifdef CONFIG_SMP -struct lpj_info { - unsigned long ref; - unsigned int freq; -}; - -static DEFINE_PER_CPU(struct lpj_info, lpj_ref); -static struct lpj_info global_lpj_ref; -#endif - static struct cpufreq_frequency_table *freq_table; static atomic_t freq_table_users = ATOMIC_INIT(0); static struct clk *mpu_clk; @@ -161,31 +151,6 @@ static int omap_target(struct cpufreq_policy *policy, } freqs.new = omap_getspeed(policy->cpu); -#ifdef CONFIG_SMP - /* - * Note that loops_per_jiffy is not updated on SMP systems in - * cpufreq driver. So, update the per-CPU loops_per_jiffy value - * on frequency transition. We need to update all dependent CPUs. - */ - for_each_cpu(i, policy->cpus) { - struct lpj_info *lpj = &per_cpu(lpj_ref, i); - if (!lpj->freq) { - lpj->ref = per_cpu(cpu_data, i).loops_per_jiffy; - lpj->freq = freqs.old; - } - - per_cpu(cpu_data, i).loops_per_jiffy = - cpufreq_scale(lpj->ref, lpj->freq, freqs.new); - } - - /* And don't forget to adjust the global one */ - if (!global_lpj_ref.freq) { - global_lpj_ref.ref = loops_per_jiffy; - global_lpj_ref.freq = freqs.old; - } - loops_per_jiffy = cpufreq_scale(global_lpj_ref.ref, global_lpj_ref.freq, - freqs.new); -#endif done: /* notifiers */ -- cgit v0.10.2