From 6f8c2e7933679f54b6478945dc72e59ef9a3d5e0 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 16 Jan 2013 23:40:01 +0100 Subject: intel_idle: Don't register CPU notifier if we are not running. The 'intel_idle_probe' probes the CPU and sets the CPU notifier. But if later on during the module initialization we fail (say in cpuidle_register_driver), we stop loading, but we neglect to unregister the CPU notifier. This means that during CPU hotplug events the system will fail: calling intel_idle_init+0x0/0x326 @ 1 intel_idle: MWAIT substates: 0x1120 intel_idle: v0.4 model 0x2A intel_idle: lapic_timer_reliable_states 0xffffffff intel_idle: intel_idle yielding to none initcall intel_idle_init+0x0/0x326 returned -19 after 14 usecs ... some time later, offlining and onlining a CPU: cpu 3 spinlock event irq 62 BUG: unable to ] __cpuidle_register_device+0x1c/0x120 PGD 99b8b067 PUD 99b95067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: xen_evtchn nouveau mxm_wmi wmi radeon ttm i915 fbcon tileblit font atl1c bitblit softcursor drm_kms_helper video xen_blkfront xen_netfront fb_sys_fops sysimgblt sysfillrect syscopyarea xenfs xen_privcmd mperf CPU 0 Pid: 2302, comm: udevd Not tainted 3.8.0-rc3upstream-00249-g09ad159 #1 MSI MS-7680/H61M-P23 (MS-7680) RIP: e030:[] [] __cpuidle_register_device+0x1c/0x120 RSP: e02b:ffff88009dacfcb8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff880105380000 RCX: 000000000000001c RDX: 0000000000000000 RSI: 0000000000000055 RDI: ffff880105380000 RBP: ffff88009dacfce8 R08: ffffffff81a4f048 R09: 0000000000000008 R10: 0000000000000008 R11: 0000000000000000 R12: ffff880105380000 R13: 00000000ffffffdd R14: 0000000000000000 R15: ffffffff81a523d0 FS: 00007f37bd83b7a0(0000) GS:ffff880105200000(0000) knlGS:0000000000000000 CS: e033 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 00000000a09ea000 CR4: 0000000000042660 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process udevd (pid: 2302, threadinfo ffff88009dace000, task ffff88009afb47f0) Stack: ffffffff8107f2d0 ffffffff810c2fb7 ffff88009dacfce8 00000000ffffffea ffff880105380000 00000000ffffffdd ffff88009dacfd08 ffffffff814d9882 0000000000000003 ffff880105380000 ffff88009dacfd28 ffffffff81340afd Call Trace: [] ? collect_cpu_info_local+0x30/0x30 [] ? __might_sleep+0xe7/0x100 [] cpuidle_register_device+0x32/0x70 [] intel_idle_cpu_init+0xad/0x110 [] cpu_hotplug_notify+0x68/0x80 [] notifier_call_chain+0x4d/0x70 [] __raw_notifier_call_chain+0x9/0x10 [] __cpu_notify+0x1b/0x30 [] _cpu_up+0x103/0x14b [] cpu_up+0xd9/0xec [] store_online+0x94/0xd0 [] dev_attr_store+0x1b/0x20 [] sysfs_write_file+0xf4/0x170 [] vfs_write+0xb4/0x130 [] sys_write+0x5a/0xa0 [] system_call_fastpath+0x16/0x1b Code: 03 18 00 c9 c3 66 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 48 83 ec 30 48 89 5d e8 4c 89 65 f0 48 89 fb 4c 89 6d f8 e8 84 08 00 00 <48> 8b 78 08 49 89 c4 e8 f8 7f c1 ff 89 c2 b8 ea ff ff ff 84 d2 RIP [] __cpuidle_register_device+0x1c/0x120 RSP This patch fixes that by moving the CPU notifier registration as the last item to be done by the module. Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Srivatsa S. Bhat Cc: 3.6+ Signed-off-by: Rafael J. Wysocki diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 4ba384f..2df9414 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -448,8 +448,6 @@ static int intel_idle_probe(void) else on_each_cpu(__setup_broadcast_timer, (void *)true, 1); - register_cpu_notifier(&cpu_hotplug_notifier); - pr_debug(PREFIX "v" INTEL_IDLE_VERSION " model 0x%X\n", boot_cpu_data.x86_model); @@ -612,6 +610,7 @@ static int __init intel_idle_init(void) return retval; } } + register_cpu_notifier(&cpu_hotplug_notifier); return 0; } -- cgit v0.10.2 From b88a634a903d9670aa5f2f785aa890628ce0dece Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 16 Jan 2013 23:40:07 +0100 Subject: ACPI / cpuidle: Fix NULL pointer issues when cpuidle is disabled If cpuidle is disabled, that means that: per_cpu(acpi_cpuidle_device, pr->id) is set to NULL as the acpi_processor_power_init ends up failing at retval = cpuidle_register_driver(&acpi_idle_driver) (in acpi_processor_power_init) and never sets the per_cpu idle device. So when acpi_processor_hotplug on CPU online notification tries to reference said device it crashes: cpu 3 spinlock event irq 62 BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 IP: [] acpi_processor_setup_cpuidle_cx+0x3f/0x105 PGD a259b067 PUD ab38b067 PMD 0 Oops: 0002 [#1] SMP odules linked in: dm_multipath dm_mod xen_evtchn iscsi_boot_sysfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi libcrc32c crc32c nouveau mxm_wmi wmi radeon ttm sg sr_mod sd_mod cdrom ata_generic ata_piix libata crc32c_intel scsi_mod atl1c i915 fbcon tileblit font bitblit softcursor drm_kms_helper video xen_blkfront xen_netfront fb_sys_fops sysimgblt sysfillrect syscopyarea xenfs xen_privcmd mperf CPU 1 Pid: 3047, comm: bash Not tainted 3.8.0-rc3upstream-00250-g165c029 #1 MSI MS-7680/H61M-P23 (MS-7680) RIP: e030:[] [] acpi_processor_setup_cpuidle_cx+0x3f/0x105 RSP: e02b:ffff88001742dca8 EFLAGS: 00010202 RAX: 0000000000010be9 RBX: ffff8800a0a61800 RCX: ffff880105380000 RDX: 0000000000000003 RSI: 0000000000000200 RDI: ffff8800a0a61800 RBP: ffff88001742dce8 R08: ffffffff81812360 R09: 0000000000000200 R10: aaaaaaaaaaaaaaaa R11: 0000000000000001 R12: ffff8800a0a61800 R13: 00000000ffffff01 R14: 0000000000000000 R15: ffffffff81a907a0 FS: 00007fd6942f7700(0000) GS:ffff880105280000(0000) knlGS:0000000000000000 CS: e033 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000004 CR3: 00000000a6773000 CR4: 0000000000042660 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process bash (pid: 3047, threadinfo ffff88001742c000, task ffff880017944000) Stack: 0000000000000150 ffff880100f59e00 ffff88001742dcd8 ffff8800a0a61800 0000000000000000 00000000ffffff01 0000000000000000 ffffffff81a907a0 ffff88001742dd18 ffffffff813815b1 ffff88001742dd08 ffffffff810ae336 Call Trace: [] acpi_processor_hotplug+0x7c/0x9f [] ? schedule_delayed_work_on+0x16/0x20 [] acpi_cpu_soft_notify+0x90/0xca [] notifier_call_chain+0x4d/0x70 [] __raw_notifier_call_chain+0x9/0x10 [] __cpu_notify+0x1b/0x30 [] _cpu_up+0x103/0x14b [] cpu_up+0xd9/0xec [] store_online+0x94/0xd0 [] dev_attr_store+0x1b/0x20 [] sysfs_write_file+0xf4/0x170 This patch fixes it. Signed-off-by: Konrad Rzeszutek Wilk Cc: Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index f1a5da4..fea6f8d 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -958,6 +958,9 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr) return -EINVAL; } + if (!dev) + return -EINVAL; + dev->cpu = pr->id; if (max_cstate == 0) -- cgit v0.10.2 From 631e8ac18fd59722e7ceb15fceeef3f368c250f9 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 18 Jan 2013 00:19:37 +0100 Subject: powernow-k8: Add a kconfig dependency on acpi-cpufreq Andreas reports in https://bugzilla.kernel.org/show_bug.cgi?id=51741 that with his Gentoo config, acpi-cpufreq wasn't enabled and powernow-k8 couldn't handoff properly to acpi-cpufreq leading to running without P-state support (i.e., cores are constantly in P0). To alleaviate that, we need to make powernow-k8 depend on acpi-cpufreq so that acpi-cpufreq is always present. References: https://bugzilla.kernel.org/show_bug.cgi?id=51741 Reported-by: Andreas Signed-off-by: Borislav Petkov Cc: 3.7+ Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 934854a..7227cd7 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -106,7 +106,7 @@ config X86_POWERNOW_K7_ACPI config X86_POWERNOW_K8 tristate "AMD Opteron/Athlon64 PowerNow!" select CPU_FREQ_TABLE - depends on ACPI && ACPI_PROCESSOR + depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ help This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors. Support for K10 and newer processors is now in acpi-cpufreq. -- cgit v0.10.2 From f427e5f1cf75bba84cccdac1d8a90552d9ae1065 Mon Sep 17 00:00:00 2001 From: Thomas Schlichter Date: Sat, 19 Jan 2013 00:28:22 +0100 Subject: ACPI / processor: Get power info before updating the C-states acpi_processor_get_power_info() has to be called before acpi_processor_setup_cpuidle_states() to have the latest information available. This fixes the missing C-state information after AC-->DC transition. Signed-off-by: Thomas Schlichter Cc: Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index fea6f8d..ed9a1cc 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1152,6 +1152,7 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr) } /* Populate Updated C-state information */ + acpi_processor_get_power_info(pr); acpi_processor_setup_cpuidle_states(pr); /* Enable all cpuidle devices */ -- cgit v0.10.2 From 25216865392a6e1f3032855aee7407de1fe0b70c Mon Sep 17 00:00:00 2001 From: Lans Zhang Date: Wed, 16 Jan 2013 09:03:29 +0000 Subject: ACPI, APEI: Fixup incorrect 64-bit access width firmware bug The bit width check was introduced by 15afae60 (ACPI, APEI: Fix incorrect APEI register bit width check and usage), and a fixup for incorrect 32-bit width memory address was given by f712c71 (ACPI, APEI: Fixup common access width firmware bug). Now there is a similar symptom: [Firmware Bug]: APEI: Invalid bit width + offset in GAR [0x12345000/64/0/3/0] Another bogus BIOS reports an incorrect 64-bit width in trigger table. Thus, apply to a similar workaround for 64-bit width memory address. Signed-off-by: Lans Zhang Acked-by: Gary Hade Acked-by: Myron Stowe Acked-by: Jean Delvare Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 00a7836..46f80e2 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -590,6 +590,9 @@ static int apei_check_gar(struct acpi_generic_address *reg, u64 *paddr, if (bit_width == 32 && bit_offset == 0 && (*paddr & 0x03) == 0 && *access_bit_width < 32) *access_bit_width = 32; + else if (bit_width == 64 && bit_offset == 0 && (*paddr & 0x07) == 0 && + *access_bit_width < 64) + *access_bit_width = 64; if ((bit_width + bit_offset) > *access_bit_width) { pr_warning(FW_BUG APEI_PFX -- cgit v0.10.2 From f44d188acdca1a34e4439ca6a173f85086e0e655 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 18 Jan 2013 19:52:32 +0000 Subject: cpufreq: OMAP: use RCU locks around usage of OPP OPP pointer is RCU protected, hence after finding it, de-reference also should be protected with the same RCU context else the OPP pointer may become invalid. Reported-by: Alexander Holler Tested-by: Alexander Holler Acked-by: Alexander Holler Signed-off-by: Nishanth Menon Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c index 1f3417a..97102b0 100644 --- a/drivers/cpufreq/omap-cpufreq.c +++ b/drivers/cpufreq/omap-cpufreq.c @@ -110,13 +110,16 @@ static int omap_target(struct cpufreq_policy *policy, freq = ret; if (mpu_reg) { + rcu_read_lock(); opp = opp_find_freq_ceil(mpu_dev, &freq); if (IS_ERR(opp)) { + rcu_read_unlock(); dev_err(mpu_dev, "%s: unable to find MPU OPP for %d\n", __func__, freqs.new); return -EINVAL; } volt = opp_get_voltage(opp); + rcu_read_unlock(); tol = volt * OPP_TOLERANCE / 100; volt_old = regulator_get_voltage(mpu_reg); } -- cgit v0.10.2 From 78e8eb8feab7d85f4cc215afe1457a228bf4eed9 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 18 Jan 2013 19:52:33 +0000 Subject: cpufreq: cpufreq-cpu0: use RCU locks around usage of OPP OPP pointer is RCU protected, hence after finding it, de-reference also should be protected with the same RCU context else the OPP pointer may become invalid. Reported-by: Jack Mitchell Tested-by: Alexander Holler Tested-by: Jack Mitchell Acked-by: Alexander Holler Signed-off-by: Nishanth Menon Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c index 52bf36d..debc5a7 100644 --- a/drivers/cpufreq/cpufreq-cpu0.c +++ b/drivers/cpufreq/cpufreq-cpu0.c @@ -71,12 +71,15 @@ static int cpu0_set_target(struct cpufreq_policy *policy, } if (cpu_reg) { + rcu_read_lock(); opp = opp_find_freq_ceil(cpu_dev, &freq_Hz); if (IS_ERR(opp)) { + rcu_read_unlock(); pr_err("failed to find OPP for %ld\n", freq_Hz); return PTR_ERR(opp); } volt = opp_get_voltage(opp); + rcu_read_unlock(); tol = volt * voltage_tolerance / 100; volt_old = regulator_get_voltage(cpu_reg); } @@ -236,12 +239,14 @@ static int cpu0_cpufreq_driver_init(void) */ for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) ; + rcu_read_lock(); opp = opp_find_freq_exact(cpu_dev, freq_table[0].frequency * 1000, true); min_uV = opp_get_voltage(opp); opp = opp_find_freq_exact(cpu_dev, freq_table[i-1].frequency * 1000, true); max_uV = opp_get_voltage(opp); + rcu_read_unlock(); ret = regulator_set_voltage_time(cpu_reg, min_uV, max_uV); if (ret > 0) transition_latency += ret * 1000; -- cgit v0.10.2 From bcb27549f4185ca7d0168e201931613706ef2b83 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 18 Jan 2013 19:52:34 +0000 Subject: PM / devfreq: add locking documentation for recommended_opp OPP pointers are protected by RCU locks, the pointer validity is permissible only under the section of rcu_read_lock to rcu_read_unlock Add documentation to the effect. Signed-off-by: Nishanth Menon Signed-off-by: Rafael J. Wysocki diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 53766f3..3b36797 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -994,6 +994,11 @@ module_exit(devfreq_exit); * @freq: The frequency given to target function * @flags: Flags handed from devfreq framework. * + * Locking: This function must be called under rcu_read_lock(). opp is a rcu + * protected pointer. The reason for the same is that the opp pointer which is + * returned will remain valid for use with opp_get_{voltage, freq} only while + * under the locked area. The pointer returned must be used prior to unlocking + * with rcu_read_unlock() to maintain the integrity of the pointer. */ struct opp *devfreq_recommended_opp(struct device *dev, unsigned long *freq, u32 flags) -- cgit v0.10.2 From 8fa938acb318378463987b04be083dc2467e0480 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 18 Jan 2013 19:52:35 +0000 Subject: PM / devfreq: exynos4_bus: honor RCU lock usage OPP pointers cannot be expected to be valid beyond the boundary of rcu_read_lock and rcu_read_unlock. Unfortunately, the current exynos4 busfreq driver does not honor the usage constraint and stores the OPP pointer in struct busfreq_data. This could potentially become invalid later such as: across devfreq opp change decisions, resulting in unpredictable behavior. To fix this, we introduce a busfreq specific busfreq_opp_info structure which is used to handle OPP information. OPP information is de-referenced to voltage and frequency pairs as needed into busfreq_opp_info structure and used as needed. Signed-off-by: Nishanth Menon Signed-off-by: Rafael J. Wysocki diff --git a/drivers/devfreq/exynos4_bus.c b/drivers/devfreq/exynos4_bus.c index 80c745e..46d94e9 100644 --- a/drivers/devfreq/exynos4_bus.c +++ b/drivers/devfreq/exynos4_bus.c @@ -73,6 +73,16 @@ enum busclk_level_idx { #define EX4210_LV_NUM (LV_2 + 1) #define EX4x12_LV_NUM (LV_4 + 1) +/** + * struct busfreq_opp_info - opp information for bus + * @rate: Frequency in hertz + * @volt: Voltage in microvolts corresponding to this OPP + */ +struct busfreq_opp_info { + unsigned long rate; + unsigned long volt; +}; + struct busfreq_data { enum exynos4_busf_type type; struct device *dev; @@ -80,7 +90,7 @@ struct busfreq_data { bool disabled; struct regulator *vdd_int; struct regulator *vdd_mif; /* Exynos4412/4212 only */ - struct opp *curr_opp; + struct busfreq_opp_info curr_oppinfo; struct exynos4_ppmu dmc[2]; struct notifier_block pm_notifier; @@ -296,13 +306,14 @@ static unsigned int exynos4x12_clkdiv_sclkip[][3] = { }; -static int exynos4210_set_busclk(struct busfreq_data *data, struct opp *opp) +static int exynos4210_set_busclk(struct busfreq_data *data, + struct busfreq_opp_info *oppi) { unsigned int index; unsigned int tmp; for (index = LV_0; index < EX4210_LV_NUM; index++) - if (opp_get_freq(opp) == exynos4210_busclk_table[index].clk) + if (oppi->rate == exynos4210_busclk_table[index].clk) break; if (index == EX4210_LV_NUM) @@ -361,13 +372,14 @@ static int exynos4210_set_busclk(struct busfreq_data *data, struct opp *opp) return 0; } -static int exynos4x12_set_busclk(struct busfreq_data *data, struct opp *opp) +static int exynos4x12_set_busclk(struct busfreq_data *data, + struct busfreq_opp_info *oppi) { unsigned int index; unsigned int tmp; for (index = LV_0; index < EX4x12_LV_NUM; index++) - if (opp_get_freq(opp) == exynos4x12_mifclk_table[index].clk) + if (oppi->rate == exynos4x12_mifclk_table[index].clk) break; if (index == EX4x12_LV_NUM) @@ -576,11 +588,12 @@ static int exynos4x12_get_intspec(unsigned long mifclk) return -EINVAL; } -static int exynos4_bus_setvolt(struct busfreq_data *data, struct opp *opp, - struct opp *oldopp) +static int exynos4_bus_setvolt(struct busfreq_data *data, + struct busfreq_opp_info *oppi, + struct busfreq_opp_info *oldoppi) { int err = 0, tmp; - unsigned long volt = opp_get_voltage(opp); + unsigned long volt = oppi->volt; switch (data->type) { case TYPE_BUSF_EXYNOS4210: @@ -595,11 +608,11 @@ static int exynos4_bus_setvolt(struct busfreq_data *data, struct opp *opp, if (err) break; - tmp = exynos4x12_get_intspec(opp_get_freq(opp)); + tmp = exynos4x12_get_intspec(oppi->rate); if (tmp < 0) { err = tmp; regulator_set_voltage(data->vdd_mif, - opp_get_voltage(oldopp), + oldoppi->volt, MAX_SAFEVOLT); break; } @@ -609,7 +622,7 @@ static int exynos4_bus_setvolt(struct busfreq_data *data, struct opp *opp, /* Try to recover */ if (err) regulator_set_voltage(data->vdd_mif, - opp_get_voltage(oldopp), + oldoppi->volt, MAX_SAFEVOLT); break; default: @@ -626,17 +639,26 @@ static int exynos4_bus_target(struct device *dev, unsigned long *_freq, struct platform_device *pdev = container_of(dev, struct platform_device, dev); struct busfreq_data *data = platform_get_drvdata(pdev); - struct opp *opp = devfreq_recommended_opp(dev, _freq, flags); - unsigned long freq = opp_get_freq(opp); - unsigned long old_freq = opp_get_freq(data->curr_opp); + struct opp *opp; + unsigned long freq; + unsigned long old_freq = data->curr_oppinfo.rate; + struct busfreq_opp_info new_oppinfo; - if (IS_ERR(opp)) + rcu_read_lock(); + opp = devfreq_recommended_opp(dev, _freq, flags); + if (IS_ERR(opp)) { + rcu_read_unlock(); return PTR_ERR(opp); + } + new_oppinfo.rate = opp_get_freq(opp); + new_oppinfo.volt = opp_get_voltage(opp); + rcu_read_unlock(); + freq = new_oppinfo.rate; if (old_freq == freq) return 0; - dev_dbg(dev, "targetting %lukHz %luuV\n", freq, opp_get_voltage(opp)); + dev_dbg(dev, "targetting %lukHz %luuV\n", freq, new_oppinfo.volt); mutex_lock(&data->lock); @@ -644,17 +666,18 @@ static int exynos4_bus_target(struct device *dev, unsigned long *_freq, goto out; if (old_freq < freq) - err = exynos4_bus_setvolt(data, opp, data->curr_opp); + err = exynos4_bus_setvolt(data, &new_oppinfo, + &data->curr_oppinfo); if (err) goto out; if (old_freq != freq) { switch (data->type) { case TYPE_BUSF_EXYNOS4210: - err = exynos4210_set_busclk(data, opp); + err = exynos4210_set_busclk(data, &new_oppinfo); break; case TYPE_BUSF_EXYNOS4x12: - err = exynos4x12_set_busclk(data, opp); + err = exynos4x12_set_busclk(data, &new_oppinfo); break; default: err = -EINVAL; @@ -664,11 +687,12 @@ static int exynos4_bus_target(struct device *dev, unsigned long *_freq, goto out; if (old_freq > freq) - err = exynos4_bus_setvolt(data, opp, data->curr_opp); + err = exynos4_bus_setvolt(data, &new_oppinfo, + &data->curr_oppinfo); if (err) goto out; - data->curr_opp = opp; + data->curr_oppinfo = new_oppinfo; out: mutex_unlock(&data->lock); return err; @@ -702,7 +726,7 @@ static int exynos4_bus_get_dev_status(struct device *dev, exynos4_read_ppmu(data); busier_dmc = exynos4_get_busier_dmc(data); - stat->current_frequency = opp_get_freq(data->curr_opp); + stat->current_frequency = data->curr_oppinfo.rate; if (busier_dmc) addr = S5P_VA_DMC1; @@ -933,6 +957,7 @@ static int exynos4_busfreq_pm_notifier_event(struct notifier_block *this, struct busfreq_data *data = container_of(this, struct busfreq_data, pm_notifier); struct opp *opp; + struct busfreq_opp_info new_oppinfo; unsigned long maxfreq = ULONG_MAX; int err = 0; @@ -943,18 +968,29 @@ static int exynos4_busfreq_pm_notifier_event(struct notifier_block *this, data->disabled = true; + rcu_read_lock(); opp = opp_find_freq_floor(data->dev, &maxfreq); + if (IS_ERR(opp)) { + rcu_read_unlock(); + dev_err(data->dev, "%s: unable to find a min freq\n", + __func__); + return PTR_ERR(opp); + } + new_oppinfo.rate = opp_get_freq(opp); + new_oppinfo.volt = opp_get_voltage(opp); + rcu_read_unlock(); - err = exynos4_bus_setvolt(data, opp, data->curr_opp); + err = exynos4_bus_setvolt(data, &new_oppinfo, + &data->curr_oppinfo); if (err) goto unlock; switch (data->type) { case TYPE_BUSF_EXYNOS4210: - err = exynos4210_set_busclk(data, opp); + err = exynos4210_set_busclk(data, &new_oppinfo); break; case TYPE_BUSF_EXYNOS4x12: - err = exynos4x12_set_busclk(data, opp); + err = exynos4x12_set_busclk(data, &new_oppinfo); break; default: err = -EINVAL; @@ -962,7 +998,7 @@ static int exynos4_busfreq_pm_notifier_event(struct notifier_block *this, if (err) goto unlock; - data->curr_opp = opp; + data->curr_oppinfo = new_oppinfo; unlock: mutex_unlock(&data->lock); if (err) @@ -1027,13 +1063,17 @@ static int exynos4_busfreq_probe(struct platform_device *pdev) } } + rcu_read_lock(); opp = opp_find_freq_floor(dev, &exynos4_devfreq_profile.initial_freq); if (IS_ERR(opp)) { + rcu_read_unlock(); dev_err(dev, "Invalid initial frequency %lu kHz.\n", exynos4_devfreq_profile.initial_freq); return PTR_ERR(opp); } - data->curr_opp = opp; + data->curr_oppinfo.rate = opp_get_freq(opp); + data->curr_oppinfo.volt = opp_get_voltage(opp); + rcu_read_unlock(); platform_set_drvdata(pdev, data); -- cgit v0.10.2 From 9855d8ce41a7801548a05d844db2f46c3e810166 Mon Sep 17 00:00:00 2001 From: Stefan Bader Date: Tue, 22 Jan 2013 13:37:21 +0100 Subject: ACPI: Check MSR valid bit before using P-state frequencies To fix incorrect P-state frequencies which can happen on some AMD systems f594065faf4f9067c2283a34619fc0714e79a98d "ACPI: Add fixups for AMD P-state figures" introduced a quirk to obtain the correct values by reading from AMD specific MSRs. This did cause a regression when running a kernel using that quirk under Xen which does (currently) not pass through MSR reads to the HW. Instead the guest gets a 0 in return. And this seems to cause a failure to initialize the ondemand governour (hard to say for sure as all P-states appear to run at the same frequency). While this should also be fixed in the hypervisor (to allow a guest to read that MSR), this patch is intended to work around the issue in the meantime. In discussion it turned out that indeed real HW/BIOSes may choose to not set the valid bit and thus mark the P-state as invalid. So this could be considered a fix for broken BIOSes that also works around the issue on Xen. Signed-off-by: Stefan Bader Cc: 3.7+ Acked-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 836bfe0..53e7ac9 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -340,6 +340,13 @@ static void amd_fixup_frequency(struct acpi_processor_px *px, int i) if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10) || boot_cpu_data.x86 == 0x11) { rdmsr(MSR_AMD_PSTATE_DEF_BASE + index, lo, hi); + /* + * MSR C001_0064+: + * Bit 63: PstateEn. Read-write. If set, the P-state is valid. + */ + if (!(hi & BIT(31))) + return; + fid = lo & 0x3f; did = (lo >> 6) & 7; if (boot_cpu_data.x86 == 0x10) -- cgit v0.10.2 From efa17194581bdfca0986dabc178908bd7c21ba00 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 22 Jan 2013 22:33:46 +0100 Subject: cpufreq: Add module aliases for acpi-cpufreq The acpi core will call request_module("acpi-cpufreq") on subsystem init, but this will fail if the module isn't available at that stage of boot. Add some module aliases to ensure that udev can load the module on Intel and AMD systems with the appropriate feature bits - I /think/ that this will also work on VIA systems, but haven't verified that. References: http://lkml.kernel.org/r/1448223.sdUJnNSRz4@vostro.rjw.lan Signed-off-by: Matthew Garrett Tested-by: Leonid Isaev Acked-by: Borislav Petkov Cc: 3.7+ Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 0d048f6..7b0d49d 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -1030,4 +1030,11 @@ MODULE_PARM_DESC(acpi_pstate_strict, late_initcall(acpi_cpufreq_init); module_exit(acpi_cpufreq_exit); +static const struct x86_cpu_id acpi_cpufreq_ids[] = { + X86_FEATURE_MATCH(X86_FEATURE_ACPI), + X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids); + MODULE_ALIAS("acpi"); -- cgit v0.10.2