From 0d66b91ebff49841f607a3c079984c907c8a4199 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 12 Sep 2013 01:42:59 +0530 Subject: cpufreq: Fix crash in cpufreq-stats during suspend/resume Stephen Warren reported that the cpufreq-stats code hits a NULL pointer dereference during the second attempt to suspend a system. He also pin-pointed the problem to commit 5302c3f "cpufreq: Perform light-weight init/teardown during suspend/resume". That commit actually ensured that the cpufreq-stats table and the cpufreq-stats sysfs entries are *not* torn down (ie., not freed) during suspend/resume, which makes it all the more surprising. However, it turns out that the root-cause is not that we access an already freed memory, but that the reference to the allocated memory gets moved around and we lose track of that during resume, leading to the reported crash in a subsequent suspend attempt. In the suspend path, during CPU offline, the value of policy->cpu is updated by choosing one of the surviving CPUs in that policy, as long as there is atleast one CPU in that policy. And cpufreq_stats_update_policy_cpu() is invoked to update the reference to the stats structure by assigning it to the new CPU. However, in the resume path, during CPU online, we end up assigning a fresh CPU as the policy->cpu, without letting cpufreq-stats know about this. Thus the reference to the stats structure remains (incorrectly) associated with the old CPU. So, in a subsequent suspend attempt, during CPU offline, we end up accessing an incorrect location to get the stats structure, which eventually leads to the NULL pointer dereference. Fix this by letting cpufreq-stats know about the update of the policy->cpu during CPU online in the resume path. (Also, move the update_policy_cpu() function higher up in the file, so that __cpufreq_add_dev() can invoke it). Reported-and-tested-by: Stephen Warren Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5a64f66..62bdb95 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -947,6 +947,18 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) kfree(policy); } +static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) +{ + policy->last_cpu = policy->cpu; + policy->cpu = cpu; + +#ifdef CONFIG_CPU_FREQ_TABLE + cpufreq_frequency_table_update_policy_cpu(policy); +#endif + blocking_notifier_call_chain(&cpufreq_policy_notifier_list, + CPUFREQ_UPDATE_POLICY_CPU, policy); +} + static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, bool frozen) { @@ -1000,7 +1012,18 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, if (!policy) goto nomem_out; - policy->cpu = cpu; + + /* + * In the resume path, since we restore a saved policy, the assignment + * to policy->cpu is like an update of the existing policy, rather than + * the creation of a brand new one. So we need to perform this update + * by invoking update_policy_cpu(). + */ + if (frozen && cpu != policy->cpu) + update_policy_cpu(policy, cpu); + else + policy->cpu = cpu; + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; cpumask_copy(policy->cpus, cpumask_of(cpu)); @@ -1092,18 +1115,6 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) return __cpufreq_add_dev(dev, sif, false); } -static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) -{ - policy->last_cpu = policy->cpu; - policy->cpu = cpu; - -#ifdef CONFIG_CPU_FREQ_TABLE - cpufreq_frequency_table_update_policy_cpu(policy); -#endif - blocking_notifier_call_chain(&cpufreq_policy_notifier_list, - CPUFREQ_UPDATE_POLICY_CPU, policy); -} - static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy, unsigned int old_cpu, bool frozen) { -- cgit v0.10.2 From 61173f256a3bebfbd09b4bd2c164dde378614091 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 12 Sep 2013 01:43:25 +0530 Subject: cpufreq: Restructure if/else block to avoid unintended behavior In __cpufreq_remove_dev_prepare(), the code which decides whether to remove the sysfs link or nominate a new policy cpu, is governed by an if/else block with a rather complex set of conditionals. Worse, they harbor a subtlety which leads to certain unintended behavior. The code looks like this: if (cpu != policy->cpu && !frozen) { sysfs_remove_link(&dev->kobj, "cpufreq"); } else if (cpus > 1) { new_cpu = cpufreq_nominate_new_policy_cpu(...); ... update_policy_cpu(..., new_cpu); } The original intention was: If the CPU going offline is not policy->cpu, just remove the link. On the other hand, if the CPU going offline is the policy->cpu itself, handover the policy->cpu job to some other surviving CPU in that policy. But because the 'if' condition also includes the 'frozen' check, now there are *two* possibilities by which we can enter the 'else' block: 1. cpu == policy->cpu (intended) 2. cpu != policy->cpu && frozen (unintended) Due to the second (unintended) scenario, we end up spuriously nominating a CPU as the policy->cpu, even when the existing policy->cpu is alive and well. This can cause problems further down the line, especially when we end up nominating the same policy->cpu as the new one (ie., old == new), because it totally confuses update_policy_cpu(). To avoid this mess, restructure the if/else block to only do what was originally intended, and thus prevent any unwelcome surprises. Signed-off-by: Srivatsa S. Bhat Tested-by: Stephen Warren Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 62bdb95..247842b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1193,8 +1193,9 @@ static int __cpufreq_remove_dev_prepare(struct device *dev, cpumask_clear_cpu(cpu, policy->cpus); unlock_policy_rwsem_write(cpu); - if (cpu != policy->cpu && !frozen) { - sysfs_remove_link(&dev->kobj, "cpufreq"); + if (cpu != policy->cpu) { + if (!frozen) + sysfs_remove_link(&dev->kobj, "cpufreq"); } else if (cpus > 1) { new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu, frozen); -- cgit v0.10.2 From cb38ed5cf1c4fdb7454e4b48fb70c396f5acfb21 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 12 Sep 2013 01:43:42 +0530 Subject: cpufreq: Prevent problems in update_policy_cpu() if last_cpu == new_cpu If update_policy_cpu() is invoked with the existing policy->cpu itself as the new-cpu parameter, then a lot of things can go terribly wrong. In its present form, update_policy_cpu() always assumes that the new-cpu is different from policy->cpu and invokes other functions to perform their respective updates. And those functions implement the actual update like this: per_cpu(..., new_cpu) = per_cpu(..., last_cpu); per_cpu(..., last_cpu) = NULL; Thus, when new_cpu == last_cpu, the final NULL assignment makes the per-cpu references vanish into thin air! (memory leak). From there, it leads to more problems: cpufreq_stats_create_table() now doesn't find the per-cpu reference and hence tries to create a new sysfs-group; but sysfs already had created the group earlier, so it complains that it cannot create a duplicate filename. In short, the repercussions of a rather innocuous invocation of update_policy_cpu() can turn out to be pretty nasty. Ideally update_policy_cpu() should handle this situation (new == last) gracefully, and not lead to such severe problems. So fix it by adding an appropriate check. Signed-off-by: Srivatsa S. Bhat Tested-by: Stephen Warren Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 247842b..d32040c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -949,6 +949,9 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) { + if (cpu == policy->cpu) + return; + policy->last_cpu = policy->cpu; policy->cpu = cpu; -- cgit v0.10.2 From 44871c9c7f7963f8869dd8bc9620221c9e9db153 Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Wed, 11 Sep 2013 15:05:05 +0800 Subject: cpufreq: Acquire the lock in cpufreq_policy_restore() for reading In cpufreq_policy_restore() before system suspend policy is read from percpu's cpufreq_cpu_data_fallback. It's a read operation rather than a write one, so take the lock for reading in there. Signed-off-by: Lan Tianyu Reviewed-by: Srivatsa S. Bhat Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d32040c..43c24aa 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -906,11 +906,11 @@ static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu) struct cpufreq_policy *policy; unsigned long flags; - write_lock_irqsave(&cpufreq_driver_lock, flags); + read_lock_irqsave(&cpufreq_driver_lock, flags); policy = per_cpu(cpufreq_cpu_data_fallback, cpu); - write_unlock_irqrestore(&cpufreq_driver_lock, flags); + read_unlock_irqrestore(&cpufreq_driver_lock, flags); return policy; } -- cgit v0.10.2