From 0bed612be638e41456cd8cb270a2b411a5b43d63 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 2 Apr 2016 01:08:43 +0200
Subject: cpufreq: sched: Helpers to add and remove update_util hooks

Replace the single helper for adding and removing cpufreq utilization
update hooks, cpufreq_set_update_util_data(), with a pair of helpers,
cpufreq_add_update_util_hook() and cpufreq_remove_update_util_hook(),
and modify the users of cpufreq_set_update_util_data() accordingly.

With the new helpers, the code using them doesn't need to worry
about the internals of struct update_util_data and in particular
it doesn't need to worry about populating the func field in it
properly upfront.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 10a5cfe..3a0312f 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -258,43 +258,6 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
 }
 EXPORT_SYMBOL_GPL(dbs_update);
 
-static void gov_set_update_util(struct policy_dbs_info *policy_dbs,
-				unsigned int delay_us)
-{
-	struct cpufreq_policy *policy = policy_dbs->policy;
-	int cpu;
-
-	gov_update_sample_delay(policy_dbs, delay_us);
-	policy_dbs->last_sample_time = 0;
-
-	for_each_cpu(cpu, policy->cpus) {
-		struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu);
-
-		cpufreq_set_update_util_data(cpu, &cdbs->update_util);
-	}
-}
-
-static inline void gov_clear_update_util(struct cpufreq_policy *policy)
-{
-	int i;
-
-	for_each_cpu(i, policy->cpus)
-		cpufreq_set_update_util_data(i, NULL);
-
-	synchronize_sched();
-}
-
-static void gov_cancel_work(struct cpufreq_policy *policy)
-{
-	struct policy_dbs_info *policy_dbs = policy->governor_data;
-
-	gov_clear_update_util(policy_dbs->policy);
-	irq_work_sync(&policy_dbs->irq_work);
-	cancel_work_sync(&policy_dbs->work);
-	atomic_set(&policy_dbs->work_count, 0);
-	policy_dbs->work_in_progress = false;
-}
-
 static void dbs_work_handler(struct work_struct *work)
 {
 	struct policy_dbs_info *policy_dbs;
@@ -382,6 +345,44 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time,
 	irq_work_queue(&policy_dbs->irq_work);
 }
 
+static void gov_set_update_util(struct policy_dbs_info *policy_dbs,
+				unsigned int delay_us)
+{
+	struct cpufreq_policy *policy = policy_dbs->policy;
+	int cpu;
+
+	gov_update_sample_delay(policy_dbs, delay_us);
+	policy_dbs->last_sample_time = 0;
+
+	for_each_cpu(cpu, policy->cpus) {
+		struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu);
+
+		cpufreq_add_update_util_hook(cpu, &cdbs->update_util,
+					     dbs_update_util_handler);
+	}
+}
+
+static inline void gov_clear_update_util(struct cpufreq_policy *policy)
+{
+	int i;
+
+	for_each_cpu(i, policy->cpus)
+		cpufreq_remove_update_util_hook(i);
+
+	synchronize_sched();
+}
+
+static void gov_cancel_work(struct cpufreq_policy *policy)
+{
+	struct policy_dbs_info *policy_dbs = policy->governor_data;
+
+	gov_clear_update_util(policy_dbs->policy);
+	irq_work_sync(&policy_dbs->irq_work);
+	cancel_work_sync(&policy_dbs->work);
+	atomic_set(&policy_dbs->work_count, 0);
+	policy_dbs->work_in_progress = false;
+}
+
 static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy,
 						     struct dbs_governor *gov)
 {
@@ -404,7 +405,6 @@ static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *poli
 		struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
 
 		j_cdbs->policy_dbs = policy_dbs;
-		j_cdbs->update_util.func = dbs_update_util_handler;
 	}
 	return policy_dbs;
 }
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 9ae1596..a382195 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1107,8 +1107,6 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 
 	intel_pstate_busy_pid_reset(cpu);
 
-	cpu->update_util.func = intel_pstate_update_util;
-
 	pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);
 
 	return 0;
@@ -1132,12 +1130,13 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
 
 	/* Prevent intel_pstate_update_util() from using stale data. */
 	cpu->sample.time = 0;
-	cpufreq_set_update_util_data(cpu_num, &cpu->update_util);
+	cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
+				     intel_pstate_update_util);
 }
 
 static void intel_pstate_clear_update_util_hook(unsigned int cpu)
 {
-	cpufreq_set_update_util_data(cpu, NULL);
+	cpufreq_remove_update_util_hook(cpu);
 	synchronize_sched();
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 60bba7e..1b88259 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3240,7 +3240,10 @@ struct update_util_data {
 		     u64 time, unsigned long util, unsigned long max);
 };
 
-void cpufreq_set_update_util_data(int cpu, struct update_util_data *data);
+void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
+			void (*func)(struct update_util_data *data, u64 time,
+				     unsigned long util, unsigned long max));
+void cpufreq_remove_update_util_hook(int cpu);
 #endif /* CONFIG_CPU_FREQ */
 
 #endif
diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index 928c4ba..1141954 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -14,24 +14,50 @@
 DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
 
 /**
- * cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer.
+ * cpufreq_add_update_util_hook - Populate the CPU's update_util_data pointer.
  * @cpu: The CPU to set the pointer for.
  * @data: New pointer value.
+ * @func: Callback function to set for the CPU.
  *
- * Set and publish the update_util_data pointer for the given CPU.  That pointer
- * points to a struct update_util_data object containing a callback function
- * to call from cpufreq_update_util().  That function will be called from an RCU
- * read-side critical section, so it must not sleep.
+ * Set and publish the update_util_data pointer for the given CPU.
  *
- * Callers must use RCU-sched callbacks to free any memory that might be
- * accessed via the old update_util_data pointer or invoke synchronize_sched()
- * right after this function to avoid use-after-free.
+ * The update_util_data pointer of @cpu is set to @data and the callback
+ * function pointer in the target struct update_util_data is set to @func.
+ * That function will be called by cpufreq_update_util() from RCU-sched
+ * read-side critical sections, so it must not sleep.  @data will always be
+ * passed to it as the first argument which allows the function to get to the
+ * target update_util_data structure and its container.
+ *
+ * The update_util_data pointer of @cpu must be NULL when this function is
+ * called or it will WARN() and return with no effect.
  */
-void cpufreq_set_update_util_data(int cpu, struct update_util_data *data)
+void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
+			void (*func)(struct update_util_data *data, u64 time,
+				     unsigned long util, unsigned long max))
 {
-	if (WARN_ON(data && !data->func))
+	if (WARN_ON(!data || !func))
 		return;
 
+	if (WARN_ON(per_cpu(cpufreq_update_util_data, cpu)))
+		return;
+
+	data->func = func;
 	rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);
 }
-EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data);
+EXPORT_SYMBOL_GPL(cpufreq_add_update_util_hook);
+
+/**
+ * cpufreq_remove_update_util_hook - Clear the CPU's update_util_data pointer.
+ * @cpu: The CPU to clear the pointer for.
+ *
+ * Clear the update_util_data pointer for the given CPU.
+ *
+ * Callers must use RCU-sched callbacks to free any memory that might be
+ * accessed via the old update_util_data pointer or invoke synchronize_sched()
+ * right after this function to avoid use-after-free.
+ */
+void cpufreq_remove_update_util_hook(int cpu)
+{
+	rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), NULL);
+}
+EXPORT_SYMBOL_GPL(cpufreq_remove_update_util_hook);
-- 
cgit v0.10.2


From 0dd3c1d678aa219a7332984fcedbdd8970e92d5b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 22 Mar 2016 02:47:51 +0100
Subject: cpufreq: governor: New data type for management part of dbs_data

In addition to fields representing governor tunables, struct dbs_data
contains some fields needed for the management of objects of that
type.  As it turns out, that part of struct dbs_data may be shared
with (future) governors that won't use the common code used by
"ondemand" and "conservative", so move it to a separate struct type
and modify the code using struct dbs_data to follow.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index bf4913f..316df24 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -129,9 +129,10 @@ static struct notifier_block cs_cpufreq_notifier_block = {
 /************************** sysfs interface ************************/
 static struct dbs_governor cs_dbs_gov;
 
-static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data,
-		const char *buf, size_t count)
+static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
+					  const char *buf, size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	unsigned int input;
 	int ret;
 	ret = sscanf(buf, "%u", &input);
@@ -143,9 +144,10 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data,
 	return count;
 }
 
-static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf,
-		size_t count)
+static ssize_t store_up_threshold(struct gov_attr_set *attr_set,
+				  const char *buf, size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 	unsigned int input;
 	int ret;
@@ -158,9 +160,10 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf,
 	return count;
 }
 
-static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf,
-		size_t count)
+static ssize_t store_down_threshold(struct gov_attr_set *attr_set,
+				    const char *buf, size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 	unsigned int input;
 	int ret;
@@ -175,9 +178,10 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf,
 	return count;
 }
 
-static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data,
-		const char *buf, size_t count)
+static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set,
+				      const char *buf, size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	unsigned int input;
 	int ret;
 
@@ -199,9 +203,10 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data,
 	return count;
 }
 
-static ssize_t store_freq_step(struct dbs_data *dbs_data, const char *buf,
-		size_t count)
+static ssize_t store_freq_step(struct gov_attr_set *attr_set, const char *buf,
+			       size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 	unsigned int input;
 	int ret;
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 3a0312f..6a565e2 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -43,9 +43,10 @@ static DEFINE_MUTEX(gov_dbs_data_mutex);
  * This must be called with dbs_data->mutex held, otherwise traversing
  * policy_dbs_list isn't safe.
  */
-ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
+ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
 			    size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	struct policy_dbs_info *policy_dbs;
 	unsigned int rate;
 	int ret;
@@ -59,7 +60,7 @@ ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
 	 * We are operating under dbs_data->mutex and so the list and its
 	 * entries can't be freed concurrently.
 	 */
-	list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) {
+	list_for_each_entry(policy_dbs, &attr_set->policy_list, list) {
 		mutex_lock(&policy_dbs->timer_mutex);
 		/*
 		 * On 32-bit architectures this may race with the
@@ -96,7 +97,7 @@ void gov_update_cpu_data(struct dbs_data *dbs_data)
 {
 	struct policy_dbs_info *policy_dbs;
 
-	list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) {
+	list_for_each_entry(policy_dbs, &dbs_data->attr_set.policy_list, list) {
 		unsigned int j;
 
 		for_each_cpu(j, policy_dbs->policy->cpus) {
@@ -111,9 +112,9 @@ void gov_update_cpu_data(struct dbs_data *dbs_data)
 }
 EXPORT_SYMBOL_GPL(gov_update_cpu_data);
 
-static inline struct dbs_data *to_dbs_data(struct kobject *kobj)
+static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj)
 {
-	return container_of(kobj, struct dbs_data, kobj);
+	return container_of(kobj, struct gov_attr_set, kobj);
 }
 
 static inline struct governor_attr *to_gov_attr(struct attribute *attr)
@@ -124,25 +125,24 @@ static inline struct governor_attr *to_gov_attr(struct attribute *attr)
 static ssize_t governor_show(struct kobject *kobj, struct attribute *attr,
 			     char *buf)
 {
-	struct dbs_data *dbs_data = to_dbs_data(kobj);
 	struct governor_attr *gattr = to_gov_attr(attr);
 
-	return gattr->show(dbs_data, buf);
+	return gattr->show(to_gov_attr_set(kobj), buf);
 }
 
 static ssize_t governor_store(struct kobject *kobj, struct attribute *attr,
 			      const char *buf, size_t count)
 {
-	struct dbs_data *dbs_data = to_dbs_data(kobj);
+	struct gov_attr_set *attr_set = to_gov_attr_set(kobj);
 	struct governor_attr *gattr = to_gov_attr(attr);
 	int ret = -EBUSY;
 
-	mutex_lock(&dbs_data->mutex);
+	mutex_lock(&attr_set->update_lock);
 
-	if (dbs_data->usage_count)
-		ret = gattr->store(dbs_data, buf, count);
+	if (attr_set->usage_count)
+		ret = gattr->store(attr_set, buf, count);
 
-	mutex_unlock(&dbs_data->mutex);
+	mutex_unlock(&attr_set->update_lock);
 
 	return ret;
 }
@@ -425,6 +425,41 @@ static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs,
 	gov->free(policy_dbs);
 }
 
+static void gov_attr_set_init(struct gov_attr_set *attr_set,
+			      struct list_head *list_node)
+{
+	INIT_LIST_HEAD(&attr_set->policy_list);
+	mutex_init(&attr_set->update_lock);
+	attr_set->usage_count = 1;
+	list_add(list_node, &attr_set->policy_list);
+}
+
+static void gov_attr_set_get(struct gov_attr_set *attr_set,
+			     struct list_head *list_node)
+{
+	mutex_lock(&attr_set->update_lock);
+	attr_set->usage_count++;
+	list_add(list_node, &attr_set->policy_list);
+	mutex_unlock(&attr_set->update_lock);
+}
+
+static unsigned int gov_attr_set_put(struct gov_attr_set *attr_set,
+				     struct list_head *list_node)
+{
+	unsigned int count;
+
+	mutex_lock(&attr_set->update_lock);
+	list_del(list_node);
+	count = --attr_set->usage_count;
+	mutex_unlock(&attr_set->update_lock);
+	if (count)
+		return count;
+
+	kobject_put(&attr_set->kobj);
+	mutex_destroy(&attr_set->update_lock);
+	return 0;
+}
+
 static int cpufreq_governor_init(struct cpufreq_policy *policy)
 {
 	struct dbs_governor *gov = dbs_governor_of(policy);
@@ -453,10 +488,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy)
 		policy_dbs->dbs_data = dbs_data;
 		policy->governor_data = policy_dbs;
 
-		mutex_lock(&dbs_data->mutex);
-		dbs_data->usage_count++;
-		list_add(&policy_dbs->list, &dbs_data->policy_dbs_list);
-		mutex_unlock(&dbs_data->mutex);
+		gov_attr_set_get(&dbs_data->attr_set, &policy_dbs->list);
 		goto out;
 	}
 
@@ -466,8 +498,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy)
 		goto free_policy_dbs_info;
 	}
 
-	INIT_LIST_HEAD(&dbs_data->policy_dbs_list);
-	mutex_init(&dbs_data->mutex);
+	gov_attr_set_init(&dbs_data->attr_set, &policy_dbs->list);
 
 	ret = gov->init(dbs_data, !policy->governor->initialized);
 	if (ret)
@@ -487,14 +518,11 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy)
 	if (!have_governor_per_policy())
 		gov->gdbs_data = dbs_data;
 
-	policy->governor_data = policy_dbs;
-
 	policy_dbs->dbs_data = dbs_data;
-	dbs_data->usage_count = 1;
-	list_add(&policy_dbs->list, &dbs_data->policy_dbs_list);
+	policy->governor_data = policy_dbs;
 
 	gov->kobj_type.sysfs_ops = &governor_sysfs_ops;
-	ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type,
+	ret = kobject_init_and_add(&dbs_data->attr_set.kobj, &gov->kobj_type,
 				   get_governor_parent_kobj(policy),
 				   "%s", gov->gov.name);
 	if (!ret)
@@ -523,29 +551,21 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy)
 	struct dbs_governor *gov = dbs_governor_of(policy);
 	struct policy_dbs_info *policy_dbs = policy->governor_data;
 	struct dbs_data *dbs_data = policy_dbs->dbs_data;
-	int count;
+	unsigned int count;
 
 	/* Protect gov->gdbs_data against concurrent updates. */
 	mutex_lock(&gov_dbs_data_mutex);
 
-	mutex_lock(&dbs_data->mutex);
-	list_del(&policy_dbs->list);
-	count = --dbs_data->usage_count;
-	mutex_unlock(&dbs_data->mutex);
+	count = gov_attr_set_put(&dbs_data->attr_set, &policy_dbs->list);
 
-	if (!count) {
-		kobject_put(&dbs_data->kobj);
-
-		policy->governor_data = NULL;
+	policy->governor_data = NULL;
 
+	if (!count) {
 		if (!have_governor_per_policy())
 			gov->gdbs_data = NULL;
 
 		gov->exit(dbs_data, policy->governor->initialized == 1);
-		mutex_destroy(&dbs_data->mutex);
 		kfree(dbs_data);
-	} else {
-		policy->governor_data = NULL;
 	}
 
 	free_policy_dbs_info(policy_dbs, gov);
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 61ff82f..f4ad431 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -41,6 +41,13 @@
 /* Ondemand Sampling types */
 enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE};
 
+struct gov_attr_set {
+	struct kobject kobj;
+	struct list_head policy_list;
+	struct mutex update_lock;
+	int usage_count;
+};
+
 /*
  * Abbreviations:
  * dbs: used as a shortform for demand based switching It helps to keep variable
@@ -52,7 +59,7 @@ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE};
 
 /* Governor demand based switching data (per-policy or global). */
 struct dbs_data {
-	int usage_count;
+	struct gov_attr_set attr_set;
 	void *tuners;
 	unsigned int min_sampling_rate;
 	unsigned int ignore_nice_load;
@@ -60,37 +67,35 @@ struct dbs_data {
 	unsigned int sampling_down_factor;
 	unsigned int up_threshold;
 	unsigned int io_is_busy;
-
-	struct kobject kobj;
-	struct list_head policy_dbs_list;
-	/*
-	 * Protect concurrent updates to governor tunables from sysfs,
-	 * policy_dbs_list and usage_count.
-	 */
-	struct mutex mutex;
 };
 
+static inline struct dbs_data *to_dbs_data(struct gov_attr_set *attr_set)
+{
+	return container_of(attr_set, struct dbs_data, attr_set);
+}
+
 /* Governor's specific attributes */
-struct dbs_data;
 struct governor_attr {
 	struct attribute attr;
-	ssize_t (*show)(struct dbs_data *dbs_data, char *buf);
-	ssize_t (*store)(struct dbs_data *dbs_data, const char *buf,
+	ssize_t (*show)(struct gov_attr_set *attr_set, char *buf);
+	ssize_t (*store)(struct gov_attr_set *attr_set, const char *buf,
 			 size_t count);
 };
 
 #define gov_show_one(_gov, file_name)					\
 static ssize_t show_##file_name						\
-(struct dbs_data *dbs_data, char *buf)					\
+(struct gov_attr_set *attr_set, char *buf)				\
 {									\
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);		\
 	struct _gov##_dbs_tuners *tuners = dbs_data->tuners;		\
 	return sprintf(buf, "%u\n", tuners->file_name);			\
 }
 
 #define gov_show_one_common(file_name)					\
 static ssize_t show_##file_name						\
-(struct dbs_data *dbs_data, char *buf)					\
+(struct gov_attr_set *attr_set, char *buf)				\
 {									\
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);		\
 	return sprintf(buf, "%u\n", dbs_data->file_name);		\
 }
 
@@ -184,7 +189,7 @@ void od_register_powersave_bias_handler(unsigned int (*f)
 		(struct cpufreq_policy *, unsigned int, unsigned int),
 		unsigned int powersave_bias);
 void od_unregister_powersave_bias_handler(void);
-ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
+ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
 			    size_t count);
 void gov_update_cpu_data(struct dbs_data *dbs_data);
 #endif /* _CPUFREQ_GOVERNOR_H */
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index acd8027..3001634 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -207,9 +207,10 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy)
 /************************** sysfs interface ************************/
 static struct dbs_governor od_dbs_gov;
 
-static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf,
-		size_t count)
+static ssize_t store_io_is_busy(struct gov_attr_set *attr_set, const char *buf,
+				size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	unsigned int input;
 	int ret;
 
@@ -224,9 +225,10 @@ static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf,
 	return count;
 }
 
-static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf,
-		size_t count)
+static ssize_t store_up_threshold(struct gov_attr_set *attr_set,
+				  const char *buf, size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	unsigned int input;
 	int ret;
 	ret = sscanf(buf, "%u", &input);
@@ -240,9 +242,10 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf,
 	return count;
 }
 
-static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data,
-		const char *buf, size_t count)
+static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
+					  const char *buf, size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	struct policy_dbs_info *policy_dbs;
 	unsigned int input;
 	int ret;
@@ -254,7 +257,7 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data,
 	dbs_data->sampling_down_factor = input;
 
 	/* Reset down sampling multiplier in case it was active */
-	list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) {
+	list_for_each_entry(policy_dbs, &attr_set->policy_list, list) {
 		/*
 		 * Doing this without locking might lead to using different
 		 * rate_mult values in od_update() and od_dbs_timer().
@@ -267,9 +270,10 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data,
 	return count;
 }
 
-static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data,
-		const char *buf, size_t count)
+static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set,
+				      const char *buf, size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	unsigned int input;
 	int ret;
 
@@ -291,9 +295,10 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data,
 	return count;
 }
 
-static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf,
-		size_t count)
+static ssize_t store_powersave_bias(struct gov_attr_set *attr_set,
+				    const char *buf, size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 	struct policy_dbs_info *policy_dbs;
 	unsigned int input;
@@ -308,7 +313,7 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf,
 
 	od_tuners->powersave_bias = input;
 
-	list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list)
+	list_for_each_entry(policy_dbs, &attr_set->policy_list, list)
 		ondemand_powersave_bias_init(policy_dbs->policy);
 
 	return count;
-- 
cgit v0.10.2


From 2d0c58ad60376160ee8b535e570a38b9a349b6a4 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 22 Mar 2016 02:49:15 +0100
Subject: cpufreq: governor: Move abstract gov_attr_set code to seperate file

Move abstract code related to struct gov_attr_set to a separate (new)
file so it can be shared with (future) goverernors that won't share
more code with "ondemand" and "conservative".

No intentional functional changes.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>

diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index a7f4585..7fd8710 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -18,7 +18,11 @@ config CPU_FREQ
 
 if CPU_FREQ
 
+config CPU_FREQ_GOV_ATTR_SET
+	bool
+
 config CPU_FREQ_GOV_COMMON
+	select CPU_FREQ_GOV_ATTR_SET
 	select IRQ_WORK
 	bool
 
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 9e63fb1..6d11867 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE)	+= cpufreq_userspace.o
 obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND)	+= cpufreq_ondemand.o
 obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE)	+= cpufreq_conservative.o
 obj-$(CONFIG_CPU_FREQ_GOV_COMMON)		+= cpufreq_governor.o
+obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET)	+= cpufreq_governor_attr_set.o
 
 obj-$(CONFIG_CPUFREQ_DT)		+= cpufreq-dt.o
 
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 6a565e2..20f0a4e 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -112,53 +112,6 @@ void gov_update_cpu_data(struct dbs_data *dbs_data)
 }
 EXPORT_SYMBOL_GPL(gov_update_cpu_data);
 
-static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj)
-{
-	return container_of(kobj, struct gov_attr_set, kobj);
-}
-
-static inline struct governor_attr *to_gov_attr(struct attribute *attr)
-{
-	return container_of(attr, struct governor_attr, attr);
-}
-
-static ssize_t governor_show(struct kobject *kobj, struct attribute *attr,
-			     char *buf)
-{
-	struct governor_attr *gattr = to_gov_attr(attr);
-
-	return gattr->show(to_gov_attr_set(kobj), buf);
-}
-
-static ssize_t governor_store(struct kobject *kobj, struct attribute *attr,
-			      const char *buf, size_t count)
-{
-	struct gov_attr_set *attr_set = to_gov_attr_set(kobj);
-	struct governor_attr *gattr = to_gov_attr(attr);
-	int ret = -EBUSY;
-
-	mutex_lock(&attr_set->update_lock);
-
-	if (attr_set->usage_count)
-		ret = gattr->store(attr_set, buf, count);
-
-	mutex_unlock(&attr_set->update_lock);
-
-	return ret;
-}
-
-/*
- * Sysfs Ops for accessing governor attributes.
- *
- * All show/store invocations for governor specific sysfs attributes, will first
- * call the below show/store callbacks and the attribute specific callback will
- * be called from within it.
- */
-static const struct sysfs_ops governor_sysfs_ops = {
-	.show	= governor_show,
-	.store	= governor_store,
-};
-
 unsigned int dbs_update(struct cpufreq_policy *policy)
 {
 	struct policy_dbs_info *policy_dbs = policy->governor_data;
@@ -425,41 +378,6 @@ static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs,
 	gov->free(policy_dbs);
 }
 
-static void gov_attr_set_init(struct gov_attr_set *attr_set,
-			      struct list_head *list_node)
-{
-	INIT_LIST_HEAD(&attr_set->policy_list);
-	mutex_init(&attr_set->update_lock);
-	attr_set->usage_count = 1;
-	list_add(list_node, &attr_set->policy_list);
-}
-
-static void gov_attr_set_get(struct gov_attr_set *attr_set,
-			     struct list_head *list_node)
-{
-	mutex_lock(&attr_set->update_lock);
-	attr_set->usage_count++;
-	list_add(list_node, &attr_set->policy_list);
-	mutex_unlock(&attr_set->update_lock);
-}
-
-static unsigned int gov_attr_set_put(struct gov_attr_set *attr_set,
-				     struct list_head *list_node)
-{
-	unsigned int count;
-
-	mutex_lock(&attr_set->update_lock);
-	list_del(list_node);
-	count = --attr_set->usage_count;
-	mutex_unlock(&attr_set->update_lock);
-	if (count)
-		return count;
-
-	kobject_put(&attr_set->kobj);
-	mutex_destroy(&attr_set->update_lock);
-	return 0;
-}
-
 static int cpufreq_governor_init(struct cpufreq_policy *policy)
 {
 	struct dbs_governor *gov = dbs_governor_of(policy);
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index f4ad431..ccdd081 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -48,6 +48,12 @@ struct gov_attr_set {
 	int usage_count;
 };
 
+extern const struct sysfs_ops governor_sysfs_ops;
+
+void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node);
+void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node);
+unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node);
+
 /*
  * Abbreviations:
  * dbs: used as a shortform for demand based switching It helps to keep variable
diff --git a/drivers/cpufreq/cpufreq_governor_attr_set.c b/drivers/cpufreq/cpufreq_governor_attr_set.c
new file mode 100644
index 0000000..52841f8
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_governor_attr_set.c
@@ -0,0 +1,84 @@
+/*
+ * Abstract code for CPUFreq governor tunable sysfs attributes.
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "cpufreq_governor.h"
+
+static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj)
+{
+	return container_of(kobj, struct gov_attr_set, kobj);
+}
+
+static inline struct governor_attr *to_gov_attr(struct attribute *attr)
+{
+	return container_of(attr, struct governor_attr, attr);
+}
+
+static ssize_t governor_show(struct kobject *kobj, struct attribute *attr,
+			     char *buf)
+{
+	struct governor_attr *gattr = to_gov_attr(attr);
+
+	return gattr->show(to_gov_attr_set(kobj), buf);
+}
+
+static ssize_t governor_store(struct kobject *kobj, struct attribute *attr,
+			      const char *buf, size_t count)
+{
+	struct gov_attr_set *attr_set = to_gov_attr_set(kobj);
+	struct governor_attr *gattr = to_gov_attr(attr);
+	int ret;
+
+	mutex_lock(&attr_set->update_lock);
+	ret = attr_set->usage_count ? gattr->store(attr_set, buf, count) : -EBUSY;
+	mutex_unlock(&attr_set->update_lock);
+	return ret;
+}
+
+const struct sysfs_ops governor_sysfs_ops = {
+	.show	= governor_show,
+	.store	= governor_store,
+};
+EXPORT_SYMBOL_GPL(governor_sysfs_ops);
+
+void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node)
+{
+	INIT_LIST_HEAD(&attr_set->policy_list);
+	mutex_init(&attr_set->update_lock);
+	attr_set->usage_count = 1;
+	list_add(list_node, &attr_set->policy_list);
+}
+EXPORT_SYMBOL_GPL(gov_attr_set_init);
+
+void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node)
+{
+	mutex_lock(&attr_set->update_lock);
+	attr_set->usage_count++;
+	list_add(list_node, &attr_set->policy_list);
+	mutex_unlock(&attr_set->update_lock);
+}
+EXPORT_SYMBOL_GPL(gov_attr_set_get);
+
+unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node)
+{
+	unsigned int count;
+
+	mutex_lock(&attr_set->update_lock);
+	list_del(list_node);
+	count = --attr_set->usage_count;
+	mutex_unlock(&attr_set->update_lock);
+	if (count)
+		return count;
+
+	kobject_put(&attr_set->kobj);
+	mutex_destroy(&attr_set->update_lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gov_attr_set_put);
-- 
cgit v0.10.2


From 66893b6ac9952ec9d9409e9d85eaacf37bba8d15 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 22 Mar 2016 02:50:45 +0100
Subject: cpufreq: Move governor attribute set headers to cpufreq.h

Move definitions and function headers related to struct gov_attr_set
to include/linux/cpufreq.h so they can be used by (future) goverernors
located outside of drivers/cpufreq/.

No functional changes.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>

diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index ccdd081..181a430 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -41,19 +41,6 @@
 /* Ondemand Sampling types */
 enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE};
 
-struct gov_attr_set {
-	struct kobject kobj;
-	struct list_head policy_list;
-	struct mutex update_lock;
-	int usage_count;
-};
-
-extern const struct sysfs_ops governor_sysfs_ops;
-
-void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node);
-void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node);
-unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node);
-
 /*
  * Abbreviations:
  * dbs: used as a shortform for demand based switching It helps to keep variable
@@ -80,14 +67,6 @@ static inline struct dbs_data *to_dbs_data(struct gov_attr_set *attr_set)
 	return container_of(attr_set, struct dbs_data, attr_set);
 }
 
-/* Governor's specific attributes */
-struct governor_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct gov_attr_set *attr_set, char *buf);
-	ssize_t (*store)(struct gov_attr_set *attr_set, const char *buf,
-			 size_t count);
-};
-
 #define gov_show_one(_gov, file_name)					\
 static ssize_t show_##file_name						\
 (struct gov_attr_set *attr_set, char *buf)				\
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 718e872..74f5cff 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -462,6 +462,29 @@ void cpufreq_unregister_governor(struct cpufreq_governor *governor);
 struct cpufreq_governor *cpufreq_default_governor(void);
 struct cpufreq_governor *cpufreq_fallback_governor(void);
 
+/* Governor attribute set */
+struct gov_attr_set {
+	struct kobject kobj;
+	struct list_head policy_list;
+	struct mutex update_lock;
+	int usage_count;
+};
+
+/* sysfs ops for cpufreq governors */
+extern const struct sysfs_ops governor_sysfs_ops;
+
+void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node);
+void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node);
+unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node);
+
+/* Governor sysfs attribute */
+struct governor_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct gov_attr_set *attr_set, char *buf);
+	ssize_t (*store)(struct gov_attr_set *attr_set, const char *buf,
+			 size_t count);
+};
+
 /*********************************************************************
  *                     FREQUENCY TABLE HELPERS                       *
  *********************************************************************/
-- 
cgit v0.10.2


From 379480d8258056bfdbaa65e4d3f024bb5b34b52b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 22 Mar 2016 02:51:56 +0100
Subject: cpufreq: Move governor symbols to cpufreq.h

Move definitions of symbols related to transition latency and
sampling rate to include/linux/cpufreq.h so they can be used by
(future) goverernors located outside of drivers/cpufreq/.

No functional changes.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>

diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 181a430..3e0eb7c 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -24,20 +24,6 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 
-/*
- * The polling frequency depends on the capability of the processor. Default
- * polling frequency is 1000 times the transition latency of the processor. The
- * governor will work on any processor with transition latency <= 10ms, using
- * appropriate sampling rate.
- *
- * For CPUs with transition latency > 10ms (mostly drivers with CPUFREQ_ETERNAL)
- * this governor will not work. All times here are in us (micro seconds).
- */
-#define MIN_SAMPLING_RATE_RATIO			(2)
-#define LATENCY_MULTIPLIER			(1000)
-#define MIN_LATENCY_MULTIPLIER			(20)
-#define TRANSITION_LATENCY_LIMIT		(10 * 1000 * 1000)
-
 /* Ondemand Sampling types */
 enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE};
 
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 74f5cff..2b4f248 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -426,6 +426,20 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div,
 #define CPUFREQ_POLICY_POWERSAVE	(1)
 #define CPUFREQ_POLICY_PERFORMANCE	(2)
 
+/*
+ * The polling frequency depends on the capability of the processor. Default
+ * polling frequency is 1000 times the transition latency of the processor. The
+ * ondemand governor will work on any processor with transition latency <= 10ms,
+ * using appropriate sampling rate.
+ *
+ * For CPUs with transition latency > 10ms (mostly drivers with CPUFREQ_ETERNAL)
+ * the ondemand governor will not work. All times here are in us (microseconds).
+ */
+#define MIN_SAMPLING_RATE_RATIO		(2)
+#define LATENCY_MULTIPLIER		(1000)
+#define MIN_LATENCY_MULTIPLIER		(20)
+#define TRANSITION_LATENCY_LIMIT	(10 * 1000 * 1000)
+
 /* Governor Events */
 #define CPUFREQ_GOV_START	1
 #define CPUFREQ_GOV_STOP	2
-- 
cgit v0.10.2


From b7898fda5bc7e786e76ce24fbd2ec993b08ec518 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 30 Mar 2016 03:47:49 +0200
Subject: cpufreq: Support for fast frequency switching

Modify the ACPI cpufreq driver to provide a method for switching
CPU frequencies from interrupt context and update the cpufreq core
to support that method if available.

Introduce a new cpufreq driver callback, ->fast_switch, to be
invoked for frequency switching from interrupt context by (future)
governors supporting that feature via (new) helper function
cpufreq_driver_fast_switch().

Add two new policy flags, fast_switch_possible, to be set by the
cpufreq driver if fast frequency switching can be used for the
given policy and fast_switch_enabled, to be set by the governor
if it is going to use fast frequency switching for the given
policy.  Also add a helper for setting the latter.

Since fast frequency switching is inherently incompatible with
cpufreq transition notifiers, make it possible to set the
fast_switch_enabled only if there are no transition notifiers
already registered and make the registration of new transition
notifiers fail if fast_switch_enabled is set for at least one
policy.

Implement the ->fast_switch callback in the ACPI cpufreq driver
and make it set fast_switch_possible during policy initialization
as appropriate.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index fb57121..7f38fb5 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -458,6 +458,43 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	return result;
 }
 
+unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
+				      unsigned int target_freq)
+{
+	struct acpi_cpufreq_data *data = policy->driver_data;
+	struct acpi_processor_performance *perf;
+	struct cpufreq_frequency_table *entry;
+	unsigned int next_perf_state, next_freq, freq;
+
+	/*
+	 * Find the closest frequency above target_freq.
+	 *
+	 * The table is sorted in the reverse order with respect to the
+	 * frequency and all of the entries are valid (see the initialization).
+	 */
+	entry = data->freq_table;
+	do {
+		entry++;
+		freq = entry->frequency;
+	} while (freq >= target_freq && freq != CPUFREQ_TABLE_END);
+	entry--;
+	next_freq = entry->frequency;
+	next_perf_state = entry->driver_data;
+
+	perf = to_perf_data(data);
+	if (perf->state == next_perf_state) {
+		if (unlikely(data->resume))
+			data->resume = 0;
+		else
+			return next_freq;
+	}
+
+	data->cpu_freq_write(&perf->control_register,
+			     perf->states[next_perf_state].control);
+	perf->state = next_perf_state;
+	return next_freq;
+}
+
 static unsigned long
 acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
 {
@@ -821,6 +858,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	 */
 	data->resume = 1;
 
+	policy->fast_switch_possible = !acpi_pstate_strict &&
+		!(policy_is_shared(policy) && policy->shared_type != CPUFREQ_SHARED_TYPE_ANY);
+
 	return result;
 
 err_freqfree:
@@ -843,6 +883,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 	pr_debug("acpi_cpufreq_cpu_exit\n");
 
 	if (data) {
+		policy->fast_switch_possible = false;
 		policy->driver_data = NULL;
 		acpi_processor_unregister_performance(data->acpi_perf_cpu);
 		free_cpumask_var(data->freqdomain_cpus);
@@ -876,6 +917,7 @@ static struct freq_attr *acpi_cpufreq_attr[] = {
 static struct cpufreq_driver acpi_cpufreq_driver = {
 	.verify		= cpufreq_generic_frequency_table_verify,
 	.target_index	= acpi_cpufreq_target,
+	.fast_switch	= acpi_cpufreq_fast_switch,
 	.bios_limit	= acpi_processor_get_bios_limit,
 	.init		= acpi_cpufreq_cpu_init,
 	.exit		= acpi_cpufreq_cpu_exit,
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index b87596b..a5b7d77 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -77,6 +77,7 @@ static inline bool has_target(void)
 static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
 static unsigned int __cpufreq_get(struct cpufreq_policy *policy);
 static int cpufreq_start_governor(struct cpufreq_policy *policy);
+static int cpufreq_exit_governor(struct cpufreq_policy *policy);
 
 /**
  * Two notifier lists: the "policy" list is involved in the
@@ -429,6 +430,68 @@ void cpufreq_freq_transition_end(struct cpufreq_policy *policy,
 }
 EXPORT_SYMBOL_GPL(cpufreq_freq_transition_end);
 
+/*
+ * Fast frequency switching status count.  Positive means "enabled", negative
+ * means "disabled" and 0 means "not decided yet".
+ */
+static int cpufreq_fast_switch_count;
+static DEFINE_MUTEX(cpufreq_fast_switch_lock);
+
+static void cpufreq_list_transition_notifiers(void)
+{
+	struct notifier_block *nb;
+
+	pr_info("Registered transition notifiers:\n");
+
+	mutex_lock(&cpufreq_transition_notifier_list.mutex);
+
+	for (nb = cpufreq_transition_notifier_list.head; nb; nb = nb->next)
+		pr_info("%pF\n", nb->notifier_call);
+
+	mutex_unlock(&cpufreq_transition_notifier_list.mutex);
+}
+
+/**
+ * cpufreq_enable_fast_switch - Enable fast frequency switching for policy.
+ * @policy: cpufreq policy to enable fast frequency switching for.
+ *
+ * Try to enable fast frequency switching for @policy.
+ *
+ * The attempt will fail if there is at least one transition notifier registered
+ * at this point, as fast frequency switching is quite fundamentally at odds
+ * with transition notifiers.  Thus if successful, it will make registration of
+ * transition notifiers fail going forward.
+ */
+void cpufreq_enable_fast_switch(struct cpufreq_policy *policy)
+{
+	lockdep_assert_held(&policy->rwsem);
+
+	if (!policy->fast_switch_possible)
+		return;
+
+	mutex_lock(&cpufreq_fast_switch_lock);
+	if (cpufreq_fast_switch_count >= 0) {
+		cpufreq_fast_switch_count++;
+		policy->fast_switch_enabled = true;
+	} else {
+		pr_warn("CPU%u: Fast frequency switching not enabled\n",
+			policy->cpu);
+		cpufreq_list_transition_notifiers();
+	}
+	mutex_unlock(&cpufreq_fast_switch_lock);
+}
+EXPORT_SYMBOL_GPL(cpufreq_enable_fast_switch);
+
+static void cpufreq_disable_fast_switch(struct cpufreq_policy *policy)
+{
+	mutex_lock(&cpufreq_fast_switch_lock);
+	if (policy->fast_switch_enabled) {
+		policy->fast_switch_enabled = false;
+		if (!WARN_ON(cpufreq_fast_switch_count <= 0))
+			cpufreq_fast_switch_count--;
+	}
+	mutex_unlock(&cpufreq_fast_switch_lock);
+}
 
 /*********************************************************************
  *                          SYSFS INTERFACE                          *
@@ -1319,7 +1382,7 @@ static void cpufreq_offline(unsigned int cpu)
 
 	/* If cpu is last user of policy, free policy */
 	if (has_target()) {
-		ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
+		ret = cpufreq_exit_governor(policy);
 		if (ret)
 			pr_err("%s: Failed to exit governor\n", __func__);
 	}
@@ -1447,8 +1510,12 @@ static unsigned int __cpufreq_get(struct cpufreq_policy *policy)
 
 	ret_freq = cpufreq_driver->get(policy->cpu);
 
-	/* Updating inactive policies is invalid, so avoid doing that. */
-	if (unlikely(policy_is_inactive(policy)))
+	/*
+	 * Updating inactive policies is invalid, so avoid doing that.  Also
+	 * if fast frequency switching is used with the given policy, the check
+	 * against policy->cur is pointless, so skip it in that case too.
+	 */
+	if (unlikely(policy_is_inactive(policy)) || policy->fast_switch_enabled)
 		return ret_freq;
 
 	if (ret_freq && policy->cur &&
@@ -1672,8 +1739,18 @@ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
 
 	switch (list) {
 	case CPUFREQ_TRANSITION_NOTIFIER:
+		mutex_lock(&cpufreq_fast_switch_lock);
+
+		if (cpufreq_fast_switch_count > 0) {
+			mutex_unlock(&cpufreq_fast_switch_lock);
+			return -EBUSY;
+		}
 		ret = srcu_notifier_chain_register(
 				&cpufreq_transition_notifier_list, nb);
+		if (!ret)
+			cpufreq_fast_switch_count--;
+
+		mutex_unlock(&cpufreq_fast_switch_lock);
 		break;
 	case CPUFREQ_POLICY_NOTIFIER:
 		ret = blocking_notifier_chain_register(
@@ -1706,8 +1783,14 @@ int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
 
 	switch (list) {
 	case CPUFREQ_TRANSITION_NOTIFIER:
+		mutex_lock(&cpufreq_fast_switch_lock);
+
 		ret = srcu_notifier_chain_unregister(
 				&cpufreq_transition_notifier_list, nb);
+		if (!ret && !WARN_ON(cpufreq_fast_switch_count >= 0))
+			cpufreq_fast_switch_count++;
+
+		mutex_unlock(&cpufreq_fast_switch_lock);
 		break;
 	case CPUFREQ_POLICY_NOTIFIER:
 		ret = blocking_notifier_chain_unregister(
@@ -1726,6 +1809,37 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier);
  *                              GOVERNORS                            *
  *********************************************************************/
 
+/**
+ * cpufreq_driver_fast_switch - Carry out a fast CPU frequency switch.
+ * @policy: cpufreq policy to switch the frequency for.
+ * @target_freq: New frequency to set (may be approximate).
+ *
+ * Carry out a fast frequency switch without sleeping.
+ *
+ * The driver's ->fast_switch() callback invoked by this function must be
+ * suitable for being called from within RCU-sched read-side critical sections
+ * and it is expected to select the minimum available frequency greater than or
+ * equal to @target_freq (CPUFREQ_RELATION_L).
+ *
+ * This function must not be called if policy->fast_switch_enabled is unset.
+ *
+ * Governors calling this function must guarantee that it will never be invoked
+ * twice in parallel for the same policy and that it will never be called in
+ * parallel with either ->target() or ->target_index() for the same policy.
+ *
+ * If CPUFREQ_ENTRY_INVALID is returned by the driver's ->fast_switch()
+ * callback to indicate an error condition, the hardware configuration must be
+ * preserved.
+ */
+unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
+					unsigned int target_freq)
+{
+	clamp_val(target_freq, policy->min, policy->max);
+
+	return cpufreq_driver->fast_switch(policy, target_freq);
+}
+EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
+
 /* Must set freqs->new to intermediate frequency */
 static int __target_intermediate(struct cpufreq_policy *policy,
 				 struct cpufreq_freqs *freqs, int index)
@@ -1946,6 +2060,12 @@ static int cpufreq_start_governor(struct cpufreq_policy *policy)
 	return ret ? ret : cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
 }
 
+static int cpufreq_exit_governor(struct cpufreq_policy *policy)
+{
+	cpufreq_disable_fast_switch(policy);
+	return cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
+}
+
 int cpufreq_register_governor(struct cpufreq_governor *governor)
 {
 	int err;
@@ -2101,7 +2221,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 			return ret;
 		}
 
-		ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
+		ret = cpufreq_exit_governor(policy);
 		if (ret) {
 			pr_err("%s: Failed to Exit Governor: %s (%d)\n",
 			       __func__, old_gov->name, ret);
@@ -2118,7 +2238,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 			pr_debug("cpufreq: governor change\n");
 			return 0;
 		}
-		cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
+		cpufreq_exit_governor(policy);
 	}
 
 	/* new governor failed, so re-start old one */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 2b4f248..55e69eb 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -102,6 +102,17 @@ struct cpufreq_policy {
 	 */
 	struct rw_semaphore	rwsem;
 
+	/*
+	 * Fast switch flags:
+	 * - fast_switch_possible should be set by the driver if it can
+	 *   guarantee that frequency can be changed on any CPU sharing the
+	 *   policy and that the change will affect all of the policy CPUs then.
+	 * - fast_switch_enabled is to be set by governors that support fast
+	 *   freqnency switching with the help of cpufreq_enable_fast_switch().
+	 */
+	bool			fast_switch_possible;
+	bool			fast_switch_enabled;
+
 	/* Synchronization for frequency transitions */
 	bool			transition_ongoing; /* Tracks transition status */
 	spinlock_t		transition_lock;
@@ -156,6 +167,7 @@ int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu);
 int cpufreq_update_policy(unsigned int cpu);
 bool have_governor_per_policy(void);
 struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
+void cpufreq_enable_fast_switch(struct cpufreq_policy *policy);
 #else
 static inline unsigned int cpufreq_get(unsigned int cpu)
 {
@@ -236,6 +248,8 @@ struct cpufreq_driver {
 				  unsigned int relation);	/* Deprecated */
 	int		(*target_index)(struct cpufreq_policy *policy,
 					unsigned int index);
+	unsigned int	(*fast_switch)(struct cpufreq_policy *policy,
+				       unsigned int target_freq);
 	/*
 	 * Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION
 	 * unset.
@@ -464,6 +478,8 @@ struct cpufreq_governor {
 };
 
 /* Pass a target to the cpufreq driver */
+unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
+					unsigned int target_freq);
 int cpufreq_driver_target(struct cpufreq_policy *policy,
 				 unsigned int target_freq,
 				 unsigned int relation);
-- 
cgit v0.10.2


From 9bdcb44e391da5c41b98573bf0305a0e0b1c9569 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 2 Apr 2016 01:09:12 +0200
Subject: cpufreq: schedutil: New governor based on scheduler utilization data

Add a new cpufreq scaling governor, called "schedutil", that uses
scheduler-provided CPU utilization information as input for making
its decisions.

Doing that is possible after commit 34e2c555f3e1 (cpufreq: Add
mechanism for registering utilization update callbacks) that
introduced cpufreq_update_util() called by the scheduler on
utilization changes (from CFS) and RT/DL task status updates.
In particular, CPU frequency scaling decisions may be based on
the the utilization data passed to cpufreq_update_util() by CFS.

The new governor is relatively simple.

The frequency selection formula used by it depends on whether or not
the utilization is frequency-invariant.  In the frequency-invariant
case the new CPU frequency is given by

	next_freq = 1.25 * max_freq * util / max

where util and max are the last two arguments of cpufreq_update_util().
In turn, if util is not frequency-invariant, the maximum frequency in
the above formula is replaced with the current frequency of the CPU:

	next_freq = 1.25 * curr_freq * util / max

The coefficient 1.25 corresponds to the frequency tipping point at
(util / max) = 0.8.

All of the computations are carried out in the utilization update
handlers provided by the new governor.  One of those handlers is
used for cpufreq policies shared between multiple CPUs and the other
one is for policies with one CPU only (and therefore it doesn't need
to use any extra synchronization means).

The governor supports fast frequency switching if that is supported
by the cpufreq driver in use and possible for the given policy.
In the fast switching case, all operations of the governor take
place in its utilization update handlers.  If fast switching cannot
be used, the frequency switch operations are carried out with the
help of a work item which only calls __cpufreq_driver_target()
(under a mutex) to trigger a frequency update (to a value already
computed beforehand in one of the utilization update handlers).

Currently, the governor treats all of the RT and DL tasks as
"unknown utilization" and sets the frequency to the allowed
maximum when updated from the RT or DL sched classes.  That
heavy-handed approach should be replaced with something more
subtle and specifically targeted at RT and DL tasks.

The governor shares some tunables management code with the
"ondemand" and "conservative" governors and uses some common
definitions from cpufreq_governor.h, but apart from that it
is stand-alone.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 7fd8710..5d74826 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -107,6 +107,16 @@ config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
 	  Be aware that not all cpufreq drivers support the conservative
 	  governor. If unsure have a look at the help section of the
 	  driver. Fallback governor will be the performance governor.
+
+config CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
+	bool "schedutil"
+	select CPU_FREQ_GOV_SCHEDUTIL
+	select CPU_FREQ_GOV_PERFORMANCE
+	help
+	  Use the 'schedutil' CPUFreq governor by default. If unsure,
+	  have a look at the help section of that governor. The fallback
+	  governor will be 'performance'.
+
 endchoice
 
 config CPU_FREQ_GOV_PERFORMANCE
@@ -188,6 +198,26 @@ config CPU_FREQ_GOV_CONSERVATIVE
 
 	  If in doubt, say N.
 
+config CPU_FREQ_GOV_SCHEDUTIL
+	tristate "'schedutil' cpufreq policy governor"
+	depends on CPU_FREQ
+	select CPU_FREQ_GOV_ATTR_SET
+	select IRQ_WORK
+	help
+	  This governor makes decisions based on the utilization data provided
+	  by the scheduler.  It sets the CPU frequency to be proportional to
+	  the utilization/capacity ratio coming from the scheduler.  If the
+	  utilization is frequency-invariant, the new frequency is also
+	  proportional to the maximum available frequency.  If that is not the
+	  case, it is proportional to the current frequency of the CPU.  The
+	  frequency tipping point is at utilization/capacity equal to 80% in
+	  both cases.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called cpufreq_schedutil.
+
+	  If in doubt, say N.
+
 comment "CPU frequency scaling drivers"
 
 config CPUFREQ_DT
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 414d9c1..5e59b83 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -24,3 +24,4 @@ obj-$(CONFIG_SCHEDSTATS) += stats.o
 obj-$(CONFIG_SCHED_DEBUG) += debug.o
 obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
 obj-$(CONFIG_CPU_FREQ) += cpufreq.o
+obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
new file mode 100644
index 0000000..d27ae06
--- /dev/null
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -0,0 +1,528 @@
+/*
+ * CPUFreq governor based on scheduler-provided CPU utilization data.
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <trace/events/power.h>
+
+#include "sched.h"
+
+struct sugov_tunables {
+	struct gov_attr_set attr_set;
+	unsigned int rate_limit_us;
+};
+
+struct sugov_policy {
+	struct cpufreq_policy *policy;
+
+	struct sugov_tunables *tunables;
+	struct list_head tunables_hook;
+
+	raw_spinlock_t update_lock;  /* For shared policies */
+	u64 last_freq_update_time;
+	s64 freq_update_delay_ns;
+	unsigned int next_freq;
+
+	/* The next fields are only needed if fast switch cannot be used. */
+	struct irq_work irq_work;
+	struct work_struct work;
+	struct mutex work_lock;
+	bool work_in_progress;
+
+	bool need_freq_update;
+};
+
+struct sugov_cpu {
+	struct update_util_data update_util;
+	struct sugov_policy *sg_policy;
+
+	/* The fields below are only needed when sharing a policy. */
+	unsigned long util;
+	unsigned long max;
+	u64 last_update;
+};
+
+static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
+
+/************************ Governor internals ***********************/
+
+static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
+{
+	s64 delta_ns;
+
+	if (sg_policy->work_in_progress)
+		return false;
+
+	if (unlikely(sg_policy->need_freq_update)) {
+		sg_policy->need_freq_update = false;
+		/*
+		 * This happens when limits change, so forget the previous
+		 * next_freq value and force an update.
+		 */
+		sg_policy->next_freq = UINT_MAX;
+		return true;
+	}
+
+	delta_ns = time - sg_policy->last_freq_update_time;
+	return delta_ns >= sg_policy->freq_update_delay_ns;
+}
+
+static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
+				unsigned int next_freq)
+{
+	struct cpufreq_policy *policy = sg_policy->policy;
+
+	sg_policy->last_freq_update_time = time;
+
+	if (policy->fast_switch_enabled) {
+		if (sg_policy->next_freq == next_freq) {
+			trace_cpu_frequency(policy->cur, smp_processor_id());
+			return;
+		}
+		sg_policy->next_freq = next_freq;
+		next_freq = cpufreq_driver_fast_switch(policy, next_freq);
+		if (next_freq == CPUFREQ_ENTRY_INVALID)
+			return;
+
+		policy->cur = next_freq;
+		trace_cpu_frequency(next_freq, smp_processor_id());
+	} else if (sg_policy->next_freq != next_freq) {
+		sg_policy->next_freq = next_freq;
+		sg_policy->work_in_progress = true;
+		irq_work_queue(&sg_policy->irq_work);
+	}
+}
+
+/**
+ * get_next_freq - Compute a new frequency for a given cpufreq policy.
+ * @policy: cpufreq policy object to compute the new frequency for.
+ * @util: Current CPU utilization.
+ * @max: CPU capacity.
+ *
+ * If the utilization is frequency-invariant, choose the new frequency to be
+ * proportional to it, that is
+ *
+ * next_freq = C * max_freq * util / max
+ *
+ * Otherwise, approximate the would-be frequency-invariant utilization by
+ * util_raw * (curr_freq / max_freq) which leads to
+ *
+ * next_freq = C * curr_freq * util_raw / max
+ *
+ * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
+ */
+static unsigned int get_next_freq(struct cpufreq_policy *policy,
+				  unsigned long util, unsigned long max)
+{
+	unsigned int freq = arch_scale_freq_invariant() ?
+				policy->cpuinfo.max_freq : policy->cur;
+
+	return (freq + (freq >> 2)) * util / max;
+}
+
+static void sugov_update_single(struct update_util_data *hook, u64 time,
+				unsigned long util, unsigned long max)
+{
+	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
+	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+	struct cpufreq_policy *policy = sg_policy->policy;
+	unsigned int next_f;
+
+	if (!sugov_should_update_freq(sg_policy, time))
+		return;
+
+	next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq :
+			get_next_freq(policy, util, max);
+	sugov_update_commit(sg_policy, time, next_f);
+}
+
+static unsigned int sugov_next_freq_shared(struct sugov_policy *sg_policy,
+					   unsigned long util, unsigned long max)
+{
+	struct cpufreq_policy *policy = sg_policy->policy;
+	unsigned int max_f = policy->cpuinfo.max_freq;
+	u64 last_freq_update_time = sg_policy->last_freq_update_time;
+	unsigned int j;
+
+	if (util == ULONG_MAX)
+		return max_f;
+
+	for_each_cpu(j, policy->cpus) {
+		struct sugov_cpu *j_sg_cpu;
+		unsigned long j_util, j_max;
+		s64 delta_ns;
+
+		if (j == smp_processor_id())
+			continue;
+
+		j_sg_cpu = &per_cpu(sugov_cpu, j);
+		/*
+		 * If the CPU utilization was last updated before the previous
+		 * frequency update and the time elapsed between the last update
+		 * of the CPU utilization and the last frequency update is long
+		 * enough, don't take the CPU into account as it probably is
+		 * idle now.
+		 */
+		delta_ns = last_freq_update_time - j_sg_cpu->last_update;
+		if (delta_ns > TICK_NSEC)
+			continue;
+
+		j_util = j_sg_cpu->util;
+		if (j_util == ULONG_MAX)
+			return max_f;
+
+		j_max = j_sg_cpu->max;
+		if (j_util * max > j_max * util) {
+			util = j_util;
+			max = j_max;
+		}
+	}
+
+	return get_next_freq(policy, util, max);
+}
+
+static void sugov_update_shared(struct update_util_data *hook, u64 time,
+				unsigned long util, unsigned long max)
+{
+	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
+	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+	unsigned int next_f;
+
+	raw_spin_lock(&sg_policy->update_lock);
+
+	sg_cpu->util = util;
+	sg_cpu->max = max;
+	sg_cpu->last_update = time;
+
+	if (sugov_should_update_freq(sg_policy, time)) {
+		next_f = sugov_next_freq_shared(sg_policy, util, max);
+		sugov_update_commit(sg_policy, time, next_f);
+	}
+
+	raw_spin_unlock(&sg_policy->update_lock);
+}
+
+static void sugov_work(struct work_struct *work)
+{
+	struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work);
+
+	mutex_lock(&sg_policy->work_lock);
+	__cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq,
+				CPUFREQ_RELATION_L);
+	mutex_unlock(&sg_policy->work_lock);
+
+	sg_policy->work_in_progress = false;
+}
+
+static void sugov_irq_work(struct irq_work *irq_work)
+{
+	struct sugov_policy *sg_policy;
+
+	sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
+	schedule_work_on(smp_processor_id(), &sg_policy->work);
+}
+
+/************************** sysfs interface ************************/
+
+static struct sugov_tunables *global_tunables;
+static DEFINE_MUTEX(global_tunables_lock);
+
+static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set)
+{
+	return container_of(attr_set, struct sugov_tunables, attr_set);
+}
+
+static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
+{
+	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+
+	return sprintf(buf, "%u\n", tunables->rate_limit_us);
+}
+
+static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf,
+				   size_t count)
+{
+	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+	struct sugov_policy *sg_policy;
+	unsigned int rate_limit_us;
+
+	if (kstrtouint(buf, 10, &rate_limit_us))
+		return -EINVAL;
+
+	tunables->rate_limit_us = rate_limit_us;
+
+	list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
+		sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC;
+
+	return count;
+}
+
+static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
+
+static struct attribute *sugov_attributes[] = {
+	&rate_limit_us.attr,
+	NULL
+};
+
+static struct kobj_type sugov_tunables_ktype = {
+	.default_attrs = sugov_attributes,
+	.sysfs_ops = &governor_sysfs_ops,
+};
+
+/********************** cpufreq governor interface *********************/
+
+static struct cpufreq_governor schedutil_gov;
+
+static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
+{
+	struct sugov_policy *sg_policy;
+
+	sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL);
+	if (!sg_policy)
+		return NULL;
+
+	sg_policy->policy = policy;
+	init_irq_work(&sg_policy->irq_work, sugov_irq_work);
+	INIT_WORK(&sg_policy->work, sugov_work);
+	mutex_init(&sg_policy->work_lock);
+	raw_spin_lock_init(&sg_policy->update_lock);
+	return sg_policy;
+}
+
+static void sugov_policy_free(struct sugov_policy *sg_policy)
+{
+	mutex_destroy(&sg_policy->work_lock);
+	kfree(sg_policy);
+}
+
+static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy)
+{
+	struct sugov_tunables *tunables;
+
+	tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
+	if (tunables) {
+		gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook);
+		if (!have_governor_per_policy())
+			global_tunables = tunables;
+	}
+	return tunables;
+}
+
+static void sugov_tunables_free(struct sugov_tunables *tunables)
+{
+	if (!have_governor_per_policy())
+		global_tunables = NULL;
+
+	kfree(tunables);
+}
+
+static int sugov_init(struct cpufreq_policy *policy)
+{
+	struct sugov_policy *sg_policy;
+	struct sugov_tunables *tunables;
+	unsigned int lat;
+	int ret = 0;
+
+	/* State should be equivalent to EXIT */
+	if (policy->governor_data)
+		return -EBUSY;
+
+	sg_policy = sugov_policy_alloc(policy);
+	if (!sg_policy)
+		return -ENOMEM;
+
+	mutex_lock(&global_tunables_lock);
+
+	if (global_tunables) {
+		if (WARN_ON(have_governor_per_policy())) {
+			ret = -EINVAL;
+			goto free_sg_policy;
+		}
+		policy->governor_data = sg_policy;
+		sg_policy->tunables = global_tunables;
+
+		gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook);
+		goto out;
+	}
+
+	tunables = sugov_tunables_alloc(sg_policy);
+	if (!tunables) {
+		ret = -ENOMEM;
+		goto free_sg_policy;
+	}
+
+	tunables->rate_limit_us = LATENCY_MULTIPLIER;
+	lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
+	if (lat)
+		tunables->rate_limit_us *= lat;
+
+	policy->governor_data = sg_policy;
+	sg_policy->tunables = tunables;
+
+	ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype,
+				   get_governor_parent_kobj(policy), "%s",
+				   schedutil_gov.name);
+	if (ret)
+		goto fail;
+
+ out:
+	mutex_unlock(&global_tunables_lock);
+
+	cpufreq_enable_fast_switch(policy);
+	return 0;
+
+ fail:
+	policy->governor_data = NULL;
+	sugov_tunables_free(tunables);
+
+ free_sg_policy:
+	mutex_unlock(&global_tunables_lock);
+
+	sugov_policy_free(sg_policy);
+	pr_err("cpufreq: schedutil governor initialization failed (error %d)\n", ret);
+	return ret;
+}
+
+static int sugov_exit(struct cpufreq_policy *policy)
+{
+	struct sugov_policy *sg_policy = policy->governor_data;
+	struct sugov_tunables *tunables = sg_policy->tunables;
+	unsigned int count;
+
+	mutex_lock(&global_tunables_lock);
+
+	count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
+	policy->governor_data = NULL;
+	if (!count)
+		sugov_tunables_free(tunables);
+
+	mutex_unlock(&global_tunables_lock);
+
+	sugov_policy_free(sg_policy);
+	return 0;
+}
+
+static int sugov_start(struct cpufreq_policy *policy)
+{
+	struct sugov_policy *sg_policy = policy->governor_data;
+	unsigned int cpu;
+
+	sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
+	sg_policy->last_freq_update_time = 0;
+	sg_policy->next_freq = UINT_MAX;
+	sg_policy->work_in_progress = false;
+	sg_policy->need_freq_update = false;
+
+	for_each_cpu(cpu, policy->cpus) {
+		struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
+
+		sg_cpu->sg_policy = sg_policy;
+		if (policy_is_shared(policy)) {
+			sg_cpu->util = ULONG_MAX;
+			sg_cpu->max = 0;
+			sg_cpu->last_update = 0;
+			cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
+						     sugov_update_shared);
+		} else {
+			cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
+						     sugov_update_single);
+		}
+	}
+	return 0;
+}
+
+static int sugov_stop(struct cpufreq_policy *policy)
+{
+	struct sugov_policy *sg_policy = policy->governor_data;
+	unsigned int cpu;
+
+	for_each_cpu(cpu, policy->cpus)
+		cpufreq_remove_update_util_hook(cpu);
+
+	synchronize_sched();
+
+	irq_work_sync(&sg_policy->irq_work);
+	cancel_work_sync(&sg_policy->work);
+	return 0;
+}
+
+static int sugov_limits(struct cpufreq_policy *policy)
+{
+	struct sugov_policy *sg_policy = policy->governor_data;
+
+	if (!policy->fast_switch_enabled) {
+		mutex_lock(&sg_policy->work_lock);
+
+		if (policy->max < policy->cur)
+			__cpufreq_driver_target(policy, policy->max,
+						CPUFREQ_RELATION_H);
+		else if (policy->min > policy->cur)
+			__cpufreq_driver_target(policy, policy->min,
+						CPUFREQ_RELATION_L);
+
+		mutex_unlock(&sg_policy->work_lock);
+	}
+
+	sg_policy->need_freq_update = true;
+	return 0;
+}
+
+int sugov_governor(struct cpufreq_policy *policy, unsigned int event)
+{
+	if (event == CPUFREQ_GOV_POLICY_INIT) {
+		return sugov_init(policy);
+	} else if (policy->governor_data) {
+		switch (event) {
+		case CPUFREQ_GOV_POLICY_EXIT:
+			return sugov_exit(policy);
+		case CPUFREQ_GOV_START:
+			return sugov_start(policy);
+		case CPUFREQ_GOV_STOP:
+			return sugov_stop(policy);
+		case CPUFREQ_GOV_LIMITS:
+			return sugov_limits(policy);
+		}
+	}
+	return -EINVAL;
+}
+
+static struct cpufreq_governor schedutil_gov = {
+	.name = "schedutil",
+	.governor = sugov_governor,
+	.owner = THIS_MODULE,
+};
+
+static int __init sugov_module_init(void)
+{
+	return cpufreq_register_governor(&schedutil_gov);
+}
+
+static void __exit sugov_module_exit(void)
+{
+	cpufreq_unregister_governor(&schedutil_gov);
+}
+
+MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>");
+MODULE_DESCRIPTION("Utilization-based CPU frequency selection");
+MODULE_LICENSE("GPL");
+
+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
+struct cpufreq_governor *cpufreq_default_governor(void)
+{
+	return &schedutil_gov;
+}
+
+fs_initcall(sugov_module_init);
+#else
+module_init(sugov_module_init);
+#endif
+module_exit(sugov_module_exit);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ec2e8d2..921d6e5 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1842,6 +1842,14 @@ static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned lo
 static inline void cpufreq_trigger_update(u64 time) {}
 #endif /* CONFIG_CPU_FREQ */
 
+#ifdef arch_scale_freq_capacity
+#ifndef arch_scale_freq_invariant
+#define arch_scale_freq_invariant()	(true)
+#endif
+#else /* arch_scale_freq_capacity */
+#define arch_scale_freq_invariant()	(false)
+#endif
+
 static inline void account_reset_rq(struct rq *rq)
 {
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index 81b8745..0c7dee2 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -15,5 +15,6 @@
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(suspend_resume);
 EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle);
+EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_frequency);
 EXPORT_TRACEPOINT_SYMBOL_GPL(powernv_throttle);
 
-- 
cgit v0.10.2


From 6c9d9c81924b4b63c7a487e90fddb3b2d0f7d458 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 7 Apr 2016 23:38:46 +0200
Subject: cpufreq: Call cpufreq_disable_fast_switch() in sugov_exit()

Due to differences in the cpufreq core's handling of runtime CPU
offline and nonboot CPUs disabling during system suspend-to-RAM,
fast frequency switching gets disabled after a suspend-to-RAM and
resume cycle on all of the nonboot CPUs.

To prevent that from happening, move the invocation of
cpufreq_disable_fast_switch() from cpufreq_exit_governor() to
sugov_exit(), as the schedutil governor is the only user of fast
frequency switching today anyway.

That simply prevents cpufreq_disable_fast_switch() from being called
without invoking the ->governor callback for the CPUFREQ_GOV_POLICY_EXIT
event (which happens during system suspend now).

Fixes: b7898fda5bc7 (cpufreq: Support for fast frequency switching)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index a5b7d77..77d77a4 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -77,7 +77,11 @@ static inline bool has_target(void)
 static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
 static unsigned int __cpufreq_get(struct cpufreq_policy *policy);
 static int cpufreq_start_governor(struct cpufreq_policy *policy);
-static int cpufreq_exit_governor(struct cpufreq_policy *policy);
+
+static inline int cpufreq_exit_governor(struct cpufreq_policy *policy)
+{
+	return cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
+}
 
 /**
  * Two notifier lists: the "policy" list is involved in the
@@ -482,7 +486,11 @@ void cpufreq_enable_fast_switch(struct cpufreq_policy *policy)
 }
 EXPORT_SYMBOL_GPL(cpufreq_enable_fast_switch);
 
-static void cpufreq_disable_fast_switch(struct cpufreq_policy *policy)
+/**
+ * cpufreq_disable_fast_switch - Disable fast frequency switching for policy.
+ * @policy: cpufreq policy to disable fast frequency switching for.
+ */
+void cpufreq_disable_fast_switch(struct cpufreq_policy *policy)
 {
 	mutex_lock(&cpufreq_fast_switch_lock);
 	if (policy->fast_switch_enabled) {
@@ -492,6 +500,7 @@ static void cpufreq_disable_fast_switch(struct cpufreq_policy *policy)
 	}
 	mutex_unlock(&cpufreq_fast_switch_lock);
 }
+EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch);
 
 /*********************************************************************
  *                          SYSFS INTERFACE                          *
@@ -2060,12 +2069,6 @@ static int cpufreq_start_governor(struct cpufreq_policy *policy)
 	return ret ? ret : cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
 }
 
-static int cpufreq_exit_governor(struct cpufreq_policy *policy)
-{
-	cpufreq_disable_fast_switch(policy);
-	return cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
-}
-
 int cpufreq_register_governor(struct cpufreq_governor *governor)
 {
 	int err;
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 55e69eb..4e81e08 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -168,6 +168,7 @@ int cpufreq_update_policy(unsigned int cpu);
 bool have_governor_per_policy(void);
 struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
 void cpufreq_enable_fast_switch(struct cpufreq_policy *policy);
+void cpufreq_disable_fast_switch(struct cpufreq_policy *policy);
 #else
 static inline unsigned int cpufreq_get(unsigned int cpu)
 {
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index d27ae06..154ae3a 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -398,6 +398,8 @@ static int sugov_exit(struct cpufreq_policy *policy)
 	struct sugov_tunables *tunables = sg_policy->tunables;
 	unsigned int count;
 
+	cpufreq_disable_fast_switch(policy);
+
 	mutex_lock(&global_tunables_lock);
 
 	count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
-- 
cgit v0.10.2


From dbbe972c112838bede8772c2a427c816e9f22839 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Sun, 27 Mar 2016 18:08:17 -0400
Subject: cpufreq: ppc_cbe_cpufreq_pmi: make the driver explicitly non-modular

The Kconfig for this driver is currently:

config CPU_FREQ_CBE_PMI
    bool "CBE frequency scaling using PMI interface"

...meaning that it currently is not being built as a module by
anyone.  Lets remove the modular and unused code here, so that
when reading the driver there is no doubt it is builtin-only.

Since module_init translates to device_initcall in the non-modular
case, the init ordering remains unchanged with this commit.

We also delete the MODULE_LICENSE tag etc. since all that information
is already contained at the top of the file in the comments.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
[ rjw: Changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
index 7969f76..7c4cd5c 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
@@ -23,7 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/timer.h>
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/of_platform.h>
 
 #include <asm/processor.h>
@@ -142,15 +142,4 @@ static int __init cbe_cpufreq_pmi_init(void)
 
 	return 0;
 }
-
-static void __exit cbe_cpufreq_pmi_exit(void)
-{
-	cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
-	pmi_unregister_handler(&cbe_pmi_handler);
-}
-
-module_init(cbe_cpufreq_pmi_init);
-module_exit(cbe_cpufreq_pmi_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
+device_initcall(cbe_cpufreq_pmi_init);
-- 
cgit v0.10.2


From f3f24dea2c7fd949377dc512fe1bfd0b7419afa3 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 29 Mar 2016 19:35:12 +0530
Subject: cpufreq: tegra124: No need of setting platform-data

All CPUs on Tegra platform share clock/voltage lines and there is
absolutely no need of setting platform data for 'cpufreq-dt' platform
device, as that's the default case.

Stop setting platform data for cpufreq-dt device.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Tested-by: Thierry Reding <treding@nvidia.com>
Acked-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/tegra124-cpufreq.c b/drivers/cpufreq/tegra124-cpufreq.c
index 20bcceb..4353025 100644
--- a/drivers/cpufreq/tegra124-cpufreq.c
+++ b/drivers/cpufreq/tegra124-cpufreq.c
@@ -14,7 +14,6 @@
 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
 
 #include <linux/clk.h>
-#include <linux/cpufreq-dt.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -69,10 +68,6 @@ static void tegra124_cpu_switch_to_pllx(struct tegra124_cpufreq_priv *priv)
 	clk_set_parent(priv->cpu_clk, priv->pllx_clk);
 }
 
-static struct cpufreq_dt_platform_data cpufreq_dt_pd = {
-	.independent_clocks = false,
-};
-
 static int tegra124_cpufreq_probe(struct platform_device *pdev)
 {
 	struct tegra124_cpufreq_priv *priv;
@@ -129,8 +124,6 @@ static int tegra124_cpufreq_probe(struct platform_device *pdev)
 
 	cpufreq_dt_devinfo.name = "cpufreq-dt";
 	cpufreq_dt_devinfo.parent = &pdev->dev;
-	cpufreq_dt_devinfo.data = &cpufreq_dt_pd;
-	cpufreq_dt_devinfo.size_data = sizeof(cpufreq_dt_pd);
 
 	priv->cpufreq_dt_pdev =
 		platform_device_register_full(&cpufreq_dt_devinfo);
-- 
cgit v0.10.2


From 7e67e239a4f32fa3e2d51dd9a7930018651635b6 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 30 Mar 2016 13:45:25 +0530
Subject: cpufreq: dt: Include types.h from cpufreq-dt.h

cpufreq-dt.h uses 'bool' data type but doesn't include types.h. It works
fine for now as the files that include cpufreq-dt.h, also include
types.h directly or indirectly.

But, when a file includes cpufreq-dt.h without including types.h, we get
a build error. Avoid such errors by including types.h in cpufreq-dt
itself.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/include/linux/cpufreq-dt.h b/include/linux/cpufreq-dt.h
index 0414009..a87335a 100644
--- a/include/linux/cpufreq-dt.h
+++ b/include/linux/cpufreq-dt.h
@@ -10,6 +10,8 @@
 #ifndef __CPUFREQ_DT_H__
 #define __CPUFREQ_DT_H__
 
+#include <linux/types.h>
+
 struct cpufreq_dt_platform_data {
 	/*
 	 * True when each CPU has its own clock to control its
-- 
cgit v0.10.2


From f56aad1d98f1c0c6df513abcd275a4d914adc1ef Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 30 Mar 2016 13:45:26 +0530
Subject: cpufreq: dt: Add generic platform-device creation support

Multiple platforms are using the generic cpufreq-dt driver now, and all
of them are required to create a platform device with name "cpufreq-dt",
in order to get the cpufreq-dt probed.

Many of them do it from platform code, others have special drivers just
to do that.

It would be more sensible to do this at a generic place, where all such
platform can mark their entries.

This patch adds a separate file to get this device created. Currently
the compat list of platforms that we support is empty, and will be
filled in as and when we move platforms to use it.

It always compiles as part of the kernel and so doesn't need a
module-exit operation.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 5d74826..b38d502 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -225,6 +225,7 @@ config CPUFREQ_DT
 	depends on HAVE_CLK && OF
 	# if CPU_THERMAL is on and THERMAL=m, CPUFREQ_DT cannot be =y:
 	depends on !CPU_THERMAL || THERMAL
+	select CPUFREQ_DT_PLATDEV
 	select PM_OPP
 	help
 	  This adds a generic DT based cpufreq driver for frequency management.
@@ -233,6 +234,15 @@ config CPUFREQ_DT
 
 	  If in doubt, say N.
 
+config CPUFREQ_DT_PLATDEV
+	bool
+	help
+	  This adds a generic DT based cpufreq platdev driver for frequency
+	  management.  This creates a 'cpufreq-dt' platform device, on the
+	  supported platforms.
+
+	  If in doubt, say N.
+
 if X86
 source "drivers/cpufreq/Kconfig.x86"
 endif
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 6d11867..d7b646c 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_CPU_FREQ_GOV_COMMON)		+= cpufreq_governor.o
 obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET)	+= cpufreq_governor_attr_set.o
 
 obj-$(CONFIG_CPUFREQ_DT)		+= cpufreq-dt.o
+obj-$(CONFIG_CPUFREQ_DT_PLATDEV)	+= cpufreq-dt-platdev.o
 
 ##################################################################################
 # x86 drivers.
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
new file mode 100644
index 0000000..2a35324
--- /dev/null
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2016 Linaro.
+ * Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+static const struct of_device_id machines[] = {
+};
+
+static int __init cpufreq_dt_platdev_init(void)
+{
+	struct device_node *np = of_find_node_by_path("/");
+
+	if (!np)
+		return -ENODEV;
+
+	if (!of_match_node(machines, np))
+		return -ENODEV;
+
+	of_node_put(of_root);
+
+	return PTR_ERR_OR_ZERO(platform_device_register_simple("cpufreq-dt", -1,
+							       NULL, 0));
+}
+device_initcall(cpufreq_dt_platdev_init);
-- 
cgit v0.10.2


From ea3b05e62f097a604f4dbe7d3ab785a148b61e93 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 30 Mar 2016 13:45:27 +0530
Subject: ARM: exynos: exynos-cpufreq platform device isn't supported anymore

The driver is removed long back and we don't support this device
anymore. Stop adding it.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c
index bbf51a4..4dfce10 100644
--- a/arch/arm/mach-exynos/exynos.c
+++ b/arch/arm/mach-exynos/exynos.c
@@ -232,12 +232,8 @@ static void __init exynos_cpufreq_init(void)
 	const struct of_device_id *match;
 
 	match = of_match_node(exynos_cpufreq_matches, root);
-	if (!match) {
-		platform_device_register_simple("exynos-cpufreq", -1, NULL, 0);
-		return;
-	}
-
-	platform_device_register_simple(match->data, -1, NULL, 0);
+	if (match)
+		platform_device_register_simple(match->data, -1, NULL, 0);
 }
 
 static void __init exynos_dt_machine_init(void)
-- 
cgit v0.10.2


From 2249c00a0bf854adf49e8e3c2973feddfbaae71f Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 30 Mar 2016 13:45:28 +0530
Subject: cpufreq: exynos: Use generic platdev driver

The cpufreq-dt-platdev driver supports creation of cpufreq-dt platform
device now, reuse that and remove similar code from platform code.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c
index 4dfce10..4d3b056 100644
--- a/arch/arm/mach-exynos/exynos.c
+++ b/arch/arm/mach-exynos/exynos.c
@@ -213,29 +213,6 @@ static void __init exynos_init_irq(void)
 	exynos_map_pmu();
 }
 
-static const struct of_device_id exynos_cpufreq_matches[] = {
-	{ .compatible = "samsung,exynos3250", .data = "cpufreq-dt" },
-	{ .compatible = "samsung,exynos4210", .data = "cpufreq-dt" },
-	{ .compatible = "samsung,exynos4212", .data = "cpufreq-dt" },
-	{ .compatible = "samsung,exynos4412", .data = "cpufreq-dt" },
-	{ .compatible = "samsung,exynos5250", .data = "cpufreq-dt" },
-#ifndef CONFIG_BL_SWITCHER
-	{ .compatible = "samsung,exynos5420", .data = "cpufreq-dt" },
-	{ .compatible = "samsung,exynos5800", .data = "cpufreq-dt" },
-#endif
-	{ /* sentinel */ }
-};
-
-static void __init exynos_cpufreq_init(void)
-{
-	struct device_node *root = of_find_node_by_path("/");
-	const struct of_device_id *match;
-
-	match = of_match_node(exynos_cpufreq_matches, root);
-	if (match)
-		platform_device_register_simple(match->data, -1, NULL, 0);
-}
-
 static void __init exynos_dt_machine_init(void)
 {
 	/*
@@ -258,8 +235,6 @@ static void __init exynos_dt_machine_init(void)
 	    of_machine_is_compatible("samsung,exynos5250"))
 		platform_device_register(&exynos_cpuidle);
 
-	exynos_cpufreq_init();
-
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 }
 
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 2a35324..f2ae7ad 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -12,6 +12,15 @@
 #include <linux/platform_device.h>
 
 static const struct of_device_id machines[] = {
+	{ .compatible = "samsung,exynos3250", },
+	{ .compatible = "samsung,exynos4210", },
+	{ .compatible = "samsung,exynos4212", },
+	{ .compatible = "samsung,exynos4412", },
+	{ .compatible = "samsung,exynos5250", },
+#ifndef CONFIG_BL_SWITCHER
+	{ .compatible = "samsung,exynos5420", },
+	{ .compatible = "samsung,exynos5800", },
+#endif
 };
 
 static int __init cpufreq_dt_platdev_init(void)
-- 
cgit v0.10.2


From 22590efb98ae0c84f798a9938c0b6d97bc89adf5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 9 Apr 2016 01:25:58 +0200
Subject: intel_pstate: Avoid pointless FRAC_BITS shifts under div_fp()

There are multiple places in intel_pstate where int_tofp() is applied
to both arguments of div_fp(), but this is pointless, because int_tofp()
simply shifts its argument to the left by FRAC_BITS which mathematically
is equivalent to multuplication by 2^FRAC_BITS, so if this is done
to both arguments of a division, the extra factors will cancel each
other during that operation anyway.

Drop the pointless int_tofp() applied to div_fp() arguments throughout
the driver.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 6c7cff1..8a368d2 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -341,17 +341,17 @@ static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
 
 static inline void pid_p_gain_set(struct _pid *pid, int percent)
 {
-	pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
+	pid->p_gain = div_fp(percent, 100);
 }
 
 static inline void pid_i_gain_set(struct _pid *pid, int percent)
 {
-	pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
+	pid->i_gain = div_fp(percent, 100);
 }
 
 static inline void pid_d_gain_set(struct _pid *pid, int percent)
 {
-	pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
+	pid->d_gain = div_fp(percent, 100);
 }
 
 static signed int pid_calc(struct _pid *pid, int32_t busy)
@@ -529,7 +529,7 @@ static ssize_t show_turbo_pct(struct kobject *kobj,
 
 	total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
 	no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
-	turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total));
+	turbo_fp = div_fp(no_turbo, total);
 	turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
 	return sprintf(buf, "%u\n", turbo_pct);
 }
@@ -600,8 +600,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
 				   limits->max_perf_pct);
 	limits->max_perf_pct = max(limits->min_perf_pct,
 				   limits->max_perf_pct);
-	limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
-				  int_tofp(100));
+	limits->max_perf = div_fp(limits->max_perf_pct, 100);
 
 	if (hwp_active)
 		intel_pstate_hwp_set_online_cpus();
@@ -625,8 +624,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 				   limits->min_perf_pct);
 	limits->min_perf_pct = min(limits->max_perf_pct,
 				   limits->min_perf_pct);
-	limits->min_perf = div_fp(int_tofp(limits->min_perf_pct),
-				  int_tofp(100));
+	limits->min_perf = div_fp(limits->min_perf_pct, 100);
 
 	if (hwp_active)
 		intel_pstate_hwp_set_online_cpus();
@@ -1011,8 +1009,8 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu)
 	struct sample *sample = &cpu->sample;
 	int64_t core_pct;
 
-	core_pct = int_tofp(sample->aperf) * int_tofp(100);
-	core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
+	core_pct = sample->aperf * int_tofp(100);
+	core_pct = div64_u64(core_pct, sample->mperf);
 
 	sample->core_pct_busy = (int32_t)core_pct;
 }
@@ -1115,8 +1113,8 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 	 * specified pstate.
 	 */
 	core_busy = cpu->sample.core_pct_busy;
-	max_pstate = int_tofp(cpu->pstate.max_pstate_physical);
-	current_pstate = int_tofp(cpu->pstate.current_pstate);
+	max_pstate = cpu->pstate.max_pstate_physical;
+	current_pstate = cpu->pstate.current_pstate;
 	core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
 
 	/*
@@ -1127,8 +1125,7 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 	 */
 	duration_ns = cpu->sample.time - cpu->last_sample_time;
 	if ((s64)duration_ns > pid_params.sample_rate_ns * 3) {
-		sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns),
-				      int_tofp(duration_ns));
+		sample_ratio = div_fp(pid_params.sample_rate_ns, duration_ns);
 		core_busy = mul_fp(core_busy, sample_ratio);
 	}
 
@@ -1328,10 +1325,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 	/* Make sure min_perf_pct <= max_perf_pct */
 	limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
 
-	limits->min_perf = div_fp(int_tofp(limits->min_perf_pct),
-				  int_tofp(100));
-	limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
-				  int_tofp(100));
+	limits->min_perf = div_fp(limits->min_perf_pct, 100);
+	limits->max_perf = div_fp(limits->max_perf_pct, 100);
 
  out:
 	intel_pstate_set_update_util_hook(policy->cpu);
-- 
cgit v0.10.2


From d2499d05f0f286a754c26e2bda62210d6a65a774 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@163.com>
Date: Tue, 5 Apr 2016 10:38:06 +0800
Subject: cpufreq: mt8173: use list_for_each_entry*()

Use list_for_each_entry*() instead of list_for_each*() to simplify
the code.

Signed-off-by: Geliang Tang <geliangtang@163.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c
index 2058e6d..6f602c7 100644
--- a/drivers/cpufreq/mt8173-cpufreq.c
+++ b/drivers/cpufreq/mt8173-cpufreq.c
@@ -59,11 +59,8 @@ static LIST_HEAD(dvfs_info_list);
 static struct mtk_cpu_dvfs_info *mtk_cpu_dvfs_info_lookup(int cpu)
 {
 	struct mtk_cpu_dvfs_info *info;
-	struct list_head *list;
-
-	list_for_each(list, &dvfs_info_list) {
-		info = list_entry(list, struct mtk_cpu_dvfs_info, list_head);
 
+	list_for_each_entry(info, &dvfs_info_list, list_head) {
 		if (cpumask_test_cpu(cpu, &info->cpus))
 			return info;
 	}
@@ -524,8 +521,7 @@ static struct cpufreq_driver mt8173_cpufreq_driver = {
 
 static int mt8173_cpufreq_probe(struct platform_device *pdev)
 {
-	struct mtk_cpu_dvfs_info *info;
-	struct list_head *list, *tmp;
+	struct mtk_cpu_dvfs_info *info, *tmp;
 	int cpu, ret;
 
 	for_each_possible_cpu(cpu) {
@@ -559,11 +555,9 @@ static int mt8173_cpufreq_probe(struct platform_device *pdev)
 	return 0;
 
 release_dvfs_info_list:
-	list_for_each_safe(list, tmp, &dvfs_info_list) {
-		info = list_entry(list, struct mtk_cpu_dvfs_info, list_head);
-
+	list_for_each_entry_safe(info, tmp, &dvfs_info_list, list_head) {
 		mtk_cpu_dvfs_info_release(info);
-		list_del(list);
+		list_del(&info->list_head);
 	}
 
 	return ret;
-- 
cgit v0.10.2


From 4836df173aaed4b93e4d4b5c51e40f12e53ea26f Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 5 Apr 2016 13:28:23 -0700
Subject: intel_pstate: Use pr_fmt

Prefix the output using the more common kernel style.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
[ rjw: Rebase ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 8a368d2..1866705e 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -10,6 +10,8 @@
  * of the License.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/kernel_stat.h>
 #include <linux/module.h>
@@ -571,7 +573,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
 
 	update_turbo_state();
 	if (limits->turbo_disabled) {
-		pr_warn("intel_pstate: Turbo disabled by BIOS or unavailable on processor\n");
+		pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
 		return -EPERM;
 	}
 
@@ -1239,7 +1241,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 
 	intel_pstate_busy_pid_reset(cpu);
 
-	pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);
+	pr_debug("controlling: cpu %d\n", cpunum);
 
 	return 0;
 }
@@ -1296,12 +1298,12 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
 		limits = &performance_limits;
 		if (policy->max >= policy->cpuinfo.max_freq) {
-			pr_debug("intel_pstate: set performance\n");
+			pr_debug("set performance\n");
 			intel_pstate_set_performance_limits(limits);
 			goto out;
 		}
 	} else {
-		pr_debug("intel_pstate: set powersave\n");
+		pr_debug("set powersave\n");
 		limits = &powersave_limits;
 	}
 
@@ -1353,7 +1355,7 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
 	int cpu_num = policy->cpu;
 	struct cpudata *cpu = all_cpu_data[cpu_num];
 
-	pr_debug("intel_pstate: CPU %d exiting\n", cpu_num);
+	pr_debug("CPU %d exiting\n", cpu_num);
 
 	intel_pstate_clear_update_util_hook(cpu_num);
 
@@ -1598,7 +1600,7 @@ hwp_cpu_matched:
 	if (intel_pstate_platform_pwr_mgmt_exists())
 		return -ENODEV;
 
-	pr_info("Intel P-state driver initializing.\n");
+	pr_info("Intel P-state driver initializing\n");
 
 	all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
 	if (!all_cpu_data)
@@ -1615,7 +1617,7 @@ hwp_cpu_matched:
 	intel_pstate_sysfs_expose_params();
 
 	if (hwp_active)
-		pr_info("intel_pstate: HWP enabled\n");
+		pr_info("HWP enabled\n");
 
 	return rc;
 out:
@@ -1641,7 +1643,7 @@ static int __init intel_pstate_setup(char *str)
 	if (!strcmp(str, "disable"))
 		no_load = 1;
 	if (!strcmp(str, "no_hwp")) {
-		pr_info("intel_pstate: HWP disabled\n");
+		pr_info("HWP disabled\n");
 		no_hwp = 1;
 	}
 	if (!strcmp(str, "force"))
-- 
cgit v0.10.2


From b49c22a6ca3656c68506fea57caf3d8f08878570 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 5 Apr 2016 13:28:24 -0700
Subject: cpufreq: Convert printk(KERN_<LEVEL> to pr_<level>

Use the more common logging style.

Miscellanea:

o Coalesce formats
o Realign arguments
o Add a missing space between a coalesced format

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 7f38fb5..ed9e93d 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -648,10 +648,7 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
 		if ((c->x86 == 15) &&
 		    (c->x86_model == 6) &&
 		    (c->x86_mask == 8)) {
-			printk(KERN_INFO "acpi-cpufreq: Intel(R) "
-			    "Xeon(R) 7100 Errata AL30, processors may "
-			    "lock up on frequency changes: disabling "
-			    "acpi-cpufreq.\n");
+			pr_info("acpi-cpufreq: Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n");
 			return -ENODEV;
 		    }
 		}
@@ -799,8 +796,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
 	    policy->cpuinfo.transition_latency > 20 * 1000) {
 		policy->cpuinfo.transition_latency = 20 * 1000;
-		printk_once(KERN_INFO
-			    "P-state transition latency capped at 20 uS\n");
+		pr_info_once("P-state transition latency capped at 20 uS\n");
 	}
 
 	/* table init */
@@ -822,7 +818,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		goto err_freqfree;
 
 	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
-		printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");
+		pr_warn(FW_WARN "P-state 0 is not max freq\n");
 
 	switch (perf->control_register.space_id) {
 	case ACPI_ADR_SPACE_SYSTEM_IO:
diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c
index db69eeb..7da96d5 100644
--- a/drivers/cpufreq/cpufreq-nforce2.c
+++ b/drivers/cpufreq/cpufreq-nforce2.c
@@ -174,13 +174,13 @@ static int nforce2_set_fsb(unsigned int fsb)
 	int pll = 0;
 
 	if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) {
-		printk(KERN_ERR PFX "FSB %d is out of range!\n", fsb);
+		pr_err(PFX "FSB %d is out of range!\n", fsb);
 		return -EINVAL;
 	}
 
 	tfsb = nforce2_fsb_read(0);
 	if (!tfsb) {
-		printk(KERN_ERR PFX "Error while reading the FSB\n");
+		pr_err(PFX "Error while reading the FSB\n");
 		return -EINVAL;
 	}
 
@@ -276,8 +276,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
 	/* local_irq_save(flags); */
 
 	if (nforce2_set_fsb(target_fsb) < 0)
-		printk(KERN_ERR PFX "Changing FSB to %d failed\n",
-			target_fsb);
+		pr_err(PFX "Changing FSB to %d failed\n", target_fsb);
 	else
 		pr_debug("Changed FSB successfully to %d\n",
 			target_fsb);
@@ -325,8 +324,7 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy)
 	/* FIX: Get FID from CPU */
 	if (!fid) {
 		if (!cpu_khz) {
-			printk(KERN_WARNING PFX
-			"cpu_khz not set, can't calculate multiplier!\n");
+			pr_warn(PFX "cpu_khz not set, can't calculate multiplier!\n");
 			return -ENODEV;
 		}
 
@@ -341,8 +339,8 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy)
 		}
 	}
 
-	printk(KERN_INFO PFX "FSB currently at %i MHz, FID %d.%d\n", fsb,
-	       fid / 10, fid % 10);
+	pr_info(PFX "FSB currently at %i MHz, FID %d.%d\n",
+		fsb, fid / 10, fid % 10);
 
 	/* Set maximum FSB to FSB at boot time */
 	max_fsb = nforce2_fsb_read(1);
@@ -401,11 +399,9 @@ static int nforce2_detect_chipset(void)
 	if (nforce2_dev == NULL)
 		return -ENODEV;
 
-	printk(KERN_INFO PFX "Detected nForce2 chipset revision %X\n",
-	       nforce2_dev->revision);
-	printk(KERN_INFO PFX
-	       "FSB changing is maybe unstable and can lead to "
-	       "crashes and data loss.\n");
+	pr_info(PFX "Detected nForce2 chipset revision %X\n",
+		nforce2_dev->revision);
+	pr_info(PFX "FSB changing is maybe unstable and can lead to crashes and data loss\n");
 
 	return 0;
 }
@@ -423,7 +419,7 @@ static int __init nforce2_init(void)
 
 	/* detect chipset */
 	if (nforce2_detect_chipset()) {
-		printk(KERN_INFO PFX "No nForce2 chipset.\n");
+		pr_info(PFX "No nForce2 chipset\n");
 		return -ENODEV;
 	}
 
diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c
index 4085244c..c420f0c 100644
--- a/drivers/cpufreq/e_powersaver.c
+++ b/drivers/cpufreq/e_powersaver.c
@@ -141,11 +141,9 @@ static int eps_set_state(struct eps_cpu_data *centaur,
 	/* Print voltage and multiplier */
 	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
 	current_voltage = lo & 0xff;
-	printk(KERN_INFO "eps: Current voltage = %dmV\n",
-		current_voltage * 16 + 700);
+	pr_info("eps: Current voltage = %dmV\n", current_voltage * 16 + 700);
 	current_multiplier = (lo >> 8) & 0xff;
-	printk(KERN_INFO "eps: Current multiplier = %d\n",
-		current_multiplier);
+	pr_info("eps: Current multiplier = %d\n", current_multiplier);
 	}
 #endif
 	return 0;
@@ -166,7 +164,7 @@ static int eps_target(struct cpufreq_policy *policy, unsigned int index)
 	dest_state = centaur->freq_table[index].driver_data & 0xffff;
 	ret = eps_set_state(centaur, policy, dest_state);
 	if (ret)
-		printk(KERN_ERR "eps: Timeout!\n");
+		pr_err("eps: Timeout!\n");
 	return ret;
 }
 
@@ -194,36 +192,36 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 		return -ENODEV;
 
 	/* Check brand */
-	printk(KERN_INFO "eps: Detected VIA ");
+	pr_info("eps: Detected VIA ");
 
 	switch (c->x86_model) {
 	case 10:
 		rdmsr(0x1153, lo, hi);
 		brand = (((lo >> 2) ^ lo) >> 18) & 3;
-		printk(KERN_CONT "Model A ");
+		pr_cont("Model A ");
 		break;
 	case 13:
 		rdmsr(0x1154, lo, hi);
 		brand = (((lo >> 4) ^ (lo >> 2))) & 0x000000ff;
-		printk(KERN_CONT "Model D ");
+		pr_cont("Model D ");
 		break;
 	}
 
 	switch (brand) {
 	case EPS_BRAND_C7M:
-		printk(KERN_CONT "C7-M\n");
+		pr_cont("C7-M\n");
 		break;
 	case EPS_BRAND_C7:
-		printk(KERN_CONT "C7\n");
+		pr_cont("C7\n");
 		break;
 	case EPS_BRAND_EDEN:
-		printk(KERN_CONT "Eden\n");
+		pr_cont("Eden\n");
 		break;
 	case EPS_BRAND_C7D:
-		printk(KERN_CONT "C7-D\n");
+		pr_cont("C7-D\n");
 		break;
 	case EPS_BRAND_C3:
-		printk(KERN_CONT "C3\n");
+		pr_cont("C3\n");
 		return -ENODEV;
 		break;
 	}
@@ -235,7 +233,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 		/* Can be locked at 0 */
 		rdmsrl(MSR_IA32_MISC_ENABLE, val);
 		if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
-			printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n");
+			pr_info("eps: Can't enable Enhanced PowerSaver\n");
 			return -ENODEV;
 		}
 	}
@@ -243,22 +241,19 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 	/* Print voltage and multiplier */
 	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
 	current_voltage = lo & 0xff;
-	printk(KERN_INFO "eps: Current voltage = %dmV\n",
-			current_voltage * 16 + 700);
+	pr_info("eps: Current voltage = %dmV\n", current_voltage * 16 + 700);
 	current_multiplier = (lo >> 8) & 0xff;
-	printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier);
+	pr_info("eps: Current multiplier = %d\n", current_multiplier);
 
 	/* Print limits */
 	max_voltage = hi & 0xff;
-	printk(KERN_INFO "eps: Highest voltage = %dmV\n",
-			max_voltage * 16 + 700);
+	pr_info("eps: Highest voltage = %dmV\n", max_voltage * 16 + 700);
 	max_multiplier = (hi >> 8) & 0xff;
-	printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier);
+	pr_info("eps: Highest multiplier = %d\n", max_multiplier);
 	min_voltage = (hi >> 16) & 0xff;
-	printk(KERN_INFO "eps: Lowest voltage = %dmV\n",
-			min_voltage * 16 + 700);
+	pr_info("eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700);
 	min_multiplier = (hi >> 24) & 0xff;
-	printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier);
+	pr_info("eps: Lowest multiplier = %d\n", min_multiplier);
 
 	/* Sanity checks */
 	if (current_multiplier == 0 || max_multiplier == 0
@@ -276,17 +271,13 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 
 	/* Check for systems using underclocked CPU */
 	if (!freq_failsafe_off && max_multiplier != current_multiplier) {
-		printk(KERN_INFO "eps: Your processor is running at different "
-			"frequency then its maximum. Aborting.\n");
-		printk(KERN_INFO "eps: You can use freq_failsafe_off option "
-			"to disable this check.\n");
+		pr_info("eps: Your processor is running at different frequency then its maximum. Aborting.\n");
+		pr_info("eps: You can use freq_failsafe_off option to disable this check.\n");
 		return -EINVAL;
 	}
 	if (!voltage_failsafe_off && max_voltage != current_voltage) {
-		printk(KERN_INFO "eps: Your processor is running at different "
-			"voltage then its maximum. Aborting.\n");
-		printk(KERN_INFO "eps: You can use voltage_failsafe_off "
-			"option to disable this check.\n");
+		pr_info("eps: Your processor is running at different voltage then its maximum. Aborting.\n");
+		pr_info("eps: You can use voltage_failsafe_off option to disable this check.\n");
 		return -EINVAL;
 	}
 
@@ -297,13 +288,13 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 	/* Check for ACPI processor speed limit */
 	if (!ignore_acpi_limit && !eps_acpi_init()) {
 		if (!acpi_processor_get_bios_limit(policy->cpu, &limit)) {
-			printk(KERN_INFO "eps: ACPI limit %u.%uGHz\n",
+			pr_info("eps: ACPI limit %u.%uGHz\n",
 				limit/1000000,
 				(limit%1000000)/10000);
 			eps_acpi_exit(policy);
 			/* Check if max_multiplier is in BIOS limits */
 			if (limit && max_multiplier * fsb > limit) {
-				printk(KERN_INFO "eps: Aborting.\n");
+				pr_info("eps: Aborting\n");
 				return -EINVAL;
 			}
 		}
@@ -319,8 +310,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 		v = (set_max_voltage - 700) / 16;
 		/* Check if voltage is within limits */
 		if (v >= min_voltage && v <= max_voltage) {
-			printk(KERN_INFO "eps: Setting %dmV as maximum.\n",
-				v * 16 + 700);
+			pr_info("eps: Setting %dmV as maximum\n", v * 16 + 700);
 			max_voltage = v;
 		}
 	}
diff --git a/drivers/cpufreq/elanfreq.c b/drivers/cpufreq/elanfreq.c
index 1c06e78..8f4dded 100644
--- a/drivers/cpufreq/elanfreq.c
+++ b/drivers/cpufreq/elanfreq.c
@@ -185,7 +185,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy)
 static int __init elanfreq_setup(char *str)
 {
 	max_freq = simple_strtoul(str, &str, 0);
-	printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n");
+	pr_warn("You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n");
 	return 1;
 }
 __setup("elanfreq=", elanfreq_setup);
diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c
index 0202429..fd36d6c 100644
--- a/drivers/cpufreq/ia64-acpi-cpufreq.c
+++ b/drivers/cpufreq/ia64-acpi-cpufreq.c
@@ -118,8 +118,7 @@ processor_get_freq (
 
 	if (ret) {
 		set_cpus_allowed_ptr(current, &saved_mask);
-		printk(KERN_WARNING "get performance failed with error %d\n",
-		       ret);
+		pr_warn("get performance failed with error %d\n", ret);
 		ret = 0;
 		goto migrate_end;
 	}
@@ -177,7 +176,7 @@ processor_set_freq (
 
 	ret = processor_set_pstate(value);
 	if (ret) {
-		printk(KERN_WARNING "Transition failed with error %d\n", ret);
+		pr_warn("Transition failed with error %d\n", ret);
 		retval = -ENODEV;
 		goto migrate_end;
 	}
@@ -291,8 +290,8 @@ acpi_cpufreq_cpu_init (
 	/* notify BIOS that we exist */
 	acpi_processor_notify_smm(THIS_MODULE);
 
-	printk(KERN_INFO "acpi-cpufreq: CPU%u - ACPI performance management "
-	       "activated.\n", cpu);
+	pr_info("acpi-cpufreq: CPU%u - ACPI performance management activated\n",
+		cpu);
 
 	for (i = 0; i < data->acpi_data.state_count; i++)
 		pr_debug("     %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n",
diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c
index 0f6b229..2baeb8c 100644
--- a/drivers/cpufreq/longhaul.c
+++ b/drivers/cpufreq/longhaul.c
@@ -347,14 +347,13 @@ retry_loop:
 	freqs.new = calc_speed(longhaul_get_cpu_mult());
 	/* Check if requested frequency is set. */
 	if (unlikely(freqs.new != speed)) {
-		printk(KERN_INFO PFX "Failed to set requested frequency!\n");
+		pr_info(PFX "Failed to set requested frequency!\n");
 		/* Revision ID = 1 but processor is expecting revision key
 		 * equal to 0. Jumpers at the bottom of processor will change
 		 * multiplier and FSB, but will not change bits in Longhaul
 		 * MSR nor enable voltage scaling. */
 		if (!revid_errata) {
-			printk(KERN_INFO PFX "Enabling \"Ignore Revision ID\" "
-						"option.\n");
+			pr_info(PFX "Enabling \"Ignore Revision ID\" option\n");
 			revid_errata = 1;
 			msleep(200);
 			goto retry_loop;
@@ -364,11 +363,10 @@ retry_loop:
 		 * but it doesn't change frequency. I tried poking various
 		 * bits in northbridge registers, but without success. */
 		if (longhaul_flags & USE_ACPI_C3) {
-			printk(KERN_INFO PFX "Disabling ACPI C3 support.\n");
+			pr_info(PFX "Disabling ACPI C3 support\n");
 			longhaul_flags &= ~USE_ACPI_C3;
 			if (revid_errata) {
-				printk(KERN_INFO PFX "Disabling \"Ignore "
-						"Revision ID\" option.\n");
+				pr_info(PFX "Disabling \"Ignore Revision ID\" option\n");
 				revid_errata = 0;
 			}
 			msleep(200);
@@ -379,7 +377,7 @@ retry_loop:
 		 * RevID = 1. RevID errata will make things right. Just
 		 * to be 100% sure. */
 		if (longhaul_version == TYPE_LONGHAUL_V2) {
-			printk(KERN_INFO PFX "Switching to Longhaul ver. 1\n");
+			pr_info(PFX "Switching to Longhaul ver. 1\n");
 			longhaul_version = TYPE_LONGHAUL_V1;
 			msleep(200);
 			goto retry_loop;
@@ -387,8 +385,7 @@ retry_loop:
 	}
 
 	if (!bm_timeout) {
-		printk(KERN_INFO PFX "Warning: Timeout while waiting for "
-				"idle PCI bus.\n");
+		pr_info(PFX "Warning: Timeout while waiting for idle PCI bus\n");
 		return -EBUSY;
 	}
 
@@ -433,12 +430,12 @@ static int longhaul_get_ranges(void)
 	/* Get current frequency */
 	mult = longhaul_get_cpu_mult();
 	if (mult == -1) {
-		printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n");
+		pr_info(PFX "Invalid (reserved) multiplier!\n");
 		return -EINVAL;
 	}
 	fsb = guess_fsb(mult);
 	if (fsb == 0) {
-		printk(KERN_INFO PFX "Invalid (reserved) FSB!\n");
+		pr_info(PFX "Invalid (reserved) FSB!\n");
 		return -EINVAL;
 	}
 	/* Get max multiplier - as we always did.
@@ -468,11 +465,11 @@ static int longhaul_get_ranges(void)
 		 print_speed(highest_speed/1000));
 
 	if (lowest_speed == highest_speed) {
-		printk(KERN_INFO PFX "highestspeed == lowest, aborting.\n");
+		pr_info(PFX "highestspeed == lowest, aborting\n");
 		return -EINVAL;
 	}
 	if (lowest_speed > highest_speed) {
-		printk(KERN_INFO PFX "nonsense! lowest (%d > %d) !\n",
+		pr_info(PFX "nonsense! lowest (%d > %d) !\n",
 			lowest_speed, highest_speed);
 		return -EINVAL;
 	}
@@ -538,16 +535,16 @@ static void longhaul_setup_voltagescaling(void)
 
 	rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
 	if (!(longhaul.bits.RevisionID & 1)) {
-		printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n");
+		pr_info(PFX "Voltage scaling not supported by CPU\n");
 		return;
 	}
 
 	if (!longhaul.bits.VRMRev) {
-		printk(KERN_INFO PFX "VRM 8.5\n");
+		pr_info(PFX "VRM 8.5\n");
 		vrm_mV_table = &vrm85_mV[0];
 		mV_vrm_table = &mV_vrm85[0];
 	} else {
-		printk(KERN_INFO PFX "Mobile VRM\n");
+		pr_info(PFX "Mobile VRM\n");
 		if (cpu_model < CPU_NEHEMIAH)
 			return;
 		vrm_mV_table = &mobilevrm_mV[0];
@@ -558,27 +555,21 @@ static void longhaul_setup_voltagescaling(void)
 	maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
 
 	if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
-		printk(KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
-					"Voltage scaling disabled.\n",
-					minvid.mV/1000, minvid.mV%1000,
-					maxvid.mV/1000, maxvid.mV%1000);
+		pr_info(PFX "Bogus values Min:%d.%03d Max:%d.%03d - Voltage scaling disabled\n",
+			minvid.mV/1000, minvid.mV%1000,
+			maxvid.mV/1000, maxvid.mV%1000);
 		return;
 	}
 
 	if (minvid.mV == maxvid.mV) {
-		printk(KERN_INFO PFX "Claims to support voltage scaling but "
-				"min & max are both %d.%03d. "
-				"Voltage scaling disabled\n",
-				maxvid.mV/1000, maxvid.mV%1000);
+		pr_info(PFX "Claims to support voltage scaling but min & max are both %d.%03d - Voltage scaling disabled\n",
+			maxvid.mV/1000, maxvid.mV%1000);
 		return;
 	}
 
 	/* How many voltage steps*/
 	numvscales = maxvid.pos - minvid.pos + 1;
-	printk(KERN_INFO PFX
-		"Max VID=%d.%03d  "
-		"Min VID=%d.%03d, "
-		"%d possible voltage scales\n",
+	pr_info(PFX "Max VID=%d.%03d  Min VID=%d.%03d, %d possible voltage scales\n",
 		maxvid.mV/1000, maxvid.mV%1000,
 		minvid.mV/1000, minvid.mV%1000,
 		numvscales);
@@ -617,12 +608,12 @@ static void longhaul_setup_voltagescaling(void)
 			pos = minvid.pos;
 		freq_pos->driver_data |= mV_vrm_table[pos] << 8;
 		vid = vrm_mV_table[mV_vrm_table[pos]];
-		printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n",
+		pr_info(PFX "f: %d kHz, index: %d, vid: %d mV\n",
 			speed, (int)(freq_pos - longhaul_table), vid.mV);
 	}
 
 	can_scale_voltage = 1;
-	printk(KERN_INFO PFX "Voltage scaling enabled.\n");
+	pr_info(PFX "Voltage scaling enabled\n");
 }
 
 
@@ -720,8 +711,7 @@ static int enable_arbiter_disable(void)
 			pci_write_config_byte(dev, reg, pci_cmd);
 			pci_read_config_byte(dev, reg, &pci_cmd);
 			if (!(pci_cmd & 1<<7)) {
-				printk(KERN_ERR PFX
-					"Can't enable access to port 0x22.\n");
+				pr_err(PFX "Can't enable access to port 0x22\n");
 				status = 0;
 			}
 		}
@@ -758,8 +748,7 @@ static int longhaul_setup_southbridge(void)
 		if (pci_cmd & 1 << 7) {
 			pci_read_config_dword(dev, 0x88, &acpi_regs_addr);
 			acpi_regs_addr &= 0xff00;
-			printk(KERN_INFO PFX "ACPI I/O at 0x%x\n",
-					acpi_regs_addr);
+			pr_info(PFX "ACPI I/O at 0x%x\n", acpi_regs_addr);
 		}
 
 		pci_dev_put(dev);
@@ -853,14 +842,14 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy)
 			longhaul_version = TYPE_LONGHAUL_V1;
 	}
 
-	printk(KERN_INFO PFX "VIA %s CPU detected.  ", cpuname);
+	pr_info(PFX "VIA %s CPU detected.  ", cpuname);
 	switch (longhaul_version) {
 	case TYPE_LONGHAUL_V1:
 	case TYPE_LONGHAUL_V2:
-		printk(KERN_CONT "Longhaul v%d supported.\n", longhaul_version);
+		pr_cont("Longhaul v%d supported\n", longhaul_version);
 		break;
 	case TYPE_POWERSAVER:
-		printk(KERN_CONT "Powersaver supported.\n");
+		pr_cont("Powersaver supported\n");
 		break;
 	};
 
@@ -889,15 +878,14 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy)
 	if (!(longhaul_flags & USE_ACPI_C3
 	     || longhaul_flags & USE_NORTHBRIDGE)
 	    && ((pr == NULL) || !(pr->flags.bm_control))) {
-		printk(KERN_ERR PFX
-			"No ACPI support. Unsupported northbridge.\n");
+		pr_err(PFX "No ACPI support: Unsupported northbridge\n");
 		return -ENODEV;
 	}
 
 	if (longhaul_flags & USE_NORTHBRIDGE)
-		printk(KERN_INFO PFX "Using northbridge support.\n");
+		pr_info(PFX "Using northbridge support\n");
 	if (longhaul_flags & USE_ACPI_C3)
-		printk(KERN_INFO PFX "Using ACPI support.\n");
+		pr_info(PFX "Using ACPI support\n");
 
 	ret = longhaul_get_ranges();
 	if (ret != 0)
@@ -934,20 +922,18 @@ static int __init longhaul_init(void)
 		return -ENODEV;
 
 	if (!enable) {
-		printk(KERN_ERR PFX "Option \"enable\" not set. Aborting.\n");
+		pr_err(PFX "Option \"enable\" not set - Aborting\n");
 		return -ENODEV;
 	}
 #ifdef CONFIG_SMP
 	if (num_online_cpus() > 1) {
-		printk(KERN_ERR PFX "More than 1 CPU detected, "
-				"longhaul disabled.\n");
+		pr_err(PFX "More than 1 CPU detected, longhaul disabled\n");
 		return -ENODEV;
 	}
 #endif
 #ifdef CONFIG_X86_IO_APIC
 	if (cpu_has_apic) {
-		printk(KERN_ERR PFX "APIC detected. Longhaul is currently "
-				"broken in this configuration.\n");
+		pr_err(PFX "APIC detected. Longhaul is currently broken in this configuration.\n");
 		return -ENODEV;
 	}
 #endif
@@ -955,7 +941,7 @@ static int __init longhaul_init(void)
 	case 6 ... 9:
 		return cpufreq_register_driver(&longhaul_driver);
 	case 10:
-		printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n");
+		pr_err(PFX "Use acpi-cpufreq driver for VIA C7\n");
 	default:
 		;
 	}
diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c
index cd593c1..93c7928 100644
--- a/drivers/cpufreq/loongson2_cpufreq.c
+++ b/drivers/cpufreq/loongson2_cpufreq.c
@@ -76,7 +76,7 @@ static int loongson2_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 	cpuclk = clk_get(NULL, "cpu_clk");
 	if (IS_ERR(cpuclk)) {
-		printk(KERN_ERR "cpufreq: couldn't get CPU clk\n");
+		pr_err("cpufreq: couldn't get CPU clk\n");
 		return PTR_ERR(cpuclk);
 	}
 
diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c
index cc3408f..7e55632 100644
--- a/drivers/cpufreq/maple-cpufreq.c
+++ b/drivers/cpufreq/maple-cpufreq.c
@@ -174,7 +174,7 @@ static int __init maple_cpufreq_init(void)
 	/* Get first CPU node */
 	cpunode = of_cpu_device_node_get(0);
 	if (cpunode == NULL) {
-		printk(KERN_ERR "cpufreq: Can't find any CPU 0 node\n");
+		pr_err("cpufreq: Can't find any CPU 0 node\n");
 		goto bail_noprops;
 	}
 
@@ -182,8 +182,7 @@ static int __init maple_cpufreq_init(void)
 	/* we actually don't care on which CPU to access PVR */
 	pvr_hi = PVR_VER(mfspr(SPRN_PVR));
 	if (pvr_hi != 0x3c && pvr_hi != 0x44) {
-		printk(KERN_ERR "cpufreq: Unsupported CPU version (%x)\n",
-				pvr_hi);
+		pr_err("cpufreq: Unsupported CPU version (%x)\n", pvr_hi);
 		goto bail_noprops;
 	}
 
@@ -222,8 +221,8 @@ static int __init maple_cpufreq_init(void)
 	maple_pmode_cur = -1;
 	maple_scom_switch_freq(maple_scom_query_freq());
 
-	printk(KERN_INFO "Registering Maple CPU frequency driver\n");
-	printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
+	pr_info("Registering Maple CPU frequency driver\n");
+	pr_info("Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
 		maple_cpu_freqs[1].frequency/1000,
 		maple_cpu_freqs[0].frequency/1000,
 		maple_cpu_freqs[maple_pmode_cur].frequency/1000);
diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index e3866e0..655fc94 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -163,13 +163,13 @@ static int omap_cpufreq_probe(struct platform_device *pdev)
 {
 	mpu_dev = get_cpu_device(0);
 	if (!mpu_dev) {
-		pr_warning("%s: unable to get the mpu device\n", __func__);
+		pr_warn("%s: unable to get the mpu device\n", __func__);
 		return -EINVAL;
 	}
 
 	mpu_reg = regulator_get(mpu_dev, "vcc");
 	if (IS_ERR(mpu_reg)) {
-		pr_warning("%s: unable to get MPU regulator\n", __func__);
+		pr_warn("%s: unable to get MPU regulator\n", __func__);
 		mpu_reg = NULL;
 	} else {
 		/* 
diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c
index 5dd95da..4d1a443 100644
--- a/drivers/cpufreq/p4-clockmod.c
+++ b/drivers/cpufreq/p4-clockmod.c
@@ -124,11 +124,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
 {
 	if (c->x86 == 0x06) {
 		if (cpu_has(c, X86_FEATURE_EST))
-			printk_once(KERN_WARNING PFX "Warning: EST-capable "
-			       "CPU detected. The acpi-cpufreq module offers "
-			       "voltage scaling in addition to frequency "
-			       "scaling. You should use that instead of "
-			       "p4-clockmod, if possible.\n");
+			pr_warn_once(PFX "Warning: EST-capable CPU detected. The acpi-cpufreq module offers voltage scaling in addition to frequency scaling. You should use that instead of p4-clockmod, if possible.\n");
 		switch (c->x86_model) {
 		case 0x0E: /* Core */
 		case 0x0F: /* Core Duo */
@@ -152,11 +148,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
 	p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
 
 	if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) {
-		printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. "
-		       "The speedstep-ich or acpi cpufreq modules offer "
-		       "voltage scaling in addition of frequency scaling. "
-		       "You should use either one instead of p4-clockmod, "
-		       "if possible.\n");
+		pr_warn(PFX "Warning: Pentium 4-M detected. The speedstep-ich or acpi cpufreq modules offer voltage scaling in addition of frequency scaling. You should use either one instead of p4-clockmod, if possible.\n");
 		return speedstep_get_frequency(SPEEDSTEP_CPU_P4M);
 	}
 
@@ -265,8 +257,7 @@ static int __init cpufreq_p4_init(void)
 
 	ret = cpufreq_register_driver(&p4clockmod_driver);
 	if (!ret)
-		printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock "
-				"Modulation available\n");
+		pr_info(PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n");
 
 	return ret;
 }
diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c
index 1f49d97..072d7b3 100644
--- a/drivers/cpufreq/pmac32-cpufreq.c
+++ b/drivers/cpufreq/pmac32-cpufreq.c
@@ -481,13 +481,13 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode)
 		freqs = of_get_property(cpunode, "bus-frequencies", &lenp);
 		lenp /= sizeof(u32);
 		if (freqs == NULL || lenp != 2) {
-			printk(KERN_ERR "cpufreq: bus-frequencies incorrect or missing\n");
+			pr_err("cpufreq: bus-frequencies incorrect or missing\n");
 			return 1;
 		}
 		ratio = of_get_property(cpunode, "processor-to-bus-ratio*2",
 						NULL);
 		if (ratio == NULL) {
-			printk(KERN_ERR "cpufreq: processor-to-bus-ratio*2 missing\n");
+			pr_err("cpufreq: processor-to-bus-ratio*2 missing\n");
 			return 1;
 		}
 
@@ -550,7 +550,7 @@ static int pmac_cpufreq_init_7447A(struct device_node *cpunode)
 	if (volt_gpio_np)
 		voltage_gpio = read_gpio(volt_gpio_np);
 	if (!voltage_gpio){
-		printk(KERN_ERR "cpufreq: missing cpu-vcore-select gpio\n");
+		pr_err("cpufreq: missing cpu-vcore-select gpio\n");
 		return 1;
 	}
 
@@ -675,9 +675,9 @@ out:
 	pmac_cpu_freqs[CPUFREQ_HIGH].frequency = hi_freq;
 	ppc_proc_freq = cur_freq * 1000ul;
 
-	printk(KERN_INFO "Registering PowerMac CPU frequency driver\n");
-	printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Boot: %d Mhz\n",
-	       low_freq/1000, hi_freq/1000, cur_freq/1000);
+	pr_info("Registering PowerMac CPU frequency driver\n");
+	pr_info("Low: %d Mhz, High: %d Mhz, Boot: %d Mhz\n",
+		low_freq/1000, hi_freq/1000, cur_freq/1000);
 
 	return cpufreq_register_driver(&pmac_cpufreq_driver);
 }
diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c
index 4ff8687..5ffe085 100644
--- a/drivers/cpufreq/pmac64-cpufreq.c
+++ b/drivers/cpufreq/pmac64-cpufreq.c
@@ -138,7 +138,7 @@ static void g5_vdnap_switch_volt(int speed_mode)
 		usleep_range(1000, 1000);
 	}
 	if (done == 0)
-		printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n");
+		pr_warn("cpufreq: Timeout in clock slewing !\n");
 }
 
 
@@ -266,7 +266,7 @@ static int g5_pfunc_switch_freq(int speed_mode)
 		rc = pmf_call_one(pfunc_cpu_setfreq_low, NULL);
 
 	if (rc)
-		printk(KERN_WARNING "cpufreq: pfunc switch error %d\n", rc);
+		pr_warn("cpufreq: pfunc switch error %d\n", rc);
 
 	/* It's an irq GPIO so we should be able to just block here,
 	 * I'll do that later after I've properly tested the IRQ code for
@@ -282,7 +282,7 @@ static int g5_pfunc_switch_freq(int speed_mode)
 		usleep_range(500, 500);
 	}
 	if (done == 0)
-		printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n");
+		pr_warn("cpufreq: Timeout in clock slewing !\n");
 
 	/* If frequency is going down, last ramp the voltage */
 	if (speed_mode > g5_pmode_cur)
@@ -368,7 +368,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpunode)
 	}
 	pvr_hi = (*valp) >> 16;
 	if (pvr_hi != 0x3c && pvr_hi != 0x44) {
-		printk(KERN_ERR "cpufreq: Unsupported CPU version\n");
+		pr_err("cpufreq: Unsupported CPU version\n");
 		goto bail_noprops;
 	}
 
@@ -403,8 +403,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpunode)
 
 		root = of_find_node_by_path("/");
 		if (root == NULL) {
-			printk(KERN_ERR "cpufreq: Can't find root of "
-			       "device tree\n");
+			pr_err("cpufreq: Can't find root of device tree\n");
 			goto bail_noprops;
 		}
 		pfunc_set_vdnap0 = pmf_find_function(root, "set-vdnap0");
@@ -412,8 +411,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpunode)
 			pmf_find_function(root, "slewing-done");
 		if (pfunc_set_vdnap0 == NULL ||
 		    pfunc_vdnap0_complete == NULL) {
-			printk(KERN_ERR "cpufreq: Can't find required "
-			       "platform function\n");
+			pr_err("cpufreq: Can't find required platform function\n");
 			goto bail_noprops;
 		}
 
@@ -453,10 +451,10 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpunode)
 	g5_pmode_cur = -1;
 	g5_switch_freq(g5_query_freq());
 
-	printk(KERN_INFO "Registering G5 CPU frequency driver\n");
-	printk(KERN_INFO "Frequency method: %s, Voltage method: %s\n",
-	       freq_method, volt_method);
-	printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
+	pr_info("Registering G5 CPU frequency driver\n");
+	pr_info("Frequency method: %s, Voltage method: %s\n",
+		freq_method, volt_method);
+	pr_info("Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
 		g5_cpu_freqs[1].frequency/1000,
 		g5_cpu_freqs[0].frequency/1000,
 		g5_cpu_freqs[g5_pmode_cur].frequency/1000);
@@ -493,7 +491,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode)
 	if (cpuid != NULL)
 		eeprom = of_get_property(cpuid, "cpuid", NULL);
 	if (eeprom == NULL) {
-		printk(KERN_ERR "cpufreq: Can't find cpuid EEPROM !\n");
+		pr_err("cpufreq: Can't find cpuid EEPROM !\n");
 		rc = -ENODEV;
 		goto bail;
 	}
@@ -511,7 +509,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode)
 		break;
 	}
 	if (hwclock == NULL) {
-		printk(KERN_ERR "cpufreq: Can't find i2c clock chip !\n");
+		pr_err("cpufreq: Can't find i2c clock chip !\n");
 		rc = -ENODEV;
 		goto bail;
 	}
@@ -539,7 +537,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode)
 	/* Check we have minimum requirements */
 	if (pfunc_cpu_getfreq == NULL || pfunc_cpu_setfreq_high == NULL ||
 	    pfunc_cpu_setfreq_low == NULL || pfunc_slewing_done == NULL) {
-		printk(KERN_ERR "cpufreq: Can't find platform functions !\n");
+		pr_err("cpufreq: Can't find platform functions !\n");
 		rc = -ENODEV;
 		goto bail;
 	}
@@ -567,7 +565,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode)
 	/* Get max frequency from device-tree */
 	valp = of_get_property(cpunode, "clock-frequency", NULL);
 	if (!valp) {
-		printk(KERN_ERR "cpufreq: Can't find CPU frequency !\n");
+		pr_err("cpufreq: Can't find CPU frequency !\n");
 		rc = -ENODEV;
 		goto bail;
 	}
@@ -583,8 +581,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode)
 
 	/* Check for machines with no useful settings */
 	if (il == ih) {
-		printk(KERN_WARNING "cpufreq: No low frequency mode available"
-		       " on this model !\n");
+		pr_warn("cpufreq: No low frequency mode available on this model !\n");
 		rc = -ENODEV;
 		goto bail;
 	}
@@ -595,7 +592,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode)
 
 	/* Sanity check */
 	if (min_freq >= max_freq || min_freq < 1000) {
-		printk(KERN_ERR "cpufreq: Can't calculate low frequency !\n");
+		pr_err("cpufreq: Can't calculate low frequency !\n");
 		rc = -ENXIO;
 		goto bail;
 	}
@@ -619,10 +616,10 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode)
 	g5_pmode_cur = -1;
 	g5_switch_freq(g5_query_freq());
 
-	printk(KERN_INFO "Registering G5 CPU frequency driver\n");
-	printk(KERN_INFO "Frequency method: i2c/pfunc, "
-	       "Voltage method: %s\n", has_volt ? "i2c/pfunc" : "none");
-	printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
+	pr_info("Registering G5 CPU frequency driver\n");
+	pr_info("Frequency method: i2c/pfunc, Voltage method: %s\n",
+		has_volt ? "i2c/pfunc" : "none");
+	pr_info("Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
 		g5_cpu_freqs[1].frequency/1000,
 		g5_cpu_freqs[0].frequency/1000,
 		g5_cpu_freqs[g5_pmode_cur].frequency/1000);
diff --git a/drivers/cpufreq/powernow-k6.c b/drivers/cpufreq/powernow-k6.c
index e6f24b2..981f40b 100644
--- a/drivers/cpufreq/powernow-k6.c
+++ b/drivers/cpufreq/powernow-k6.c
@@ -141,7 +141,7 @@ static int powernow_k6_target(struct cpufreq_policy *policy,
 {
 
 	if (clock_ratio[best_i].driver_data > max_multiplier) {
-		printk(KERN_ERR PFX "invalid target frequency\n");
+		pr_err(PFX "invalid target frequency\n");
 		return -EINVAL;
 	}
 
@@ -175,13 +175,14 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
 				max_multiplier = param_max_multiplier;
 				goto have_max_multiplier;
 			}
-		printk(KERN_ERR "powernow-k6: invalid max_multiplier parameter, valid parameters 20, 30, 35, 40, 45, 50, 55, 60\n");
+		pr_err("powernow-k6: invalid max_multiplier parameter, valid parameters 20, 30, 35, 40, 45, 50, 55, 60\n");
 		return -EINVAL;
 	}
 
 	if (!max_multiplier) {
-		printk(KERN_WARNING "powernow-k6: unknown frequency %u, cannot determine current multiplier\n", khz);
-		printk(KERN_WARNING "powernow-k6: use module parameters max_multiplier and bus_frequency\n");
+		pr_warn("powernow-k6: unknown frequency %u, cannot determine current multiplier\n",
+			khz);
+		pr_warn("powernow-k6: use module parameters max_multiplier and bus_frequency\n");
 		return -EOPNOTSUPP;
 	}
 
@@ -193,7 +194,7 @@ have_max_multiplier:
 			busfreq = param_busfreq / 10;
 			goto have_busfreq;
 		}
-		printk(KERN_ERR "powernow-k6: invalid bus_frequency parameter, allowed range 50000 - 150000 kHz\n");
+		pr_err("powernow-k6: invalid bus_frequency parameter, allowed range 50000 - 150000 kHz\n");
 		return -EINVAL;
 	}
 
@@ -275,7 +276,7 @@ static int __init powernow_k6_init(void)
 		return -ENODEV;
 
 	if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) {
-		printk(KERN_INFO PFX "PowerNow IOPORT region already used.\n");
+		pr_info(PFX "PowerNow IOPORT region already used\n");
 		return -EIO;
 	}
 
diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c
index c1ae199..4a1b420 100644
--- a/drivers/cpufreq/powernow-k7.c
+++ b/drivers/cpufreq/powernow-k7.c
@@ -127,14 +127,13 @@ static int check_powernow(void)
 	maxei = cpuid_eax(0x80000000);
 	if (maxei < 0x80000007) {	/* Any powernow info ? */
 #ifdef MODULE
-		printk(KERN_INFO PFX "No powernow capabilities detected\n");
+		pr_info(PFX "No powernow capabilities detected\n");
 #endif
 		return 0;
 	}
 
 	if ((c->x86_model == 6) && (c->x86_mask == 0)) {
-		printk(KERN_INFO PFX "K7 660[A0] core detected, "
-				"enabling errata workarounds\n");
+		pr_info(PFX "K7 660[A0] core detected, enabling errata workarounds\n");
 		have_a0 = 1;
 	}
 
@@ -144,22 +143,22 @@ static int check_powernow(void)
 	if (!(edx & (1 << 1 | 1 << 2)))
 		return 0;
 
-	printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: ");
+	pr_info(PFX "PowerNOW! Technology present. Can scale: ");
 
 	if (edx & 1 << 1) {
-		printk("frequency");
+		pr_cont("frequency");
 		can_scale_bus = 1;
 	}
 
 	if ((edx & (1 << 1 | 1 << 2)) == 0x6)
-		printk(" and ");
+		pr_cont(" and ");
 
 	if (edx & 1 << 2) {
-		printk("voltage");
+		pr_cont("voltage");
 		can_scale_vid = 1;
 	}
 
-	printk(".\n");
+	pr_cont("\n");
 	return 1;
 }
 
@@ -427,16 +426,14 @@ err1:
 err05:
 	kfree(acpi_processor_perf);
 err0:
-	printk(KERN_WARNING PFX "ACPI perflib can not be used on "
-			"this platform\n");
+	pr_warn(PFX "ACPI perflib can not be used on this platform\n");
 	acpi_processor_perf = NULL;
 	return retval;
 }
 #else
 static int powernow_acpi_init(void)
 {
-	printk(KERN_INFO PFX "no support for ACPI processor found."
-	       "  Please recompile your kernel with ACPI processor\n");
+	pr_info(PFX "no support for ACPI processor found - please recompile your kernel with ACPI processor\n");
 	return -EINVAL;
 }
 #endif
@@ -468,8 +465,7 @@ static int powernow_decode_bios(int maxfid, int startvid)
 			psb = (struct psb_s *) p;
 			pr_debug("Table version: 0x%x\n", psb->tableversion);
 			if (psb->tableversion != 0x12) {
-				printk(KERN_INFO PFX "Sorry, only v1.2 tables"
-						" supported right now\n");
+				pr_info(PFX "Sorry, only v1.2 tables supported right now\n");
 				return -ENODEV;
 			}
 
@@ -481,10 +477,8 @@ static int powernow_decode_bios(int maxfid, int startvid)
 
 			latency = psb->settlingtime;
 			if (latency < 100) {
-				printk(KERN_INFO PFX "BIOS set settling time "
-						"to %d microseconds. "
-						"Should be at least 100. "
-						"Correcting.\n", latency);
+				pr_info(PFX "BIOS set settling time to %d microseconds. Should be at least 100. Correcting.\n",
+					latency);
 				latency = 100;
 			}
 			pr_debug("Settling Time: %d microseconds.\n",
@@ -516,10 +510,9 @@ static int powernow_decode_bios(int maxfid, int startvid)
 						p += 2;
 				}
 			}
-			printk(KERN_INFO PFX "No PST tables match this cpuid "
-					"(0x%x)\n", etuple);
-			printk(KERN_INFO PFX "This is indicative of a broken "
-					"BIOS.\n");
+			pr_info(PFX "No PST tables match this cpuid (0x%x)\n",
+				etuple);
+			pr_info(PFX "This is indicative of a broken BIOS\n");
 
 			return -EINVAL;
 		}
@@ -552,7 +545,7 @@ static int fixup_sgtc(void)
 	sgtc = 100 * m * latency;
 	sgtc = sgtc / 3;
 	if (sgtc > 0xfffff) {
-		printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc);
+		pr_warn(PFX "SGTC too large %d\n", sgtc);
 		sgtc = 0xfffff;
 	}
 	return sgtc;
@@ -574,14 +567,10 @@ static unsigned int powernow_get(unsigned int cpu)
 
 static int acer_cpufreq_pst(const struct dmi_system_id *d)
 {
-	printk(KERN_WARNING PFX
-		"%s laptop with broken PST tables in BIOS detected.\n",
+	pr_warn(PFX "%s laptop with broken PST tables in BIOS detected\n",
 		d->ident);
-	printk(KERN_WARNING PFX
-		"You need to downgrade to 3A21 (09/09/2002), or try a newer "
-		"BIOS than 3A71 (01/20/2003)\n");
-	printk(KERN_WARNING PFX
-		"cpufreq scaling has been disabled as a result of this.\n");
+	pr_warn(PFX "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n");
+	pr_warn(PFX "cpufreq scaling has been disabled as a result of this\n");
 	return 0;
 }
 
@@ -616,40 +605,38 @@ static int powernow_cpu_init(struct cpufreq_policy *policy)
 
 	fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID];
 	if (!fsb) {
-		printk(KERN_WARNING PFX "can not determine bus frequency\n");
+		pr_warn(PFX "can not determine bus frequency\n");
 		return -EINVAL;
 	}
 	pr_debug("FSB: %3dMHz\n", fsb/1000);
 
 	if (dmi_check_system(powernow_dmi_table) || acpi_force) {
-		printk(KERN_INFO PFX "PSB/PST known to be broken.  "
-				"Trying ACPI instead\n");
+		pr_info(PFX "PSB/PST known to be broken - trying ACPI instead\n");
 		result = powernow_acpi_init();
 	} else {
 		result = powernow_decode_bios(fidvidstatus.bits.MFID,
 				fidvidstatus.bits.SVID);
 		if (result) {
-			printk(KERN_INFO PFX "Trying ACPI perflib\n");
+			pr_info(PFX "Trying ACPI perflib\n");
 			maximum_speed = 0;
 			minimum_speed = -1;
 			latency = 0;
 			result = powernow_acpi_init();
 			if (result) {
-				printk(KERN_INFO PFX
-					"ACPI and legacy methods failed\n");
+				pr_info(PFX "ACPI and legacy methods failed\n");
 			}
 		} else {
 			/* SGTC use the bus clock as timer */
 			latency = fixup_sgtc();
-			printk(KERN_INFO PFX "SGTC: %d\n", latency);
+			pr_info(PFX "SGTC: %d\n", latency);
 		}
 	}
 
 	if (result)
 		return result;
 
-	printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n",
-				minimum_speed/1000, maximum_speed/1000);
+	pr_info(PFX "Minimum speed %d MHz - Maximum speed %d MHz\n",
+		minimum_speed/1000, maximum_speed/1000);
 
 	policy->cpuinfo.transition_latency =
 		cpufreq_scale(2000000UL, fsb, latency);
diff --git a/drivers/cpufreq/pxa2xx-cpufreq.c b/drivers/cpufreq/pxa2xx-cpufreq.c
index 46fee15..8a27667 100644
--- a/drivers/cpufreq/pxa2xx-cpufreq.c
+++ b/drivers/cpufreq/pxa2xx-cpufreq.c
@@ -233,9 +233,8 @@ static void pxa27x_guess_max_freq(void)
 {
 	if (!pxa27x_maxfreq) {
 		pxa27x_maxfreq = 416000;
-		printk(KERN_INFO "PXA CPU 27x max frequency not defined "
-		       "(pxa27x_maxfreq), assuming pxa271 with %dkHz maxfreq\n",
-		       pxa27x_maxfreq);
+		pr_info("PXA CPU 27x max frequency not defined (pxa27x_maxfreq), assuming pxa271 with %dkHz maxfreq\n",
+			pxa27x_maxfreq);
 	} else {
 		pxa27x_maxfreq *= 1000;
 	}
@@ -417,7 +416,7 @@ static int pxa_cpufreq_init(struct cpufreq_policy *policy)
 		cpufreq_table_validate_and_show(policy, pxa27x_freq_table);
 	}
 
-	printk(KERN_INFO "PXA CPU frequency change support initialized\n");
+	pr_info("PXA CPU frequency change support initialized\n");
 
 	return 0;
 }
diff --git a/drivers/cpufreq/s3c2412-cpufreq.c b/drivers/cpufreq/s3c2412-cpufreq.c
index eb26213..f9447ba 100644
--- a/drivers/cpufreq/s3c2412-cpufreq.c
+++ b/drivers/cpufreq/s3c2412-cpufreq.c
@@ -197,21 +197,20 @@ static int s3c2412_cpufreq_add(struct device *dev,
 
 	hclk = clk_get(NULL, "hclk");
 	if (IS_ERR(hclk)) {
-		printk(KERN_ERR "%s: cannot find hclk clock\n", __func__);
+		pr_err("%s: cannot find hclk clock\n", __func__);
 		return -ENOENT;
 	}
 
 	fclk = clk_get(NULL, "fclk");
 	if (IS_ERR(fclk)) {
-		printk(KERN_ERR "%s: cannot find fclk clock\n", __func__);
+		pr_err("%s: cannot find fclk clock\n", __func__);
 		goto err_fclk;
 	}
 
 	fclk_rate = clk_get_rate(fclk);
 	if (fclk_rate > 200000000) {
-		printk(KERN_INFO
-		       "%s: fclk %ld MHz, assuming 266MHz capable part\n",
-		       __func__, fclk_rate / 1000000);
+		pr_info("%s: fclk %ld MHz, assuming 266MHz capable part\n",
+			__func__, fclk_rate / 1000000);
 		s3c2412_cpufreq_info.max.fclk = 266000000;
 		s3c2412_cpufreq_info.max.hclk = 133000000;
 		s3c2412_cpufreq_info.max.pclk =  66000000;
@@ -219,13 +218,13 @@ static int s3c2412_cpufreq_add(struct device *dev,
 
 	armclk = clk_get(NULL, "armclk");
 	if (IS_ERR(armclk)) {
-		printk(KERN_ERR "%s: cannot find arm clock\n", __func__);
+		pr_err("%s: cannot find arm clock\n", __func__);
 		goto err_armclk;
 	}
 
 	xtal = clk_get(NULL, "xtal");
 	if (IS_ERR(xtal)) {
-		printk(KERN_ERR "%s: cannot find xtal clock\n", __func__);
+		pr_err("%s: cannot find xtal clock\n", __func__);
 		goto err_xtal;
 	}
 
diff --git a/drivers/cpufreq/s3c2440-cpufreq.c b/drivers/cpufreq/s3c2440-cpufreq.c
index 0129f5c..f6cefe3 100644
--- a/drivers/cpufreq/s3c2440-cpufreq.c
+++ b/drivers/cpufreq/s3c2440-cpufreq.c
@@ -66,7 +66,7 @@ static int s3c2440_cpufreq_calcdivs(struct s3c_cpufreq_config *cfg)
 		     __func__, fclk, armclk, hclk_max);
 
 	if (armclk > fclk) {
-		printk(KERN_WARNING "%s: armclk > fclk\n", __func__);
+		pr_warn("%s: armclk > fclk\n", __func__);
 		armclk = fclk;
 	}
 
@@ -273,7 +273,7 @@ static int s3c2440_cpufreq_add(struct device *dev,
 	armclk = s3c_cpufreq_clk_get(NULL, "armclk");
 
 	if (IS_ERR(xtal) || IS_ERR(hclk) || IS_ERR(fclk) || IS_ERR(armclk)) {
-		printk(KERN_ERR "%s: failed to get clocks\n", __func__);
+		pr_err("%s: failed to get clocks\n", __func__);
 		return -ENOENT;
 	}
 
diff --git a/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c b/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c
index 9b7b428..8182608 100644
--- a/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c
+++ b/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c
@@ -178,7 +178,7 @@ static int __init s3c_freq_debugfs_init(void)
 {
 	dbgfs_root = debugfs_create_dir("s3c-cpufreq", NULL);
 	if (IS_ERR(dbgfs_root)) {
-		printk(KERN_ERR "%s: error creating debugfs root\n", __func__);
+		pr_err("%s: error creating debugfs root\n", __func__);
 		return PTR_ERR(dbgfs_root);
 	}
 
diff --git a/drivers/cpufreq/s3c24xx-cpufreq.c b/drivers/cpufreq/s3c24xx-cpufreq.c
index 68ef8fd..68f8837 100644
--- a/drivers/cpufreq/s3c24xx-cpufreq.c
+++ b/drivers/cpufreq/s3c24xx-cpufreq.c
@@ -175,7 +175,7 @@ static int s3c_cpufreq_settarget(struct cpufreq_policy *policy,
 	cpu_new.freq.fclk = cpu_new.pll.frequency;
 
 	if (s3c_cpufreq_calcdivs(&cpu_new) < 0) {
-		printk(KERN_ERR "no divisors for %d\n", target_freq);
+		pr_err("no divisors for %d\n", target_freq);
 		goto err_notpossible;
 	}
 
@@ -187,7 +187,7 @@ static int s3c_cpufreq_settarget(struct cpufreq_policy *policy,
 
 	if (cpu_new.freq.hclk != cpu_cur.freq.hclk) {
 		if (s3c_cpufreq_calcio(&cpu_new) < 0) {
-			printk(KERN_ERR "%s: no IO timings\n", __func__);
+			pr_err("%s: no IO timings\n", __func__);
 			goto err_notpossible;
 		}
 	}
@@ -262,7 +262,7 @@ static int s3c_cpufreq_settarget(struct cpufreq_policy *policy,
 	return 0;
 
  err_notpossible:
-	printk(KERN_ERR "no compatible settings for %d\n", target_freq);
+	pr_err("no compatible settings for %d\n", target_freq);
 	return -EINVAL;
 }
 
@@ -331,7 +331,7 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy,
 						     &index);
 
 		if (ret < 0) {
-			printk(KERN_ERR "%s: no PLL available\n", __func__);
+			pr_err("%s: no PLL available\n", __func__);
 			goto err_notpossible;
 		}
 
@@ -346,7 +346,7 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy,
 	return s3c_cpufreq_settarget(policy, target_freq, pll);
 
  err_notpossible:
-	printk(KERN_ERR "no compatible settings for %d\n", target_freq);
+	pr_err("no compatible settings for %d\n", target_freq);
 	return -EINVAL;
 }
 
@@ -356,7 +356,7 @@ struct clk *s3c_cpufreq_clk_get(struct device *dev, const char *name)
 
 	clk = clk_get(dev, name);
 	if (IS_ERR(clk))
-		printk(KERN_ERR "cpufreq: failed to get clock '%s'\n", name);
+		pr_err("cpufreq: failed to get clock '%s'\n", name);
 
 	return clk;
 }
@@ -378,15 +378,16 @@ static int __init s3c_cpufreq_initclks(void)
 
 	if (IS_ERR(clk_fclk) || IS_ERR(clk_hclk) || IS_ERR(clk_pclk) ||
 	    IS_ERR(_clk_mpll) || IS_ERR(clk_arm) || IS_ERR(_clk_xtal)) {
-		printk(KERN_ERR "%s: could not get clock(s)\n", __func__);
+		pr_err("%s: could not get clock(s)\n", __func__);
 		return -ENOENT;
 	}
 
-	printk(KERN_INFO "%s: clocks f=%lu,h=%lu,p=%lu,a=%lu\n", __func__,
-	       clk_get_rate(clk_fclk) / 1000,
-	       clk_get_rate(clk_hclk) / 1000,
-	       clk_get_rate(clk_pclk) / 1000,
-	       clk_get_rate(clk_arm) / 1000);
+	pr_info("%s: clocks f=%lu,h=%lu,p=%lu,a=%lu\n",
+		__func__,
+		clk_get_rate(clk_fclk) / 1000,
+		clk_get_rate(clk_hclk) / 1000,
+		clk_get_rate(clk_pclk) / 1000,
+		clk_get_rate(clk_arm) / 1000);
 
 	return 0;
 }
@@ -424,7 +425,7 @@ static int s3c_cpufreq_resume(struct cpufreq_policy *policy)
 
 	ret = s3c_cpufreq_settarget(NULL, suspend_freq, &suspend_pll);
 	if (ret) {
-		printk(KERN_ERR "%s: failed to reset pll/freq\n", __func__);
+		pr_err("%s: failed to reset pll/freq\n", __func__);
 		return ret;
 	}
 
@@ -449,13 +450,12 @@ static struct cpufreq_driver s3c24xx_driver = {
 int s3c_cpufreq_register(struct s3c_cpufreq_info *info)
 {
 	if (!info || !info->name) {
-		printk(KERN_ERR "%s: failed to pass valid information\n",
-		       __func__);
+		pr_err("%s: failed to pass valid information\n", __func__);
 		return -EINVAL;
 	}
 
-	printk(KERN_INFO "S3C24XX CPU Frequency driver, %s cpu support\n",
-	       info->name);
+	pr_info("S3C24XX CPU Frequency driver, %s cpu support\n",
+		info->name);
 
 	/* check our driver info has valid data */
 
@@ -478,7 +478,7 @@ int __init s3c_cpufreq_setboard(struct s3c_cpufreq_board *board)
 	struct s3c_cpufreq_board *ours;
 
 	if (!board) {
-		printk(KERN_INFO "%s: no board data\n", __func__);
+		pr_info("%s: no board data\n", __func__);
 		return -EINVAL;
 	}
 
@@ -487,7 +487,7 @@ int __init s3c_cpufreq_setboard(struct s3c_cpufreq_board *board)
 
 	ours = kzalloc(sizeof(*ours), GFP_KERNEL);
 	if (ours == NULL) {
-		printk(KERN_ERR "%s: no memory\n", __func__);
+		pr_err("%s: no memory\n", __func__);
 		return -ENOMEM;
 	}
 
@@ -502,15 +502,15 @@ static int __init s3c_cpufreq_auto_io(void)
 	int ret;
 
 	if (!cpu_cur.info->get_iotiming) {
-		printk(KERN_ERR "%s: get_iotiming undefined\n", __func__);
+		pr_err("%s: get_iotiming undefined\n", __func__);
 		return -ENOENT;
 	}
 
-	printk(KERN_INFO "%s: working out IO settings\n", __func__);
+	pr_info("%s: working out IO settings\n", __func__);
 
 	ret = (cpu_cur.info->get_iotiming)(&cpu_cur, &s3c24xx_iotiming);
 	if (ret)
-		printk(KERN_ERR "%s: failed to get timings\n", __func__);
+		pr_err("%s: failed to get timings\n", __func__);
 
 	return ret;
 }
@@ -561,7 +561,7 @@ static void s3c_cpufreq_update_loctkime(void)
 	val = calc_locktime(rate, cpu_cur.info->locktime_u) << bits;
 	val |= calc_locktime(rate, cpu_cur.info->locktime_m);
 
-	printk(KERN_INFO "%s: new locktime is 0x%08x\n", __func__, val);
+	pr_info("%s: new locktime is 0x%08x\n", __func__, val);
 	__raw_writel(val, S3C2410_LOCKTIME);
 }
 
@@ -580,7 +580,7 @@ static int s3c_cpufreq_build_freq(void)
 
 	ftab = kzalloc(sizeof(*ftab) * size, GFP_KERNEL);
 	if (!ftab) {
-		printk(KERN_ERR "%s: no memory for tables\n", __func__);
+		pr_err("%s: no memory for tables\n", __func__);
 		return -ENOMEM;
 	}
 
@@ -608,15 +608,14 @@ static int __init s3c_cpufreq_initcall(void)
 		if (cpu_cur.board->auto_io) {
 			ret = s3c_cpufreq_auto_io();
 			if (ret) {
-				printk(KERN_ERR "%s: failed to get io timing\n",
+				pr_err("%s: failed to get io timing\n",
 				       __func__);
 				goto out;
 			}
 		}
 
 		if (cpu_cur.board->need_io && !cpu_cur.info->set_iotiming) {
-			printk(KERN_ERR "%s: no IO support registered\n",
-			       __func__);
+			pr_err("%s: no IO support registered\n", __func__);
 			ret = -EINVAL;
 			goto out;
 		}
@@ -666,9 +665,9 @@ int s3c_plltab_register(struct cpufreq_frequency_table *plls,
 		vals += plls_no;
 		vals->frequency = CPUFREQ_TABLE_END;
 
-		printk(KERN_INFO "cpufreq: %d PLL entries\n", plls_no);
+		pr_info("cpufreq: %d PLL entries\n", plls_no);
 	} else
-		printk(KERN_ERR "cpufreq: no memory for PLL tables\n");
+		pr_err("cpufreq: no memory for PLL tables\n");
 
 	return vals ? 0 : -ENOMEM;
 }
diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c
index a145b31..344e584 100644
--- a/drivers/cpufreq/s5pv210-cpufreq.c
+++ b/drivers/cpufreq/s5pv210-cpufreq.c
@@ -205,7 +205,7 @@ static void s5pv210_set_refresh(enum s5pv210_dmc_port ch, unsigned long freq)
 	} else if (ch == DMC1) {
 		reg = (dmc_base[1] + 0x30);
 	} else {
-		printk(KERN_ERR "Cannot find DMC port\n");
+		pr_err("Cannot find DMC port\n");
 		return;
 	}
 
@@ -534,7 +534,7 @@ static int s5pv210_cpu_init(struct cpufreq_policy *policy)
 	mem_type = check_mem_type(dmc_base[0]);
 
 	if ((mem_type != LPDDR) && (mem_type != LPDDR2)) {
-		printk(KERN_ERR "CPUFreq doesn't support this memory type\n");
+		pr_err("CPUFreq doesn't support this memory type\n");
 		ret = -EINVAL;
 		goto out_dmc1;
 	}
diff --git a/drivers/cpufreq/sc520_freq.c b/drivers/cpufreq/sc520_freq.c
index ac84e48..57bbddf 100644
--- a/drivers/cpufreq/sc520_freq.c
+++ b/drivers/cpufreq/sc520_freq.c
@@ -44,8 +44,8 @@ static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
 
 	switch (clockspeed_reg & 0x03) {
 	default:
-		printk(KERN_ERR PFX "error: cpuctl register has unexpected "
-				"value %02x\n", clockspeed_reg);
+		pr_err(PFX "error: cpuctl register has unexpected value %02x\n",
+		       clockspeed_reg);
 	case 0x01:
 		return 100000;
 	case 0x02:
@@ -112,7 +112,7 @@ static int __init sc520_freq_init(void)
 
 	cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1);
 	if (!cpuctl) {
-		printk(KERN_ERR "sc520_freq: error: failed to remap memory\n");
+		pr_err("sc520_freq: error: failed to remap memory\n");
 		return -ENOMEM;
 	}
 
diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c
index 7d4a315..47df2d6 100644
--- a/drivers/cpufreq/speedstep-centrino.c
+++ b/drivers/cpufreq/speedstep-centrino.c
@@ -386,8 +386,7 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
 		/* check to see if it stuck */
 		rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 		if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
-			printk(KERN_INFO PFX
-				"couldn't enable Enhanced SpeedStep\n");
+			pr_info(PFX "couldn't enable Enhanced SpeedStep\n");
 			return -ENODEV;
 		}
 	}
diff --git a/drivers/cpufreq/speedstep-ich.c b/drivers/cpufreq/speedstep-ich.c
index 37555c6..9d00c22 100644
--- a/drivers/cpufreq/speedstep-ich.c
+++ b/drivers/cpufreq/speedstep-ich.c
@@ -68,13 +68,13 @@ static int speedstep_find_register(void)
 	/* get PMBASE */
 	pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase);
 	if (!(pmbase & 0x01)) {
-		printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
+		pr_err("speedstep-ich: could not find speedstep register\n");
 		return -ENODEV;
 	}
 
 	pmbase &= 0xFFFFFFFE;
 	if (!pmbase) {
-		printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
+		pr_err("speedstep-ich: could not find speedstep register\n");
 		return -ENODEV;
 	}
 
@@ -136,7 +136,7 @@ static void speedstep_set_state(unsigned int state)
 		pr_debug("change to %u MHz succeeded\n",
 			speedstep_get_frequency(speedstep_processor) / 1000);
 	else
-		printk(KERN_ERR "cpufreq: change failed - I/O error\n");
+		pr_err("cpufreq: change failed - I/O error\n");
 
 	return;
 }
diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c
index 15d3214..32bdf1d 100644
--- a/drivers/cpufreq/speedstep-lib.c
+++ b/drivers/cpufreq/speedstep-lib.c
@@ -153,7 +153,7 @@ static unsigned int pentium_core_get_frequency(void)
 		fsb = 333333;
 		break;
 	default:
-		printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value");
+		pr_err("PCORE - MSR_FSB_FREQ undefined value\n");
 	}
 
 	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
@@ -453,11 +453,8 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 		 */
 		if (*transition_latency > 10000000 ||
 		    *transition_latency < 50000) {
-			printk(KERN_WARNING PFX "frequency transition "
-					"measured seems out of range (%u "
-					"nSec), falling back to a safe one of"
-					"%u nSec.\n",
-					*transition_latency, 500000);
+			pr_warn(PFX "frequency transition measured seems out of range (%u nSec), falling back to a safe one of %u nSec\n",
+				*transition_latency, 500000);
 			*transition_latency = 500000;
 		}
 	}
diff --git a/drivers/cpufreq/speedstep-smi.c b/drivers/cpufreq/speedstep-smi.c
index 819229e..af32a5f 100644
--- a/drivers/cpufreq/speedstep-smi.c
+++ b/drivers/cpufreq/speedstep-smi.c
@@ -204,9 +204,8 @@ static void speedstep_set_state(unsigned int state)
 			(speedstep_freqs[new_state].frequency / 1000),
 			retry, result);
 	else
-		printk(KERN_ERR "cpufreq: change to state %u "
-			"failed with new_state %u and result %u\n",
-			state, new_state, result);
+		pr_err("cpufreq: change to state %u failed with new_state %u and result %u\n",
+		       state, new_state, result);
 
 	return;
 }
-- 
cgit v0.10.2


From 1c5864e26c99cf32b51e878f3daf73a388d7561a Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 5 Apr 2016 13:28:25 -0700
Subject: cpufreq: Use consistent prefixing via pr_fmt

Use the more common kernel style adding a define for pr_fmt.

Miscellanea:

o Remove now unused PFX defines

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index ed9e93d..67a612e 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -25,6 +25,8 @@
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -50,8 +52,6 @@ MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
 MODULE_DESCRIPTION("ACPI Processor P-States Driver");
 MODULE_LICENSE("GPL");
 
-#define PFX "acpi-cpufreq: "
-
 enum {
 	UNDEFINED_CAPABLE = 0,
 	SYSTEM_INTEL_MSR_CAPABLE,
@@ -648,7 +648,7 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
 		if ((c->x86 == 15) &&
 		    (c->x86_model == 6) &&
 		    (c->x86_mask == 8)) {
-			pr_info("acpi-cpufreq: Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n");
+			pr_info("Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n");
 			return -ENODEV;
 		    }
 		}
@@ -724,7 +724,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		cpumask_copy(data->freqdomain_cpus,
 			     topology_sibling_cpumask(cpu));
 		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
-		pr_info_once(PFX "overriding BIOS provided _PSD data\n");
+		pr_info_once("overriding BIOS provided _PSD data\n");
 	}
 #endif
 
diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c
index 7da96d5..5503d49 100644
--- a/drivers/cpufreq/cpufreq-nforce2.c
+++ b/drivers/cpufreq/cpufreq-nforce2.c
@@ -7,6 +7,8 @@
  *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -56,8 +58,6 @@ MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
 MODULE_PARM_DESC(min_fsb,
 		"Minimum FSB to use, if not defined: current FSB - 50");
 
-#define PFX "cpufreq-nforce2: "
-
 /**
  * nforce2_calc_fsb - calculate FSB
  * @pll: PLL value
@@ -174,13 +174,13 @@ static int nforce2_set_fsb(unsigned int fsb)
 	int pll = 0;
 
 	if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) {
-		pr_err(PFX "FSB %d is out of range!\n", fsb);
+		pr_err("FSB %d is out of range!\n", fsb);
 		return -EINVAL;
 	}
 
 	tfsb = nforce2_fsb_read(0);
 	if (!tfsb) {
-		pr_err(PFX "Error while reading the FSB\n");
+		pr_err("Error while reading the FSB\n");
 		return -EINVAL;
 	}
 
@@ -276,7 +276,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
 	/* local_irq_save(flags); */
 
 	if (nforce2_set_fsb(target_fsb) < 0)
-		pr_err(PFX "Changing FSB to %d failed\n", target_fsb);
+		pr_err("Changing FSB to %d failed\n", target_fsb);
 	else
 		pr_debug("Changed FSB successfully to %d\n",
 			target_fsb);
@@ -324,7 +324,7 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy)
 	/* FIX: Get FID from CPU */
 	if (!fid) {
 		if (!cpu_khz) {
-			pr_warn(PFX "cpu_khz not set, can't calculate multiplier!\n");
+			pr_warn("cpu_khz not set, can't calculate multiplier!\n");
 			return -ENODEV;
 		}
 
@@ -339,7 +339,7 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy)
 		}
 	}
 
-	pr_info(PFX "FSB currently at %i MHz, FID %d.%d\n",
+	pr_info("FSB currently at %i MHz, FID %d.%d\n",
 		fsb, fid / 10, fid % 10);
 
 	/* Set maximum FSB to FSB at boot time */
@@ -399,9 +399,9 @@ static int nforce2_detect_chipset(void)
 	if (nforce2_dev == NULL)
 		return -ENODEV;
 
-	pr_info(PFX "Detected nForce2 chipset revision %X\n",
+	pr_info("Detected nForce2 chipset revision %X\n",
 		nforce2_dev->revision);
-	pr_info(PFX "FSB changing is maybe unstable and can lead to crashes and data loss\n");
+	pr_info("FSB changing is maybe unstable and can lead to crashes and data loss\n");
 
 	return 0;
 }
@@ -419,7 +419,7 @@ static int __init nforce2_init(void)
 
 	/* detect chipset */
 	if (nforce2_detect_chipset()) {
-		pr_info(PFX "No nForce2 chipset\n");
+		pr_info("No nForce2 chipset\n");
 		return -ENODEV;
 	}
 
diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c
index c420f0c..5ec87e3 100644
--- a/drivers/cpufreq/e_powersaver.c
+++ b/drivers/cpufreq/e_powersaver.c
@@ -6,6 +6,8 @@
  *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -141,9 +143,9 @@ static int eps_set_state(struct eps_cpu_data *centaur,
 	/* Print voltage and multiplier */
 	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
 	current_voltage = lo & 0xff;
-	pr_info("eps: Current voltage = %dmV\n", current_voltage * 16 + 700);
+	pr_info("Current voltage = %dmV\n", current_voltage * 16 + 700);
 	current_multiplier = (lo >> 8) & 0xff;
-	pr_info("eps: Current multiplier = %d\n", current_multiplier);
+	pr_info("Current multiplier = %d\n", current_multiplier);
 	}
 #endif
 	return 0;
@@ -164,7 +166,7 @@ static int eps_target(struct cpufreq_policy *policy, unsigned int index)
 	dest_state = centaur->freq_table[index].driver_data & 0xffff;
 	ret = eps_set_state(centaur, policy, dest_state);
 	if (ret)
-		pr_err("eps: Timeout!\n");
+		pr_err("Timeout!\n");
 	return ret;
 }
 
@@ -192,7 +194,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 		return -ENODEV;
 
 	/* Check brand */
-	pr_info("eps: Detected VIA ");
+	pr_info("Detected VIA ");
 
 	switch (c->x86_model) {
 	case 10:
@@ -233,7 +235,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 		/* Can be locked at 0 */
 		rdmsrl(MSR_IA32_MISC_ENABLE, val);
 		if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
-			pr_info("eps: Can't enable Enhanced PowerSaver\n");
+			pr_info("Can't enable Enhanced PowerSaver\n");
 			return -ENODEV;
 		}
 	}
@@ -241,19 +243,19 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 	/* Print voltage and multiplier */
 	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
 	current_voltage = lo & 0xff;
-	pr_info("eps: Current voltage = %dmV\n", current_voltage * 16 + 700);
+	pr_info("Current voltage = %dmV\n", current_voltage * 16 + 700);
 	current_multiplier = (lo >> 8) & 0xff;
-	pr_info("eps: Current multiplier = %d\n", current_multiplier);
+	pr_info("Current multiplier = %d\n", current_multiplier);
 
 	/* Print limits */
 	max_voltage = hi & 0xff;
-	pr_info("eps: Highest voltage = %dmV\n", max_voltage * 16 + 700);
+	pr_info("Highest voltage = %dmV\n", max_voltage * 16 + 700);
 	max_multiplier = (hi >> 8) & 0xff;
-	pr_info("eps: Highest multiplier = %d\n", max_multiplier);
+	pr_info("Highest multiplier = %d\n", max_multiplier);
 	min_voltage = (hi >> 16) & 0xff;
-	pr_info("eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700);
+	pr_info("Lowest voltage = %dmV\n", min_voltage * 16 + 700);
 	min_multiplier = (hi >> 24) & 0xff;
-	pr_info("eps: Lowest multiplier = %d\n", min_multiplier);
+	pr_info("Lowest multiplier = %d\n", min_multiplier);
 
 	/* Sanity checks */
 	if (current_multiplier == 0 || max_multiplier == 0
@@ -271,13 +273,13 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 
 	/* Check for systems using underclocked CPU */
 	if (!freq_failsafe_off && max_multiplier != current_multiplier) {
-		pr_info("eps: Your processor is running at different frequency then its maximum. Aborting.\n");
-		pr_info("eps: You can use freq_failsafe_off option to disable this check.\n");
+		pr_info("Your processor is running at different frequency then its maximum. Aborting.\n");
+		pr_info("You can use freq_failsafe_off option to disable this check.\n");
 		return -EINVAL;
 	}
 	if (!voltage_failsafe_off && max_voltage != current_voltage) {
-		pr_info("eps: Your processor is running at different voltage then its maximum. Aborting.\n");
-		pr_info("eps: You can use voltage_failsafe_off option to disable this check.\n");
+		pr_info("Your processor is running at different voltage then its maximum. Aborting.\n");
+		pr_info("You can use voltage_failsafe_off option to disable this check.\n");
 		return -EINVAL;
 	}
 
@@ -288,13 +290,13 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 	/* Check for ACPI processor speed limit */
 	if (!ignore_acpi_limit && !eps_acpi_init()) {
 		if (!acpi_processor_get_bios_limit(policy->cpu, &limit)) {
-			pr_info("eps: ACPI limit %u.%uGHz\n",
+			pr_info("ACPI limit %u.%uGHz\n",
 				limit/1000000,
 				(limit%1000000)/10000);
 			eps_acpi_exit(policy);
 			/* Check if max_multiplier is in BIOS limits */
 			if (limit && max_multiplier * fsb > limit) {
-				pr_info("eps: Aborting\n");
+				pr_info("Aborting\n");
 				return -EINVAL;
 			}
 		}
@@ -310,7 +312,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 		v = (set_max_voltage - 700) / 16;
 		/* Check if voltage is within limits */
 		if (v >= min_voltage && v <= max_voltage) {
-			pr_info("eps: Setting %dmV as maximum\n", v * 16 + 700);
+			pr_info("Setting %dmV as maximum\n", v * 16 + 700);
 			max_voltage = v;
 		}
 	}
diff --git a/drivers/cpufreq/elanfreq.c b/drivers/cpufreq/elanfreq.c
index 8f4dded..bfce11c 100644
--- a/drivers/cpufreq/elanfreq.c
+++ b/drivers/cpufreq/elanfreq.c
@@ -16,6 +16,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c
index fd36d6c..759612d 100644
--- a/drivers/cpufreq/ia64-acpi-cpufreq.c
+++ b/drivers/cpufreq/ia64-acpi-cpufreq.c
@@ -8,6 +8,8 @@
  *      Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/module.h>
@@ -290,8 +292,7 @@ acpi_cpufreq_cpu_init (
 	/* notify BIOS that we exist */
 	acpi_processor_notify_smm(THIS_MODULE);
 
-	pr_info("acpi-cpufreq: CPU%u - ACPI performance management activated\n",
-		cpu);
+	pr_info("CPU%u - ACPI performance management activated\n", cpu);
 
 	for (i = 0; i < data->acpi_data.state_count; i++)
 		pr_debug("     %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n",
diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c
index 2baeb8c..beae5cf 100644
--- a/drivers/cpufreq/longhaul.c
+++ b/drivers/cpufreq/longhaul.c
@@ -21,6 +21,8 @@
  *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -40,8 +42,6 @@
 
 #include "longhaul.h"
 
-#define PFX "longhaul: "
-
 #define TYPE_LONGHAUL_V1	1
 #define TYPE_LONGHAUL_V2	2
 #define TYPE_POWERSAVER		3
@@ -347,13 +347,13 @@ retry_loop:
 	freqs.new = calc_speed(longhaul_get_cpu_mult());
 	/* Check if requested frequency is set. */
 	if (unlikely(freqs.new != speed)) {
-		pr_info(PFX "Failed to set requested frequency!\n");
+		pr_info("Failed to set requested frequency!\n");
 		/* Revision ID = 1 but processor is expecting revision key
 		 * equal to 0. Jumpers at the bottom of processor will change
 		 * multiplier and FSB, but will not change bits in Longhaul
 		 * MSR nor enable voltage scaling. */
 		if (!revid_errata) {
-			pr_info(PFX "Enabling \"Ignore Revision ID\" option\n");
+			pr_info("Enabling \"Ignore Revision ID\" option\n");
 			revid_errata = 1;
 			msleep(200);
 			goto retry_loop;
@@ -363,10 +363,10 @@ retry_loop:
 		 * but it doesn't change frequency. I tried poking various
 		 * bits in northbridge registers, but without success. */
 		if (longhaul_flags & USE_ACPI_C3) {
-			pr_info(PFX "Disabling ACPI C3 support\n");
+			pr_info("Disabling ACPI C3 support\n");
 			longhaul_flags &= ~USE_ACPI_C3;
 			if (revid_errata) {
-				pr_info(PFX "Disabling \"Ignore Revision ID\" option\n");
+				pr_info("Disabling \"Ignore Revision ID\" option\n");
 				revid_errata = 0;
 			}
 			msleep(200);
@@ -377,7 +377,7 @@ retry_loop:
 		 * RevID = 1. RevID errata will make things right. Just
 		 * to be 100% sure. */
 		if (longhaul_version == TYPE_LONGHAUL_V2) {
-			pr_info(PFX "Switching to Longhaul ver. 1\n");
+			pr_info("Switching to Longhaul ver. 1\n");
 			longhaul_version = TYPE_LONGHAUL_V1;
 			msleep(200);
 			goto retry_loop;
@@ -385,7 +385,7 @@ retry_loop:
 	}
 
 	if (!bm_timeout) {
-		pr_info(PFX "Warning: Timeout while waiting for idle PCI bus\n");
+		pr_info("Warning: Timeout while waiting for idle PCI bus\n");
 		return -EBUSY;
 	}
 
@@ -430,12 +430,12 @@ static int longhaul_get_ranges(void)
 	/* Get current frequency */
 	mult = longhaul_get_cpu_mult();
 	if (mult == -1) {
-		pr_info(PFX "Invalid (reserved) multiplier!\n");
+		pr_info("Invalid (reserved) multiplier!\n");
 		return -EINVAL;
 	}
 	fsb = guess_fsb(mult);
 	if (fsb == 0) {
-		pr_info(PFX "Invalid (reserved) FSB!\n");
+		pr_info("Invalid (reserved) FSB!\n");
 		return -EINVAL;
 	}
 	/* Get max multiplier - as we always did.
@@ -465,11 +465,11 @@ static int longhaul_get_ranges(void)
 		 print_speed(highest_speed/1000));
 
 	if (lowest_speed == highest_speed) {
-		pr_info(PFX "highestspeed == lowest, aborting\n");
+		pr_info("highestspeed == lowest, aborting\n");
 		return -EINVAL;
 	}
 	if (lowest_speed > highest_speed) {
-		pr_info(PFX "nonsense! lowest (%d > %d) !\n",
+		pr_info("nonsense! lowest (%d > %d) !\n",
 			lowest_speed, highest_speed);
 		return -EINVAL;
 	}
@@ -535,16 +535,16 @@ static void longhaul_setup_voltagescaling(void)
 
 	rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
 	if (!(longhaul.bits.RevisionID & 1)) {
-		pr_info(PFX "Voltage scaling not supported by CPU\n");
+		pr_info("Voltage scaling not supported by CPU\n");
 		return;
 	}
 
 	if (!longhaul.bits.VRMRev) {
-		pr_info(PFX "VRM 8.5\n");
+		pr_info("VRM 8.5\n");
 		vrm_mV_table = &vrm85_mV[0];
 		mV_vrm_table = &mV_vrm85[0];
 	} else {
-		pr_info(PFX "Mobile VRM\n");
+		pr_info("Mobile VRM\n");
 		if (cpu_model < CPU_NEHEMIAH)
 			return;
 		vrm_mV_table = &mobilevrm_mV[0];
@@ -555,21 +555,21 @@ static void longhaul_setup_voltagescaling(void)
 	maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
 
 	if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
-		pr_info(PFX "Bogus values Min:%d.%03d Max:%d.%03d - Voltage scaling disabled\n",
+		pr_info("Bogus values Min:%d.%03d Max:%d.%03d - Voltage scaling disabled\n",
 			minvid.mV/1000, minvid.mV%1000,
 			maxvid.mV/1000, maxvid.mV%1000);
 		return;
 	}
 
 	if (minvid.mV == maxvid.mV) {
-		pr_info(PFX "Claims to support voltage scaling but min & max are both %d.%03d - Voltage scaling disabled\n",
+		pr_info("Claims to support voltage scaling but min & max are both %d.%03d - Voltage scaling disabled\n",
 			maxvid.mV/1000, maxvid.mV%1000);
 		return;
 	}
 
 	/* How many voltage steps*/
 	numvscales = maxvid.pos - minvid.pos + 1;
-	pr_info(PFX "Max VID=%d.%03d  Min VID=%d.%03d, %d possible voltage scales\n",
+	pr_info("Max VID=%d.%03d  Min VID=%d.%03d, %d possible voltage scales\n",
 		maxvid.mV/1000, maxvid.mV%1000,
 		minvid.mV/1000, minvid.mV%1000,
 		numvscales);
@@ -608,12 +608,12 @@ static void longhaul_setup_voltagescaling(void)
 			pos = minvid.pos;
 		freq_pos->driver_data |= mV_vrm_table[pos] << 8;
 		vid = vrm_mV_table[mV_vrm_table[pos]];
-		pr_info(PFX "f: %d kHz, index: %d, vid: %d mV\n",
+		pr_info("f: %d kHz, index: %d, vid: %d mV\n",
 			speed, (int)(freq_pos - longhaul_table), vid.mV);
 	}
 
 	can_scale_voltage = 1;
-	pr_info(PFX "Voltage scaling enabled\n");
+	pr_info("Voltage scaling enabled\n");
 }
 
 
@@ -711,7 +711,7 @@ static int enable_arbiter_disable(void)
 			pci_write_config_byte(dev, reg, pci_cmd);
 			pci_read_config_byte(dev, reg, &pci_cmd);
 			if (!(pci_cmd & 1<<7)) {
-				pr_err(PFX "Can't enable access to port 0x22\n");
+				pr_err("Can't enable access to port 0x22\n");
 				status = 0;
 			}
 		}
@@ -748,7 +748,7 @@ static int longhaul_setup_southbridge(void)
 		if (pci_cmd & 1 << 7) {
 			pci_read_config_dword(dev, 0x88, &acpi_regs_addr);
 			acpi_regs_addr &= 0xff00;
-			pr_info(PFX "ACPI I/O at 0x%x\n", acpi_regs_addr);
+			pr_info("ACPI I/O at 0x%x\n", acpi_regs_addr);
 		}
 
 		pci_dev_put(dev);
@@ -842,7 +842,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy)
 			longhaul_version = TYPE_LONGHAUL_V1;
 	}
 
-	pr_info(PFX "VIA %s CPU detected.  ", cpuname);
+	pr_info("VIA %s CPU detected.  ", cpuname);
 	switch (longhaul_version) {
 	case TYPE_LONGHAUL_V1:
 	case TYPE_LONGHAUL_V2:
@@ -878,14 +878,14 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy)
 	if (!(longhaul_flags & USE_ACPI_C3
 	     || longhaul_flags & USE_NORTHBRIDGE)
 	    && ((pr == NULL) || !(pr->flags.bm_control))) {
-		pr_err(PFX "No ACPI support: Unsupported northbridge\n");
+		pr_err("No ACPI support: Unsupported northbridge\n");
 		return -ENODEV;
 	}
 
 	if (longhaul_flags & USE_NORTHBRIDGE)
-		pr_info(PFX "Using northbridge support\n");
+		pr_info("Using northbridge support\n");
 	if (longhaul_flags & USE_ACPI_C3)
-		pr_info(PFX "Using ACPI support\n");
+		pr_info("Using ACPI support\n");
 
 	ret = longhaul_get_ranges();
 	if (ret != 0)
@@ -922,18 +922,18 @@ static int __init longhaul_init(void)
 		return -ENODEV;
 
 	if (!enable) {
-		pr_err(PFX "Option \"enable\" not set - Aborting\n");
+		pr_err("Option \"enable\" not set - Aborting\n");
 		return -ENODEV;
 	}
 #ifdef CONFIG_SMP
 	if (num_online_cpus() > 1) {
-		pr_err(PFX "More than 1 CPU detected, longhaul disabled\n");
+		pr_err("More than 1 CPU detected, longhaul disabled\n");
 		return -ENODEV;
 	}
 #endif
 #ifdef CONFIG_X86_IO_APIC
 	if (cpu_has_apic) {
-		pr_err(PFX "APIC detected. Longhaul is currently broken in this configuration.\n");
+		pr_err("APIC detected. Longhaul is currently broken in this configuration.\n");
 		return -ENODEV;
 	}
 #endif
@@ -941,7 +941,7 @@ static int __init longhaul_init(void)
 	case 6 ... 9:
 		return cpufreq_register_driver(&longhaul_driver);
 	case 10:
-		pr_err(PFX "Use acpi-cpufreq driver for VIA C7\n");
+		pr_err("Use acpi-cpufreq driver for VIA C7\n");
 	default:
 		;
 	}
diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c
index 93c7928..6bbdac1 100644
--- a/drivers/cpufreq/loongson2_cpufreq.c
+++ b/drivers/cpufreq/loongson2_cpufreq.c
@@ -10,6 +10,9 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/cpufreq.h>
 #include <linux/module.h>
 #include <linux/err.h>
@@ -76,7 +79,7 @@ static int loongson2_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 	cpuclk = clk_get(NULL, "cpu_clk");
 	if (IS_ERR(cpuclk)) {
-		pr_err("cpufreq: couldn't get CPU clk\n");
+		pr_err("couldn't get CPU clk\n");
 		return PTR_ERR(cpuclk);
 	}
 
@@ -163,7 +166,7 @@ static int __init cpufreq_init(void)
 	if (ret)
 		return ret;
 
-	pr_info("cpufreq: Loongson-2F CPU frequency driver.\n");
+	pr_info("Loongson-2F CPU frequency driver\n");
 
 	cpufreq_register_notifier(&loongson2_cpufreq_notifier_block,
 				  CPUFREQ_TRANSITION_NOTIFIER);
diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c
index 7e55632..d9df893 100644
--- a/drivers/cpufreq/maple-cpufreq.c
+++ b/drivers/cpufreq/maple-cpufreq.c
@@ -13,6 +13,8 @@
 
 #undef DEBUG
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/errno.h>
@@ -174,7 +176,7 @@ static int __init maple_cpufreq_init(void)
 	/* Get first CPU node */
 	cpunode = of_cpu_device_node_get(0);
 	if (cpunode == NULL) {
-		pr_err("cpufreq: Can't find any CPU 0 node\n");
+		pr_err("Can't find any CPU 0 node\n");
 		goto bail_noprops;
 	}
 
@@ -182,7 +184,7 @@ static int __init maple_cpufreq_init(void)
 	/* we actually don't care on which CPU to access PVR */
 	pvr_hi = PVR_VER(mfspr(SPRN_PVR));
 	if (pvr_hi != 0x3c && pvr_hi != 0x44) {
-		pr_err("cpufreq: Unsupported CPU version (%x)\n", pvr_hi);
+		pr_err("Unsupported CPU version (%x)\n", pvr_hi);
 		goto bail_noprops;
 	}
 
diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index 655fc94..cead9be 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -13,6 +13,9 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -163,7 +166,7 @@ static int omap_cpufreq_probe(struct platform_device *pdev)
 {
 	mpu_dev = get_cpu_device(0);
 	if (!mpu_dev) {
-		pr_warn("%s: unable to get the mpu device\n", __func__);
+		pr_warn("%s: unable to get the MPU device\n", __func__);
 		return -EINVAL;
 	}
 
diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c
index 4d1a443..fd77812 100644
--- a/drivers/cpufreq/p4-clockmod.c
+++ b/drivers/cpufreq/p4-clockmod.c
@@ -20,6 +20,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -35,8 +37,6 @@
 
 #include "speedstep-lib.h"
 
-#define PFX	"p4-clockmod: "
-
 /*
  * Duty Cycle (3bits), note DC_DISABLE is not specified in
  * intel docs i just use it to mean disable
@@ -124,7 +124,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
 {
 	if (c->x86 == 0x06) {
 		if (cpu_has(c, X86_FEATURE_EST))
-			pr_warn_once(PFX "Warning: EST-capable CPU detected. The acpi-cpufreq module offers voltage scaling in addition to frequency scaling. You should use that instead of p4-clockmod, if possible.\n");
+			pr_warn_once("Warning: EST-capable CPU detected. The acpi-cpufreq module offers voltage scaling in addition to frequency scaling. You should use that instead of p4-clockmod, if possible.\n");
 		switch (c->x86_model) {
 		case 0x0E: /* Core */
 		case 0x0F: /* Core Duo */
@@ -148,7 +148,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
 	p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
 
 	if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) {
-		pr_warn(PFX "Warning: Pentium 4-M detected. The speedstep-ich or acpi cpufreq modules offer voltage scaling in addition of frequency scaling. You should use either one instead of p4-clockmod, if possible.\n");
+		pr_warn("Warning: Pentium 4-M detected. The speedstep-ich or acpi cpufreq modules offer voltage scaling in addition of frequency scaling. You should use either one instead of p4-clockmod, if possible.\n");
 		return speedstep_get_frequency(SPEEDSTEP_CPU_P4M);
 	}
 
@@ -257,7 +257,7 @@ static int __init cpufreq_p4_init(void)
 
 	ret = cpufreq_register_driver(&p4clockmod_driver);
 	if (!ret)
-		pr_info(PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n");
+		pr_info("P4/Xeon(TM) CPU On-Demand Clock Modulation available\n");
 
 	return ret;
 }
diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c
index 072d7b3..b7b576e 100644
--- a/drivers/cpufreq/pmac32-cpufreq.c
+++ b/drivers/cpufreq/pmac32-cpufreq.c
@@ -13,6 +13,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/errno.h>
@@ -481,13 +483,13 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode)
 		freqs = of_get_property(cpunode, "bus-frequencies", &lenp);
 		lenp /= sizeof(u32);
 		if (freqs == NULL || lenp != 2) {
-			pr_err("cpufreq: bus-frequencies incorrect or missing\n");
+			pr_err("bus-frequencies incorrect or missing\n");
 			return 1;
 		}
 		ratio = of_get_property(cpunode, "processor-to-bus-ratio*2",
 						NULL);
 		if (ratio == NULL) {
-			pr_err("cpufreq: processor-to-bus-ratio*2 missing\n");
+			pr_err("processor-to-bus-ratio*2 missing\n");
 			return 1;
 		}
 
@@ -550,7 +552,7 @@ static int pmac_cpufreq_init_7447A(struct device_node *cpunode)
 	if (volt_gpio_np)
 		voltage_gpio = read_gpio(volt_gpio_np);
 	if (!voltage_gpio){
-		pr_err("cpufreq: missing cpu-vcore-select gpio\n");
+		pr_err("missing cpu-vcore-select gpio\n");
 		return 1;
 	}
 
diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c
index 5ffe085..267e089 100644
--- a/drivers/cpufreq/pmac64-cpufreq.c
+++ b/drivers/cpufreq/pmac64-cpufreq.c
@@ -12,6 +12,8 @@
 
 #undef DEBUG
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/errno.h>
@@ -138,7 +140,7 @@ static void g5_vdnap_switch_volt(int speed_mode)
 		usleep_range(1000, 1000);
 	}
 	if (done == 0)
-		pr_warn("cpufreq: Timeout in clock slewing !\n");
+		pr_warn("Timeout in clock slewing !\n");
 }
 
 
@@ -266,7 +268,7 @@ static int g5_pfunc_switch_freq(int speed_mode)
 		rc = pmf_call_one(pfunc_cpu_setfreq_low, NULL);
 
 	if (rc)
-		pr_warn("cpufreq: pfunc switch error %d\n", rc);
+		pr_warn("pfunc switch error %d\n", rc);
 
 	/* It's an irq GPIO so we should be able to just block here,
 	 * I'll do that later after I've properly tested the IRQ code for
@@ -282,7 +284,7 @@ static int g5_pfunc_switch_freq(int speed_mode)
 		usleep_range(500, 500);
 	}
 	if (done == 0)
-		pr_warn("cpufreq: Timeout in clock slewing !\n");
+		pr_warn("Timeout in clock slewing !\n");
 
 	/* If frequency is going down, last ramp the voltage */
 	if (speed_mode > g5_pmode_cur)
@@ -368,7 +370,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpunode)
 	}
 	pvr_hi = (*valp) >> 16;
 	if (pvr_hi != 0x3c && pvr_hi != 0x44) {
-		pr_err("cpufreq: Unsupported CPU version\n");
+		pr_err("Unsupported CPU version\n");
 		goto bail_noprops;
 	}
 
@@ -403,7 +405,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpunode)
 
 		root = of_find_node_by_path("/");
 		if (root == NULL) {
-			pr_err("cpufreq: Can't find root of device tree\n");
+			pr_err("Can't find root of device tree\n");
 			goto bail_noprops;
 		}
 		pfunc_set_vdnap0 = pmf_find_function(root, "set-vdnap0");
@@ -411,7 +413,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpunode)
 			pmf_find_function(root, "slewing-done");
 		if (pfunc_set_vdnap0 == NULL ||
 		    pfunc_vdnap0_complete == NULL) {
-			pr_err("cpufreq: Can't find required platform function\n");
+			pr_err("Can't find required platform function\n");
 			goto bail_noprops;
 		}
 
@@ -491,7 +493,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode)
 	if (cpuid != NULL)
 		eeprom = of_get_property(cpuid, "cpuid", NULL);
 	if (eeprom == NULL) {
-		pr_err("cpufreq: Can't find cpuid EEPROM !\n");
+		pr_err("Can't find cpuid EEPROM !\n");
 		rc = -ENODEV;
 		goto bail;
 	}
@@ -509,7 +511,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode)
 		break;
 	}
 	if (hwclock == NULL) {
-		pr_err("cpufreq: Can't find i2c clock chip !\n");
+		pr_err("Can't find i2c clock chip !\n");
 		rc = -ENODEV;
 		goto bail;
 	}
@@ -537,7 +539,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode)
 	/* Check we have minimum requirements */
 	if (pfunc_cpu_getfreq == NULL || pfunc_cpu_setfreq_high == NULL ||
 	    pfunc_cpu_setfreq_low == NULL || pfunc_slewing_done == NULL) {
-		pr_err("cpufreq: Can't find platform functions !\n");
+		pr_err("Can't find platform functions !\n");
 		rc = -ENODEV;
 		goto bail;
 	}
@@ -565,7 +567,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode)
 	/* Get max frequency from device-tree */
 	valp = of_get_property(cpunode, "clock-frequency", NULL);
 	if (!valp) {
-		pr_err("cpufreq: Can't find CPU frequency !\n");
+		pr_err("Can't find CPU frequency !\n");
 		rc = -ENODEV;
 		goto bail;
 	}
@@ -581,7 +583,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode)
 
 	/* Check for machines with no useful settings */
 	if (il == ih) {
-		pr_warn("cpufreq: No low frequency mode available on this model !\n");
+		pr_warn("No low frequency mode available on this model !\n");
 		rc = -ENODEV;
 		goto bail;
 	}
@@ -592,7 +594,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode)
 
 	/* Sanity check */
 	if (min_freq >= max_freq || min_freq < 1000) {
-		pr_err("cpufreq: Can't calculate low frequency !\n");
+		pr_err("Can't calculate low frequency !\n");
 		rc = -ENXIO;
 		goto bail;
 	}
@@ -651,7 +653,7 @@ static int __init g5_cpufreq_init(void)
 	/* Get first CPU node */
 	cpunode = of_cpu_device_node_get(0);
 	if (cpunode == NULL) {
-		pr_err("cpufreq: Can't find any CPU node\n");
+		pr_err("Can't find any CPU node\n");
 		return -ENODEV;
 	}
 
diff --git a/drivers/cpufreq/powernow-k6.c b/drivers/cpufreq/powernow-k6.c
index 981f40b..dedd256 100644
--- a/drivers/cpufreq/powernow-k6.c
+++ b/drivers/cpufreq/powernow-k6.c
@@ -8,6 +8,8 @@
  *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -22,7 +24,6 @@
 #define POWERNOW_IOPORT 0xfff0          /* it doesn't matter where, as long
 					   as it is unused */
 
-#define PFX "powernow-k6: "
 static unsigned int                     busfreq;   /* FSB, in 10 kHz */
 static unsigned int                     max_multiplier;
 
@@ -141,7 +142,7 @@ static int powernow_k6_target(struct cpufreq_policy *policy,
 {
 
 	if (clock_ratio[best_i].driver_data > max_multiplier) {
-		pr_err(PFX "invalid target frequency\n");
+		pr_err("invalid target frequency\n");
 		return -EINVAL;
 	}
 
@@ -175,14 +176,14 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
 				max_multiplier = param_max_multiplier;
 				goto have_max_multiplier;
 			}
-		pr_err("powernow-k6: invalid max_multiplier parameter, valid parameters 20, 30, 35, 40, 45, 50, 55, 60\n");
+		pr_err("invalid max_multiplier parameter, valid parameters 20, 30, 35, 40, 45, 50, 55, 60\n");
 		return -EINVAL;
 	}
 
 	if (!max_multiplier) {
-		pr_warn("powernow-k6: unknown frequency %u, cannot determine current multiplier\n",
+		pr_warn("unknown frequency %u, cannot determine current multiplier\n",
 			khz);
-		pr_warn("powernow-k6: use module parameters max_multiplier and bus_frequency\n");
+		pr_warn("use module parameters max_multiplier and bus_frequency\n");
 		return -EOPNOTSUPP;
 	}
 
@@ -194,7 +195,7 @@ have_max_multiplier:
 			busfreq = param_busfreq / 10;
 			goto have_busfreq;
 		}
-		pr_err("powernow-k6: invalid bus_frequency parameter, allowed range 50000 - 150000 kHz\n");
+		pr_err("invalid bus_frequency parameter, allowed range 50000 - 150000 kHz\n");
 		return -EINVAL;
 	}
 
@@ -276,7 +277,7 @@ static int __init powernow_k6_init(void)
 		return -ENODEV;
 
 	if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) {
-		pr_info(PFX "PowerNow IOPORT region already used\n");
+		pr_info("PowerNow IOPORT region already used\n");
 		return -EIO;
 	}
 
diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c
index 4a1b420..9f013ed 100644
--- a/drivers/cpufreq/powernow-k7.c
+++ b/drivers/cpufreq/powernow-k7.c
@@ -13,6 +13,8 @@
  *  - We disable half multipliers if ACPI is used on A0 stepping CPUs.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -35,9 +37,6 @@
 
 #include "powernow-k7.h"
 
-#define PFX "powernow: "
-
-
 struct psb_s {
 	u8 signature[10];
 	u8 tableversion;
@@ -127,13 +126,13 @@ static int check_powernow(void)
 	maxei = cpuid_eax(0x80000000);
 	if (maxei < 0x80000007) {	/* Any powernow info ? */
 #ifdef MODULE
-		pr_info(PFX "No powernow capabilities detected\n");
+		pr_info("No powernow capabilities detected\n");
 #endif
 		return 0;
 	}
 
 	if ((c->x86_model == 6) && (c->x86_mask == 0)) {
-		pr_info(PFX "K7 660[A0] core detected, enabling errata workarounds\n");
+		pr_info("K7 660[A0] core detected, enabling errata workarounds\n");
 		have_a0 = 1;
 	}
 
@@ -143,7 +142,7 @@ static int check_powernow(void)
 	if (!(edx & (1 << 1 | 1 << 2)))
 		return 0;
 
-	pr_info(PFX "PowerNOW! Technology present. Can scale: ");
+	pr_info("PowerNOW! Technology present. Can scale: ");
 
 	if (edx & 1 << 1) {
 		pr_cont("frequency");
@@ -426,14 +425,14 @@ err1:
 err05:
 	kfree(acpi_processor_perf);
 err0:
-	pr_warn(PFX "ACPI perflib can not be used on this platform\n");
+	pr_warn("ACPI perflib can not be used on this platform\n");
 	acpi_processor_perf = NULL;
 	return retval;
 }
 #else
 static int powernow_acpi_init(void)
 {
-	pr_info(PFX "no support for ACPI processor found - please recompile your kernel with ACPI processor\n");
+	pr_info("no support for ACPI processor found - please recompile your kernel with ACPI processor\n");
 	return -EINVAL;
 }
 #endif
@@ -465,7 +464,7 @@ static int powernow_decode_bios(int maxfid, int startvid)
 			psb = (struct psb_s *) p;
 			pr_debug("Table version: 0x%x\n", psb->tableversion);
 			if (psb->tableversion != 0x12) {
-				pr_info(PFX "Sorry, only v1.2 tables supported right now\n");
+				pr_info("Sorry, only v1.2 tables supported right now\n");
 				return -ENODEV;
 			}
 
@@ -477,7 +476,7 @@ static int powernow_decode_bios(int maxfid, int startvid)
 
 			latency = psb->settlingtime;
 			if (latency < 100) {
-				pr_info(PFX "BIOS set settling time to %d microseconds. Should be at least 100. Correcting.\n",
+				pr_info("BIOS set settling time to %d microseconds. Should be at least 100. Correcting.\n",
 					latency);
 				latency = 100;
 			}
@@ -510,9 +509,9 @@ static int powernow_decode_bios(int maxfid, int startvid)
 						p += 2;
 				}
 			}
-			pr_info(PFX "No PST tables match this cpuid (0x%x)\n",
+			pr_info("No PST tables match this cpuid (0x%x)\n",
 				etuple);
-			pr_info(PFX "This is indicative of a broken BIOS\n");
+			pr_info("This is indicative of a broken BIOS\n");
 
 			return -EINVAL;
 		}
@@ -545,7 +544,7 @@ static int fixup_sgtc(void)
 	sgtc = 100 * m * latency;
 	sgtc = sgtc / 3;
 	if (sgtc > 0xfffff) {
-		pr_warn(PFX "SGTC too large %d\n", sgtc);
+		pr_warn("SGTC too large %d\n", sgtc);
 		sgtc = 0xfffff;
 	}
 	return sgtc;
@@ -567,10 +566,10 @@ static unsigned int powernow_get(unsigned int cpu)
 
 static int acer_cpufreq_pst(const struct dmi_system_id *d)
 {
-	pr_warn(PFX "%s laptop with broken PST tables in BIOS detected\n",
+	pr_warn("%s laptop with broken PST tables in BIOS detected\n",
 		d->ident);
-	pr_warn(PFX "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n");
-	pr_warn(PFX "cpufreq scaling has been disabled as a result of this\n");
+	pr_warn("You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n");
+	pr_warn("cpufreq scaling has been disabled as a result of this\n");
 	return 0;
 }
 
@@ -605,37 +604,37 @@ static int powernow_cpu_init(struct cpufreq_policy *policy)
 
 	fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID];
 	if (!fsb) {
-		pr_warn(PFX "can not determine bus frequency\n");
+		pr_warn("can not determine bus frequency\n");
 		return -EINVAL;
 	}
 	pr_debug("FSB: %3dMHz\n", fsb/1000);
 
 	if (dmi_check_system(powernow_dmi_table) || acpi_force) {
-		pr_info(PFX "PSB/PST known to be broken - trying ACPI instead\n");
+		pr_info("PSB/PST known to be broken - trying ACPI instead\n");
 		result = powernow_acpi_init();
 	} else {
 		result = powernow_decode_bios(fidvidstatus.bits.MFID,
 				fidvidstatus.bits.SVID);
 		if (result) {
-			pr_info(PFX "Trying ACPI perflib\n");
+			pr_info("Trying ACPI perflib\n");
 			maximum_speed = 0;
 			minimum_speed = -1;
 			latency = 0;
 			result = powernow_acpi_init();
 			if (result) {
-				pr_info(PFX "ACPI and legacy methods failed\n");
+				pr_info("ACPI and legacy methods failed\n");
 			}
 		} else {
 			/* SGTC use the bus clock as timer */
 			latency = fixup_sgtc();
-			pr_info(PFX "SGTC: %d\n", latency);
+			pr_info("SGTC: %d\n", latency);
 		}
 	}
 
 	if (result)
 		return result;
 
-	pr_info(PFX "Minimum speed %d MHz - Maximum speed %d MHz\n",
+	pr_info("Minimum speed %d MHz - Maximum speed %d MHz\n",
 		minimum_speed/1000, maximum_speed/1000);
 
 	policy->cpuinfo.transition_latency =
diff --git a/drivers/cpufreq/pxa2xx-cpufreq.c b/drivers/cpufreq/pxa2xx-cpufreq.c
index 8a27667..ce345bf 100644
--- a/drivers/cpufreq/pxa2xx-cpufreq.c
+++ b/drivers/cpufreq/pxa2xx-cpufreq.c
@@ -29,6 +29,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
@@ -186,8 +188,7 @@ static int pxa_cpufreq_change_voltage(const struct pxa_freqs *pxa_freq)
 
 	ret = regulator_set_voltage(vcc_core, vmin, vmax);
 	if (ret)
-		pr_err("cpufreq: Failed to set vcc_core in [%dmV..%dmV]\n",
-		       vmin, vmax);
+		pr_err("Failed to set vcc_core in [%dmV..%dmV]\n", vmin, vmax);
 	return ret;
 }
 
@@ -195,10 +196,10 @@ static void __init pxa_cpufreq_init_voltages(void)
 {
 	vcc_core = regulator_get(NULL, "vcc_core");
 	if (IS_ERR(vcc_core)) {
-		pr_info("cpufreq: Didn't find vcc_core regulator\n");
+		pr_info("Didn't find vcc_core regulator\n");
 		vcc_core = NULL;
 	} else {
-		pr_info("cpufreq: Found vcc_core regulator\n");
+		pr_info("Found vcc_core regulator\n");
 	}
 }
 #else
@@ -407,7 +408,7 @@ static int pxa_cpufreq_init(struct cpufreq_policy *policy)
 	 */
 	if (cpu_is_pxa25x()) {
 		find_freq_tables(&pxa255_freq_table, &pxa255_freqs);
-		pr_info("PXA255 cpufreq using %s frequency table\n",
+		pr_info("using %s frequency table\n",
 			pxa255_turbo_table ? "turbo" : "run");
 
 		cpufreq_table_validate_and_show(policy, pxa255_freq_table);
@@ -416,7 +417,7 @@ static int pxa_cpufreq_init(struct cpufreq_policy *policy)
 		cpufreq_table_validate_and_show(policy, pxa27x_freq_table);
 	}
 
-	pr_info("PXA CPU frequency change support initialized\n");
+	pr_info("frequency change support initialized\n");
 
 	return 0;
 }
diff --git a/drivers/cpufreq/s3c2412-cpufreq.c b/drivers/cpufreq/s3c2412-cpufreq.c
index f9447ba..b04b6f0 100644
--- a/drivers/cpufreq/s3c2412-cpufreq.c
+++ b/drivers/cpufreq/s3c2412-cpufreq.c
@@ -10,6 +10,8 @@
  * published by the Free Software Foundation.
 */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
@@ -197,20 +199,20 @@ static int s3c2412_cpufreq_add(struct device *dev,
 
 	hclk = clk_get(NULL, "hclk");
 	if (IS_ERR(hclk)) {
-		pr_err("%s: cannot find hclk clock\n", __func__);
+		pr_err("cannot find hclk clock\n");
 		return -ENOENT;
 	}
 
 	fclk = clk_get(NULL, "fclk");
 	if (IS_ERR(fclk)) {
-		pr_err("%s: cannot find fclk clock\n", __func__);
+		pr_err("cannot find fclk clock\n");
 		goto err_fclk;
 	}
 
 	fclk_rate = clk_get_rate(fclk);
 	if (fclk_rate > 200000000) {
-		pr_info("%s: fclk %ld MHz, assuming 266MHz capable part\n",
-			__func__, fclk_rate / 1000000);
+		pr_info("fclk %ld MHz, assuming 266MHz capable part\n",
+			fclk_rate / 1000000);
 		s3c2412_cpufreq_info.max.fclk = 266000000;
 		s3c2412_cpufreq_info.max.hclk = 133000000;
 		s3c2412_cpufreq_info.max.pclk =  66000000;
@@ -218,13 +220,13 @@ static int s3c2412_cpufreq_add(struct device *dev,
 
 	armclk = clk_get(NULL, "armclk");
 	if (IS_ERR(armclk)) {
-		pr_err("%s: cannot find arm clock\n", __func__);
+		pr_err("cannot find arm clock\n");
 		goto err_armclk;
 	}
 
 	xtal = clk_get(NULL, "xtal");
 	if (IS_ERR(xtal)) {
-		pr_err("%s: cannot find xtal clock\n", __func__);
+		pr_err("cannot find xtal clock\n");
 		goto err_xtal;
 	}
 
diff --git a/drivers/cpufreq/s3c2440-cpufreq.c b/drivers/cpufreq/s3c2440-cpufreq.c
index f6cefe3..d0d75b6 100644
--- a/drivers/cpufreq/s3c2440-cpufreq.c
+++ b/drivers/cpufreq/s3c2440-cpufreq.c
@@ -11,6 +11,8 @@
  * published by the Free Software Foundation.
 */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
diff --git a/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c b/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c
index 8182608..4d976e8 100644
--- a/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c
+++ b/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c
@@ -10,6 +10,8 @@
  * published by the Free Software Foundation.
 */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/interrupt.h>
diff --git a/drivers/cpufreq/s3c24xx-cpufreq.c b/drivers/cpufreq/s3c24xx-cpufreq.c
index 68f8837..ae8eaed 100644
--- a/drivers/cpufreq/s3c24xx-cpufreq.c
+++ b/drivers/cpufreq/s3c24xx-cpufreq.c
@@ -10,6 +10,8 @@
  * published by the Free Software Foundation.
 */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
@@ -356,7 +358,7 @@ struct clk *s3c_cpufreq_clk_get(struct device *dev, const char *name)
 
 	clk = clk_get(dev, name);
 	if (IS_ERR(clk))
-		pr_err("cpufreq: failed to get clock '%s'\n", name);
+		pr_err("failed to get clock '%s'\n", name);
 
 	return clk;
 }
@@ -665,9 +667,9 @@ int s3c_plltab_register(struct cpufreq_frequency_table *plls,
 		vals += plls_no;
 		vals->frequency = CPUFREQ_TABLE_END;
 
-		pr_info("cpufreq: %d PLL entries\n", plls_no);
+		pr_info("%d PLL entries\n", plls_no);
 	} else
-		pr_err("cpufreq: no memory for PLL tables\n");
+		pr_err("no memory for PLL tables\n");
 
 	return vals ? 0 : -ENOMEM;
 }
diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c
index 344e584..06d8591 100644
--- a/drivers/cpufreq/s5pv210-cpufreq.c
+++ b/drivers/cpufreq/s5pv210-cpufreq.c
@@ -9,6 +9,8 @@
  * published by the Free Software Foundation.
 */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -635,13 +637,13 @@ static int s5pv210_cpufreq_probe(struct platform_device *pdev)
 
 	arm_regulator = regulator_get(NULL, "vddarm");
 	if (IS_ERR(arm_regulator)) {
-		pr_err("failed to get regulator vddarm");
+		pr_err("failed to get regulator vddarm\n");
 		return PTR_ERR(arm_regulator);
 	}
 
 	int_regulator = regulator_get(NULL, "vddint");
 	if (IS_ERR(int_regulator)) {
-		pr_err("failed to get regulator vddint");
+		pr_err("failed to get regulator vddint\n");
 		regulator_put(arm_regulator);
 		return PTR_ERR(int_regulator);
 	}
diff --git a/drivers/cpufreq/sc520_freq.c b/drivers/cpufreq/sc520_freq.c
index 57bbddf..4225501 100644
--- a/drivers/cpufreq/sc520_freq.c
+++ b/drivers/cpufreq/sc520_freq.c
@@ -13,6 +13,8 @@
  *	2005-03-30: - initial revision
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -30,8 +32,6 @@
 
 static __u8 __iomem *cpuctl;
 
-#define PFX "sc520_freq: "
-
 static struct cpufreq_frequency_table sc520_freq_table[] = {
 	{0, 0x01,	100000},
 	{0, 0x02,	133000},
@@ -44,7 +44,7 @@ static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
 
 	switch (clockspeed_reg & 0x03) {
 	default:
-		pr_err(PFX "error: cpuctl register has unexpected value %02x\n",
+		pr_err("error: cpuctl register has unexpected value %02x\n",
 		       clockspeed_reg);
 	case 0x01:
 		return 100000;
diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c
index 47df2d6..41bc539 100644
--- a/drivers/cpufreq/speedstep-centrino.c
+++ b/drivers/cpufreq/speedstep-centrino.c
@@ -13,6 +13,8 @@
  * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org>
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -27,7 +29,6 @@
 #include <asm/cpufeature.h>
 #include <asm/cpu_device_id.h>
 
-#define PFX		"speedstep-centrino: "
 #define MAINTAINER	"linux-pm@vger.kernel.org"
 
 #define INTEL_MSR_RANGE	(0xffff)
@@ -386,7 +387,7 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
 		/* check to see if it stuck */
 		rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 		if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
-			pr_info(PFX "couldn't enable Enhanced SpeedStep\n");
+			pr_info("couldn't enable Enhanced SpeedStep\n");
 			return -ENODEV;
 		}
 	}
diff --git a/drivers/cpufreq/speedstep-ich.c b/drivers/cpufreq/speedstep-ich.c
index 9d00c22..b86953a 100644
--- a/drivers/cpufreq/speedstep-ich.c
+++ b/drivers/cpufreq/speedstep-ich.c
@@ -18,6 +18,8 @@
  *                        SPEEDSTEP - DEFINITIONS                    *
  *********************************************************************/
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -68,13 +70,13 @@ static int speedstep_find_register(void)
 	/* get PMBASE */
 	pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase);
 	if (!(pmbase & 0x01)) {
-		pr_err("speedstep-ich: could not find speedstep register\n");
+		pr_err("could not find speedstep register\n");
 		return -ENODEV;
 	}
 
 	pmbase &= 0xFFFFFFFE;
 	if (!pmbase) {
-		pr_err("speedstep-ich: could not find speedstep register\n");
+		pr_err("could not find speedstep register\n");
 		return -ENODEV;
 	}
 
@@ -136,7 +138,7 @@ static void speedstep_set_state(unsigned int state)
 		pr_debug("change to %u MHz succeeded\n",
 			speedstep_get_frequency(speedstep_processor) / 1000);
 	else
-		pr_err("cpufreq: change failed - I/O error\n");
+		pr_err("change failed - I/O error\n");
 
 	return;
 }
diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c
index 32bdf1d..1b80621 100644
--- a/drivers/cpufreq/speedstep-lib.c
+++ b/drivers/cpufreq/speedstep-lib.c
@@ -8,6 +8,8 @@
  *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -453,7 +455,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 		 */
 		if (*transition_latency > 10000000 ||
 		    *transition_latency < 50000) {
-			pr_warn(PFX "frequency transition measured seems out of range (%u nSec), falling back to a safe one of %u nSec\n",
+			pr_warn("frequency transition measured seems out of range (%u nSec), falling back to a safe one of %u nSec\n",
 				*transition_latency, 500000);
 			*transition_latency = 500000;
 		}
diff --git a/drivers/cpufreq/speedstep-smi.c b/drivers/cpufreq/speedstep-smi.c
index af32a5f..770a9ae 100644
--- a/drivers/cpufreq/speedstep-smi.c
+++ b/drivers/cpufreq/speedstep-smi.c
@@ -12,6 +12,8 @@
  *                        SPEEDSTEP - DEFINITIONS                    *
  *********************************************************************/
 
+#define pr_fmt(fmt) "cpufreq: " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -204,7 +206,7 @@ static void speedstep_set_state(unsigned int state)
 			(speedstep_freqs[new_state].frequency / 1000),
 			retry, result);
 	else
-		pr_err("cpufreq: change to state %u failed with new_state %u and result %u\n",
+		pr_err("change to state %u failed with new_state %u and result %u\n",
 		       state, new_state, result);
 
 	return;
-- 
cgit v0.10.2


From cd73e9b01f635d25dbd17ed090f9351becf00280 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 7 Apr 2016 03:30:40 +0200
Subject: cpufreq: Simplify switch () in cpufreq_cpu_callback()

Merge two switch entries that do the same thing in
cpufreq_cpu_callback().

No functional changes.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 77d77a4..8592815 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2309,16 +2309,13 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
 
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_ONLINE:
+	case CPU_DOWN_FAILED:
 		cpufreq_online(cpu);
 		break;
 
 	case CPU_DOWN_PREPARE:
 		cpufreq_offline(cpu);
 		break;
-
-	case CPU_DOWN_FAILED:
-		cpufreq_online(cpu);
-		break;
 	}
 	return NOTIFY_OK;
 }
-- 
cgit v0.10.2


From a794d6138cda391b6a9ba28c4cf27651415669c6 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 7 Apr 2016 03:31:57 +0200
Subject: cpufreq: Rearrange cpufreq_add_dev()

Reorganize the code in cpufreq_add_dev() to avoid using the ret
variable and reduce the indentation level in it.

No functional changes.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 8592815..2f1ae56 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1320,26 +1320,24 @@ out_free_policy:
  */
 static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 {
+	struct cpufreq_policy *policy;
 	unsigned cpu = dev->id;
-	int ret;
 
 	dev_dbg(dev, "%s: adding CPU%u\n", __func__, cpu);
 
-	if (cpu_online(cpu)) {
-		ret = cpufreq_online(cpu);
-	} else {
-		/*
-		 * A hotplug notifier will follow and we will handle it as CPU
-		 * online then.  For now, just create the sysfs link, unless
-		 * there is no policy or the link is already present.
-		 */
-		struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
+	if (cpu_online(cpu))
+		return cpufreq_online(cpu);
 
-		ret = policy && !cpumask_test_and_set_cpu(cpu, policy->real_cpus)
-			? add_cpu_dev_symlink(policy, cpu) : 0;
-	}
+	/*
+	 * A hotplug notifier will follow and we will handle it as CPU online
+	 * then.  For now, just create the sysfs link, unless there is no policy
+	 * or the link is already present.
+	 */
+	policy = per_cpu(cpufreq_cpu_data, cpu);
+	if (!policy || cpumask_test_and_set_cpu(cpu, policy->real_cpus))
+		return 0;
 
-	return ret;
+	return add_cpu_dev_symlink(policy, cpu);
 }
 
 static void cpufreq_offline(unsigned int cpu)
-- 
cgit v0.10.2


From 9b55f55af8f017fec053c7eb0675241d057936da Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 7 Apr 2016 14:06:58 +0530
Subject: cpufreq: ACPI: policy->driver_data can't be NULL in ->exit()

Its always set by ->init() and so it will always be there in ->exit().
There is no need to have a special check for just that.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
[ rjw: Rebase ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 67a612e..a24a0c0 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -878,14 +878,12 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 
 	pr_debug("acpi_cpufreq_cpu_exit\n");
 
-	if (data) {
-		policy->fast_switch_possible = false;
-		policy->driver_data = NULL;
-		acpi_processor_unregister_performance(data->acpi_perf_cpu);
-		free_cpumask_var(data->freqdomain_cpus);
-		kfree(data->freq_table);
-		kfree(data);
-	}
+	policy->fast_switch_possible = false;
+	policy->driver_data = NULL;
+	acpi_processor_unregister_performance(data->acpi_perf_cpu);
+	free_cpumask_var(data->freqdomain_cpus);
+	kfree(data->freq_table);
+	kfree(data);
 
 	return 0;
 }
-- 
cgit v0.10.2


From 8cee1eed8e78143aa2ed60308fb88e2d6fa46205 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 7 Apr 2016 14:06:57 +0530
Subject: cpufreq: ACPI: Remove freq_table from acpi_cpufreq_data

The freq-table is stored in struct cpufreq_policy also and there is
absolutely no need of keeping a copy of its reference in struct
acpi_cpufreq_data. Drop it.

Also policy->freq_table can't be NULL in the target() callback, remove
the useless check as well.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
[ rjw: Rebase ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index a24a0c0..32a1505 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -65,7 +65,6 @@ enum {
 #define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)
 
 struct acpi_cpufreq_data {
-	struct cpufreq_frequency_table *freq_table;
 	unsigned int resume;
 	unsigned int cpu_feature;
 	unsigned int acpi_perf_cpu;
@@ -200,8 +199,9 @@ static int check_amd_hwpstate_cpu(unsigned int cpuid)
 	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
 }
 
-static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
+static unsigned extract_io(struct cpufreq_policy *policy, u32 value)
 {
+	struct acpi_cpufreq_data *data = policy->driver_data;
 	struct acpi_processor_performance *perf;
 	int i;
 
@@ -209,13 +209,14 @@ static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
 
 	for (i = 0; i < perf->state_count; i++) {
 		if (value == perf->states[i].status)
-			return data->freq_table[i].frequency;
+			return policy->freq_table[i].frequency;
 	}
 	return 0;
 }
 
-static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
+static unsigned extract_msr(struct cpufreq_policy *policy, u32 msr)
 {
+	struct acpi_cpufreq_data *data = policy->driver_data;
 	struct cpufreq_frequency_table *pos;
 	struct acpi_processor_performance *perf;
 
@@ -226,20 +227,22 @@ static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
 
 	perf = to_perf_data(data);
 
-	cpufreq_for_each_entry(pos, data->freq_table)
+	cpufreq_for_each_entry(pos, policy->freq_table)
 		if (msr == perf->states[pos->driver_data].status)
 			return pos->frequency;
-	return data->freq_table[0].frequency;
+	return policy->freq_table[0].frequency;
 }
 
-static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
+static unsigned extract_freq(struct cpufreq_policy *policy, u32 val)
 {
+	struct acpi_cpufreq_data *data = policy->driver_data;
+
 	switch (data->cpu_feature) {
 	case SYSTEM_INTEL_MSR_CAPABLE:
 	case SYSTEM_AMD_MSR_CAPABLE:
-		return extract_msr(val, data);
+		return extract_msr(policy, val);
 	case SYSTEM_IO_CAPABLE:
-		return extract_io(val, data);
+		return extract_io(policy, val);
 	default:
 		return 0;
 	}
@@ -374,11 +377,11 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 		return 0;
 
 	data = policy->driver_data;
-	if (unlikely(!data || !data->freq_table))
+	if (unlikely(!data || !policy->freq_table))
 		return 0;
 
-	cached_freq = data->freq_table[to_perf_data(data)->state].frequency;
-	freq = extract_freq(get_cur_val(cpumask_of(cpu), data), data);
+	cached_freq = policy->freq_table[to_perf_data(data)->state].frequency;
+	freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data));
 	if (freq != cached_freq) {
 		/*
 		 * The dreaded BIOS frequency change behind our back.
@@ -392,14 +395,15 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 	return freq;
 }
 
-static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
-				struct acpi_cpufreq_data *data)
+static unsigned int check_freqs(struct cpufreq_policy *policy,
+				const struct cpumask *mask, unsigned int freq)
 {
+	struct acpi_cpufreq_data *data = policy->driver_data;
 	unsigned int cur_freq;
 	unsigned int i;
 
 	for (i = 0; i < 100; i++) {
-		cur_freq = extract_freq(get_cur_val(mask, data), data);
+		cur_freq = extract_freq(policy, get_cur_val(mask, data));
 		if (cur_freq == freq)
 			return 1;
 		udelay(10);
@@ -416,12 +420,12 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	unsigned int next_perf_state = 0; /* Index into perf table */
 	int result = 0;
 
-	if (unlikely(data == NULL || data->freq_table == NULL)) {
+	if (unlikely(!data)) {
 		return -ENODEV;
 	}
 
 	perf = to_perf_data(data);
-	next_perf_state = data->freq_table[index].driver_data;
+	next_perf_state = policy->freq_table[index].driver_data;
 	if (perf->state == next_perf_state) {
 		if (unlikely(data->resume)) {
 			pr_debug("Called after resume, resetting to P%d\n",
@@ -444,8 +448,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	drv_write(data, mask, perf->states[next_perf_state].control);
 
 	if (acpi_pstate_strict) {
-		if (!check_freqs(mask, data->freq_table[index].frequency,
-					data)) {
+		if (!check_freqs(policy, mask,
+				 policy->freq_table[index].frequency)) {
 			pr_debug("acpi_cpufreq_target failed (%d)\n",
 				policy->cpu);
 			result = -EAGAIN;
@@ -472,7 +476,7 @@ unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
 	 * The table is sorted in the reverse order with respect to the
 	 * frequency and all of the entries are valid (see the initialization).
 	 */
-	entry = data->freq_table;
+	entry = policy->freq_table;
 	do {
 		entry++;
 		freq = entry->frequency;
@@ -665,6 +669,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	unsigned int result = 0;
 	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
 	struct acpi_processor_performance *perf;
+	struct cpufreq_frequency_table *freq_table;
 #ifdef CONFIG_SMP
 	static int blacklisted;
 #endif
@@ -776,9 +781,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		goto err_unreg;
 	}
 
-	data->freq_table = kzalloc(sizeof(*data->freq_table) *
+	freq_table = kzalloc(sizeof(*freq_table) *
 		    (perf->state_count+1), GFP_KERNEL);
-	if (!data->freq_table) {
+	if (!freq_table) {
 		result = -ENOMEM;
 		goto err_unreg;
 	}
@@ -802,18 +807,18 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	/* table init */
 	for (i = 0; i < perf->state_count; i++) {
 		if (i > 0 && perf->states[i].core_frequency >=
-		    data->freq_table[valid_states-1].frequency / 1000)
+		    freq_table[valid_states-1].frequency / 1000)
 			continue;
 
-		data->freq_table[valid_states].driver_data = i;
-		data->freq_table[valid_states].frequency =
+		freq_table[valid_states].driver_data = i;
+		freq_table[valid_states].frequency =
 		    perf->states[i].core_frequency * 1000;
 		valid_states++;
 	}
-	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
+	freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
 	perf->state = 0;
 
-	result = cpufreq_table_validate_and_show(policy, data->freq_table);
+	result = cpufreq_table_validate_and_show(policy, freq_table);
 	if (result)
 		goto err_freqfree;
 
@@ -860,7 +865,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	return result;
 
 err_freqfree:
-	kfree(data->freq_table);
+	kfree(freq_table);
 err_unreg:
 	acpi_processor_unregister_performance(cpu);
 err_free_mask:
@@ -882,7 +887,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 	policy->driver_data = NULL;
 	acpi_processor_unregister_performance(data->acpi_perf_cpu);
 	free_cpumask_var(data->freqdomain_cpus);
-	kfree(data->freq_table);
+	kfree(policy->freq_table);
 	kfree(data);
 
 	return 0;
-- 
cgit v0.10.2


From c998c07836f985b24361629dc98506ec7893e7a0 Mon Sep 17 00:00:00 2001
From: Dave Gerlach <d-gerlach@ti.com>
Date: Tue, 5 Apr 2016 14:05:38 -0500
Subject: cpuidle: Indicate when a device has been unregistered

Currently the 'registered' member of the cpuidle_device struct is set
to 1 during cpuidle_register_device. In this same function there are
checks to see if the device is already registered to prevent duplicate
calls to register the device, but this value is never set to 0 even on
unregister of the device. Because of this, any attempt to call
cpuidle_register_device after a call to cpuidle_unregister_device will
fail which shouldn't be the case.

To prevent this, set registered to 0 when the device is unregistered.

Fixes: c878a52d3c7c (cpuidle: Check if device is already registered)
Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: All applicable <stable@vger.kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index f996efc..c2dd99a 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -433,6 +433,8 @@ static void __cpuidle_unregister_device(struct cpuidle_device *dev)
 	list_del(&dev->device_list);
 	per_cpu(cpuidle_devices, dev->cpu) = NULL;
 	module_put(drv->owner);
+
+	dev->registered = 0;
 }
 
 static void __cpuidle_device_init(struct cpuidle_device *dev)
-- 
cgit v0.10.2


From 5dcef694860100fd16885f052591b1268b764d21 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 6 Apr 2016 17:00:47 -0400
Subject: intel_idle: add BXT support

Broxton has all the HSW C-states, except C3.
BXT C-state timing is slightly different.

Here we trust the IRTL MSRs as authority
on maximum C-state latency, and override the driver's tables
with the values found in the associated IRTL MSRs.
Further we set the target_residency to 1x maximum latency,
trusting the hardware demotion logic.

Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b05402e..73d66fa 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -162,6 +162,14 @@
 #define MSR_PKG_C9_RESIDENCY		0x00000631
 #define MSR_PKG_C10_RESIDENCY		0x00000632
 
+/* Interrupt Response Limit */
+#define MSR_PKGC3_IRTL			0x0000060a
+#define MSR_PKGC6_IRTL			0x0000060b
+#define MSR_PKGC7_IRTL			0x0000060c
+#define MSR_PKGC8_IRTL			0x00000633
+#define MSR_PKGC9_IRTL			0x00000634
+#define MSR_PKGC10_IRTL			0x00000635
+
 /* Run Time Average Power Limiting (RAPL) Interface */
 
 #define MSR_RAPL_POWER_UNIT		0x00000606
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index c6935de..c966492 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -766,6 +766,67 @@ static struct cpuidle_state knl_cstates[] = {
 		.enter = NULL }
 };
 
+static struct cpuidle_state bxt_cstates[] = {
+	{
+		.name = "C1-BXT",
+		.desc = "MWAIT 0x00",
+		.flags = MWAIT2flg(0x00),
+		.exit_latency = 2,
+		.target_residency = 2,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.name = "C1E-BXT",
+		.desc = "MWAIT 0x01",
+		.flags = MWAIT2flg(0x01),
+		.exit_latency = 10,
+		.target_residency = 20,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.name = "C6-BXT",
+		.desc = "MWAIT 0x20",
+		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 133,
+		.target_residency = 133,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.name = "C7s-BXT",
+		.desc = "MWAIT 0x31",
+		.flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 155,
+		.target_residency = 155,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.name = "C8-BXT",
+		.desc = "MWAIT 0x40",
+		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 1000,
+		.target_residency = 1000,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.name = "C9-BXT",
+		.desc = "MWAIT 0x50",
+		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 2000,
+		.target_residency = 2000,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.name = "C10-BXT",
+		.desc = "MWAIT 0x60",
+		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 10000,
+		.target_residency = 10000,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.enter = NULL }
+};
+
 /**
  * intel_idle
  * @dev: cpuidle_device
@@ -950,6 +1011,11 @@ static const struct idle_cpu idle_cpu_knl = {
 	.state_table = knl_cstates,
 };
 
+static const struct idle_cpu idle_cpu_bxt = {
+	.state_table = bxt_cstates,
+	.disable_promotion_to_c1e = true,
+};
+
 #define ICPU(model, cpu) \
 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu }
 
@@ -985,6 +1051,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
 	ICPU(0x9e, idle_cpu_skl),
 	ICPU(0x55, idle_cpu_skx),
 	ICPU(0x57, idle_cpu_knl),
+	ICPU(0x5c, idle_cpu_bxt),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
@@ -1075,6 +1142,73 @@ static void ivt_idle_state_table_update(void)
 
 	/* else, 1 and 2 socket systems use default ivt_cstates */
 }
+
+/*
+ * Translate IRTL (Interrupt Response Time Limit) MSR to usec
+ */
+
+static unsigned int irtl_ns_units[] = {
+	1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
+
+static unsigned long long irtl_2_usec(unsigned long long irtl)
+{
+	unsigned long long ns;
+
+	ns = irtl_ns_units[(irtl >> 10) & 0x3];
+
+	return div64_u64((irtl & 0x3FF) * ns, 1000);
+}
+/*
+ * bxt_idle_state_table_update(void)
+ *
+ * On BXT, we trust the IRTL to show the definitive maximum latency
+ * We use the same value for target_residency.
+ */
+static void bxt_idle_state_table_update(void)
+{
+	unsigned long long msr;
+
+	rdmsrl(MSR_PKGC6_IRTL, msr);
+	if (msr) {
+		unsigned int usec = irtl_2_usec(msr);
+
+		bxt_cstates[2].exit_latency = usec;
+		bxt_cstates[2].target_residency = usec;
+	}
+
+	rdmsrl(MSR_PKGC7_IRTL, msr);
+	if (msr) {
+		unsigned int usec = irtl_2_usec(msr);
+
+		bxt_cstates[3].exit_latency = usec;
+		bxt_cstates[3].target_residency = usec;
+	}
+
+	rdmsrl(MSR_PKGC8_IRTL, msr);
+	if (msr) {
+		unsigned int usec = irtl_2_usec(msr);
+
+		bxt_cstates[4].exit_latency = usec;
+		bxt_cstates[4].target_residency = usec;
+	}
+
+	rdmsrl(MSR_PKGC9_IRTL, msr);
+	if (msr) {
+		unsigned int usec = irtl_2_usec(msr);
+
+		bxt_cstates[5].exit_latency = usec;
+		bxt_cstates[5].target_residency = usec;
+	}
+
+	rdmsrl(MSR_PKGC10_IRTL, msr);
+	if (msr) {
+		unsigned int usec = irtl_2_usec(msr);
+
+		bxt_cstates[6].exit_latency = usec;
+		bxt_cstates[6].target_residency = usec;
+	}
+
+}
 /*
  * sklh_idle_state_table_update(void)
  *
@@ -1130,6 +1264,9 @@ static void intel_idle_state_table_update(void)
 	case 0x3e: /* IVT */
 		ivt_idle_state_table_update();
 		break;
+	case 0x5c: /* BXT */
+		bxt_idle_state_table_update();
+		break;
 	case 0x5e: /* SKL-H */
 		sklh_idle_state_table_update();
 		break;
-- 
cgit v0.10.2


From 33475a8784f4b8f0bff4419569e0dc348b318c99 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@marvell.com>
Date: Tue, 22 Mar 2016 22:42:40 +0800
Subject: ARM: cpuidle: add const qualifier to cpuidle_ops member in structures

The core code does not modify smp_operations structures. To clarify it,
this patch adds 'const' qualifier to the 'ops' member of struct
of_cpuidle_method.

This change allows each arm cpuidle code to add 'const' qualifier to
its cpuidle_ops structure.

Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

diff --git a/arch/arm/include/asm/cpuidle.h b/arch/arm/include/asm/cpuidle.h
index 3848259..baefe1d 100644
--- a/arch/arm/include/asm/cpuidle.h
+++ b/arch/arm/include/asm/cpuidle.h
@@ -36,7 +36,7 @@ struct cpuidle_ops {
 
 struct of_cpuidle_method {
 	const char *method;
-	struct cpuidle_ops *ops;
+	const struct cpuidle_ops *ops;
 };
 
 #define CPUIDLE_METHOD_OF_DECLARE(name, _method, _ops)			\
-- 
cgit v0.10.2


From 4cfd55202cee115a3686d5ad6a0f60d604aca802 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@marvell.com>
Date: Tue, 22 Mar 2016 22:42:41 +0800
Subject: ARM: cpuidle: constify return value of arm_cpuidle_get_ops()

arm_cpuidle_read_ops() just copies '*ops' to cpuidle_ops[cpu], so the
structure '*ops' is not modified at all.

The comment is also updated accordingly.

Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

diff --git a/arch/arm/kernel/cpuidle.c b/arch/arm/kernel/cpuidle.c
index 703926e..a44b268e 100644
--- a/arch/arm/kernel/cpuidle.c
+++ b/arch/arm/kernel/cpuidle.c
@@ -70,7 +70,7 @@ int arm_cpuidle_suspend(int index)
  *
  * Returns a struct cpuidle_ops pointer, NULL if not found.
  */
-static struct cpuidle_ops *__init arm_cpuidle_get_ops(const char *method)
+static const struct cpuidle_ops *__init arm_cpuidle_get_ops(const char *method)
 {
 	struct of_cpuidle_method *m = __cpuidle_method_of_table;
 
@@ -88,7 +88,7 @@ static struct cpuidle_ops *__init arm_cpuidle_get_ops(const char *method)
  *
  * Get the method name defined in the 'enable-method' property, retrieve the
  * associated cpuidle_ops and do a struct copy. This copy is needed because all
- * cpuidle_ops are tagged __initdata and will be unloaded after the init
+ * cpuidle_ops are tagged __initconst and will be unloaded after the init
  * process.
  *
  * Return 0 on sucess, -ENOENT if no 'enable-method' is defined, -EOPNOTSUPP if
@@ -97,7 +97,7 @@ static struct cpuidle_ops *__init arm_cpuidle_get_ops(const char *method)
 static int __init arm_cpuidle_read_ops(struct device_node *dn, int cpu)
 {
 	const char *enable_method;
-	struct cpuidle_ops *ops;
+	const struct cpuidle_ops *ops;
 
 	enable_method = of_get_property(dn, "enable-method", NULL);
 	if (!enable_method)
-- 
cgit v0.10.2


From 1e712d9be873a44d7f5bc2a11d0ad6573029a67e Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@marvell.com>
Date: Tue, 22 Mar 2016 22:42:42 +0800
Subject: soc: qcom: spm: Use const and __initconst for qcom_cpuidle_ops

The qcom_cpuidle_ops structures is not over-written, so add "const"
qualifier and replace __initdata with __initconst.

Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Andy Gross <andy.gross@linaro.org>

diff --git a/drivers/soc/qcom/spm.c b/drivers/soc/qcom/spm.c
index 5548a31..1fcbb22 100644
--- a/drivers/soc/qcom/spm.c
+++ b/drivers/soc/qcom/spm.c
@@ -274,7 +274,7 @@ check_spm:
 	return per_cpu(cpu_spm_drv, cpu) ? 0 : -ENXIO;
 }
 
-static struct cpuidle_ops qcom_cpuidle_ops __initdata = {
+static const struct cpuidle_ops qcom_cpuidle_ops __initconst = {
 	.suspend = qcom_idle_enter,
 	.init = qcom_cpuidle_init,
 };
-- 
cgit v0.10.2


From 5e7c17df795e462c70a43f1b3b670e08efefe8fd Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@marvell.com>
Date: Tue, 22 Mar 2016 22:42:43 +0800
Subject: drivers: firmware: psci: use const and __initconst for
 psci_cpuidle_ops

The psci_cpuidle_ops structures is not over-written, so add "const"
qualifier and replace __initdata with __initconst.

Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c
index 11bfee8..eb8134a 100644
--- a/drivers/firmware/psci.c
+++ b/drivers/firmware/psci.c
@@ -355,7 +355,7 @@ int psci_cpu_suspend_enter(unsigned long index)
 
 /* ARM specific CPU idle operations */
 #ifdef CONFIG_ARM
-static struct cpuidle_ops psci_cpuidle_ops __initdata = {
+static const struct cpuidle_ops psci_cpuidle_ops __initconst = {
 	.suspend = psci_cpu_suspend_enter,
 	.init = psci_dt_cpu_init_idle,
 };
-- 
cgit v0.10.2


From 0ae3aeefabbeef26294e7a349b51f1c761d46c9f Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 8 Apr 2016 13:10:23 +0200
Subject: PM / Runtime: Fix error path in pm_runtime_force_resume()

As pm_runtime_set_active() may fail because the device's parent isn't
active, we can end up executing the ->runtime_resume() callback for the
device when it isn't allowed.

Fix this by invoking pm_runtime_set_active() before running the callback
and let's also deal with the error code.

Fixes: 37f204164dfb (PM: Add pm_runtime_suspend|resume_force functions)
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Cc: 3.15+ <stable@vger.kernel.org> # 3.15+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 4c70550..b746904 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1506,11 +1506,16 @@ int pm_runtime_force_resume(struct device *dev)
 		goto out;
 	}
 
-	ret = callback(dev);
+	ret = pm_runtime_set_active(dev);
 	if (ret)
 		goto out;
 
-	pm_runtime_set_active(dev);
+	ret = callback(dev);
+	if (ret) {
+		pm_runtime_set_suspended(dev);
+		goto out;
+	}
+
 	pm_runtime_mark_last_busy(dev);
 out:
 	pm_runtime_enable(dev);
-- 
cgit v0.10.2


From 372a12ed9d99c02f105278a9b75f0cb176d15cc1 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 8 Apr 2016 13:40:53 +0200
Subject: PM / Runtime: Move ignore_children flag under CONFIG_PM

The ignore_children flag is used only when CONFIG_PM is set, so let's move
it into that section within the struct dev_pm_info.

Move also the corresponding pm_suspend_ignore_children() API out of
device.h into pm_runtime.h, to be consistent with similar APIs.

Unfortunate this causes the Toshiba PCI SD mmc host driver to fail to
compile as it needs pm_runtime.h, so let's fix this here as well.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/mmc/host/toshsd.c b/drivers/mmc/host/toshsd.c
index e2cdd5f..553ef41 100644
--- a/drivers/mmc/host/toshsd.c
+++ b/drivers/mmc/host/toshsd.c
@@ -21,6 +21,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/pm.h>
+#include <linux/pm_runtime.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 002c597..b130304 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -956,11 +956,6 @@ static inline bool device_async_suspend_enabled(struct device *dev)
 	return !!dev->power.async_suspend;
 }
 
-static inline void pm_suspend_ignore_children(struct device *dev, bool enable)
-{
-	dev->power.ignore_children = enable;
-}
-
 static inline void dev_pm_syscore_device(struct device *dev, bool val)
 {
 #ifdef CONFIG_PM_SLEEP
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 6a5d654..06eb353 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -563,7 +563,6 @@ struct dev_pm_info {
 	bool			is_suspended:1;	/* Ditto */
 	bool			is_noirq_suspended:1;
 	bool			is_late_suspended:1;
-	bool			ignore_children:1;
 	bool			early_init:1;	/* Owned by the PM core */
 	bool			direct_complete:1;	/* Owned by the PM core */
 	spinlock_t		lock;
@@ -591,6 +590,7 @@ struct dev_pm_info {
 	unsigned int		deferred_resume:1;
 	unsigned int		run_wake:1;
 	unsigned int		runtime_auto:1;
+	bool			ignore_children:1;
 	unsigned int		no_callbacks:1;
 	unsigned int		irq_safe:1;
 	unsigned int		use_autosuspend:1;
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 7af093d..2e14d26 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -56,6 +56,11 @@ extern void pm_runtime_update_max_time_suspended(struct device *dev,
 						 s64 delta_ns);
 extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable);
 
+static inline void pm_suspend_ignore_children(struct device *dev, bool enable)
+{
+	dev->power.ignore_children = enable;
+}
+
 static inline bool pm_children_suspended(struct device *dev)
 {
 	return dev->power.ignore_children
@@ -156,6 +161,7 @@ static inline void __pm_runtime_disable(struct device *dev, bool c) {}
 static inline void pm_runtime_allow(struct device *dev) {}
 static inline void pm_runtime_forbid(struct device *dev) {}
 
+static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {}
 static inline bool pm_children_suspended(struct device *dev) { return false; }
 static inline void pm_runtime_get_noresume(struct device *dev) {}
 static inline void pm_runtime_put_noidle(struct device *dev) {}
-- 
cgit v0.10.2


From 9df3921e026532eb3bd2904745d990c0a9f0b4e4 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 31 Mar 2016 11:21:25 +0200
Subject: PM / Domains: Rename stop_ok to suspend_ok for the genpd governor

The genpd governor validates the latency constraints to find out whether
it's acceptable to runtime suspend a device. Earlier this validation was
made to know whether it was okay to invoke the ->stop() callback for the
device, hence the governor used the name "stop_ok" for the related
variables.

To clarify the code around this, let's rename these variables from
"stop_ok" to "suspend_ok".

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 56705b5..c62687d 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -382,7 +382,7 @@ static void genpd_power_off_work_fn(struct work_struct *work)
 static int pm_genpd_runtime_suspend(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
-	bool (*stop_ok)(struct device *__dev);
+	bool (*suspend_ok)(struct device *__dev);
 	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
 	bool runtime_pm = pm_runtime_enabled(dev);
 	ktime_t time_start;
@@ -401,8 +401,8 @@ static int pm_genpd_runtime_suspend(struct device *dev)
 	 * runtime PM is disabled. Under these circumstances, we shall skip
 	 * validating/measuring the PM QoS latency.
 	 */
-	stop_ok = genpd->gov ? genpd->gov->stop_ok : NULL;
-	if (runtime_pm && stop_ok && !stop_ok(dev))
+	suspend_ok = genpd->gov ? genpd->gov->suspend_ok : NULL;
+	if (runtime_pm && suspend_ok && !suspend_ok(dev))
 		return -EBUSY;
 
 	/* Measure suspend latency. */
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index 00a5436..2e0fce7 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -37,10 +37,10 @@ static int dev_update_qos_constraint(struct device *dev, void *data)
 }
 
 /**
- * default_stop_ok - Default PM domain governor routine for stopping devices.
+ * default_suspend_ok - Default PM domain governor routine to suspend devices.
  * @dev: Device to check.
  */
-static bool default_stop_ok(struct device *dev)
+static bool default_suspend_ok(struct device *dev)
 {
 	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
 	unsigned long flags;
@@ -51,13 +51,13 @@ static bool default_stop_ok(struct device *dev)
 	spin_lock_irqsave(&dev->power.lock, flags);
 
 	if (!td->constraint_changed) {
-		bool ret = td->cached_stop_ok;
+		bool ret = td->cached_suspend_ok;
 
 		spin_unlock_irqrestore(&dev->power.lock, flags);
 		return ret;
 	}
 	td->constraint_changed = false;
-	td->cached_stop_ok = false;
+	td->cached_suspend_ok = false;
 	td->effective_constraint_ns = -1;
 	constraint_ns = __dev_pm_qos_read_value(dev);
 
@@ -83,13 +83,13 @@ static bool default_stop_ok(struct device *dev)
 			return false;
 	}
 	td->effective_constraint_ns = constraint_ns;
-	td->cached_stop_ok = constraint_ns >= 0;
+	td->cached_suspend_ok = constraint_ns >= 0;
 
 	/*
 	 * The children have been suspended already, so we don't need to take
-	 * their stop latencies into account here.
+	 * their suspend latencies into account here.
 	 */
-	return td->cached_stop_ok;
+	return td->cached_suspend_ok;
 }
 
 /**
@@ -150,7 +150,7 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd,
 		 */
 		td = &to_gpd_data(pdd)->td;
 		constraint_ns = td->effective_constraint_ns;
-		/* default_stop_ok() need not be called before us. */
+		/* default_suspend_ok() need not be called before us. */
 		if (constraint_ns < 0) {
 			constraint_ns = dev_pm_qos_read_value(pdd->dev);
 			constraint_ns *= NSEC_PER_USEC;
@@ -227,7 +227,7 @@ static bool always_on_power_down_ok(struct dev_pm_domain *domain)
 }
 
 struct dev_power_governor simple_qos_governor = {
-	.stop_ok = default_stop_ok,
+	.suspend_ok = default_suspend_ok,
 	.power_down_ok = default_power_down_ok,
 };
 
@@ -236,5 +236,5 @@ struct dev_power_governor simple_qos_governor = {
  */
 struct dev_power_governor pm_domain_always_on_gov = {
 	.power_down_ok = always_on_power_down_ok,
-	.stop_ok = default_stop_ok,
+	.suspend_ok = default_suspend_ok,
 };
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 49cd889..e913939 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -28,7 +28,7 @@ enum gpd_status {
 
 struct dev_power_governor {
 	bool (*power_down_ok)(struct dev_pm_domain *domain);
-	bool (*stop_ok)(struct device *dev);
+	bool (*suspend_ok)(struct device *dev);
 };
 
 struct gpd_dev_ops {
@@ -94,7 +94,7 @@ struct gpd_timing_data {
 	s64 resume_latency_ns;
 	s64 effective_constraint_ns;
 	bool constraint_changed;
-	bool cached_stop_ok;
+	bool cached_suspend_ok;
 };
 
 struct pm_domain_data {
-- 
cgit v0.10.2


From 795bd2e7e86967ced927948eff08fe8201ba5fc3 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 31 Mar 2016 11:21:26 +0200
Subject: PM / Domains: Rename pm_genpd_runtime_suspend|resume()

Follow genpd's rule for names of static functions, by renaming
pm_genpd_runtime_suspend|resume() to genpd_runtime_suspend|resume().

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index c62687d..75b994a 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -372,14 +372,14 @@ static void genpd_power_off_work_fn(struct work_struct *work)
 }
 
 /**
- * pm_genpd_runtime_suspend - Suspend a device belonging to I/O PM domain.
+ * genpd_runtime_suspend - Suspend a device belonging to I/O PM domain.
  * @dev: Device to suspend.
  *
  * Carry out a runtime suspend of a device under the assumption that its
  * pm_domain field points to the domain member of an object of type
  * struct generic_pm_domain representing a PM domain consisting of I/O devices.
  */
-static int pm_genpd_runtime_suspend(struct device *dev)
+static int genpd_runtime_suspend(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
 	bool (*suspend_ok)(struct device *__dev);
@@ -446,14 +446,14 @@ static int pm_genpd_runtime_suspend(struct device *dev)
 }
 
 /**
- * pm_genpd_runtime_resume - Resume a device belonging to I/O PM domain.
+ * genpd_runtime_resume - Resume a device belonging to I/O PM domain.
  * @dev: Device to resume.
  *
  * Carry out a runtime resume of a device under the assumption that its
  * pm_domain field points to the domain member of an object of type
  * struct generic_pm_domain representing a PM domain consisting of I/O devices.
  */
-static int pm_genpd_runtime_resume(struct device *dev)
+static int genpd_runtime_resume(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
 	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
@@ -1498,8 +1498,8 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	genpd->device_count = 0;
 	genpd->max_off_time_ns = -1;
 	genpd->max_off_time_changed = true;
-	genpd->domain.ops.runtime_suspend = pm_genpd_runtime_suspend;
-	genpd->domain.ops.runtime_resume = pm_genpd_runtime_resume;
+	genpd->domain.ops.runtime_suspend = genpd_runtime_suspend;
+	genpd->domain.ops.runtime_resume = genpd_runtime_resume;
 	genpd->domain.ops.prepare = pm_genpd_prepare;
 	genpd->domain.ops.suspend = pm_genpd_suspend;
 	genpd->domain.ops.suspend_late = pm_genpd_suspend_late;
-- 
cgit v0.10.2


From 54eeddbf92d0de297d78f7419dde00079d553dec Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 31 Mar 2016 11:21:27 +0200
Subject: PM / Domains: Remove ->save|restore_state() callbacks

As a part of the ongoing consolidation of genpd, it's become questionable
whether clients actually needs to be able to assign their own set of
->save|restore_state() callbacks. Currently all users copes fine with the
default callbacks, so let's remove the configuration option and stick to
the default ones.

This enables further clarifications of the related code and let's also
rename pm_genpd_default_save|restore_state() into
__genpd_runtime_suspend|resume() to apply the rule of static functionnames
in genpd.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 75b994a..4ce4ce0 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -229,17 +229,6 @@ static int genpd_poweron(struct generic_pm_domain *genpd, unsigned int depth)
 	return ret;
 }
 
-static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, save_state, dev);
-}
-
-static int genpd_restore_dev(struct generic_pm_domain *genpd,
-			struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, restore_state, dev);
-}
-
 static int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
 				     unsigned long val, void *ptr)
 {
@@ -372,6 +361,52 @@ static void genpd_power_off_work_fn(struct work_struct *work)
 }
 
 /**
+ * __genpd_runtime_suspend - walk the hierarchy of ->runtime_suspend() callbacks
+ * @dev: Device to handle.
+ */
+static int __genpd_runtime_suspend(struct device *dev)
+{
+	int (*cb)(struct device *__dev);
+
+	if (dev->type && dev->type->pm)
+		cb = dev->type->pm->runtime_suspend;
+	else if (dev->class && dev->class->pm)
+		cb = dev->class->pm->runtime_suspend;
+	else if (dev->bus && dev->bus->pm)
+		cb = dev->bus->pm->runtime_suspend;
+	else
+		cb = NULL;
+
+	if (!cb && dev->driver && dev->driver->pm)
+		cb = dev->driver->pm->runtime_suspend;
+
+	return cb ? cb(dev) : 0;
+}
+
+/**
+ * __genpd_runtime_resume - walk the hierarchy of ->runtime_resume() callbacks
+ * @dev: Device to handle.
+ */
+static int __genpd_runtime_resume(struct device *dev)
+{
+	int (*cb)(struct device *__dev);
+
+	if (dev->type && dev->type->pm)
+		cb = dev->type->pm->runtime_resume;
+	else if (dev->class && dev->class->pm)
+		cb = dev->class->pm->runtime_resume;
+	else if (dev->bus && dev->bus->pm)
+		cb = dev->bus->pm->runtime_resume;
+	else
+		cb = NULL;
+
+	if (!cb && dev->driver && dev->driver->pm)
+		cb = dev->driver->pm->runtime_resume;
+
+	return cb ? cb(dev) : 0;
+}
+
+/**
  * genpd_runtime_suspend - Suspend a device belonging to I/O PM domain.
  * @dev: Device to suspend.
  *
@@ -409,13 +444,13 @@ static int genpd_runtime_suspend(struct device *dev)
 	if (runtime_pm)
 		time_start = ktime_get();
 
-	ret = genpd_save_dev(genpd, dev);
+	ret = __genpd_runtime_suspend(dev);
 	if (ret)
 		return ret;
 
 	ret = genpd_stop_dev(genpd, dev);
 	if (ret) {
-		genpd_restore_dev(genpd, dev);
+		__genpd_runtime_resume(dev);
 		return ret;
 	}
 
@@ -491,7 +526,7 @@ static int genpd_runtime_resume(struct device *dev)
 	if (ret)
 		goto err_poweroff;
 
-	ret = genpd_restore_dev(genpd, dev);
+	ret = __genpd_runtime_resume(dev);
 	if (ret)
 		goto err_stop;
 
@@ -1427,54 +1462,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain);
 
-/* Default device callbacks for generic PM domains. */
-
-/**
- * pm_genpd_default_save_state - Default "save device state" for PM domains.
- * @dev: Device to handle.
- */
-static int pm_genpd_default_save_state(struct device *dev)
-{
-	int (*cb)(struct device *__dev);
-
-	if (dev->type && dev->type->pm)
-		cb = dev->type->pm->runtime_suspend;
-	else if (dev->class && dev->class->pm)
-		cb = dev->class->pm->runtime_suspend;
-	else if (dev->bus && dev->bus->pm)
-		cb = dev->bus->pm->runtime_suspend;
-	else
-		cb = NULL;
-
-	if (!cb && dev->driver && dev->driver->pm)
-		cb = dev->driver->pm->runtime_suspend;
-
-	return cb ? cb(dev) : 0;
-}
-
-/**
- * pm_genpd_default_restore_state - Default PM domains "restore device state".
- * @dev: Device to handle.
- */
-static int pm_genpd_default_restore_state(struct device *dev)
-{
-	int (*cb)(struct device *__dev);
-
-	if (dev->type && dev->type->pm)
-		cb = dev->type->pm->runtime_resume;
-	else if (dev->class && dev->class->pm)
-		cb = dev->class->pm->runtime_resume;
-	else if (dev->bus && dev->bus->pm)
-		cb = dev->bus->pm->runtime_resume;
-	else
-		cb = NULL;
-
-	if (!cb && dev->driver && dev->driver->pm)
-		cb = dev->driver->pm->runtime_resume;
-
-	return cb ? cb(dev) : 0;
-}
-
 /**
  * pm_genpd_init - Initialize a generic I/O PM domain object.
  * @genpd: PM domain object to initialize.
@@ -1520,8 +1507,6 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	genpd->domain.ops.restore_early = pm_genpd_resume_early;
 	genpd->domain.ops.restore = pm_genpd_resume;
 	genpd->domain.ops.complete = pm_genpd_complete;
-	genpd->dev_ops.save_state = pm_genpd_default_save_state;
-	genpd->dev_ops.restore_state = pm_genpd_default_restore_state;
 
 	if (genpd->flags & GENPD_FLAG_PM_CLK) {
 		genpd->dev_ops.stop = pm_clk_suspend;
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index e913939..39285c7 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -34,8 +34,6 @@ struct dev_power_governor {
 struct gpd_dev_ops {
 	int (*start)(struct device *dev);
 	int (*stop)(struct device *dev);
-	int (*save_state)(struct device *dev);
-	int (*restore_state)(struct device *dev);
 	bool (*active_wakeup)(struct device *dev);
 };
 
-- 
cgit v0.10.2


From bdcaa23fb87260c8314f538f444ee845ea51e15d Mon Sep 17 00:00:00 2001
From: Philippe Longepe <philippe.longepe@linux.intel.com>
Date: Fri, 22 Apr 2016 11:46:09 -0700
Subject: cpufreq: intel_pstate: Use average P-State instead of current P-State

The result returned by pid_calc() is subtracted from current_pstate
(which is the P-State requested during the last period) in order to
obtain the target P-State for the current iteration.

However, current_pstate may not reflect the real current P-State of
the CPU. In particular, that P-State may be higher because of the
frequency sharing per module.

The theory is:
 - The load is the percentage of time spent in C0 and is related to
   the average P-State during the same period.
 - The last requested P-State can be completely different than the
   average P-State (because of frequency sharing or throttling).
 - The P-State shift computed by the pid_calc is based on the load
   computed at average P-State, so the shift must be relative to
   this average P-State.

Using the average P-State instead of current P-State improves power
without significant performance penalty in cases when a task migrates
from one core to other core sharing frequency and voltage.

Performance and power comparison with this patch on Cherry Trail
platform using Android:

Benchmark               ?Perf    ?Power
FishTank                10.45%    3.1%
SmartBench-Gaming       -0.1%   -10.4%
SmartBench-Productivity -0.8%   -10.4%
CandyCrush                n/a   -17.4%
AngryBirds                n/a    -5.9%
videoPlayback             n/a   -13.9%
audioPlayback             n/a    -4.9%
IcyRocks-20-50           0.0%   -38.4%
iozone RR               -0.16%  -1.3%
iozone RW                0.74%  -1.3%

Signed-off-by: Philippe Longepe <philippe.longepe@linux.intel.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 12ae2e6..cfa6a68 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1061,6 +1061,12 @@ static inline int32_t get_avg_frequency(struct cpudata *cpu)
 		cpu->pstate.scaling, cpu->sample.mperf);
 }
 
+static inline int32_t get_avg_pstate(struct cpudata *cpu)
+{
+	return div64_u64(cpu->pstate.max_pstate_physical * cpu->sample.aperf,
+			 cpu->sample.mperf);
+}
+
 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
 {
 	struct sample *sample = &cpu->sample;
@@ -1093,7 +1099,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
 	cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc);
 	cpu->sample.busy_scaled = cpu_load;
 
-	return cpu->pstate.current_pstate - pid_calc(&cpu->pid, cpu_load);
+	return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load);
 }
 
 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
-- 
cgit v0.10.2


From a29a1e76781d112014761ba106ab03e097cc4492 Mon Sep 17 00:00:00 2001
From: Ashwin Chaugule <ashwin.chaugule@linaro.org>
Date: Thu, 14 Apr 2016 20:45:53 -0400
Subject: cpufreq: ACPI / CPPC: Add module support for cppc_cpufreq driver

Add a function to cleanup at module exit and export
appropriate GPL string to enable moduler support
for the cppc_cpufreq driver.

Reported-by: Srinivas Pandruvada <srinivas.pandruvada@intel.com>
Signed-off-by: Ashwin Chaugule <ashwin.chaugule@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 7c0bdfb..8882b8e 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -173,4 +173,25 @@ out:
 	return -ENODEV;
 }
 
+static void __exit cppc_cpufreq_exit(void)
+{
+	struct cpudata *cpu;
+	int i;
+
+	cpufreq_unregister_driver(&cppc_cpufreq_driver);
+
+	for_each_possible_cpu(i) {
+		cpu = all_cpu_data[i];
+		free_cpumask_var(cpu->shared_cpu_map);
+		kfree(cpu);
+	}
+
+	kfree(all_cpu_data);
+}
+
+module_exit(cppc_cpufreq_exit);
+MODULE_AUTHOR("Ashwin Chaugule");
+MODULE_DESCRIPTION("CPUFreq driver based on the ACPI CPPC v5.0+ spec");
+MODULE_LICENSE("GPL");
+
 late_initcall(cppc_cpufreq_init);
-- 
cgit v0.10.2


From 27b8fe8daf39c8a028d377a878b1405b73f5a10f Mon Sep 17 00:00:00 2001
From: Jia Hongtao <hongtao.jia@nxp.com>
Date: Mon, 18 Apr 2016 15:59:32 +0800
Subject: cpufreq: qoriq: Don't show cooling device messages if THERMAL_OF
 undefined

When THERMAL_OF is undefined the cooling device messages should not be
shown. -ENOSYS is returned from of_cpufreq_cooling_register() when
THERMAL_OF is undefined.

Signed-off-by: Jia Hongtao <hongtao.jia@nxp.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index b23e525..8b77589 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c
@@ -333,8 +333,8 @@ static void qoriq_cpufreq_ready(struct cpufreq_policy *policy)
 		cpud->cdev = of_cpufreq_cooling_register(np,
 							 policy->related_cpus);
 
-		if (IS_ERR(cpud->cdev)) {
-			pr_err("Failed to register cooling device cpu%d: %ld\n",
+		if (IS_ERR(cpud->cdev) && PTR_ERR(cpud->cdev) != -ENOSYS) {
+			pr_err("cpu%d is not running as cooling device: %ld\n",
 					policy->cpu, PTR_ERR(cpud->cdev));
 
 			cpud->cdev = NULL;
-- 
cgit v0.10.2


From 495c716f177ed8fb3eb6334e4d08c409a9a0bd0f Mon Sep 17 00:00:00 2001
From: Jia Hongtao <hongtao.jia@nxp.com>
Date: Tue, 19 Apr 2016 17:00:06 +0800
Subject: cpufreq: qoriq: Remove __exit macro from .exit callback

.exit callback (qoriq_cpufreq_cpu_exit()) is also used during suspend.
So __exit macro should be removed or the function will be discarded.

Signed-off-by: Jia Hongtao <hongtao.jia@nxp.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index 8b77589..e029079 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c
@@ -301,7 +301,7 @@ err_np:
 	return -ENODEV;
 }
 
-static int __exit qoriq_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+static int qoriq_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 {
 	struct cpu_data *data = policy->driver_data;
 
@@ -348,7 +348,7 @@ static struct cpufreq_driver qoriq_cpufreq_driver = {
 	.name		= "qoriq_cpufreq",
 	.flags		= CPUFREQ_CONST_LOOPS,
 	.init		= qoriq_cpufreq_cpu_init,
-	.exit		= __exit_p(qoriq_cpufreq_cpu_exit),
+	.exit		= qoriq_cpufreq_cpu_exit,
 	.verify		= cpufreq_generic_frequency_table_verify,
 	.target_index	= qoriq_cpufreq_target,
 	.get		= cpufreq_generic_get,
-- 
cgit v0.10.2


From 394cb8316b44e86f64c39ee943bc9bc4f813283b Mon Sep 17 00:00:00 2001
From: Jia Hongtao <hongtao.jia@nxp.com>
Date: Tue, 19 Apr 2016 17:00:07 +0800
Subject: cpufreq: qoriq: Fix cooling device registration issue during suspend

Cooling device is registered by ready callback. It's also invoked while
system resuming from sleep (Enabling non-boot cpus). Thus cooling device
may be multiple registered. Matchable unregistration is added to exit
callback to fix this issue.

Signed-off-by: Jia Hongtao <hongtao.jia@nxp.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index e029079..53d8c3f 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c
@@ -305,6 +305,7 @@ static int qoriq_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 {
 	struct cpu_data *data = policy->driver_data;
 
+	cpufreq_cooling_unregister(data->cdev);
 	kfree(data->pclk);
 	kfree(data->table);
 	kfree(data);
-- 
cgit v0.10.2


From 2d74c6d565567e72c6ea303a452473c8fa7141e4 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 21 Apr 2016 14:28:53 +0530
Subject: PM / OPP: Propagate the error returned by _find_opp_table()

Don't send -EINVAL and propagate what's received from _find_opp_table().

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c
index ba2bdbd..b7411a3 100644
--- a/drivers/base/power/opp/cpu.c
+++ b/drivers/base/power/opp/cpu.c
@@ -131,7 +131,7 @@ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
 
 	opp_table = _find_opp_table(cpu_dev);
 	if (IS_ERR(opp_table)) {
-		ret = -EINVAL;
+		ret = PTR_ERR(opp_table);
 		goto unlock;
 	}
 
-- 
cgit v0.10.2


From 45ca36ad7cd5784c6326504a8f54df4cccaa8852 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 21 Apr 2016 14:28:54 +0530
Subject: PM / OPP: Add missing doc style comments

Few of the routines in cpu.c were missing these, add them.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c
index b7411a3..b151401 100644
--- a/drivers/base/power/opp/cpu.c
+++ b/drivers/base/power/opp/cpu.c
@@ -119,7 +119,22 @@ void dev_pm_opp_free_cpufreq_table(struct device *dev,
 EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table);
 #endif	/* CONFIG_CPU_FREQ */
 
-/* Required only for V1 bindings, as v2 can manage it from DT itself */
+/**
+ * dev_pm_opp_set_sharing_cpus() - Mark OPP table as shared by few CPUs
+ * @cpu_dev:	CPU device for which we do this operation
+ * @cpumask:	cpumask of the CPUs which share the OPP table with @cpu_dev
+ *
+ * This marks OPP table of the @cpu_dev as shared by the CPUs present in
+ * @cpumask.
+ *
+ * Returns -ENODEV if OPP table isn't already present.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
 int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
 {
 	struct opp_device *opp_dev;
@@ -161,6 +176,18 @@ unlock:
 EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus);
 
 #ifdef CONFIG_OF
+/**
+ * dev_pm_opp_of_cpumask_remove_table() - Removes OPP table for @cpumask
+ * @cpumask:	cpumask for which OPP table needs to be removed
+ *
+ * This removes the OPP tables for CPUs present in the @cpumask.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
 void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask)
 {
 	struct device *cpu_dev;
@@ -181,6 +208,18 @@ void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_remove_table);
 
+/**
+ * dev_pm_opp_of_cpumask_add_table() - Adds OPP table for @cpumask
+ * @cpumask:	cpumask for which OPP table needs to be added.
+ *
+ * This adds the OPP tables for CPUs present in the @cpumask.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
 int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask)
 {
 	struct device *cpu_dev;
@@ -216,6 +255,24 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_add_table);
  *
  * Returns -ENOENT if operating-points-v2 bindings aren't supported.
  */
+/**
+ * dev_pm_opp_of_get_sharing_cpus() - Get cpumask of CPUs sharing OPPs with
+ *				      @cpu_dev using operating-points-v2
+ *				      bindings.
+ *
+ * @cpu_dev:	CPU device for which we do this operation
+ * @cpumask:	cpumask to update with information of sharing CPUs
+ *
+ * This updates the @cpumask with CPUs that are sharing OPPs with @cpu_dev.
+ *
+ * Returns -ENOENT if operating-points-v2 isn't present for @cpu_dev.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
 int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
 {
 	struct device_node *np, *tmp_np;
-- 
cgit v0.10.2


From 642aa8cee7b84eee1dc4897e301611cffa6d5775 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 21 Apr 2016 14:28:55 +0530
Subject: PM / OPP: dev_pm_opp_set_sharing_cpus() doesn't depend on CONFIG_OF

dev_pm_opp_set_sharing_cpus() doesn't do any DT specific stuff and its
declarations are added within the CONFIG_OF ifdef by mistake. Take them
out of that.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index cccaf4a..5b6ad31 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -65,6 +65,7 @@ void dev_pm_opp_put_prop_name(struct device *dev);
 int dev_pm_opp_set_regulator(struct device *dev, const char *name);
 void dev_pm_opp_put_regulator(struct device *dev);
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
+int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask);
 #else
 static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 {
@@ -178,6 +179,11 @@ static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_f
 	return -EINVAL;
 }
 
+static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
+{
+	return -ENOSYS;
+}
+
 #endif		/* CONFIG_PM_OPP */
 
 #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
@@ -186,7 +192,6 @@ void dev_pm_opp_of_remove_table(struct device *dev);
 int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask);
 void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask);
 int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask);
-int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask);
 #else
 static inline int dev_pm_opp_of_add_table(struct device *dev)
 {
@@ -210,11 +215,6 @@ static inline int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask
 {
 	return -ENOSYS;
 }
-
-static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
-{
-	return -ENOSYS;
-}
 #endif
 
 #endif		/* __LINUX_OPP_H__ */
-- 
cgit v0.10.2


From 2c93104ff2d678b22bb8d5ca502ac5df21a68f69 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 21 Apr 2016 14:28:56 +0530
Subject: PM / OPP: Relocate dev_pm_opp_set_sharing_cpus()

Move dev_pm_opp_set_sharing_cpus() towards the end of the file. This
is required for better readability after the next patch is applied,
which adds dev_pm_opp_get_sharing_cpus().

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c
index b151401..491e868 100644
--- a/drivers/base/power/opp/cpu.c
+++ b/drivers/base/power/opp/cpu.c
@@ -119,62 +119,6 @@ void dev_pm_opp_free_cpufreq_table(struct device *dev,
 EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table);
 #endif	/* CONFIG_CPU_FREQ */
 
-/**
- * dev_pm_opp_set_sharing_cpus() - Mark OPP table as shared by few CPUs
- * @cpu_dev:	CPU device for which we do this operation
- * @cpumask:	cpumask of the CPUs which share the OPP table with @cpu_dev
- *
- * This marks OPP table of the @cpu_dev as shared by the CPUs present in
- * @cpumask.
- *
- * Returns -ENODEV if OPP table isn't already present.
- *
- * Locking: The internal opp_table and opp structures are RCU protected.
- * Hence this function internally uses RCU updater strategy with mutex locks
- * to keep the integrity of the internal data structures. Callers should ensure
- * that this function is *NOT* called under RCU protection or in contexts where
- * mutex cannot be locked.
- */
-int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
-{
-	struct opp_device *opp_dev;
-	struct opp_table *opp_table;
-	struct device *dev;
-	int cpu, ret = 0;
-
-	mutex_lock(&opp_table_lock);
-
-	opp_table = _find_opp_table(cpu_dev);
-	if (IS_ERR(opp_table)) {
-		ret = PTR_ERR(opp_table);
-		goto unlock;
-	}
-
-	for_each_cpu(cpu, cpumask) {
-		if (cpu == cpu_dev->id)
-			continue;
-
-		dev = get_cpu_device(cpu);
-		if (!dev) {
-			dev_err(cpu_dev, "%s: failed to get cpu%d device\n",
-				__func__, cpu);
-			continue;
-		}
-
-		opp_dev = _add_opp_dev(dev, opp_table);
-		if (!opp_dev) {
-			dev_err(dev, "%s: failed to add opp-dev for cpu%d device\n",
-				__func__, cpu);
-			continue;
-		}
-	}
-unlock:
-	mutex_unlock(&opp_table_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus);
-
 #ifdef CONFIG_OF
 /**
  * dev_pm_opp_of_cpumask_remove_table() - Removes OPP table for @cpumask
@@ -326,3 +270,59 @@ put_cpu_node:
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus);
 #endif
+
+/**
+ * dev_pm_opp_set_sharing_cpus() - Mark OPP table as shared by few CPUs
+ * @cpu_dev:	CPU device for which we do this operation
+ * @cpumask:	cpumask of the CPUs which share the OPP table with @cpu_dev
+ *
+ * This marks OPP table of the @cpu_dev as shared by the CPUs present in
+ * @cpumask.
+ *
+ * Returns -ENODEV if OPP table isn't already present.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
+{
+	struct opp_device *opp_dev;
+	struct opp_table *opp_table;
+	struct device *dev;
+	int cpu, ret = 0;
+
+	mutex_lock(&opp_table_lock);
+
+	opp_table = _find_opp_table(cpu_dev);
+	if (IS_ERR(opp_table)) {
+		ret = PTR_ERR(opp_table);
+		goto unlock;
+	}
+
+	for_each_cpu(cpu, cpumask) {
+		if (cpu == cpu_dev->id)
+			continue;
+
+		dev = get_cpu_device(cpu);
+		if (!dev) {
+			dev_err(cpu_dev, "%s: failed to get cpu%d device\n",
+				__func__, cpu);
+			continue;
+		}
+
+		opp_dev = _add_opp_dev(dev, opp_table);
+		if (!opp_dev) {
+			dev_err(dev, "%s: failed to add opp-dev for cpu%d device\n",
+				__func__, cpu);
+			continue;
+		}
+	}
+unlock:
+	mutex_unlock(&opp_table_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus);
-- 
cgit v0.10.2


From 46e7a4e18397649fe1b9007ad99707d960eb138e Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 21 Apr 2016 14:28:57 +0530
Subject: PM / OPP: Mark shared-opp for non-dt case

opp core allows OPPs to be explicitly marked as shared from platform
code, in case of operating-point v1 bindings.

Though we do everything fine in that case, we don't set the flag in the
opp-table to indicate that the OPPs are shared. It works fine today as
the flag isn't used anywhere else in the core, but we should be doing
the right thing by marking it set.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c
index 491e868..55cbf9b 100644
--- a/drivers/base/power/opp/cpu.c
+++ b/drivers/base/power/opp/cpu.c
@@ -319,6 +319,9 @@ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
 				__func__, cpu);
 			continue;
 		}
+
+		/* Mark opp-table as multiple CPUs are sharing it now */
+		opp_table->shared_opp = true;
 	}
 unlock:
 	mutex_unlock(&opp_table_lock);
-- 
cgit v0.10.2


From e92bb16674c5156ea9230ba7a3ab6d490750888f Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 22 Apr 2016 16:58:39 +0530
Subject: cpufreq: dt: Mark platdev machines array as __initconst

The machines array in cpufreq-dt-platdev is used only once at boot time
and so should be marked with __initconst, so that kernel can free up
memory used for it, if required.

Suggested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index f2ae7ad..00da8c8b 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -11,7 +11,7 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 
-static const struct of_device_id machines[] = {
+static const struct of_device_id machines[] __initconst = {
 	{ .compatible = "samsung,exynos3250", },
 	{ .compatible = "samsung,exynos4210", },
 	{ .compatible = "samsung,exynos4212", },
-- 
cgit v0.10.2


From a59511d1daa6406eede58a79f99250ffcd9a3566 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 22 Apr 2016 16:58:40 +0530
Subject: cpufreq: berlin: Use generic platdev driver

The cpufreq-dt-platdev driver supports creation of cpufreq-dt platform
device now, reuse that and remove similar code from platform code.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/arch/arm/mach-berlin/berlin.c b/arch/arm/mach-berlin/berlin.c
index 25d7387..ac181c6 100644
--- a/arch/arm/mach-berlin/berlin.c
+++ b/arch/arm/mach-berlin/berlin.c
@@ -18,11 +18,6 @@
 #include <asm/hardware/cache-l2x0.h>
 #include <asm/mach/arch.h>
 
-static void __init berlin_init_late(void)
-{
-	platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
-}
-
 static const char * const berlin_dt_compat[] = {
 	"marvell,berlin",
 	NULL,
@@ -30,7 +25,6 @@ static const char * const berlin_dt_compat[] = {
 
 DT_MACHINE_START(BERLIN_DT, "Marvell Berlin")
 	.dt_compat	= berlin_dt_compat,
-	.init_late	= berlin_init_late,
 	/*
 	 * with DT probing for L2CCs, berlin_init_machine can be removed.
 	 * Note: 88DE3005 (Armada 1500-mini) uses pl310 l2cc
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 00da8c8b..3e1c821 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -12,6 +12,8 @@
 #include <linux/platform_device.h>
 
 static const struct of_device_id machines[] __initconst = {
+	{ .compatible = "marvell,berlin", },
+
 	{ .compatible = "samsung,exynos3250", },
 	{ .compatible = "samsung,exynos4210", },
 	{ .compatible = "samsung,exynos4212", },
-- 
cgit v0.10.2


From 7ead83f6df2b105e4973140fe323a93ccfd4f8f9 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 22 Apr 2016 16:58:41 +0530
Subject: cpufreq: imx: Use generic platdev driver

The cpufreq-dt-platdev driver supports creation of cpufreq-dt platform
device now, reuse that and remove similar code from platform code.

Note that the complete routine imx27_dt_init() is removed as

of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);

has same effect as a NULL .init_machine machine callback pointer.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Lucas Stach <l.stach@pengutronix.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/arch/arm/mach-imx/imx27-dt.c b/arch/arm/mach-imx/imx27-dt.c
index bd42d1b..530a728 100644
--- a/arch/arm/mach-imx/imx27-dt.c
+++ b/arch/arm/mach-imx/imx27-dt.c
@@ -18,15 +18,6 @@
 #include "common.h"
 #include "mx27.h"
 
-static void __init imx27_dt_init(void)
-{
-	struct platform_device_info devinfo = { .name = "cpufreq-dt", };
-
-	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-
-	platform_device_register_full(&devinfo);
-}
-
 static const char * const imx27_dt_board_compat[] __initconst = {
 	"fsl,imx27",
 	NULL
@@ -36,6 +27,5 @@ DT_MACHINE_START(IMX27_DT, "Freescale i.MX27 (Device Tree Support)")
 	.map_io		= mx27_map_io,
 	.init_early	= imx27_init_early,
 	.init_irq	= mx27_init_irq,
-	.init_machine	= imx27_dt_init,
 	.dt_compat	= imx27_dt_board_compat,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-imx51.c b/arch/arm/mach-imx/mach-imx51.c
index 6883fba..10a82a4 100644
--- a/arch/arm/mach-imx/mach-imx51.c
+++ b/arch/arm/mach-imx/mach-imx51.c
@@ -50,13 +50,10 @@ static void __init imx51_ipu_mipi_setup(void)
 
 static void __init imx51_dt_init(void)
 {
-	struct platform_device_info devinfo = { .name = "cpufreq-dt", };
-
 	imx51_ipu_mipi_setup();
 	imx_src_init();
 
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-	platform_device_register_full(&devinfo);
 }
 
 static void __init imx51_init_late(void)
diff --git a/arch/arm/mach-imx/mach-imx53.c b/arch/arm/mach-imx/mach-imx53.c
index 86316a9..18b5c5c13 100644
--- a/arch/arm/mach-imx/mach-imx53.c
+++ b/arch/arm/mach-imx/mach-imx53.c
@@ -40,8 +40,6 @@ static void __init imx53_dt_init(void)
 static void __init imx53_init_late(void)
 {
 	imx53_pm_init();
-
-	platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
 }
 
 static const char * const imx53_dt_board_compat[] __initconst = {
diff --git a/arch/arm/mach-imx/mach-imx7d.c b/arch/arm/mach-imx/mach-imx7d.c
index 5a27f20..b450f52 100644
--- a/arch/arm/mach-imx/mach-imx7d.c
+++ b/arch/arm/mach-imx/mach-imx7d.c
@@ -105,11 +105,6 @@ static void __init imx7d_init_irq(void)
 	irqchip_init();
 }
 
-static void __init imx7d_init_late(void)
-{
-	platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
-}
-
 static const char *const imx7d_dt_compat[] __initconst = {
 	"fsl,imx7d",
 	NULL,
@@ -117,7 +112,6 @@ static const char *const imx7d_dt_compat[] __initconst = {
 
 DT_MACHINE_START(IMX7D, "Freescale i.MX7 Dual (Device Tree)")
 	.init_irq	= imx7d_init_irq,
-	.init_late	= imx7d_init_late,
 	.init_machine	= imx7d_init_machine,
 	.dt_compat	= imx7d_dt_compat,
 MACHINE_END
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 3e1c821..3843314 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -12,6 +12,11 @@
 #include <linux/platform_device.h>
 
 static const struct of_device_id machines[] __initconst = {
+	{ .compatible = "fsl,imx27", },
+	{ .compatible = "fsl,imx51", },
+	{ .compatible = "fsl,imx53", },
+	{ .compatible = "fsl,imx7d", },
+
 	{ .compatible = "marvell,berlin", },
 
 	{ .compatible = "samsung,exynos3250", },
-- 
cgit v0.10.2


From 7694ca6e1d6f01122f05039b81f70f64b1ec4063 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 22 Apr 2016 16:58:42 +0530
Subject: cpufreq: omap: Use generic platdev driver

The cpufreq-dt-platdev driver supports creation of cpufreq-dt platform
device now, reuse that and remove similar code from platform code.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c
index 58920bc..2f7b11d 100644
--- a/arch/arm/mach-omap2/pm.c
+++ b/arch/arm/mach-omap2/pm.c
@@ -277,13 +277,10 @@ static void __init omap4_init_voltages(void)
 
 static inline void omap_init_cpufreq(void)
 {
-	struct platform_device_info devinfo = { };
+	struct platform_device_info devinfo = { .name = "omap-cpufreq" };
 
 	if (!of_have_populated_dt())
-		devinfo.name = "omap-cpufreq";
-	else
-		devinfo.name = "cpufreq-dt";
-	platform_device_register_full(&devinfo);
+		platform_device_register_full(&devinfo);
 }
 
 static int __init omap2_common_pm_init(void)
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 3843314..4e525f6 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -28,6 +28,11 @@ static const struct of_device_id machines[] __initconst = {
 	{ .compatible = "samsung,exynos5420", },
 	{ .compatible = "samsung,exynos5800", },
 #endif
+
+	{ .compatible = "ti,omap2", },
+	{ .compatible = "ti,omap3", },
+	{ .compatible = "ti,omap4", },
+	{ .compatible = "ti,omap5", },
 };
 
 static int __init cpufreq_dt_platdev_init(void)
-- 
cgit v0.10.2


From 014400c127be352a0a129aaa8a9ac870a310c76c Mon Sep 17 00:00:00 2001
From: Finley Xiao <finley.xiao@rock-chips.com>
Date: Fri, 22 Apr 2016 16:58:43 +0530
Subject: cpufreq: rockchip: Use generic platdev driver

This patch add rockchip's compatible string to the compat list and
remove similar code from platform code for supporting generic platdev
driver.

Signed-off-by: Finley Xiao <finley.xiao@rock-chips.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/arch/arm/mach-rockchip/rockchip.c b/arch/arm/mach-rockchip/rockchip.c
index 3f07cc5..beb71da 100644
--- a/arch/arm/mach-rockchip/rockchip.c
+++ b/arch/arm/mach-rockchip/rockchip.c
@@ -74,7 +74,6 @@ static void __init rockchip_dt_init(void)
 {
 	rockchip_suspend_init();
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-	platform_device_register_simple("cpufreq-dt", 0, NULL, 0);
 }
 
 static const char * const rockchip_board_dt_compat[] = {
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 4e525f6..75fa921 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -29,6 +29,17 @@ static const struct of_device_id machines[] __initconst = {
 	{ .compatible = "samsung,exynos5800", },
 #endif
 
+	{ .compatible = "rockchip,rk2928", },
+	{ .compatible = "rockchip,rk3036", },
+	{ .compatible = "rockchip,rk3066a", },
+	{ .compatible = "rockchip,rk3066b", },
+	{ .compatible = "rockchip,rk3188", },
+	{ .compatible = "rockchip,rk3228", },
+	{ .compatible = "rockchip,rk3288", },
+	{ .compatible = "rockchip,rk3366", },
+	{ .compatible = "rockchip,rk3368", },
+	{ .compatible = "rockchip,rk3399", },
+
 	{ .compatible = "ti,omap2", },
 	{ .compatible = "ti,omap3", },
 	{ .compatible = "ti,omap4", },
-- 
cgit v0.10.2


From a399dc9fc5005321cebee6589d6bca780ed99c18 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 22 Apr 2016 16:58:44 +0530
Subject: cpufreq: shmobile: Use generic platdev driver

The cpufreq-dt-platdev driver supports creation of cpufreq-dt platform
device now, reuse that and remove similar code from platform code.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile
index a65c80ac..c9ea0e6 100644
--- a/arch/arm/mach-shmobile/Makefile
+++ b/arch/arm/mach-shmobile/Makefile
@@ -38,7 +38,6 @@ smp-$(CONFIG_ARCH_EMEV2)	+= smp-emev2.o headsmp-scu.o platsmp-scu.o
 
 # PM objects
 obj-$(CONFIG_SUSPEND)		+= suspend.o
-obj-$(CONFIG_CPU_FREQ)		+= cpufreq.o
 obj-$(CONFIG_PM_RCAR)		+= pm-rcar.o
 obj-$(CONFIG_PM_RMOBILE)	+= pm-rmobile.o
 obj-$(CONFIG_ARCH_RCAR_GEN2)	+= pm-rcar-gen2.o
diff --git a/arch/arm/mach-shmobile/common.h b/arch/arm/mach-shmobile/common.h
index 5464b7a..3b562d8 100644
--- a/arch/arm/mach-shmobile/common.h
+++ b/arch/arm/mach-shmobile/common.h
@@ -25,16 +25,9 @@ static inline int shmobile_suspend_init(void) { return 0; }
 static inline void shmobile_smp_apmu_suspend_init(void) { }
 #endif
 
-#ifdef CONFIG_CPU_FREQ
-int shmobile_cpufreq_init(void);
-#else
-static inline int shmobile_cpufreq_init(void) { return 0; }
-#endif
-
 static inline void __init shmobile_init_late(void)
 {
 	shmobile_suspend_init();
-	shmobile_cpufreq_init();
 }
 
 #endif /* __ARCH_MACH_COMMON_H */
diff --git a/arch/arm/mach-shmobile/cpufreq.c b/arch/arm/mach-shmobile/cpufreq.c
deleted file mode 100644
index 634d701..0000000
--- a/arch/arm/mach-shmobile/cpufreq.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * CPUFreq support code for SH-Mobile ARM
- *
- *  Copyright (C) 2014 Gaku Inami
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#include <linux/platform_device.h>
-
-#include "common.h"
-
-int __init shmobile_cpufreq_init(void)
-{
-	platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
-	return 0;
-}
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 75fa921..4b94fe3 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -29,6 +29,18 @@ static const struct of_device_id machines[] __initconst = {
 	{ .compatible = "samsung,exynos5800", },
 #endif
 
+	{ .compatible = "renesas,emev2", },
+	{ .compatible = "renesas,r7s72100", },
+	{ .compatible = "renesas,r8a73a4", },
+	{ .compatible = "renesas,r8a7740", },
+	{ .compatible = "renesas,r8a7778", },
+	{ .compatible = "renesas,r8a7779", },
+	{ .compatible = "renesas,r8a7790", },
+	{ .compatible = "renesas,r8a7791", },
+	{ .compatible = "renesas,r8a7793", },
+	{ .compatible = "renesas,r8a7794", },
+	{ .compatible = "renesas,sh73a0", },
+
 	{ .compatible = "rockchip,rk2928", },
 	{ .compatible = "rockchip,rk3036", },
 	{ .compatible = "rockchip,rk3066a", },
-- 
cgit v0.10.2


From 117d4f59affa869f819ec53ed0cd54aedfdeffa0 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 22 Apr 2016 16:58:45 +0530
Subject: cpufreq: sunxi: Use generic platdev driver

The cpufreq-dt-platdev driver supports creation of cpufreq-dt platform
device now, reuse that and remove similar code from platform code.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c
index 3c15619..95dca8c 100644
--- a/arch/arm/mach-sunxi/sunxi.c
+++ b/arch/arm/mach-sunxi/sunxi.c
@@ -17,11 +17,6 @@
 
 #include <asm/mach/arch.h>
 
-static void __init sunxi_dt_cpufreq_init(void)
-{
-	platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
-}
-
 static const char * const sunxi_board_dt_compat[] = {
 	"allwinner,sun4i-a10",
 	"allwinner,sun5i-a10s",
@@ -32,7 +27,6 @@ static const char * const sunxi_board_dt_compat[] = {
 
 DT_MACHINE_START(SUNXI_DT, "Allwinner sun4i/sun5i Families")
 	.dt_compat	= sunxi_board_dt_compat,
-	.init_late	= sunxi_dt_cpufreq_init,
 MACHINE_END
 
 static const char * const sun6i_board_dt_compat[] = {
@@ -53,7 +47,6 @@ static void __init sun6i_timer_init(void)
 DT_MACHINE_START(SUN6I_DT, "Allwinner sun6i (A31) Family")
 	.init_time	= sun6i_timer_init,
 	.dt_compat	= sun6i_board_dt_compat,
-	.init_late	= sunxi_dt_cpufreq_init,
 MACHINE_END
 
 static const char * const sun7i_board_dt_compat[] = {
@@ -63,7 +56,6 @@ static const char * const sun7i_board_dt_compat[] = {
 
 DT_MACHINE_START(SUN7I_DT, "Allwinner sun7i (A20) Family")
 	.dt_compat	= sun7i_board_dt_compat,
-	.init_late	= sunxi_dt_cpufreq_init,
 MACHINE_END
 
 static const char * const sun8i_board_dt_compat[] = {
@@ -77,7 +69,6 @@ static const char * const sun8i_board_dt_compat[] = {
 DT_MACHINE_START(SUN8I_DT, "Allwinner sun8i Family")
 	.init_time	= sun6i_timer_init,
 	.dt_compat	= sun8i_board_dt_compat,
-	.init_late	= sunxi_dt_cpufreq_init,
 MACHINE_END
 
 static const char * const sun9i_board_dt_compat[] = {
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 4b94fe3..24dde5e 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -12,6 +12,18 @@
 #include <linux/platform_device.h>
 
 static const struct of_device_id machines[] __initconst = {
+	{ .compatible = "allwinner,sun4i-a10", },
+	{ .compatible = "allwinner,sun5i-a10s", },
+	{ .compatible = "allwinner,sun5i-a13", },
+	{ .compatible = "allwinner,sun5i-r8", },
+	{ .compatible = "allwinner,sun6i-a31", },
+	{ .compatible = "allwinner,sun6i-a31s", },
+	{ .compatible = "allwinner,sun7i-a20", },
+	{ .compatible = "allwinner,sun8i-a23", },
+	{ .compatible = "allwinner,sun8i-a33", },
+	{ .compatible = "allwinner,sun8i-a83t", },
+	{ .compatible = "allwinner,sun8i-h3", },
+
 	{ .compatible = "fsl,imx27", },
 	{ .compatible = "fsl,imx51", },
 	{ .compatible = "fsl,imx53", },
-- 
cgit v0.10.2


From 5e4249c6d9e596fdb26357997236a01b96c8902d Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 22 Apr 2016 16:58:46 +0530
Subject: cpufreq: zynq: Use generic platdev driver

The cpufreq-dt-platdev driver supports creation of cpufreq-dt platform
device now, reuse that and remove similar code from platform code.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
index 860ffb6..da876d2 100644
--- a/arch/arm/mach-zynq/common.c
+++ b/arch/arm/mach-zynq/common.c
@@ -110,7 +110,6 @@ static void __init zynq_init_late(void)
  */
 static void __init zynq_init_machine(void)
 {
-	struct platform_device_info devinfo = { .name = "cpufreq-dt", };
 	struct soc_device_attribute *soc_dev_attr;
 	struct soc_device *soc_dev;
 	struct device *parent = NULL;
@@ -145,7 +144,6 @@ out:
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, parent);
 
 	platform_device_register(&zynq_cpuidle_device);
-	platform_device_register_full(&devinfo);
 }
 
 static void __init zynq_timer_init(void)
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 24dde5e..5ad2938 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -68,6 +68,8 @@ static const struct of_device_id machines[] __initconst = {
 	{ .compatible = "ti,omap3", },
 	{ .compatible = "ti,omap4", },
 	{ .compatible = "ti,omap5", },
+
+	{ .compatible = "xlnx,zynq-7000", },
 };
 
 static int __init cpufreq_dt_platdev_init(void)
-- 
cgit v0.10.2


From 3920be471ce7f66c05e6e0cd251a332b84520ef8 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 22 Apr 2016 16:58:47 +0530
Subject: cpufreq: hisilicon: Use generic platdev driver

The cpufreq-dt-platdev driver supports creation of cpufreq-dt platform
device now, reuse that and remove similar code from platform code.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 14b1f93..d89b8af 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -50,15 +50,6 @@ config ARM_HIGHBANK_CPUFREQ
 
 	  If in doubt, say N.
 
-config ARM_HISI_ACPU_CPUFREQ
-	tristate "Hisilicon ACPU CPUfreq driver"
-	depends on ARCH_HISI && CPUFREQ_DT
-	select PM_OPP
-	help
-	  This enables the hisilicon ACPU CPUfreq driver.
-
-	  If in doubt, say N.
-
 config ARM_IMX6Q_CPUFREQ
 	tristate "Freescale i.MX6 cpufreq support"
 	depends on ARCH_MXC
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index d7b646c..2cce2cd 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -55,7 +55,6 @@ obj-$(CONFIG_ARCH_DAVINCI)		+= davinci-cpufreq.o
 obj-$(CONFIG_UX500_SOC_DB8500)		+= dbx500-cpufreq.o
 obj-$(CONFIG_ARM_EXYNOS5440_CPUFREQ)	+= exynos5440-cpufreq.o
 obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ)	+= highbank-cpufreq.o
-obj-$(CONFIG_ARM_HISI_ACPU_CPUFREQ)	+= hisi-acpu-cpufreq.o
 obj-$(CONFIG_ARM_IMX6Q_CPUFREQ)		+= imx6q-cpufreq.o
 obj-$(CONFIG_ARM_INTEGRATOR)		+= integrator-cpufreq.o
 obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ)	+= kirkwood-cpufreq.o
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 5ad2938..ac4a0ba 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -24,6 +24,8 @@ static const struct of_device_id machines[] __initconst = {
 	{ .compatible = "allwinner,sun8i-a83t", },
 	{ .compatible = "allwinner,sun8i-h3", },
 
+	{ .compatible = "hisilicon,hi6220", },
+
 	{ .compatible = "fsl,imx27", },
 	{ .compatible = "fsl,imx51", },
 	{ .compatible = "fsl,imx53", },
diff --git a/drivers/cpufreq/hisi-acpu-cpufreq.c b/drivers/cpufreq/hisi-acpu-cpufreq.c
deleted file mode 100644
index 026d5b2..0000000
--- a/drivers/cpufreq/hisi-acpu-cpufreq.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Hisilicon Platforms Using ACPU CPUFreq Support
- *
- * Copyright (c) 2015 Hisilicon Limited.
- * Copyright (c) 2015 Linaro Limited.
- *
- * Leo Yan <leo.yan@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-
-static int __init hisi_acpu_cpufreq_driver_init(void)
-{
-	struct platform_device *pdev;
-
-	if (!of_machine_is_compatible("hisilicon,hi6220"))
-		return -ENODEV;
-
-	pdev = platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
-	return PTR_ERR_OR_ZERO(pdev);
-}
-module_init(hisi_acpu_cpufreq_driver_init);
-
-MODULE_AUTHOR("Leo Yan <leo.yan@linaro.org>");
-MODULE_DESCRIPTION("Hisilicon acpu cpufreq driver");
-MODULE_LICENSE("GPL v2");
-- 
cgit v0.10.2


From ba1ca654f30ddca8f208f43ebbf27cb933a97982 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 25 Apr 2016 16:21:34 +0200
Subject: cpufreq: governor: Fix prev_load initialization in
 cpufreq_governor_start()

The way cpufreq_governor_start() initializes j_cdbs->prev_load is
questionable.

First off, j_cdbs->prev_cpu_wall used as a denominator in the
computation may be zero.  The case this happens is when
get_cpu_idle_time_us() returns -1 and get_cpu_idle_time_jiffy()
used to return that number is called exactly at the jiffies_64
wrap time.  It is rather hard to trigger that error, but it is not
impossible and it will just crash the kernel then.

Second, j_cdbs->prev_load is computed as the average load during
the entire time since the system started and it may not reflect the
load in the previous sampling period (as it is expected to).
That doesn't play well with the way dbs_update() uses that value.
Namely, if the update time delta (wall_time) happens do be greater
than twice the sampling rate on the first invocation of it, the
initial value of j_cdbs->prev_load (which may be completely off) will
be returned to the caller as the current load (unless it is equal to
zero and unless another CPU sharing the same policy object has a
greater load value).

For this reason, notice that the prev_load field of struct cpu_dbs_info
is only used by dbs_update() and only in that one place, so if
cpufreq_governor_start() is modified to always initialize it to 0,
it will make dbs_update() always compute the actual load first time
it checks the update time delta against the doubled sampling rate
(after initialization) and there won't be any side effects of it.

Consequently, modify cpufreq_governor_start() as described.

Fixes: 18b46abd0009 (cpufreq: governor: Be friendly towards latency-sensitive bursty workloads)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index db649c6..095f91c 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -508,12 +508,12 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy)
 
 	for_each_cpu(j, policy->cpus) {
 		struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
-		unsigned int prev_load;
 
 		j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy);
-
-		prev_load = j_cdbs->prev_cpu_wall - j_cdbs->prev_cpu_idle;
-		j_cdbs->prev_load = 100 * prev_load / (unsigned int)j_cdbs->prev_cpu_wall;
+		/*
+		 * Make the first invocation of dbs_update() compute the load.
+		 */
+		j_cdbs->prev_load = 0;
 
 		if (ignore_nice)
 			j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
-- 
cgit v0.10.2


From e93e59ce5b85e6c2b444f09fd1f707274ec066dc Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Thu, 21 Apr 2016 10:56:30 +0200
Subject: cpuidle: Replace ktime_get() with local_clock()

The ktime_get() can have a non negligeable overhead, use local_clock()
instead.

In order to test the difference between ktime_get() and local_clock(),
a quick hack has been added to trigger, via debugfs, 10000 times a
call to ktime_get() and local_clock() and measure the elapsed time.

Then the average value, the min and max is computed for each call.

From userspace, the test above was called 100 times every 2 seconds.

So, ktime_get() and local_clock() have been called 1000000 times in
total.

The results are:

ktime_get():
============
 * average: 101 ns (stddev: 27.4)
 * maximum: 38313 ns
 * minimum: 65 ns

local_clock():
==============
 * average: 60 ns (stddev: 9.8)
 * maximum: 13487 ns
 * minimum: 46 ns

The local_clock() is faster and more stable.

Even if it is a drop in the ocean, changing the ktime_get() by the
local_clock() allows to save 80ns at idle time (entry + exit). And
in some circumstances, especially when there are several CPUs racing
for the clock access, we save tens of microseconds.

The idle duration resulting from a diff is converted from nanosec to
microsec. This could be done with integer division (div 1000) - which is
an expensive operation or by 10 bits shifting (div 1024) - which is fast
but unprecise.

The following table gives some results at the limits.

 ------------------------------------------
|   nsec   |   div(1000)   |   div(1024)   |
 ------------------------------------------
|   1e3    |        1 usec |      976 nsec |
 ------------------------------------------
|   1e6    |     1000 usec |      976 usec |
 ------------------------------------------
|   1e9    |  1000000 usec |   976562 usec |
 ------------------------------------------

There is a linear deviation of 2.34%. This loss of precision is acceptable
in the context of the resulting diff which is used for statistics. These
ones are processed to guess estimate an approximation of the duration of the
next idle period which ends up into an idle state selection. The selection
criteria takes into account the next duration based on large intervals,
represented by the idle state's target residency.

The 2^10 division is enough because the approximation regarding the 1e3
division is lost in all the approximations done for the next idle duration
computation.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
[ rjw: Subject ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index c2dd99a..2b8e6ce 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -173,7 +173,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 
 	struct cpuidle_state *target_state = &drv->states[index];
 	bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP);
-	ktime_t time_start, time_end;
+	u64 time_start, time_end;
 	s64 diff;
 
 	/*
@@ -195,13 +195,13 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 	sched_idle_set_state(target_state);
 
 	trace_cpu_idle_rcuidle(index, dev->cpu);
-	time_start = ktime_get();
+	time_start = local_clock();
 
 	stop_critical_timings();
 	entered_state = target_state->enter(dev, drv, index);
 	start_critical_timings();
 
-	time_end = ktime_get();
+	time_end = local_clock();
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
 
 	/* The cpu is no longer idle or about to enter idle. */
@@ -217,7 +217,11 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 	if (!cpuidle_state_is_coupled(drv, entered_state))
 		local_irq_enable();
 
-	diff = ktime_to_us(ktime_sub(time_end, time_start));
+	/*
+	 * local_clock() returns the time in nanosecond, let's shift
+	 * by 10 (divide by 1024) to have microsecond based time.
+	 */
+	diff = (time_end - time_start) >> 10;
 	if (diff > INT_MAX)
 		diff = INT_MAX;
 
-- 
cgit v0.10.2


From bc19b9a81da009fd1d797d756d41b518ac7e5f14 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Thu, 31 Mar 2016 15:48:42 +0200
Subject: PM / AVS: rockchip-io: make io-domains a child of the GRF

IO-domain handling is part of the general register files, so should live
under the grf directly. This change allows the grf to be a simple-mfd and
the io-domains fetching the syscon regmap from that parent-node.

The old binding is of course preserved, though deprecated.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Kevin Hilman <khilman@baylibre.com>
Tested-by: David Wu <david.wu@rock-chips.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
index c84fb47..d23dc00 100644
--- a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
+++ b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
@@ -37,8 +37,10 @@ Required properties:
   - "rockchip,rk3368-pmu-io-voltage-domain" for rk3368 pmu-domains
   - "rockchip,rk3399-io-voltage-domain" for rk3399
   - "rockchip,rk3399-pmu-io-voltage-domain" for rk3399 pmu-domains
-- rockchip,grf: phandle to the syscon managing the "general register files"
 
+Deprecated properties:
+- rockchip,grf: phandle to the syscon managing the "general register files"
+    Systems should move the io-domains to a sub-node of the grf simple-mfd.
 
 You specify supplies using the standard regulator bindings by including
 a phandle the relevant regulator.  All specified supplies must be able
diff --git a/drivers/power/avs/rockchip-io-domain.c b/drivers/power/avs/rockchip-io-domain.c
index 8986382..01b6d3f 100644
--- a/drivers/power/avs/rockchip-io-domain.c
+++ b/drivers/power/avs/rockchip-io-domain.c
@@ -336,6 +336,7 @@ static int rockchip_iodomain_probe(struct platform_device *pdev)
 	struct device_node *np = pdev->dev.of_node;
 	const struct of_device_id *match;
 	struct rockchip_iodomain *iod;
+	struct device *parent;
 	int i, ret = 0;
 
 	if (!np)
@@ -351,7 +352,14 @@ static int rockchip_iodomain_probe(struct platform_device *pdev)
 	match = of_match_node(rockchip_iodomain_match, np);
 	iod->soc_data = (struct rockchip_iodomain_soc_data *)match->data;
 
-	iod->grf = syscon_regmap_lookup_by_phandle(np, "rockchip,grf");
+	parent = pdev->dev.parent;
+	if (parent && parent->of_node) {
+		iod->grf = syscon_node_to_regmap(parent->of_node);
+	} else {
+		dev_dbg(&pdev->dev, "falling back to old binding\n");
+		iod->grf = syscon_regmap_lookup_by_phandle(np, "rockchip,grf");
+	}
+
 	if (IS_ERR(iod->grf)) {
 		dev_err(&pdev->dev, "couldn't find grf regmap\n");
 		return PTR_ERR(iod->grf);
-- 
cgit v0.10.2


From 0b26985a7d76645598d98fec6922ddd1ec8cbc55 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 16 Apr 2016 13:50:03 +0100
Subject: PM / clk: ensure we don't allocate a -ve size of count clks

It is entirely possible for of_count_phandle_wit_args to
return a -ve error return value so we need to check for this
otherwise we end up allocating a negative number of clk objects.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index 0e64a1b..3657ac1 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c
@@ -159,7 +159,7 @@ int of_pm_clk_add_clks(struct device *dev)
 
 	count = of_count_phandle_with_args(dev->of_node, "clocks",
 					   "#clock-cells");
-	if (count == 0)
+	if (count <= 0)
 		return -ENODEV;
 
 	clks = kcalloc(count, sizeof(*clks), GFP_KERNEL);
-- 
cgit v0.10.2


From 6c51cc0203de25aeaff9d0236d6c2b497be93e3b Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan@linux.intel.com>
Date: Fri, 22 Apr 2016 10:17:19 -0700
Subject: powercap/intel_rapl: Add support for Kabylake

Kabylake is similar to Skylake in terms of RAPL.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index 8fad0a7..470bb62 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -1101,6 +1101,8 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
 	RAPL_CPU(0X5C, rapl_defaults_core),/* Broxton */
 	RAPL_CPU(0x5E, rapl_defaults_core),/* Skylake-H/S */
 	RAPL_CPU(0x57, rapl_defaults_hsw_server),/* Knights Landing */
+	RAPL_CPU(0x8E, rapl_defaults_core),/* Kabylake */
+	RAPL_CPU(0x9E, rapl_defaults_core),/* Kabylake */
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
-- 
cgit v0.10.2


From 624c8df7d2823ec0df9609025480309322886ed3 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 26 Apr 2016 08:47:17 +0200
Subject: PM / Domains: Remove redundant pm_runtime_get|put*() in
 pm_genpd_prepare()

The PM core increases and decreases the runtime PM usage count in the
system PM prepare phase. This makes some of the pm_runtime_get|put*()
calls in pm_genpd_prepare() redundant, so let's remove them.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@baylibre.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 4ce4ce0..60a3573 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -730,14 +730,11 @@ static int pm_genpd_prepare(struct device *dev)
 	 * at this point and a system wakeup event should be reported if it's
 	 * set up to wake up the system from sleep states.
 	 */
-	pm_runtime_get_noresume(dev);
 	if (pm_runtime_barrier(dev) && device_may_wakeup(dev))
 		pm_wakeup_event(dev, 0);
 
-	if (pm_wakeup_pending()) {
-		pm_runtime_put(dev);
+	if (pm_wakeup_pending())
 		return -EBUSY;
-	}
 
 	if (resume_needed(dev, genpd))
 		pm_runtime_resume(dev);
@@ -751,10 +748,8 @@ static int pm_genpd_prepare(struct device *dev)
 
 	mutex_unlock(&genpd->lock);
 
-	if (genpd->suspend_power_off) {
-		pm_runtime_put_noidle(dev);
+	if (genpd->suspend_power_off)
 		return 0;
-	}
 
 	/*
 	 * The PM domain must be in the GPD_STATE_ACTIVE state at this point,
@@ -776,7 +771,6 @@ static int pm_genpd_prepare(struct device *dev)
 		pm_runtime_enable(dev);
 	}
 
-	pm_runtime_put(dev);
 	return ret;
 }
 
-- 
cgit v0.10.2


From 164a2159a2d6789bc7e3c4b126dde7f3ce865992 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 26 Apr 2016 08:47:18 +0200
Subject: PM / Domains: Drop unnecessary wakeup code from pm_genpd_prepare()

As the PM core already have wakeup management during the system PM phase,
it seems reasonable that genpd and its users should be able to rely on
that. Therefore let's remove this from pm_genpd_prepare().

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 60a3573..de23b64 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -730,12 +730,6 @@ static int pm_genpd_prepare(struct device *dev)
 	 * at this point and a system wakeup event should be reported if it's
 	 * set up to wake up the system from sleep states.
 	 */
-	if (pm_runtime_barrier(dev) && device_may_wakeup(dev))
-		pm_wakeup_event(dev, 0);
-
-	if (pm_wakeup_pending())
-		return -EBUSY;
-
 	if (resume_needed(dev, genpd))
 		pm_runtime_resume(dev);
 
-- 
cgit v0.10.2


From 6de0dc4b536110d63a83021b7e6dce3e1955297b Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Tue, 26 Apr 2016 20:14:13 -0400
Subject: cpufreq: e_powersaver: Use IS_ENABLED() instead of checking for
 built-in or module

The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either
built-in or as a module, use that macro instead of open coding the same.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c
index 5ec87e3..cdf097b 100644
--- a/drivers/cpufreq/e_powersaver.c
+++ b/drivers/cpufreq/e_powersaver.c
@@ -22,7 +22,7 @@
 #include <asm/msr.h>
 #include <asm/tsc.h>
 
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
 #include <linux/acpi.h>
 #include <acpi/processor.h>
 #endif
@@ -35,7 +35,7 @@
 
 struct eps_cpu_data {
 	u32 fsb;
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
 	u32 bios_limit;
 #endif
 	struct cpufreq_frequency_table freq_table[];
@@ -48,7 +48,7 @@ static int freq_failsafe_off;
 static int voltage_failsafe_off;
 static int set_max_voltage;
 
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
 static int ignore_acpi_limit;
 
 static struct acpi_processor_performance *eps_acpi_cpu_perf;
@@ -186,7 +186,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 	int k, step, voltage;
 	int ret;
 	int states;
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
 	unsigned int limit;
 #endif
 
@@ -286,7 +286,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 	/* Calc FSB speed */
 	fsb = cpu_khz / current_multiplier;
 
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
 	/* Check for ACPI processor speed limit */
 	if (!ignore_acpi_limit && !eps_acpi_init()) {
 		if (!acpi_processor_get_bios_limit(policy->cpu, &limit)) {
@@ -333,7 +333,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 
 	/* Copy basic values */
 	centaur->fsb = fsb;
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
 	centaur->bios_limit = limit;
 #endif
 
@@ -418,7 +418,7 @@ module_param(freq_failsafe_off, int, 0644);
 MODULE_PARM_DESC(freq_failsafe_off, "Disable current vs max frequency check");
 module_param(voltage_failsafe_off, int, 0644);
 MODULE_PARM_DESC(voltage_failsafe_off, "Disable current vs max voltage check");
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
 module_param(ignore_acpi_limit, int, 0644);
 MODULE_PARM_DESC(ignore_acpi_limit, "Don't check ACPI's processor speed limit");
 #endif
diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.h b/drivers/cpufreq/ppc_cbe_cpufreq.h
index b4c00a5..3eace72 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq.h
+++ b/drivers/cpufreq/ppc_cbe_cpufreq.h
@@ -17,7 +17,7 @@ int cbe_cpufreq_get_pmode(int cpu);
 
 int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode);
 
-#if defined(CONFIG_CPU_FREQ_CBE_PMI) || defined(CONFIG_CPU_FREQ_CBE_PMI_MODULE)
+#if IS_ENABLED(CONFIG_CPU_FREQ_CBE_PMI)
 extern bool cbe_cpufreq_has_pmi;
 #else
 #define cbe_cpufreq_has_pmi (0)
-- 
cgit v0.10.2


From 2920e9ce8f43a946e3c257a7c0ee35b02149fca7 Mon Sep 17 00:00:00 2001
From: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Date: Tue, 19 Apr 2016 15:28:00 +0530
Subject: cpufreq: powernv: Remove flag use-case of policy->driver_data

commit 1b0289848d5d ("cpufreq: powernv: Add sysfs attributes to show
throttle stats") used policy->driver_data as a flag for one-time creation
of throttle sysfs files. Instead of this use 'kernfs_find_and_get()' to
check if the attribute already exists. This is required as
policy->driver_data is used for other purposes in the later patch.

Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Signed-off-by: Akshay Adiga <akshay.adiga@linux.vnet.ibm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 39ac78c..e2e2219 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -455,13 +455,15 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
 static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
 {
 	int base, i;
+	struct kernfs_node *kn;
 
 	base = cpu_first_thread_sibling(policy->cpu);
 
 	for (i = 0; i < threads_per_core; i++)
 		cpumask_set_cpu(base + i, policy->cpus);
 
-	if (!policy->driver_data) {
+	kn = kernfs_find_and_get(policy->kobj.sd, throttle_attr_grp.name);
+	if (!kn) {
 		int ret;
 
 		ret = sysfs_create_group(&policy->kobj, &throttle_attr_grp);
@@ -470,11 +472,8 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
 				policy->cpu);
 			return ret;
 		}
-		/*
-		 * policy->driver_data is used as a flag for one-time
-		 * creation of throttle sysfs files.
-		 */
-		policy->driver_data = policy;
+	} else {
+		kernfs_put(kn);
 	}
 	return cpufreq_table_validate_and_show(policy, powernv_freqs);
 }
-- 
cgit v0.10.2


From eaa2c3aeef83f096cd1ec73df1310712e423337b Mon Sep 17 00:00:00 2001
From: Akshay Adiga <akshay.adiga@linux.vnet.ibm.com>
Date: Tue, 19 Apr 2016 15:28:01 +0530
Subject: cpufreq: powernv: Ramp-down global pstate slower than local-pstate

The frequency transition latency from pmin to pmax is observed to be in
few millisecond granurality. And it usually happens to take a performance
penalty during sudden frequency rampup requests.

This patch set solves this problem by using an entity called "global
pstates". The global pstate is a Chip-level entity, so the global entitiy
(Voltage) is managed across the cores. The local pstate is a Core-level
entity, so the local entity (frequency) is managed across threads.

This patch brings down global pstate at a slower rate than the local
pstate. Hence by holding global pstates higher than local pstate makes
the subsequent rampups faster.

A per policy structure is maintained to keep track of the global and
local pstate changes. The global pstate is brought down using a parabolic
equation. The ramp down time to pmin is set to ~5 seconds. To make sure
that the global pstates are dropped at regular interval , a timer is
queued for every 2 seconds during ramp-down phase, which eventually brings
the pstate down to local pstate.

Iozone results show fairly consistent performance boost.
YCSB on redis shows improved Max latencies in most cases.

Iozone write/rewite test were made with filesizes 200704Kb and 401408Kb
with different record sizes . The following table shows IOoperations/sec
with and without patch.

Iozone Results ( in op/sec) ( mean over 3 iterations )
---------------------------------------------------------------------
file size-                      with            without		  %
recordsize-IOtype               patch           patch		change
----------------------------------------------------------------------
200704-1-SeqWrite               1616532         1615425         0.06
200704-1-Rewrite                2423195         2303130         5.21
200704-2-SeqWrite               1628577         1602620         1.61
200704-2-Rewrite                2428264         2312154         5.02
200704-4-SeqWrite               1617605         1617182         0.02
200704-4-Rewrite                2430524         2351238         3.37
200704-8-SeqWrite               1629478         1600436         1.81
200704-8-Rewrite                2415308         2298136         5.09
200704-16-SeqWrite              1619632         1618250         0.08
200704-16-Rewrite               2396650         2352591         1.87
200704-32-SeqWrite              1632544         1598083         2.15
200704-32-Rewrite               2425119         2329743         4.09
200704-64-SeqWrite              1617812         1617235         0.03
200704-64-Rewrite               2402021         2321080         3.48
200704-128-SeqWrite             1631998         1600256         1.98
200704-128-Rewrite              2422389         2304954         5.09
200704-256 SeqWrite             1617065         1616962         0.00
200704-256-Rewrite              2432539         2301980         5.67
200704-512-SeqWrite             1632599         1598656         2.12
200704-512-Rewrite              2429270         2323676         4.54
200704-1024-SeqWrite            1618758         1616156         0.16
200704-1024-Rewrite             2431631         2315889         4.99
401408-1-SeqWrite               1631479         1608132         1.45
401408-1-Rewrite                2501550         2459409         1.71
401408-2-SeqWrite               1617095         1626069         -0.55
401408-2-Rewrite                2507557         2443621         2.61
401408-4-SeqWrite               1629601         1611869         1.10
401408-4-Rewrite                2505909         2462098         1.77
401408-8-SeqWrite               1617110         1626968         -0.60
401408-8-Rewrite                2512244         2456827         2.25
401408-16-SeqWrite              1632609         1609603         1.42
401408-16-Rewrite               2500792         2451405         2.01
401408-32-SeqWrite              1619294         1628167         -0.54
401408-32-Rewrite               2510115         2451292         2.39
401408-64-SeqWrite              1632709         1603746         1.80
401408-64-Rewrite               2506692         2433186         3.02
401408-128-SeqWrite             1619284         1627461         -0.50
401408-128-Rewrite              2518698         2453361         2.66
401408-256-SeqWrite             1634022         1610681         1.44
401408-256-Rewrite              2509987         2446328         2.60
401408-512-SeqWrite             1617524         1628016         -0.64
401408-512-Rewrite              2504409         2442899         2.51
401408-1024-SeqWrite            1629812         1611566         1.13
401408-1024-Rewrite             2507620          2442968        2.64

Tested with YCSB workload (50% update + 50% read) over redis for 1 million
records and 1 million operation. Each test was carried out with target
operations per second and persistence disabled.

Max-latency (in us)( mean over 5 iterations )
---------------------------------------------------------------
op/s    Operation       with patch      without patch   %change
---------------------------------------------------------------
15000   Read            61480.6         50261.4         22.32
15000   cleanup         215.2           293.6           -26.70
15000   update          25666.2         25163.8         2.00

25000   Read            32626.2         89525.4         -63.56
25000   cleanup         292.2           263.0           11.10
25000   update          32293.4         90255.0         -64.22

35000   Read            34783.0         33119.0         5.02
35000   cleanup         321.2           395.8           -18.8
35000   update          36047.0         38747.8         -6.97

40000   Read            38562.2         42357.4         -8.96
40000   cleanup         371.8           384.6           -3.33
40000   update          27861.4         41547.8         -32.94

45000   Read            42271.0         88120.6         -52.03
45000   cleanup         263.6           383.0           -31.17
45000   update          29755.8         81359.0         -63.43

(test without target op/s)
47659   Read            83061.4         136440.6        -39.12
47659   cleanup         195.8           193.8           1.03
47659   update          73429.4         124971.8        -41.24

Signed-off-by: Akshay Adiga <akshay.adiga@linux.vnet.ibm.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index e2e2219..144c732 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -36,12 +36,56 @@
 #include <asm/reg.h>
 #include <asm/smp.h> /* Required for cpu_sibling_mask() in UP configs */
 #include <asm/opal.h>
+#include <linux/timer.h>
 
 #define POWERNV_MAX_PSTATES	256
 #define PMSR_PSAFE_ENABLE	(1UL << 30)
 #define PMSR_SPR_EM_DISABLE	(1UL << 31)
 #define PMSR_MAX(x)		((x >> 32) & 0xFF)
 
+#define MAX_RAMP_DOWN_TIME				5120
+/*
+ * On an idle system we want the global pstate to ramp-down from max value to
+ * min over a span of ~5 secs. Also we want it to initially ramp-down slowly and
+ * then ramp-down rapidly later on.
+ *
+ * This gives a percentage rampdown for time elapsed in milliseconds.
+ * ramp_down_percentage = ((ms * ms) >> 18)
+ *			~= 3.8 * (sec * sec)
+ *
+ * At 0 ms	ramp_down_percent = 0
+ * At 5120 ms	ramp_down_percent = 100
+ */
+#define ramp_down_percent(time)		((time * time) >> 18)
+
+/* Interval after which the timer is queued to bring down global pstate */
+#define GPSTATE_TIMER_INTERVAL				2000
+
+/**
+ * struct global_pstate_info -	Per policy data structure to maintain history of
+ *				global pstates
+ * @highest_lpstate:		The local pstate from which we are ramping down
+ * @elapsed_time:		Time in ms spent in ramping down from
+ *				highest_lpstate
+ * @last_sampled_time:		Time from boot in ms when global pstates were
+ *				last set
+ * @last_lpstate,last_gpstate:	Last set values for local and global pstates
+ * @timer:			Is used for ramping down if cpu goes idle for
+ *				a long time with global pstate held high
+ * @gpstate_lock:		A spinlock to maintain synchronization between
+ *				routines called by the timer handler and
+ *				governer's target_index calls
+ */
+struct global_pstate_info {
+	int highest_lpstate;
+	unsigned int elapsed_time;
+	unsigned int last_sampled_time;
+	int last_lpstate;
+	int last_gpstate;
+	spinlock_t gpstate_lock;
+	struct timer_list timer;
+};
+
 static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
 static bool rebooting, throttled, occ_reset;
 
@@ -94,6 +138,17 @@ static struct powernv_pstate_info {
 	int nr_pstates;
 } powernv_pstate_info;
 
+static inline void reset_gpstates(struct cpufreq_policy *policy)
+{
+	struct global_pstate_info *gpstates = policy->driver_data;
+
+	gpstates->highest_lpstate = 0;
+	gpstates->elapsed_time = 0;
+	gpstates->last_sampled_time = 0;
+	gpstates->last_lpstate = 0;
+	gpstates->last_gpstate = 0;
+}
+
 /*
  * Initialize the freq table based on data obtained
  * from the firmware passed via device-tree
@@ -285,6 +340,7 @@ static inline void set_pmspr(unsigned long sprn, unsigned long val)
 struct powernv_smp_call_data {
 	unsigned int freq;
 	int pstate_id;
+	int gpstate_id;
 };
 
 /*
@@ -343,19 +399,21 @@ static unsigned int powernv_cpufreq_get(unsigned int cpu)
  * (struct powernv_smp_call_data *) and the pstate_id which needs to be set
  * on this CPU should be present in freq_data->pstate_id.
  */
-static void set_pstate(void *freq_data)
+static void set_pstate(void *data)
 {
 	unsigned long val;
-	unsigned long pstate_ul =
-		((struct powernv_smp_call_data *) freq_data)->pstate_id;
+	struct powernv_smp_call_data *freq_data = data;
+	unsigned long pstate_ul = freq_data->pstate_id;
+	unsigned long gpstate_ul = freq_data->gpstate_id;
 
 	val = get_pmspr(SPRN_PMCR);
 	val = val & 0x0000FFFFFFFFFFFFULL;
 
 	pstate_ul = pstate_ul & 0xFF;
+	gpstate_ul = gpstate_ul & 0xFF;
 
 	/* Set both global(bits 56..63) and local(bits 48..55) PStates */
-	val = val | (pstate_ul << 56) | (pstate_ul << 48);
+	val = val | (gpstate_ul << 56) | (pstate_ul << 48);
 
 	pr_debug("Setting cpu %d pmcr to %016lX\n",
 			raw_smp_processor_id(), val);
@@ -424,6 +482,110 @@ next:
 	}
 }
 
+/**
+ * calc_global_pstate - Calculate global pstate
+ * @elapsed_time:	Elapsed time in milliseconds
+ * @local_pstate:	New local pstate
+ * @highest_lpstate:	pstate from which its ramping down
+ *
+ * Finds the appropriate global pstate based on the pstate from which its
+ * ramping down and the time elapsed in ramping down. It follows a quadratic
+ * equation which ensures that it reaches ramping down to pmin in 5sec.
+ */
+static inline int calc_global_pstate(unsigned int elapsed_time,
+				     int highest_lpstate, int local_pstate)
+{
+	int pstate_diff;
+
+	/*
+	 * Using ramp_down_percent we get the percentage of rampdown
+	 * that we are expecting to be dropping. Difference between
+	 * highest_lpstate and powernv_pstate_info.min will give a absolute
+	 * number of how many pstates we will drop eventually by the end of
+	 * 5 seconds, then just scale it get the number pstates to be dropped.
+	 */
+	pstate_diff =  ((int)ramp_down_percent(elapsed_time) *
+			(highest_lpstate - powernv_pstate_info.min)) / 100;
+
+	/* Ensure that global pstate is >= to local pstate */
+	if (highest_lpstate - pstate_diff < local_pstate)
+		return local_pstate;
+	else
+		return highest_lpstate - pstate_diff;
+}
+
+static inline void  queue_gpstate_timer(struct global_pstate_info *gpstates)
+{
+	unsigned int timer_interval;
+
+	/*
+	 * Setting up timer to fire after GPSTATE_TIMER_INTERVAL ms, But
+	 * if it exceeds MAX_RAMP_DOWN_TIME ms for ramp down time.
+	 * Set timer such that it fires exactly at MAX_RAMP_DOWN_TIME
+	 * seconds of ramp down time.
+	 */
+	if ((gpstates->elapsed_time + GPSTATE_TIMER_INTERVAL)
+	     > MAX_RAMP_DOWN_TIME)
+		timer_interval = MAX_RAMP_DOWN_TIME - gpstates->elapsed_time;
+	else
+		timer_interval = GPSTATE_TIMER_INTERVAL;
+
+	mod_timer_pinned(&gpstates->timer, jiffies +
+			msecs_to_jiffies(timer_interval));
+}
+
+/**
+ * gpstate_timer_handler
+ *
+ * @data: pointer to cpufreq_policy on which timer was queued
+ *
+ * This handler brings down the global pstate closer to the local pstate
+ * according quadratic equation. Queues a new timer if it is still not equal
+ * to local pstate
+ */
+void gpstate_timer_handler(unsigned long data)
+{
+	struct cpufreq_policy *policy = (struct cpufreq_policy *)data;
+	struct global_pstate_info *gpstates = policy->driver_data;
+	int gpstate_id;
+	unsigned int time_diff = jiffies_to_msecs(jiffies)
+					- gpstates->last_sampled_time;
+	struct powernv_smp_call_data freq_data;
+
+	if (!spin_trylock(&gpstates->gpstate_lock))
+		return;
+
+	gpstates->last_sampled_time += time_diff;
+	gpstates->elapsed_time += time_diff;
+	freq_data.pstate_id = gpstates->last_lpstate;
+
+	if ((gpstates->last_gpstate == freq_data.pstate_id) ||
+	    (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME)) {
+		gpstate_id = freq_data.pstate_id;
+		reset_gpstates(policy);
+		gpstates->highest_lpstate = freq_data.pstate_id;
+	} else {
+		gpstate_id = calc_global_pstate(gpstates->elapsed_time,
+						gpstates->highest_lpstate,
+						freq_data.pstate_id);
+	}
+
+	/*
+	 * If local pstate is equal to global pstate, rampdown is over
+	 * So timer is not required to be queued.
+	 */
+	if (gpstate_id != freq_data.pstate_id)
+		queue_gpstate_timer(gpstates);
+
+	freq_data.gpstate_id = gpstate_id;
+	gpstates->last_gpstate = freq_data.gpstate_id;
+	gpstates->last_lpstate = freq_data.pstate_id;
+
+	/* Timer may get migrated to a different cpu on cpu hot unplug */
+	smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1);
+	spin_unlock(&gpstates->gpstate_lock);
+}
+
 /*
  * powernv_cpufreq_target_index: Sets the frequency corresponding to
  * the cpufreq table entry indexed by new_index on the cpus in the
@@ -433,6 +595,9 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
 					unsigned int new_index)
 {
 	struct powernv_smp_call_data freq_data;
+	unsigned int cur_msec, gpstate_id;
+	unsigned long flags;
+	struct global_pstate_info *gpstates = policy->driver_data;
 
 	if (unlikely(rebooting) && new_index != get_nominal_index())
 		return 0;
@@ -440,22 +605,70 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
 	if (!throttled)
 		powernv_cpufreq_throttle_check(NULL);
 
+	cur_msec = jiffies_to_msecs(get_jiffies_64());
+
+	spin_lock_irqsave(&gpstates->gpstate_lock, flags);
 	freq_data.pstate_id = powernv_freqs[new_index].driver_data;
 
+	if (!gpstates->last_sampled_time) {
+		gpstate_id = freq_data.pstate_id;
+		gpstates->highest_lpstate = freq_data.pstate_id;
+		goto gpstates_done;
+	}
+
+	if (gpstates->last_gpstate > freq_data.pstate_id) {
+		gpstates->elapsed_time += cur_msec -
+						 gpstates->last_sampled_time;
+
+		/*
+		 * If its has been ramping down for more than MAX_RAMP_DOWN_TIME
+		 * we should be resetting all global pstate related data. Set it
+		 * equal to local pstate to start fresh.
+		 */
+		if (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME) {
+			reset_gpstates(policy);
+			gpstates->highest_lpstate = freq_data.pstate_id;
+			gpstate_id = freq_data.pstate_id;
+		} else {
+		/* Elaspsed_time is less than 5 seconds, continue to rampdown */
+			gpstate_id = calc_global_pstate(gpstates->elapsed_time,
+							gpstates->highest_lpstate,
+							freq_data.pstate_id);
+		}
+	} else {
+		reset_gpstates(policy);
+		gpstates->highest_lpstate = freq_data.pstate_id;
+		gpstate_id = freq_data.pstate_id;
+	}
+
+	/*
+	 * If local pstate is equal to global pstate, rampdown is over
+	 * So timer is not required to be queued.
+	 */
+	if (gpstate_id != freq_data.pstate_id)
+		queue_gpstate_timer(gpstates);
+
+gpstates_done:
+	freq_data.gpstate_id = gpstate_id;
+	gpstates->last_sampled_time = cur_msec;
+	gpstates->last_gpstate = freq_data.gpstate_id;
+	gpstates->last_lpstate = freq_data.pstate_id;
+
 	/*
 	 * Use smp_call_function to send IPI and execute the
 	 * mtspr on target CPU.  We could do that without IPI
 	 * if current CPU is within policy->cpus (core)
 	 */
 	smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1);
-
+	spin_unlock_irqrestore(&gpstates->gpstate_lock, flags);
 	return 0;
 }
 
 static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
 {
-	int base, i;
+	int base, i, ret;
 	struct kernfs_node *kn;
+	struct global_pstate_info *gpstates;
 
 	base = cpu_first_thread_sibling(policy->cpu);
 
@@ -475,7 +688,34 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	} else {
 		kernfs_put(kn);
 	}
-	return cpufreq_table_validate_and_show(policy, powernv_freqs);
+
+	gpstates =  kzalloc(sizeof(*gpstates), GFP_KERNEL);
+	if (!gpstates)
+		return -ENOMEM;
+
+	policy->driver_data = gpstates;
+
+	/* initialize timer */
+	init_timer_deferrable(&gpstates->timer);
+	gpstates->timer.data = (unsigned long)policy;
+	gpstates->timer.function = gpstate_timer_handler;
+	gpstates->timer.expires = jiffies +
+				msecs_to_jiffies(GPSTATE_TIMER_INTERVAL);
+	spin_lock_init(&gpstates->gpstate_lock);
+	ret = cpufreq_table_validate_and_show(policy, powernv_freqs);
+
+	if (ret < 0)
+		kfree(policy->driver_data);
+
+	return ret;
+}
+
+static int powernv_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+	/* timer is deleted in cpufreq_cpu_stop() */
+	kfree(policy->driver_data);
+
+	return 0;
 }
 
 static int powernv_cpufreq_reboot_notifier(struct notifier_block *nb,
@@ -603,15 +843,19 @@ static struct notifier_block powernv_cpufreq_opal_nb = {
 static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy)
 {
 	struct powernv_smp_call_data freq_data;
+	struct global_pstate_info *gpstates = policy->driver_data;
 
 	freq_data.pstate_id = powernv_pstate_info.min;
+	freq_data.gpstate_id = powernv_pstate_info.min;
 	smp_call_function_single(policy->cpu, set_pstate, &freq_data, 1);
+	del_timer_sync(&gpstates->timer);
 }
 
 static struct cpufreq_driver powernv_cpufreq_driver = {
 	.name		= "powernv-cpufreq",
 	.flags		= CPUFREQ_CONST_LOOPS,
 	.init		= powernv_cpufreq_cpu_init,
+	.exit		= powernv_cpufreq_cpu_exit,
 	.verify		= cpufreq_generic_frequency_table_verify,
 	.target_index	= powernv_cpufreq_target_index,
 	.get		= powernv_cpufreq_get,
-- 
cgit v0.10.2


From 9522a2ff9cde26ef48c30e0c9ca9ae4dfb669764 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Wed, 27 Apr 2016 15:48:06 -0700
Subject: cpufreq: intel_pstate: Enforce _PPC limits

Use ACPI _PPC notification to limit max P state driver will request.
ACPI _PPC change notification is sent by BIOS to limit max P state
in several cases:
- Reduce impact of platform thermal condition
- When Config TDP feature is used, a changed _PPC is sent to
follow TDP change
- Remote node managers in server want to control platform power
via baseboard management controller (BMC)

This change registers with ACPI processor performance lib so that
_PPC changes are notified to cpufreq core, which in turns will
result in call to .setpolicy() callback. Also the way _PSS
table identifies a turbo frequency is not compatible to max turbo
frequency in intel_pstate, so the very first entry in _PSS needs
to be adjusted.

This feature can be turned on by using kernel parameters:
intel_pstate=support_acpi_ppc

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
[ rjw: Minor cleanups ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index ecc74fa..31c094f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1661,6 +1661,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 		hwp_only
 			Only load intel_pstate on systems which support
 			hardware P state control (HWP) if available.
+		support_acpi_ppc
+			Enforce ACPI _PPC performance limits.
 
 	intremap=	[X86-64, Intel-IOMMU]
 			on	enable Interrupt Remapping (default)
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index c59bdcb..adbd1de 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -5,6 +5,7 @@
 config X86_INTEL_PSTATE
        bool "Intel P state control"
        depends on X86
+       select ACPI_PROCESSOR if ACPI
        help
           This driver provides a P state for Intel core processors.
 	  The driver implements an internal governor and will become
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index cfa6a68..c72a82a 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -41,6 +41,10 @@
 #define ATOM_TURBO_RATIOS	0x66c
 #define ATOM_TURBO_VIDS		0x66d
 
+#ifdef CONFIG_ACPI
+#include <acpi/processor.h>
+#endif
+
 #define FRAC_BITS 8
 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
 #define fp_toint(X) ((X) >> FRAC_BITS)
@@ -174,6 +178,8 @@ struct _pid {
  * @prev_cummulative_iowait: IO Wait time difference from last and
  *			current sample
  * @sample:		Storage for storing last Sample data
+ * @acpi_perf_data:	Stores ACPI perf information read from _PSS
+ * @valid_pss_table:	Set to true for valid ACPI _PSS entries found
  *
  * This structure stores per CPU instance data for all CPUs.
  */
@@ -192,6 +198,10 @@ struct cpudata {
 	u64	prev_tsc;
 	u64	prev_cummulative_iowait;
 	struct sample sample;
+#ifdef CONFIG_ACPI
+	struct acpi_processor_performance acpi_perf_data;
+	bool valid_pss_table;
+#endif
 };
 
 static struct cpudata **all_cpu_data;
@@ -260,6 +270,9 @@ static struct pstate_adjust_policy pid_params;
 static struct pstate_funcs pstate_funcs;
 static int hwp_active;
 
+#ifdef CONFIG_ACPI
+static bool acpi_ppc;
+#endif
 
 /**
  * struct perf_limits - Store user and policy limits
@@ -333,6 +346,111 @@ static struct perf_limits *limits = &performance_limits;
 static struct perf_limits *limits = &powersave_limits;
 #endif
 
+#ifdef CONFIG_ACPI
+/*
+ * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and
+ * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and
+ * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state
+ * ratio, out of it only high 8 bits are used. For example 0x1700 is setting
+ * target ratio 0x17. The _PSS control value stores in a format which can be
+ * directly written to PERF_CTL MSR. But in intel_pstate driver this shift
+ * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()).
+ * This function converts the _PSS control value to intel pstate driver format
+ * for comparison and assignment.
+ */
+static int convert_to_native_pstate_format(struct cpudata *cpu, int index)
+{
+	return cpu->acpi_perf_data.states[index].control >> 8;
+}
+
+static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
+{
+	struct cpudata *cpu;
+	int turbo_pss_ctl;
+	int ret;
+	int i;
+
+	if (!acpi_ppc)
+		return;
+
+	cpu = all_cpu_data[policy->cpu];
+
+	ret = acpi_processor_register_performance(&cpu->acpi_perf_data,
+						  policy->cpu);
+	if (ret)
+		return;
+
+	/*
+	 * Check if the control value in _PSS is for PERF_CTL MSR, which should
+	 * guarantee that the states returned by it map to the states in our
+	 * list directly.
+	 */
+	if (cpu->acpi_perf_data.control_register.space_id !=
+						ACPI_ADR_SPACE_FIXED_HARDWARE)
+		goto err;
+
+	/*
+	 * If there is only one entry _PSS, simply ignore _PSS and continue as
+	 * usual without taking _PSS into account
+	 */
+	if (cpu->acpi_perf_data.state_count < 2)
+		goto err;
+
+	pr_debug("CPU%u - ACPI _PSS perf data\n", policy->cpu);
+	for (i = 0; i < cpu->acpi_perf_data.state_count; i++) {
+		pr_debug("     %cP%d: %u MHz, %u mW, 0x%x\n",
+			 (i == cpu->acpi_perf_data.state ? '*' : ' '), i,
+			 (u32) cpu->acpi_perf_data.states[i].core_frequency,
+			 (u32) cpu->acpi_perf_data.states[i].power,
+			 (u32) cpu->acpi_perf_data.states[i].control);
+	}
+
+	/*
+	 * The _PSS table doesn't contain whole turbo frequency range.
+	 * This just contains +1 MHZ above the max non turbo frequency,
+	 * with control value corresponding to max turbo ratio. But
+	 * when cpufreq set policy is called, it will call with this
+	 * max frequency, which will cause a reduced performance as
+	 * this driver uses real max turbo frequency as the max
+	 * frequency. So correct this frequency in _PSS table to
+	 * correct max turbo frequency based on the turbo ratio.
+	 * Also need to convert to MHz as _PSS freq is in MHz.
+	 */
+	turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0);
+	if (turbo_pss_ctl > cpu->pstate.max_pstate)
+		cpu->acpi_perf_data.states[0].core_frequency =
+					policy->cpuinfo.max_freq / 1000;
+	cpu->valid_pss_table = true;
+	pr_info("_PPC limits will be enforced\n");
+
+	return;
+
+ err:
+	cpu->valid_pss_table = false;
+	acpi_processor_unregister_performance(policy->cpu);
+}
+
+static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
+{
+	struct cpudata *cpu;
+
+	cpu = all_cpu_data[policy->cpu];
+	if (!cpu->valid_pss_table)
+		return;
+
+	acpi_processor_unregister_performance(policy->cpu);
+}
+
+#else
+static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
+{
+}
+
+static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
+{
+}
+#endif
+
 static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
 			     int deadband, int integral) {
 	pid->setpoint = int_tofp(setpoint);
@@ -1398,18 +1516,27 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
 	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
 	policy->cpuinfo.max_freq =
 		cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+	intel_pstate_init_acpi_perf_limits(policy);
 	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
 	cpumask_set_cpu(policy->cpu, policy->cpus);
 
 	return 0;
 }
 
+static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
+{
+	intel_pstate_exit_perf_limits(policy);
+
+	return 0;
+}
+
 static struct cpufreq_driver intel_pstate_driver = {
 	.flags		= CPUFREQ_CONST_LOOPS,
 	.verify		= intel_pstate_verify_policy,
 	.setpolicy	= intel_pstate_set_policy,
 	.get		= intel_pstate_get,
 	.init		= intel_pstate_cpu_init,
+	.exit		= intel_pstate_cpu_exit,
 	.stop_cpu	= intel_pstate_stop_cpu,
 	.name		= "intel_pstate",
 };
@@ -1453,8 +1580,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs)
 
 }
 
-#if IS_ENABLED(CONFIG_ACPI)
-#include <acpi/processor.h>
+#ifdef CONFIG_ACPI
 
 static bool intel_pstate_no_acpi_pss(void)
 {
@@ -1660,6 +1786,12 @@ static int __init intel_pstate_setup(char *str)
 		force_load = 1;
 	if (!strcmp(str, "hwp_only"))
 		hwp_only = 1;
+
+#ifdef CONFIG_ACPI
+	if (!strcmp(str, "support_acpi_ppc"))
+		acpi_ppc = true;
+#endif
+
 	return 0;
 }
 early_param("intel_pstate", intel_pstate_setup);
-- 
cgit v0.10.2


From 3be9200d512bfa8d6292f5dce6c02aa151821e09 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Wed, 27 Apr 2016 15:48:07 -0700
Subject: cpufreq: intel_pstate: Adjust policy->max

When policy->max is changed via _PPC or sysfs and is more than the max non
turbo frequency, it does not really change resulting performance in some
processors. When policy->max results in a P-State ratio more than the
turbo activation ratio, then processor can choose any P-State up to max
turbo. So the user or _PPC setting has no value, but this can cause
undesirable side effects like:
- Showing reduced max percentage in Intel P-State sysfs
- It can cause reduced max performance under certain boundary conditions:
The requested max scaling frequency either via _PPC or via cpufreq-sysfs,
will be converted into a fixed floating point max percent scale. In
majority of the cases this will result in correct max. But not 100% of the
time. If the _PPC is requested at a point where the calculation lead to a
lower max, this can result in a lower P-State then expected and it will
impact performance.
Example of this condition using a Broadwell laptop with config TDP.

ACPI _PSS table from a Broadwell laptop
2301000 2300000 2200000 2000000 1900000 1800000 1700000 1500000 1400000
1300000 1100000 1000000 900000 800000 600000 500000

The actual results by disabling config TDP so that we can get what is
requested on or below 2300000Khz.

scaling_max_freq        Max Requested P-State   Resultant scaling
max
---------------------------------------- ----------------------
2400000                 18                      2900000 (max
turbo)
2300000                 17                      2300000 (max
physical non turbo)
2200000                 15                      2100000
2100000                 15                      2100000
2000000                 13                      1900000
1900000                 13                      1900000
1800000                 12                      1800000
1700000                 11                      1700000
1600000                 10                      1600000
1500000                 f                       1500000
1400000                 e                       1400000
1300000                 d                       1300000
1200000                 c                       1200000
1100000                 a                       1000000
1000000                 a                       1000000
900000                  9                        900000
800000                  8                        800000
700000                  7                        700000
600000                  6                        600000
500000                  5                        500000
------------------------------------------------------------------

Now set the config TDP level 1 ratio as 0x0b (equivalent to 1100000KHz)
in BIOS (not every system will let you adjust this).
The turbo activation ratio will be set to one less than that, which will
be 0x0a (So any request above 1000000KHz should result in turbo region
assuming no thermal limits).
Here _PPC will request max to 1100000KHz (which basically should still
result in turbo as this is more than the turbo activation ratio up to
max allowable turbo frequency), but actual calculation resulted in a max
ceiling P-State which is 0x0a. So under any load condition, this driver
will not request turbo P-States. This will be a huge performance hit.

When config TDP feature is ON, if the _PPC points to a frequency above
turbo activation ratio, the performance can still reach max turbo. In this
case we don't need to treat this as the reduced frequency in set_policy
callback.

In this change when config TDP is active (by checking if the physical max
non turbo ratio is more than the current max non turbo ratio), any request
above current max non turbo is treated as full performance.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
[ rjw : Minor cleanups ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index c72a82a..31f6ffe 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1418,11 +1418,22 @@ static void intel_pstate_set_performance_limits(struct perf_limits *limits)
 
 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 {
+	struct cpudata *cpu;
+
 	if (!policy->cpuinfo.max_freq)
 		return -ENODEV;
 
 	intel_pstate_clear_update_util_hook(policy->cpu);
 
+	cpu = all_cpu_data[0];
+	if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate) {
+		if (policy->max < policy->cpuinfo.max_freq &&
+		    policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) {
+			pr_debug("policy->max > max non turbo frequency\n");
+			policy->max = policy->cpuinfo.max_freq;
+		}
+	}
+
 	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
 		limits = &performance_limits;
 		if (policy->max >= policy->cpuinfo.max_freq) {
-- 
cgit v0.10.2


From 2b3ec765058449caa27e2806a939a4bffcbeba64 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Wed, 27 Apr 2016 15:48:08 -0700
Subject: cpufreq: intel_pstate: Enable PPC enforcement for servers

For platforms which are controlled via remove node manager, enable _PPC by
default. These platforms are mostly categorized as enterprise server or
performance servers. These platforms needs to go through some
certifications tests, which tests control via _PPC.
The relative risk of enabling by default is  low as this is is less likely
that these systems have broken _PSS table.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 31c094f..1cffa3f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1662,7 +1662,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Only load intel_pstate on systems which support
 			hardware P state control (HWP) if available.
 		support_acpi_ppc
-			Enforce ACPI _PPC performance limits.
+			Enforce ACPI _PPC performance limits. If the Fixed ACPI
+			Description Table, specifies preferred power management
+			profile as "Enterprise Server" or "Performance Server",
+			then this feature is turned on by default.
 
 	intremap=	[X86-64, Intel-IOMMU]
 			on	enable Interrupt Remapping (default)
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 31f6ffe..a0823e8 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -347,6 +347,16 @@ static struct perf_limits *limits = &powersave_limits;
 #endif
 
 #ifdef CONFIG_ACPI
+
+static bool intel_pstate_get_ppc_enable_status(void)
+{
+	if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER ||
+	    acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER)
+		return true;
+
+	return acpi_ppc;
+}
+
 /*
  * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and
  * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and
@@ -370,7 +380,7 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 	int ret;
 	int i;
 
-	if (!acpi_ppc)
+	if (!intel_pstate_get_ppc_enable_status())
 		return;
 
 	cpu = all_cpu_data[policy->cpu];
-- 
cgit v0.10.2


From b4f4b4b37133340befa354fbaa54d8e84ea86103 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 28 Apr 2016 01:19:03 +0200
Subject: cpufreq: governor: Change confusing struct field and variable names

The name of the prev_cpu_wall field in struct cpu_dbs_info is
confusing, because it doesn't represent wall time, but the previous
update time as returned by get_cpu_idle_time() (that may be the
current value of jiffies_64 in some cases, for example).

Moreover, the names of some related variables in dbs_update() take
that confusion further.

Rename all of those things to make their names reflect the purpose
more accurately.  While at it, drop unnecessary parens from one of
the updated expressions.

No functional changes.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Chen Yu <yu.c.chen@intel.com>

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 095f91c..eb2fdbd 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -103,7 +103,7 @@ void gov_update_cpu_data(struct dbs_data *dbs_data)
 		for_each_cpu(j, policy_dbs->policy->cpus) {
 			struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
 
-			j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall,
+			j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_update_time,
 								  dbs_data->io_is_busy);
 			if (dbs_data->ignore_nice_load)
 				j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
@@ -137,14 +137,14 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
 	/* Get Absolute Load */
 	for_each_cpu(j, policy->cpus) {
 		struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
-		u64 cur_wall_time, cur_idle_time;
-		unsigned int idle_time, wall_time;
+		u64 update_time, cur_idle_time;
+		unsigned int idle_time, time_elapsed;
 		unsigned int load;
 
-		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy);
+		cur_idle_time = get_cpu_idle_time(j, &update_time, io_busy);
 
-		wall_time = cur_wall_time - j_cdbs->prev_cpu_wall;
-		j_cdbs->prev_cpu_wall = cur_wall_time;
+		time_elapsed = update_time - j_cdbs->prev_update_time;
+		j_cdbs->prev_update_time = update_time;
 
 		idle_time = cur_idle_time - j_cdbs->prev_cpu_idle;
 		j_cdbs->prev_cpu_idle = cur_idle_time;
@@ -156,7 +156,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
 			j_cdbs->prev_cpu_nice = cur_nice;
 		}
 
-		if (unlikely(!wall_time || wall_time < idle_time))
+		if (unlikely(!time_elapsed || time_elapsed < idle_time))
 			continue;
 
 		/*
@@ -177,7 +177,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
 		 *
 		 * Detecting this situation is easy: the governor's utilization
 		 * update handler would not have run during CPU-idle periods.
-		 * Hence, an unusually large 'wall_time' (as compared to the
+		 * Hence, an unusually large 'time_elapsed' (as compared to the
 		 * sampling rate) indicates this scenario.
 		 *
 		 * prev_load can be zero in two cases and we must recalculate it
@@ -185,7 +185,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
 		 * - during long idle intervals
 		 * - explicitly set to zero
 		 */
-		if (unlikely(wall_time > (2 * sampling_rate) &&
+		if (unlikely(time_elapsed > 2 * sampling_rate &&
 			     j_cdbs->prev_load)) {
 			load = j_cdbs->prev_load;
 
@@ -196,7 +196,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
 			 */
 			j_cdbs->prev_load = 0;
 		} else {
-			load = 100 * (wall_time - idle_time) / wall_time;
+			load = 100 * (time_elapsed - idle_time) / time_elapsed;
 			j_cdbs->prev_load = load;
 		}
 
@@ -509,7 +509,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy)
 	for_each_cpu(j, policy->cpus) {
 		struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
 
-		j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy);
+		j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_update_time, io_busy);
 		/*
 		 * Make the first invocation of dbs_update() compute the load.
 		 */
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 3e0eb7c..34eb214 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -111,7 +111,7 @@ static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs,
 /* Per cpu structures */
 struct cpu_dbs_info {
 	u64 prev_cpu_idle;
-	u64 prev_cpu_wall;
+	u64 prev_update_time;
 	u64 prev_cpu_nice;
 	/*
 	 * Used to keep track of load in the previous interval. However, when
-- 
cgit v0.10.2


From fba1fbf56383073d286e6e3657bff36ee0f410e8 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 28 Apr 2016 14:42:34 +0200
Subject: PM / sleep: Drop unused `info' variable

Commit 32e8d689dc12 (PM / sleep: trace_device_pm_callback coverage in
dpm_prepare/complete) removed all users of this variable but forgot to
remove the variable itself.

Signed-off-by: Thierry Reding <treding@nvidia.com>
[ rjw: Subject ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 6e7c3cc..c81667d 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1556,7 +1556,6 @@ int dpm_suspend(pm_message_t state)
 static int device_prepare(struct device *dev, pm_message_t state)
 {
 	int (*callback)(struct device *) = NULL;
-	char *info = NULL;
 	int ret = 0;
 
 	if (dev->power.syscore)
@@ -1579,24 +1578,17 @@ static int device_prepare(struct device *dev, pm_message_t state)
 		goto unlock;
 	}
 
-	if (dev->pm_domain) {
-		info = "preparing power domain ";
+	if (dev->pm_domain)
 		callback = dev->pm_domain->ops.prepare;
-	} else if (dev->type && dev->type->pm) {
-		info = "preparing type ";
+	else if (dev->type && dev->type->pm)
 		callback = dev->type->pm->prepare;
-	} else if (dev->class && dev->class->pm) {
-		info = "preparing class ";
+	else if (dev->class && dev->class->pm)
 		callback = dev->class->pm->prepare;
-	} else if (dev->bus && dev->bus->pm) {
-		info = "preparing bus ";
+	else if (dev->bus && dev->bus->pm)
 		callback = dev->bus->pm->prepare;
-	}
 
-	if (!callback && dev->driver && dev->driver->pm) {
-		info = "preparing driver ";
+	if (!callback && dev->driver && dev->driver->pm)
 		callback = dev->driver->pm->prepare;
-	}
 
 	if (callback)
 		ret = callback(dev);
-- 
cgit v0.10.2


From d708b384c06dddeb35d46a6127c08a7403fa2faf Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 27 Apr 2016 08:52:21 +0530
Subject: PM / OPP: -ENOSYS is applicable only to syscalls

Some of the routines have used -ENOSYS for the cases where the
functionality isn't implemented in the kernel. But ENOSYS is supposed to
be used only for syscalls.

Replace that with -ENOTSUPP, which specifically means that the operation
isn't supported.

While at it, replace exiting -EINVAL errors for similar cases to
-ENOTSUPP.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 5b6ad31..a2df8f6 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -110,25 +110,25 @@ static inline struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev)
 static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 					unsigned long freq, bool available)
 {
-	return ERR_PTR(-EINVAL);
+	return ERR_PTR(-ENOTSUPP);
 }
 
 static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 					unsigned long *freq)
 {
-	return ERR_PTR(-EINVAL);
+	return ERR_PTR(-ENOTSUPP);
 }
 
 static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
 					unsigned long *freq)
 {
-	return ERR_PTR(-EINVAL);
+	return ERR_PTR(-ENOTSUPP);
 }
 
 static inline int dev_pm_opp_add(struct device *dev, unsigned long freq,
 					unsigned long u_volt)
 {
-	return -EINVAL;
+	return -ENOTSUPP;
 }
 
 static inline void dev_pm_opp_remove(struct device *dev, unsigned long freq)
@@ -148,40 +148,40 @@ static inline int dev_pm_opp_disable(struct device *dev, unsigned long freq)
 static inline struct srcu_notifier_head *dev_pm_opp_get_notifier(
 							struct device *dev)
 {
-	return ERR_PTR(-EINVAL);
+	return ERR_PTR(-ENOTSUPP);
 }
 
 static inline int dev_pm_opp_set_supported_hw(struct device *dev,
 					      const u32 *versions,
 					      unsigned int count)
 {
-	return -EINVAL;
+	return -ENOTSUPP;
 }
 
 static inline void dev_pm_opp_put_supported_hw(struct device *dev) {}
 
 static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
 {
-	return -EINVAL;
+	return -ENOTSUPP;
 }
 
 static inline void dev_pm_opp_put_prop_name(struct device *dev) {}
 
 static inline int dev_pm_opp_set_regulator(struct device *dev, const char *name)
 {
-	return -EINVAL;
+	return -ENOTSUPP;
 }
 
 static inline void dev_pm_opp_put_regulator(struct device *dev) {}
 
 static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 {
-	return -EINVAL;
+	return -ENOTSUPP;
 }
 
 static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
 {
-	return -ENOSYS;
+	return -ENOTSUPP;
 }
 
 #endif		/* CONFIG_PM_OPP */
@@ -195,7 +195,7 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask
 #else
 static inline int dev_pm_opp_of_add_table(struct device *dev)
 {
-	return -EINVAL;
+	return -ENOTSUPP;
 }
 
 static inline void dev_pm_opp_of_remove_table(struct device *dev)
@@ -204,7 +204,7 @@ static inline void dev_pm_opp_of_remove_table(struct device *dev)
 
 static inline int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask)
 {
-	return -ENOSYS;
+	return -ENOTSUPP;
 }
 
 static inline void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask)
@@ -213,7 +213,7 @@ static inline void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask)
 
 static inline int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
 {
-	return -ENOSYS;
+	return -ENOTSUPP;
 }
 #endif
 
-- 
cgit v0.10.2


From dde370b23c1787a9c723ac049d56a3014937f889 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 27 Apr 2016 08:52:22 +0530
Subject: PM / OPP: Mark cpumask as const in dev_pm_opp_set_sharing_cpus()

dev_pm_opp_set_sharing_cpus() isn't supposed to update the cpumask
passed as its parameter, and so it should always have been marked
'const'.

Do it now.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c
index 55cbf9b..5469e77 100644
--- a/drivers/base/power/opp/cpu.c
+++ b/drivers/base/power/opp/cpu.c
@@ -287,7 +287,8 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus);
  * that this function is *NOT* called under RCU protection or in contexts where
  * mutex cannot be locked.
  */
-int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
+int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev,
+				const cpumask_var_t cpumask)
 {
 	struct opp_device *opp_dev;
 	struct opp_table *opp_table;
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index a2df8f6..0c08ed3 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -65,7 +65,7 @@ void dev_pm_opp_put_prop_name(struct device *dev);
 int dev_pm_opp_set_regulator(struct device *dev, const char *name);
 void dev_pm_opp_put_regulator(struct device *dev);
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
-int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask);
+int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const cpumask_var_t cpumask);
 #else
 static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 {
@@ -179,7 +179,7 @@ static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_f
 	return -ENOTSUPP;
 }
 
-static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
+static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const cpumask_var_t cpumask)
 {
 	return -ENOTSUPP;
 }
-- 
cgit v0.10.2


From 6f707daa3833761110a03478cba5cc4b708ec77d Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 27 Apr 2016 08:52:23 +0530
Subject: PM / OPP: Add dev_pm_opp_get_sharing_cpus()

OPP core allows a platform to mark OPP table as shared, when the
platform isn't using operating-points-v2 bindings.

And, so there should be a non DT way of finding out if the OPP table is
shared or not.

This patch adds dev_pm_opp_get_sharing_cpus(), which first tries to get
OPP sharing information from the opp-table (in case it is already marked
as shared), otherwise it uses the existing DT way of finding sharing
information.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c
index 5469e77..3428380 100644
--- a/drivers/base/power/opp/cpu.c
+++ b/drivers/base/power/opp/cpu.c
@@ -330,3 +330,48 @@ unlock:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus);
+
+/**
+ * dev_pm_opp_get_sharing_cpus() - Get cpumask of CPUs sharing OPPs with @cpu_dev
+ * @cpu_dev:	CPU device for which we do this operation
+ * @cpumask:	cpumask to update with information of sharing CPUs
+ *
+ * This updates the @cpumask with CPUs that are sharing OPPs with @cpu_dev.
+ *
+ * Returns -ENODEV if OPP table isn't already present.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
+{
+	struct opp_device *opp_dev;
+	struct opp_table *opp_table;
+	int ret = 0;
+
+	mutex_lock(&opp_table_lock);
+
+	opp_table = _find_opp_table(cpu_dev);
+	if (IS_ERR(opp_table)) {
+		ret = PTR_ERR(opp_table);
+		goto unlock;
+	}
+
+	cpumask_clear(cpumask);
+
+	if (opp_table->shared_opp) {
+		list_for_each_entry(opp_dev, &opp_table->dev_list, node)
+			cpumask_set_cpu(opp_dev->dev->id, cpumask);
+	} else {
+		cpumask_set_cpu(cpu_dev->id, cpumask);
+	}
+
+unlock:
+	mutex_unlock(&opp_table_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_sharing_cpus);
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 0c08ed3..15f5544 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -66,6 +66,7 @@ int dev_pm_opp_set_regulator(struct device *dev, const char *name);
 void dev_pm_opp_put_regulator(struct device *dev);
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
 int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const cpumask_var_t cpumask);
+int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask);
 #else
 static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 {
@@ -184,6 +185,11 @@ static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const cpum
 	return -ENOTSUPP;
 }
 
+static inline int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
+{
+	return -EINVAL;
+}
+
 #endif		/* CONFIG_PM_OPP */
 
 #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
-- 
cgit v0.10.2


From 1530b9963eeb5304d9aec48ee0e7ea966d7cf3d9 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 27 Apr 2016 08:52:24 +0530
Subject: cpufreq: dt: Identify cpu-sharing for platforms without
 operating-points-v2

Existing platforms, which do not support operating-points-v2, can
explicitly tell the opp core that some of the CPUs share opp tables,
with help of dev_pm_opp_set_sharing_cpus().

For such platforms, explicitly ask the opp core to provide list of CPUs
sharing the opp table with current cpu device, before falling back to
platform data.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 5f8dbe6..aca9bec 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -147,7 +147,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	struct clk *cpu_clk;
 	struct dev_pm_opp *suspend_opp;
 	unsigned int transition_latency;
-	bool opp_v1 = false;
+	bool fallback = false;
 	const char *name;
 	int ret;
 
@@ -167,14 +167,16 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	/* Get OPP-sharing information from "operating-points-v2" bindings */
 	ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, policy->cpus);
 	if (ret) {
+		if (ret != -ENOENT)
+			goto out_put_clk;
+
 		/*
 		 * operating-points-v2 not supported, fallback to old method of
-		 * finding shared-OPPs for backward compatibility.
+		 * finding shared-OPPs for backward compatibility if the
+		 * platform hasn't set sharing CPUs.
 		 */
-		if (ret == -ENOENT)
-			opp_v1 = true;
-		else
-			goto out_put_clk;
+		if (dev_pm_opp_get_sharing_cpus(cpu_dev, policy->cpus))
+			fallback = true;
 	}
 
 	/*
@@ -214,7 +216,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 		goto out_free_opp;
 	}
 
-	if (opp_v1) {
+	if (fallback) {
 		struct cpufreq_dt_platform_data *pd = cpufreq_get_driver_data();
 
 		if (!pd || !pd->independent_clocks)
-- 
cgit v0.10.2


From 947bd567f7a5185325a8f85e5235cf1145bd4417 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 27 Apr 2016 08:52:25 +0530
Subject: mvebu: Use dev_pm_opp_set_sharing_cpus() to mark OPP tables as shared

That will allow us to avoid using cpufreq-dt platform data.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c
index ed8fda4..8928f7c 100644
--- a/arch/arm/mach-mvebu/pmsu.c
+++ b/arch/arm/mach-mvebu/pmsu.c
@@ -20,7 +20,6 @@
 
 #include <linux/clk.h>
 #include <linux/cpu_pm.h>
-#include <linux/cpufreq-dt.h>
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/io.h>
@@ -609,10 +608,6 @@ int mvebu_pmsu_dfs_request(int cpu)
 	return 0;
 }
 
-struct cpufreq_dt_platform_data cpufreq_dt_pd = {
-	.independent_clocks = true,
-};
-
 static int __init armada_xp_pmsu_cpufreq_init(void)
 {
 	struct device_node *np;
@@ -683,10 +678,15 @@ static int __init armada_xp_pmsu_cpufreq_init(void)
 			clk_put(clk);
 			return ret;
 		}
+
+		ret = dev_pm_opp_set_sharing_cpus(cpu_dev,
+						  cpumask_of(cpu_dev->id));
+		if (ret)
+			dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
+				__func__, ret);
 	}
 
-	platform_device_register_data(NULL, "cpufreq-dt", -1,
-				      &cpufreq_dt_pd, sizeof(cpufreq_dt_pd));
+	platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
 	return 0;
 }
 
-- 
cgit v0.10.2


From eb96924acddc709db58221c210ca05cd9effb1df Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 27 Apr 2016 08:52:26 +0530
Subject: cpufreq: dt: Kill platform-data

There are no more users of platform-data for cpufreq-dt driver, get rid
of it.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index aca9bec..3957de8 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -15,7 +15,6 @@
 #include <linux/cpu.h>
 #include <linux/cpu_cooling.h>
 #include <linux/cpufreq.h>
-#include <linux/cpufreq-dt.h>
 #include <linux/cpumask.h>
 #include <linux/err.h>
 #include <linux/module.h>
@@ -217,10 +216,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	}
 
 	if (fallback) {
-		struct cpufreq_dt_platform_data *pd = cpufreq_get_driver_data();
-
-		if (!pd || !pd->independent_clocks)
-			cpumask_setall(policy->cpus);
+		cpumask_setall(policy->cpus);
 
 		/*
 		 * OPP tables are initialized only for policy->cpu, do it for
diff --git a/include/linux/cpufreq-dt.h b/include/linux/cpufreq-dt.h
deleted file mode 100644
index a87335a..0000000
--- a/include/linux/cpufreq-dt.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (C) 2014 Marvell
- * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __CPUFREQ_DT_H__
-#define __CPUFREQ_DT_H__
-
-#include <linux/types.h>
-
-struct cpufreq_dt_platform_data {
-	/*
-	 * True when each CPU has its own clock to control its
-	 * frequency, false when all CPUs are controlled by a single
-	 * clock.
-	 */
-	bool independent_clocks;
-};
-
-#endif /* __CPUFREQ_DT_H__ */
-- 
cgit v0.10.2


From 9f123def55d3fd0d76d49e0009419bcb994f6f1f Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 27 Apr 2016 08:52:27 +0530
Subject: cpufreq: mvebu: Move cpufreq code into drivers/cpufreq/

Move cpufreq bits for mvebu into drivers/cpufreq/ directory, that's
where they really belong to.

Compiled tested only.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index 1d5b4be..0bb566e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1322,6 +1322,7 @@ F:	drivers/rtc/rtc-armada38x.c
 F:	arch/arm/boot/dts/armada*
 F:	arch/arm/boot/dts/kirkwood*
 F:	arch/arm64/boot/dts/marvell/armada*
+F:	drivers/cpufreq/mvebu-cpufreq.c
 
 
 ARM/Marvell Berlin SoC support
diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c
index 8928f7c..b444423 100644
--- a/arch/arm/mach-mvebu/pmsu.c
+++ b/arch/arm/mach-mvebu/pmsu.c
@@ -28,7 +28,6 @@
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
-#include <linux/pm_opp.h>
 #include <linux/resource.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
@@ -607,87 +606,3 @@ int mvebu_pmsu_dfs_request(int cpu)
 
 	return 0;
 }
-
-static int __init armada_xp_pmsu_cpufreq_init(void)
-{
-	struct device_node *np;
-	struct resource res;
-	int ret, cpu;
-
-	if (!of_machine_is_compatible("marvell,armadaxp"))
-		return 0;
-
-	/*
-	 * In order to have proper cpufreq handling, we need to ensure
-	 * that the Device Tree description of the CPU clock includes
-	 * the definition of the PMU DFS registers. If not, we do not
-	 * register the clock notifier and the cpufreq driver. This
-	 * piece of code is only for compatibility with old Device
-	 * Trees.
-	 */
-	np = of_find_compatible_node(NULL, NULL, "marvell,armada-xp-cpu-clock");
-	if (!np)
-		return 0;
-
-	ret = of_address_to_resource(np, 1, &res);
-	if (ret) {
-		pr_warn(FW_WARN "not enabling cpufreq, deprecated armada-xp-cpu-clock binding\n");
-		of_node_put(np);
-		return 0;
-	}
-
-	of_node_put(np);
-
-	/*
-	 * For each CPU, this loop registers the operating points
-	 * supported (which are the nominal CPU frequency and half of
-	 * it), and registers the clock notifier that will take care
-	 * of doing the PMSU part of a frequency transition.
-	 */
-	for_each_possible_cpu(cpu) {
-		struct device *cpu_dev;
-		struct clk *clk;
-		int ret;
-
-		cpu_dev = get_cpu_device(cpu);
-		if (!cpu_dev) {
-			pr_err("Cannot get CPU %d\n", cpu);
-			continue;
-		}
-
-		clk = clk_get(cpu_dev, 0);
-		if (IS_ERR(clk)) {
-			pr_err("Cannot get clock for CPU %d\n", cpu);
-			return PTR_ERR(clk);
-		}
-
-		/*
-		 * In case of a failure of dev_pm_opp_add(), we don't
-		 * bother with cleaning up the registered OPP (there's
-		 * no function to do so), and simply cancel the
-		 * registration of the cpufreq device.
-		 */
-		ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk), 0);
-		if (ret) {
-			clk_put(clk);
-			return ret;
-		}
-
-		ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk) / 2, 0);
-		if (ret) {
-			clk_put(clk);
-			return ret;
-		}
-
-		ret = dev_pm_opp_set_sharing_cpus(cpu_dev,
-						  cpumask_of(cpu_dev->id));
-		if (ret)
-			dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
-				__func__, ret);
-	}
-
-	platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
-	return 0;
-}
-
-device_initcall(armada_xp_pmsu_cpufreq_init);
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 2cce2cd..e1eb11e 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -79,6 +79,7 @@ obj-$(CONFIG_ARM_TEGRA20_CPUFREQ)	+= tegra20-cpufreq.o
 obj-$(CONFIG_ARM_TEGRA124_CPUFREQ)	+= tegra124-cpufreq.o
 obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ)	+= vexpress-spc-cpufreq.o
 obj-$(CONFIG_ACPI_CPPC_CPUFREQ) += cppc_cpufreq.o
+obj-$(CONFIG_MACH_MVEBU_V7)		+= mvebu-cpufreq.o
 
 
 ##################################################################################
diff --git a/drivers/cpufreq/mvebu-cpufreq.c b/drivers/cpufreq/mvebu-cpufreq.c
new file mode 100644
index 0000000..e920889
--- /dev/null
+++ b/drivers/cpufreq/mvebu-cpufreq.c
@@ -0,0 +1,107 @@
+/*
+ * CPUFreq support for Armada 370/XP platforms.
+ *
+ * Copyright (C) 2012-2016 Marvell
+ *
+ * Yehuda Yitschak <yehuday@marvell.com>
+ * Gregory Clement <gregory.clement@free-electrons.com>
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#define pr_fmt(fmt) "mvebu-pmsu: " fmt
+
+#include <linux/clk.h>
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+#include <linux/resource.h>
+
+static int __init armada_xp_pmsu_cpufreq_init(void)
+{
+	struct device_node *np;
+	struct resource res;
+	int ret, cpu;
+
+	if (!of_machine_is_compatible("marvell,armadaxp"))
+		return 0;
+
+	/*
+	 * In order to have proper cpufreq handling, we need to ensure
+	 * that the Device Tree description of the CPU clock includes
+	 * the definition of the PMU DFS registers. If not, we do not
+	 * register the clock notifier and the cpufreq driver. This
+	 * piece of code is only for compatibility with old Device
+	 * Trees.
+	 */
+	np = of_find_compatible_node(NULL, NULL, "marvell,armada-xp-cpu-clock");
+	if (!np)
+		return 0;
+
+	ret = of_address_to_resource(np, 1, &res);
+	if (ret) {
+		pr_warn(FW_WARN "not enabling cpufreq, deprecated armada-xp-cpu-clock binding\n");
+		of_node_put(np);
+		return 0;
+	}
+
+	of_node_put(np);
+
+	/*
+	 * For each CPU, this loop registers the operating points
+	 * supported (which are the nominal CPU frequency and half of
+	 * it), and registers the clock notifier that will take care
+	 * of doing the PMSU part of a frequency transition.
+	 */
+	for_each_possible_cpu(cpu) {
+		struct device *cpu_dev;
+		struct clk *clk;
+		int ret;
+
+		cpu_dev = get_cpu_device(cpu);
+		if (!cpu_dev) {
+			pr_err("Cannot get CPU %d\n", cpu);
+			continue;
+		}
+
+		clk = clk_get(cpu_dev, 0);
+		if (IS_ERR(clk)) {
+			pr_err("Cannot get clock for CPU %d\n", cpu);
+			return PTR_ERR(clk);
+		}
+
+		/*
+		 * In case of a failure of dev_pm_opp_add(), we don't
+		 * bother with cleaning up the registered OPP (there's
+		 * no function to do so), and simply cancel the
+		 * registration of the cpufreq device.
+		 */
+		ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk), 0);
+		if (ret) {
+			clk_put(clk);
+			return ret;
+		}
+
+		ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk) / 2, 0);
+		if (ret) {
+			clk_put(clk);
+			return ret;
+		}
+
+		ret = dev_pm_opp_set_sharing_cpus(cpu_dev,
+						  cpumask_of(cpu_dev->id));
+		if (ret)
+			dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
+				__func__, ret);
+	}
+
+	platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
+	return 0;
+}
+device_initcall(armada_xp_pmsu_cpufreq_init);
-- 
cgit v0.10.2


From 04b03594c73c1877a98c7a657cc45bf5e9d89f7c Mon Sep 17 00:00:00 2001
From: Mattia Dongili <malattia@linux.it>
Date: Thu, 28 Apr 2016 15:24:36 +0200
Subject: Honour user's LDFLAGS

Signed-off-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Thomas Renninger <trenn@suse.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile
index d0f879b..3e59f1a 100644
--- a/tools/power/cpupower/bench/Makefile
+++ b/tools/power/cpupower/bench/Makefile
@@ -22,7 +22,7 @@ $(OUTPUT)%.o : %.c
 
 $(OUTPUT)cpufreq-bench: $(OBJS)
 	$(ECHO) "  CC      " $@
-	$(QUIET) $(CC) -o $@ $(CFLAGS) $(OBJS) $(LIBS)
+	$(QUIET) $(CC) -o $@ $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS)
 
 all: $(OUTPUT)cpufreq-bench
 
-- 
cgit v0.10.2


From 983d9e065b37dac5aaa2d5a819bdd5b5a0b78c8c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 28 Apr 2016 15:24:37 +0200
Subject: cpupower: bench: parse.c: fix several resource leaks

The error handling in prepare_output has several issues with
resource leaks.  Ensure that filename is free'd and the directory
stream DIR is closed before returning.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Thomas Renninger <trenn@suse.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c
index f503fb5..2d09c92 100644
--- a/tools/power/cpupower/bench/parse.c
+++ b/tools/power/cpupower/bench/parse.c
@@ -81,14 +81,18 @@ FILE *prepare_output(const char *dirname)
 
 	len = strlen(dirname) + 30;
 	filename = malloc(sizeof(char) * len);
+	if (!filename) {
+		perror("malloc");
+		goto out_dir;
+	}
 
 	if (uname(&sysdata) == 0) {
 		len += strlen(sysdata.nodename) + strlen(sysdata.release);
 		filename = realloc(filename, sizeof(char) * len);
 
-		if (filename == NULL) {
+		if (!filename) {
 			perror("realloc");
-			return NULL;
+			goto out_dir;
 		}
 
 		snprintf(filename, len - 1, "%s/benchmark_%s_%s_%li.log",
@@ -104,12 +108,16 @@ FILE *prepare_output(const char *dirname)
 	if (output == NULL) {
 		perror("fopen");
 		fprintf(stderr, "error: unable to open logfile\n");
+		goto out;
 	}
 
 	fprintf(stdout, "Logfile: %s\n", filename);
 
-	free(filename);
 	fprintf(output, "#round load sleep performance powersave percentage\n");
+out:
+	free(filename);
+out_dir:
+	closedir(dir);
 	return output;
 }
 
-- 
cgit v0.10.2


From 938bb850d525199c2c3619800ef97186cfccc8eb Mon Sep 17 00:00:00 2001
From: Mattia Dongili <malattia@linux.it>
Date: Thu, 28 Apr 2016 15:24:38 +0200
Subject: Fix cpupower manpages "NAME" section

The token before "-" should be the program name, no spaces allowed.
See man(7) and lexgrog(1).

Signed-off-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Thomas Renninger <trenn@suse.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/tools/power/cpupower/man/cpupower-frequency-info.1 b/tools/power/cpupower/man/cpupower-frequency-info.1
index 9c85a38..6aa8d23 100644
--- a/tools/power/cpupower/man/cpupower-frequency-info.1
+++ b/tools/power/cpupower/man/cpupower-frequency-info.1
@@ -1,7 +1,7 @@
 .TH "CPUPOWER\-FREQUENCY\-INFO" "1" "0.1" "" "cpupower Manual"
 .SH "NAME"
 .LP 
-cpupower frequency\-info \- Utility to retrieve cpufreq kernel information
+cpupower\-frequency\-info \- Utility to retrieve cpufreq kernel information
 .SH "SYNTAX"
 .LP 
 cpupower [ \-c cpulist ] frequency\-info [\fIoptions\fP]
diff --git a/tools/power/cpupower/man/cpupower-frequency-set.1 b/tools/power/cpupower/man/cpupower-frequency-set.1
index 3eacc8d..b505702 100644
--- a/tools/power/cpupower/man/cpupower-frequency-set.1
+++ b/tools/power/cpupower/man/cpupower-frequency-set.1
@@ -1,7 +1,7 @@
 .TH "CPUPOWER\-FREQUENCY\-SET" "1" "0.1" "" "cpupower Manual"
 .SH "NAME"
 .LP 
-cpupower frequency\-set \- A small tool which allows to modify cpufreq settings.
+cpupower\-frequency\-set \- A small tool which allows to modify cpufreq settings.
 .SH "SYNTAX"
 .LP 
 cpupower [ \-c cpu ] frequency\-set [\fIoptions\fP]
diff --git a/tools/power/cpupower/man/cpupower-idle-info.1 b/tools/power/cpupower/man/cpupower-idle-info.1
index 7b3646a..80a1311 100644
--- a/tools/power/cpupower/man/cpupower-idle-info.1
+++ b/tools/power/cpupower/man/cpupower-idle-info.1
@@ -1,7 +1,7 @@
 .TH "CPUPOWER-IDLE-INFO" "1" "0.1" "" "cpupower Manual"
 .SH "NAME"
 .LP
-cpupower idle\-info \- Utility to retrieve cpu idle kernel information
+cpupower\-idle\-info \- Utility to retrieve cpu idle kernel information
 .SH "SYNTAX"
 .LP
 cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP]
diff --git a/tools/power/cpupower/man/cpupower-idle-set.1 b/tools/power/cpupower/man/cpupower-idle-set.1
index 580c4e3..21916cf 100644
--- a/tools/power/cpupower/man/cpupower-idle-set.1
+++ b/tools/power/cpupower/man/cpupower-idle-set.1
@@ -1,7 +1,7 @@
 .TH "CPUPOWER-IDLE-SET" "1" "0.1" "" "cpupower Manual"
 .SH "NAME"
 .LP
-cpupower idle\-set \- Utility to set cpu idle state specific kernel options
+cpupower\-idle\-set \- Utility to set cpu idle state specific kernel options
 .SH "SYNTAX"
 .LP
 cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP]
-- 
cgit v0.10.2


From fe7656a8e8fcb3a0151b36b2f4c66763fa9553ea Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 28 Apr 2016 15:24:39 +0200
Subject: cpupowerutils: bench: trivial fix of spelling mistake on "average"

fix spelling mistake, avarage -> average

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Thomas Renninger <trenn@suse.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/tools/power/cpupower/bench/README-BENCH b/tools/power/cpupower/bench/README-BENCH
index 8093ec7..97727ae 100644
--- a/tools/power/cpupower/bench/README-BENCH
+++ b/tools/power/cpupower/bench/README-BENCH
@@ -113,7 +113,7 @@ cpufreq-bench Command Usage
 -c, --cpu=<unsigned int>        CPU Number to use, starting at 0
 -p, --prio=<priority>           scheduler priority, HIGH, LOW or DEFAULT
 -g, --governor=<governor>       cpufreq governor to test
--n, --cycles=<int>              load/sleep cycles to get an avarage value to compare
+-n, --cycles=<int>              load/sleep cycles to get an average value to compare
 -r, --rounds<int>               load/sleep rounds
 -f, --file=<configfile>         config file to use
 -o, --output=<dir>              output dir, must exist
diff --git a/tools/power/cpupower/bench/benchmark.c b/tools/power/cpupower/bench/benchmark.c
index 81b1c48..429d51a 100644
--- a/tools/power/cpupower/bench/benchmark.c
+++ b/tools/power/cpupower/bench/benchmark.c
@@ -130,7 +130,7 @@ void start_benchmark(struct config *config)
 			_round, load_time, sleep_time);
 
 		if (config->verbose)
-			printf("avarage: %lius, rps:%li\n",
+			printf("average: %lius, rps:%li\n",
 				load_time / calculations,
 				1000000 * calculations / load_time);
 
@@ -177,7 +177,7 @@ void start_benchmark(struct config *config)
 
 		progress_time += sleep_time + load_time;
 
-		/* compare the avarage sleep/load cycles  */
+		/* compare the average sleep/load cycles  */
 		fprintf(config->output, "%li ",
 			powersave_time / config->cycles);
 		fprintf(config->output, "%.3f\n",
-- 
cgit v0.10.2


From ac5a181d065d74fb6b213d538f743392f27bcdbd Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.com>
Date: Thu, 28 Apr 2016 15:24:40 +0200
Subject: cpupower: Add cpuidle parts into library

This more or less is a renaming and moving of functions and should not
introduce any functional change.

cpupower was built from cpufrequtils (which had a C library providing easy
access to cpu frequency platform info). In the meantime it got enhanced
by quite some neat cpuidle userspace tools.

Now the cpu idle functions have been separated and added to the cpupower.so
library.
So beside an already existing public header file:
cpufreq.h
cpupower now also exports these cpu idle functions in:
cpuidle.h

Here again pasted for better review of the interfaces:

======================================
int cpuidle_is_state_disabled(unsigned int cpu,
                                       unsigned int idlestate);
int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate,
                                   unsigned int disable);
unsigned long cpuidle_state_latency(unsigned int cpu,
                                                unsigned int idlestate);
unsigned long cpuidle_state_usage(unsigned int cpu,
                                        unsigned int idlestate);
unsigned long long cpuidle_state_time(unsigned int cpu,
                                                unsigned int idlestate);
char *cpuidle_state_name(unsigned int cpu,
                                unsigned int idlestate);
char *cpuidle_state_desc(unsigned int cpu,
                                unsigned int idlestate);
unsigned int cpuidle_state_count(unsigned int cpu);

char *cpuidle_get_governor(void);
char *cpuidle_get_driver(void);

======================================

Signed-off-by: Thomas Renninger <trenn@suse.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index 0adaf0c..8358863 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -63,7 +63,7 @@ DESTDIR ?=
 # and _should_ modify the PACKAGE_BUGREPORT definition
 
 VERSION=			$(shell ./utils/version-gen.sh)
-LIB_MAJ=			0.0.0
+LIB_MAJ=			0.0.1
 LIB_MIN=			0
 
 PACKAGE =			cpupower
@@ -129,7 +129,7 @@ WARNINGS += -Wshadow
 CFLAGS += -DVERSION=\"$(VERSION)\" -DPACKAGE=\"$(PACKAGE)\" \
 		-DPACKAGE_BUGREPORT=\"$(PACKAGE_BUGREPORT)\" -D_GNU_SOURCE
 
-UTIL_OBJS =  utils/helpers/amd.o utils/helpers/topology.o utils/helpers/msr.o \
+UTIL_OBJS =  utils/helpers/amd.o utils/helpers/msr.o \
 	utils/helpers/sysfs.o utils/helpers/misc.o utils/helpers/cpuid.o \
 	utils/helpers/pci.o utils/helpers/bitmask.o \
 	utils/idle_monitor/nhm_idle.o utils/idle_monitor/snb_idle.o \
@@ -148,9 +148,9 @@ UTIL_HEADERS = utils/helpers/helpers.h utils/idle_monitor/cpupower-monitor.h \
 	utils/helpers/bitmask.h \
 	utils/idle_monitor/idle_monitors.h utils/idle_monitor/idle_monitors.def
 
-LIB_HEADERS = 	lib/cpufreq.h lib/sysfs.h
-LIB_SRC = 	lib/cpufreq.c lib/sysfs.c
-LIB_OBJS = 	lib/cpufreq.o lib/sysfs.o
+LIB_HEADERS = 	lib/cpufreq.h lib/cpupower.h lib/cpuidle.h
+LIB_SRC = 	lib/cpufreq.c lib/cpupower.c lib/cpuidle.c
+LIB_OBJS = 	lib/cpufreq.o lib/cpupower.o lib/cpuidle.o
 LIB_OBJS :=	$(addprefix $(OUTPUT),$(LIB_OBJS))
 
 CFLAGS +=	-pipe
@@ -280,6 +280,7 @@ install-lib:
 	$(CP) $(OUTPUT)libcpupower.so* $(DESTDIR)${libdir}/
 	$(INSTALL) -d $(DESTDIR)${includedir}
 	$(INSTALL_DATA) lib/cpufreq.h $(DESTDIR)${includedir}/cpufreq.h
+	$(INSTALL_DATA) lib/cpuidle.h $(DESTDIR)${includedir}/cpuidle.h
 
 install-tools:
 	$(INSTALL) -d $(DESTDIR)${bindir}
@@ -315,6 +316,7 @@ endif
 uninstall:
 	- rm -f $(DESTDIR)${libdir}/libcpupower.*
 	- rm -f $(DESTDIR)${includedir}/cpufreq.h
+	- rm -f $(DESTDIR)${includedir}/cpuidle.h
 	- rm -f $(DESTDIR)${bindir}/utils/cpupower
 	- rm -f $(DESTDIR)${mandir}/man1/cpupower.1
 	- rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-set.1
diff --git a/tools/power/cpupower/bench/system.c b/tools/power/cpupower/bench/system.c
index f01e3f4..c25a74a 100644
--- a/tools/power/cpupower/bench/system.c
+++ b/tools/power/cpupower/bench/system.c
@@ -26,6 +26,7 @@
 #include <sched.h>
 
 #include <cpufreq.h>
+#include <cpupower.h>
 
 #include "config.h"
 #include "system.h"
@@ -60,7 +61,7 @@ int set_cpufreq_governor(char *governor, unsigned int cpu)
 
 	dprintf("set %s as cpufreq governor\n", governor);
 
-	if (cpufreq_cpu_exists(cpu) != 0) {
+	if (cpupower_is_cpu_online(cpu) != 0) {
 		perror("cpufreq_cpu_exists");
 		fprintf(stderr, "error: cpu %u does not exist\n", cpu);
 		return -1;
diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c
index d961101..1b993fe 100644
--- a/tools/power/cpupower/lib/cpufreq.c
+++ b/tools/power/cpupower/lib/cpufreq.c
@@ -9,28 +9,190 @@
 #include <errno.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
 
 #include "cpufreq.h"
-#include "sysfs.h"
+#include "cpupower_intern.h"
 
-int cpufreq_cpu_exists(unsigned int cpu)
+/* CPUFREQ sysfs access **************************************************/
+
+/* helper function to read file from /sys into given buffer */
+/* fname is a relative path under "cpuX/cpufreq" dir */
+static unsigned int sysfs_cpufreq_read_file(unsigned int cpu, const char *fname,
+					    char *buf, size_t buflen)
 {
-	return sysfs_cpu_exists(cpu);
+	char path[SYSFS_PATH_MAX];
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s",
+			 cpu, fname);
+	return sysfs_read_file(path, buf, buflen);
 }
 
+/* helper function to write a new value to a /sys file */
+/* fname is a relative path under "cpuX/cpufreq" dir */
+static unsigned int sysfs_cpufreq_write_file(unsigned int cpu,
+					     const char *fname,
+					     const char *value, size_t len)
+{
+	char path[SYSFS_PATH_MAX];
+	int fd;
+	ssize_t numwrite;
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s",
+			 cpu, fname);
+
+	fd = open(path, O_WRONLY);
+	if (fd == -1)
+		return 0;
+
+	numwrite = write(fd, value, len);
+	if (numwrite < 1) {
+		close(fd);
+		return 0;
+	}
+
+	close(fd);
+
+	return (unsigned int) numwrite;
+}
+
+/* read access to files which contain one numeric value */
+
+enum cpufreq_value {
+	CPUINFO_CUR_FREQ,
+	CPUINFO_MIN_FREQ,
+	CPUINFO_MAX_FREQ,
+	CPUINFO_LATENCY,
+	SCALING_CUR_FREQ,
+	SCALING_MIN_FREQ,
+	SCALING_MAX_FREQ,
+	STATS_NUM_TRANSITIONS,
+	MAX_CPUFREQ_VALUE_READ_FILES
+};
+
+static const char *cpufreq_value_files[MAX_CPUFREQ_VALUE_READ_FILES] = {
+	[CPUINFO_CUR_FREQ] = "cpuinfo_cur_freq",
+	[CPUINFO_MIN_FREQ] = "cpuinfo_min_freq",
+	[CPUINFO_MAX_FREQ] = "cpuinfo_max_freq",
+	[CPUINFO_LATENCY]  = "cpuinfo_transition_latency",
+	[SCALING_CUR_FREQ] = "scaling_cur_freq",
+	[SCALING_MIN_FREQ] = "scaling_min_freq",
+	[SCALING_MAX_FREQ] = "scaling_max_freq",
+	[STATS_NUM_TRANSITIONS] = "stats/total_trans"
+};
+
+
+static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu,
+						 enum cpufreq_value which)
+{
+	unsigned long value;
+	unsigned int len;
+	char linebuf[MAX_LINE_LEN];
+	char *endp;
+
+	if (which >= MAX_CPUFREQ_VALUE_READ_FILES)
+		return 0;
+
+	len = sysfs_cpufreq_read_file(cpu, cpufreq_value_files[which],
+				linebuf, sizeof(linebuf));
+
+	if (len == 0)
+		return 0;
+
+	value = strtoul(linebuf, &endp, 0);
+
+	if (endp == linebuf || errno == ERANGE)
+		return 0;
+
+	return value;
+}
+
+/* read access to files which contain one string */
+
+enum cpufreq_string {
+	SCALING_DRIVER,
+	SCALING_GOVERNOR,
+	MAX_CPUFREQ_STRING_FILES
+};
+
+static const char *cpufreq_string_files[MAX_CPUFREQ_STRING_FILES] = {
+	[SCALING_DRIVER] = "scaling_driver",
+	[SCALING_GOVERNOR] = "scaling_governor",
+};
+
+
+static char *sysfs_cpufreq_get_one_string(unsigned int cpu,
+					   enum cpufreq_string which)
+{
+	char linebuf[MAX_LINE_LEN];
+	char *result;
+	unsigned int len;
+
+	if (which >= MAX_CPUFREQ_STRING_FILES)
+		return NULL;
+
+	len = sysfs_cpufreq_read_file(cpu, cpufreq_string_files[which],
+				linebuf, sizeof(linebuf));
+	if (len == 0)
+		return NULL;
+
+	result = strdup(linebuf);
+	if (result == NULL)
+		return NULL;
+
+	if (result[strlen(result) - 1] == '\n')
+		result[strlen(result) - 1] = '\0';
+
+	return result;
+}
+
+/* write access */
+
+enum cpufreq_write {
+	WRITE_SCALING_MIN_FREQ,
+	WRITE_SCALING_MAX_FREQ,
+	WRITE_SCALING_GOVERNOR,
+	WRITE_SCALING_SET_SPEED,
+	MAX_CPUFREQ_WRITE_FILES
+};
+
+static const char *cpufreq_write_files[MAX_CPUFREQ_WRITE_FILES] = {
+	[WRITE_SCALING_MIN_FREQ] = "scaling_min_freq",
+	[WRITE_SCALING_MAX_FREQ] = "scaling_max_freq",
+	[WRITE_SCALING_GOVERNOR] = "scaling_governor",
+	[WRITE_SCALING_SET_SPEED] = "scaling_setspeed",
+};
+
+static int sysfs_cpufreq_write_one_value(unsigned int cpu,
+					 enum cpufreq_write which,
+					 const char *new_value, size_t len)
+{
+	if (which >= MAX_CPUFREQ_WRITE_FILES)
+		return 0;
+
+	if (sysfs_cpufreq_write_file(cpu, cpufreq_write_files[which],
+					new_value, len) != len)
+		return -ENODEV;
+
+	return 0;
+};
+
 unsigned long cpufreq_get_freq_kernel(unsigned int cpu)
 {
-	return sysfs_get_freq_kernel(cpu);
+	return sysfs_cpufreq_get_one_value(cpu, SCALING_CUR_FREQ);
 }
 
 unsigned long cpufreq_get_freq_hardware(unsigned int cpu)
 {
-	return sysfs_get_freq_hardware(cpu);
+	return sysfs_cpufreq_get_one_value(cpu, CPUINFO_CUR_FREQ);
 }
 
 unsigned long cpufreq_get_transition_latency(unsigned int cpu)
 {
-	return sysfs_get_freq_transition_latency(cpu);
+	return sysfs_cpufreq_get_one_value(cpu, CPUINFO_LATENCY);
 }
 
 int cpufreq_get_hardware_limits(unsigned int cpu,
@@ -39,12 +201,21 @@ int cpufreq_get_hardware_limits(unsigned int cpu,
 {
 	if ((!min) || (!max))
 		return -EINVAL;
-	return sysfs_get_freq_hardware_limits(cpu, min, max);
+
+	*min = sysfs_cpufreq_get_one_value(cpu, CPUINFO_MIN_FREQ);
+	if (!*min)
+		return -ENODEV;
+
+	*max = sysfs_cpufreq_get_one_value(cpu, CPUINFO_MAX_FREQ);
+	if (!*max)
+		return -ENODEV;
+
+	return 0;
 }
 
 char *cpufreq_get_driver(unsigned int cpu)
 {
-	return sysfs_get_freq_driver(cpu);
+	return sysfs_cpufreq_get_one_string(cpu, SCALING_DRIVER);
 }
 
 void cpufreq_put_driver(char *ptr)
@@ -56,7 +227,26 @@ void cpufreq_put_driver(char *ptr)
 
 struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu)
 {
-	return sysfs_get_freq_policy(cpu);
+	struct cpufreq_policy *policy;
+
+	policy = malloc(sizeof(struct cpufreq_policy));
+	if (!policy)
+		return NULL;
+
+	policy->governor = sysfs_cpufreq_get_one_string(cpu, SCALING_GOVERNOR);
+	if (!policy->governor) {
+		free(policy);
+		return NULL;
+	}
+	policy->min = sysfs_cpufreq_get_one_value(cpu, SCALING_MIN_FREQ);
+	policy->max = sysfs_cpufreq_get_one_value(cpu, SCALING_MAX_FREQ);
+	if ((!policy->min) || (!policy->max)) {
+		free(policy->governor);
+		free(policy);
+		return NULL;
+	}
+
+	return policy;
 }
 
 void cpufreq_put_policy(struct cpufreq_policy *policy)
@@ -72,7 +262,57 @@ void cpufreq_put_policy(struct cpufreq_policy *policy)
 struct cpufreq_available_governors *cpufreq_get_available_governors(unsigned
 								int cpu)
 {
-	return sysfs_get_freq_available_governors(cpu);
+	struct cpufreq_available_governors *first = NULL;
+	struct cpufreq_available_governors *current = NULL;
+	char linebuf[MAX_LINE_LEN];
+	unsigned int pos, i;
+	unsigned int len;
+
+	len = sysfs_cpufreq_read_file(cpu, "scaling_available_governors",
+				linebuf, sizeof(linebuf));
+	if (len == 0)
+		return NULL;
+
+	pos = 0;
+	for (i = 0; i < len; i++) {
+		if (linebuf[i] == ' ' || linebuf[i] == '\n') {
+			if (i - pos < 2)
+				continue;
+			if (current) {
+				current->next = malloc(sizeof(*current));
+				if (!current->next)
+					goto error_out;
+				current = current->next;
+			} else {
+				first = malloc(sizeof(*first));
+				if (!first)
+					goto error_out;
+				current = first;
+			}
+			current->first = first;
+			current->next = NULL;
+
+			current->governor = malloc(i - pos + 1);
+			if (!current->governor)
+				goto error_out;
+
+			memcpy(current->governor, linebuf + pos, i - pos);
+			current->governor[i - pos] = '\0';
+			pos = i + 1;
+		}
+	}
+
+	return first;
+
+ error_out:
+	while (first) {
+		current = first->next;
+		if (first->governor)
+			free(first->governor);
+		free(first);
+		first = current;
+	}
+	return NULL;
 }
 
 void cpufreq_put_available_governors(struct cpufreq_available_governors *any)
@@ -96,7 +336,57 @@ void cpufreq_put_available_governors(struct cpufreq_available_governors *any)
 struct cpufreq_available_frequencies
 *cpufreq_get_available_frequencies(unsigned int cpu)
 {
-	return sysfs_get_available_frequencies(cpu);
+	struct cpufreq_available_frequencies *first = NULL;
+	struct cpufreq_available_frequencies *current = NULL;
+	char one_value[SYSFS_PATH_MAX];
+	char linebuf[MAX_LINE_LEN];
+	unsigned int pos, i;
+	unsigned int len;
+
+	len = sysfs_cpufreq_read_file(cpu, "scaling_available_frequencies",
+				linebuf, sizeof(linebuf));
+	if (len == 0)
+		return NULL;
+
+	pos = 0;
+	for (i = 0; i < len; i++) {
+		if (linebuf[i] == ' ' || linebuf[i] == '\n') {
+			if (i - pos < 2)
+				continue;
+			if (i - pos >= SYSFS_PATH_MAX)
+				goto error_out;
+			if (current) {
+				current->next = malloc(sizeof(*current));
+				if (!current->next)
+					goto error_out;
+				current = current->next;
+			} else {
+				first = malloc(sizeof(*first));
+				if (!first)
+					goto error_out;
+				current = first;
+			}
+			current->first = first;
+			current->next = NULL;
+
+			memcpy(one_value, linebuf + pos, i - pos);
+			one_value[i - pos] = '\0';
+			if (sscanf(one_value, "%lu", &current->frequency) != 1)
+				goto error_out;
+
+			pos = i + 1;
+		}
+	}
+
+	return first;
+
+ error_out:
+	while (first) {
+		current = first->next;
+		free(first);
+		first = current;
+	}
+	return NULL;
 }
 
 void cpufreq_put_available_frequencies(struct cpufreq_available_frequencies
@@ -114,10 +404,65 @@ void cpufreq_put_available_frequencies(struct cpufreq_available_frequencies
 	}
 }
 
+static struct cpufreq_affected_cpus *sysfs_get_cpu_list(unsigned int cpu,
+							const char *file)
+{
+	struct cpufreq_affected_cpus *first = NULL;
+	struct cpufreq_affected_cpus *current = NULL;
+	char one_value[SYSFS_PATH_MAX];
+	char linebuf[MAX_LINE_LEN];
+	unsigned int pos, i;
+	unsigned int len;
+
+	len = sysfs_cpufreq_read_file(cpu, file, linebuf, sizeof(linebuf));
+	if (len == 0)
+		return NULL;
+
+	pos = 0;
+	for (i = 0; i < len; i++) {
+		if (i == len || linebuf[i] == ' ' || linebuf[i] == '\n') {
+			if (i - pos  < 1)
+				continue;
+			if (i - pos >= SYSFS_PATH_MAX)
+				goto error_out;
+			if (current) {
+				current->next = malloc(sizeof(*current));
+				if (!current->next)
+					goto error_out;
+				current = current->next;
+			} else {
+				first = malloc(sizeof(*first));
+				if (!first)
+					goto error_out;
+				current = first;
+			}
+			current->first = first;
+			current->next = NULL;
+
+			memcpy(one_value, linebuf + pos, i - pos);
+			one_value[i - pos] = '\0';
+
+			if (sscanf(one_value, "%u", &current->cpu) != 1)
+				goto error_out;
+
+			pos = i + 1;
+		}
+	}
+
+	return first;
+
+ error_out:
+	while (first) {
+		current = first->next;
+		free(first);
+		first = current;
+	}
+	return NULL;
+}
 
 struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned int cpu)
 {
-	return sysfs_get_freq_affected_cpus(cpu);
+	return sysfs_get_cpu_list(cpu, "affected_cpus");
 }
 
 void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *any)
@@ -138,7 +483,7 @@ void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *any)
 
 struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned int cpu)
 {
-	return sysfs_get_freq_related_cpus(cpu);
+	return sysfs_get_cpu_list(cpu, "related_cpus");
 }
 
 void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *any)
@@ -146,45 +491,208 @@ void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *any)
 	cpufreq_put_affected_cpus(any);
 }
 
+static int verify_gov(char *new_gov, char *passed_gov)
+{
+	unsigned int i, j = 0;
+
+	if (!passed_gov || (strlen(passed_gov) > 19))
+		return -EINVAL;
+
+	strncpy(new_gov, passed_gov, 20);
+	for (i = 0; i < 20; i++) {
+		if (j) {
+			new_gov[i] = '\0';
+			continue;
+		}
+		if ((new_gov[i] >= 'a') && (new_gov[i] <= 'z'))
+			continue;
+
+		if ((new_gov[i] >= 'A') && (new_gov[i] <= 'Z'))
+			continue;
+
+		if (new_gov[i] == '-')
+			continue;
+
+		if (new_gov[i] == '_')
+			continue;
+
+		if (new_gov[i] == '\0') {
+			j = 1;
+			continue;
+		}
+		return -EINVAL;
+	}
+	new_gov[19] = '\0';
+	return 0;
+}
 
 int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy)
 {
+	char min[SYSFS_PATH_MAX];
+	char max[SYSFS_PATH_MAX];
+	char gov[SYSFS_PATH_MAX];
+	int ret;
+	unsigned long old_min;
+	int write_max_first;
+
 	if (!policy || !(policy->governor))
 		return -EINVAL;
 
-	return sysfs_set_freq_policy(cpu, policy);
+	if (policy->max < policy->min)
+		return -EINVAL;
+
+	if (verify_gov(gov, policy->governor))
+		return -EINVAL;
+
+	snprintf(min, SYSFS_PATH_MAX, "%lu", policy->min);
+	snprintf(max, SYSFS_PATH_MAX, "%lu", policy->max);
+
+	old_min = sysfs_cpufreq_get_one_value(cpu, SCALING_MIN_FREQ);
+	write_max_first = (old_min && (policy->max < old_min) ? 0 : 1);
+
+	if (write_max_first) {
+		ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
+						    max, strlen(max));
+		if (ret)
+			return ret;
+	}
+
+	ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MIN_FREQ, min,
+					    strlen(min));
+	if (ret)
+		return ret;
+
+	if (!write_max_first) {
+		ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
+						    max, strlen(max));
+		if (ret)
+			return ret;
+	}
+
+	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_GOVERNOR,
+					     gov, strlen(gov));
 }
 
 
 int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq)
 {
-	return sysfs_modify_freq_policy_min(cpu, min_freq);
+	char value[SYSFS_PATH_MAX];
+
+	snprintf(value, SYSFS_PATH_MAX, "%lu", min_freq);
+
+	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MIN_FREQ,
+					     value, strlen(value));
 }
 
 
 int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq)
 {
-	return sysfs_modify_freq_policy_max(cpu, max_freq);
-}
+	char value[SYSFS_PATH_MAX];
+
+	snprintf(value, SYSFS_PATH_MAX, "%lu", max_freq);
 
+	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
+					     value, strlen(value));
+}
 
 int cpufreq_modify_policy_governor(unsigned int cpu, char *governor)
 {
+	char new_gov[SYSFS_PATH_MAX];
+
 	if ((!governor) || (strlen(governor) > 19))
 		return -EINVAL;
 
-	return sysfs_modify_freq_policy_governor(cpu, governor);
+	if (verify_gov(new_gov, governor))
+		return -EINVAL;
+
+	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_GOVERNOR,
+					     new_gov, strlen(new_gov));
 }
 
 int cpufreq_set_frequency(unsigned int cpu, unsigned long target_frequency)
 {
-	return sysfs_set_frequency(cpu, target_frequency);
+	struct cpufreq_policy *pol = cpufreq_get_policy(cpu);
+	char userspace_gov[] = "userspace";
+	char freq[SYSFS_PATH_MAX];
+	int ret;
+
+	if (!pol)
+		return -ENODEV;
+
+	if (strncmp(pol->governor, userspace_gov, 9) != 0) {
+		ret = cpufreq_modify_policy_governor(cpu, userspace_gov);
+		if (ret) {
+			cpufreq_put_policy(pol);
+			return ret;
+		}
+	}
+
+	cpufreq_put_policy(pol);
+
+	snprintf(freq, SYSFS_PATH_MAX, "%lu", target_frequency);
+
+	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_SET_SPEED,
+					     freq, strlen(freq));
 }
 
 struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu,
 					unsigned long long *total_time)
 {
-	return sysfs_get_freq_stats(cpu, total_time);
+	struct cpufreq_stats *first = NULL;
+	struct cpufreq_stats *current = NULL;
+	char one_value[SYSFS_PATH_MAX];
+	char linebuf[MAX_LINE_LEN];
+	unsigned int pos, i;
+	unsigned int len;
+
+	len = sysfs_cpufreq_read_file(cpu, "stats/time_in_state",
+				linebuf, sizeof(linebuf));
+	if (len == 0)
+		return NULL;
+
+	*total_time = 0;
+	pos = 0;
+	for (i = 0; i < len; i++) {
+		if (i == strlen(linebuf) || linebuf[i] == '\n')	{
+			if (i - pos < 2)
+				continue;
+			if ((i - pos) >= SYSFS_PATH_MAX)
+				goto error_out;
+			if (current) {
+				current->next = malloc(sizeof(*current));
+				if (!current->next)
+					goto error_out;
+				current = current->next;
+			} else {
+				first = malloc(sizeof(*first));
+				if (!first)
+					goto error_out;
+				current = first;
+			}
+			current->first = first;
+			current->next = NULL;
+
+			memcpy(one_value, linebuf + pos, i - pos);
+			one_value[i - pos] = '\0';
+			if (sscanf(one_value, "%lu %llu",
+					&current->frequency,
+					&current->time_in_state) != 2)
+				goto error_out;
+
+			*total_time = *total_time + current->time_in_state;
+			pos = i + 1;
+		}
+	}
+
+	return first;
+
+ error_out:
+	while (first) {
+		current = first->next;
+		free(first);
+		first = current;
+	}
+	return NULL;
 }
 
 void cpufreq_put_stats(struct cpufreq_stats *any)
@@ -204,5 +712,5 @@ void cpufreq_put_stats(struct cpufreq_stats *any)
 
 unsigned long cpufreq_get_transitions(unsigned int cpu)
 {
-	return sysfs_get_freq_transitions(cpu);
+	return sysfs_cpufreq_get_one_value(cpu, STATS_NUM_TRANSITIONS);
 }
diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h
index 3aae8e7..3b005c3 100644
--- a/tools/power/cpupower/lib/cpufreq.h
+++ b/tools/power/cpupower/lib/cpufreq.h
@@ -17,8 +17,8 @@
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-#ifndef _CPUFREQ_H
-#define _CPUFREQ_H 1
+#ifndef __CPUPOWER_CPUFREQ_H__
+#define __CPUPOWER_CPUFREQ_H__
 
 struct cpufreq_policy {
 	unsigned long min;
@@ -58,13 +58,6 @@ struct cpufreq_stats {
 extern "C" {
 #endif
 
-/*
- * returns 0 if the specified CPU is present (it doesn't say
- * whether it is online!), and an error value if not.
- */
-
-extern int cpufreq_cpu_exists(unsigned int cpu);
-
 /* determine current CPU frequency
  * - _kernel variant means kernel's opinion of CPU frequency
  * - _hardware variant means actual hardware CPU frequency,
@@ -73,9 +66,9 @@ extern int cpufreq_cpu_exists(unsigned int cpu);
  * returns 0 on failure, else frequency in kHz.
  */
 
-extern unsigned long cpufreq_get_freq_kernel(unsigned int cpu);
+unsigned long cpufreq_get_freq_kernel(unsigned int cpu);
 
-extern unsigned long cpufreq_get_freq_hardware(unsigned int cpu);
+unsigned long cpufreq_get_freq_hardware(unsigned int cpu);
 
 #define cpufreq_get(cpu) cpufreq_get_freq_kernel(cpu);
 
@@ -84,7 +77,7 @@ extern unsigned long cpufreq_get_freq_hardware(unsigned int cpu);
  *
  * returns 0 on failure, else transition latency in 10^(-9) s = nanoseconds
  */
-extern unsigned long cpufreq_get_transition_latency(unsigned int cpu);
+unsigned long cpufreq_get_transition_latency(unsigned int cpu);
 
 
 /* determine hardware CPU frequency limits
@@ -93,7 +86,7 @@ extern unsigned long cpufreq_get_transition_latency(unsigned int cpu);
  * considerations by cpufreq policy notifiers in the kernel.
  */
 
-extern int cpufreq_get_hardware_limits(unsigned int cpu,
+int cpufreq_get_hardware_limits(unsigned int cpu,
 				unsigned long *min,
 				unsigned long *max);
 
@@ -104,9 +97,9 @@ extern int cpufreq_get_hardware_limits(unsigned int cpu,
  * to avoid memory leakage, please.
  */
 
-extern char *cpufreq_get_driver(unsigned int cpu);
+char *cpufreq_get_driver(unsigned int cpu);
 
-extern void cpufreq_put_driver(char *ptr);
+void cpufreq_put_driver(char *ptr);
 
 
 /* determine CPUfreq policy currently used
@@ -116,9 +109,9 @@ extern void cpufreq_put_driver(char *ptr);
  */
 
 
-extern struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu);
+struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu);
 
-extern void cpufreq_put_policy(struct cpufreq_policy *policy);
+void cpufreq_put_policy(struct cpufreq_policy *policy);
 
 
 /* determine CPUfreq governors currently available
@@ -129,10 +122,10 @@ extern void cpufreq_put_policy(struct cpufreq_policy *policy);
  */
 
 
-extern struct cpufreq_available_governors
+struct cpufreq_available_governors
 *cpufreq_get_available_governors(unsigned int cpu);
 
-extern void cpufreq_put_available_governors(
+void cpufreq_put_available_governors(
 	struct cpufreq_available_governors *first);
 
 
@@ -143,10 +136,10 @@ extern void cpufreq_put_available_governors(
  * cpufreq_put_available_frequencies after use.
  */
 
-extern struct cpufreq_available_frequencies
+struct cpufreq_available_frequencies
 *cpufreq_get_available_frequencies(unsigned int cpu);
 
-extern void cpufreq_put_available_frequencies(
+void cpufreq_put_available_frequencies(
 		struct cpufreq_available_frequencies *first);
 
 
@@ -156,10 +149,10 @@ extern void cpufreq_put_available_frequencies(
  * to avoid memory leakage, please.
  */
 
-extern struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned
+struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned
 							int cpu);
 
-extern void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *first);
+void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *first);
 
 
 /* determine related CPUs
@@ -168,10 +161,10 @@ extern void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *first);
  * to avoid memory leakage, please.
  */
 
-extern struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned
+struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned
 							int cpu);
 
-extern void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *first);
+void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *first);
 
 
 /* determine stats for cpufreq subsystem
@@ -179,12 +172,12 @@ extern void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *first);
  * This is not available in all kernel versions or configurations.
  */
 
-extern struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu,
+struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu,
 					unsigned long long *total_time);
 
-extern void cpufreq_put_stats(struct cpufreq_stats *stats);
+void cpufreq_put_stats(struct cpufreq_stats *stats);
 
-extern unsigned long cpufreq_get_transitions(unsigned int cpu);
+unsigned long cpufreq_get_transitions(unsigned int cpu);
 
 
 /* set new cpufreq policy
@@ -193,7 +186,7 @@ extern unsigned long cpufreq_get_transitions(unsigned int cpu);
  * but results may differ depending e.g. on governors being available.
  */
 
-extern int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy);
+int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy);
 
 
 /* modify a policy by only changing min/max freq or governor
@@ -201,9 +194,9 @@ extern int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy);
  * Does not check whether result is what was intended.
  */
 
-extern int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq);
-extern int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq);
-extern int cpufreq_modify_policy_governor(unsigned int cpu, char *governor);
+int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq);
+int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq);
+int cpufreq_modify_policy_governor(unsigned int cpu, char *governor);
 
 
 /* set a specific frequency
@@ -213,7 +206,7 @@ extern int cpufreq_modify_policy_governor(unsigned int cpu, char *governor);
  * occurs. Also does not work on ->range() cpufreq drivers.
  */
 
-extern int cpufreq_set_frequency(unsigned int cpu,
+int cpufreq_set_frequency(unsigned int cpu,
 				unsigned long target_frequency);
 
 #ifdef __cplusplus
diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c
new file mode 100644
index 0000000..9bd4c76
--- /dev/null
+++ b/tools/power/cpupower/lib/cpuidle.c
@@ -0,0 +1,380 @@
+/*
+ *  (C) 2004-2009  Dominik Brodowski <linux@dominikbrodowski.de>
+ *  (C) 2011       Thomas Renninger <trenn@novell.com> Novell Inc.
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "cpuidle.h"
+#include "cpupower_intern.h"
+
+/*
+ * helper function to check whether a file under "../cpuX/cpuidle/stateX/" dir
+ * exists.
+ * For example the functionality to disable c-states was introduced in later
+ * kernel versions, this function can be used to explicitly check for this
+ * feature.
+ *
+ * returns 1 if the file exists, 0 otherwise.
+ */
+static
+unsigned int cpuidle_state_file_exists(unsigned int cpu,
+				       unsigned int idlestate,
+				       const char *fname)
+{
+	char path[SYSFS_PATH_MAX];
+	struct stat statbuf;
+
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s",
+		 cpu, idlestate, fname);
+	if (stat(path, &statbuf) != 0)
+		return 0;
+	return 1;
+}
+
+/*
+ * helper function to read file from /sys into given buffer
+ * fname is a relative path under "cpuX/cpuidle/stateX/" dir
+ * cstates starting with 0, C0 is not counted as cstate.
+ * This means if you want C1 info, pass 0 as idlestate param
+ */
+static
+unsigned int cpuidle_state_read_file(unsigned int cpu,
+					    unsigned int idlestate,
+					    const char *fname, char *buf,
+					    size_t buflen)
+{
+	char path[SYSFS_PATH_MAX];
+	int fd;
+	ssize_t numread;
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s",
+		 cpu, idlestate, fname);
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		return 0;
+
+	numread = read(fd, buf, buflen - 1);
+	if (numread < 1) {
+		close(fd);
+		return 0;
+	}
+
+	buf[numread] = '\0';
+	close(fd);
+
+	return (unsigned int) numread;
+}
+
+/*
+ * helper function to write a new value to a /sys file
+ * fname is a relative path under "../cpuX/cpuidle/cstateY/" dir
+ *
+ * Returns the number of bytes written or 0 on error
+ */
+static
+unsigned int cpuidle_state_write_file(unsigned int cpu,
+				      unsigned int idlestate,
+				      const char *fname,
+				      const char *value, size_t len)
+{
+	char path[SYSFS_PATH_MAX];
+	int fd;
+	ssize_t numwrite;
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s",
+		 cpu, idlestate, fname);
+
+	fd = open(path, O_WRONLY);
+	if (fd == -1)
+		return 0;
+
+	numwrite = write(fd, value, len);
+	if (numwrite < 1) {
+		close(fd);
+		return 0;
+	}
+
+	close(fd);
+
+	return (unsigned int) numwrite;
+}
+
+/* read access to files which contain one numeric value */
+
+enum idlestate_value {
+	IDLESTATE_USAGE,
+	IDLESTATE_POWER,
+	IDLESTATE_LATENCY,
+	IDLESTATE_TIME,
+	IDLESTATE_DISABLE,
+	MAX_IDLESTATE_VALUE_FILES
+};
+
+static const char *idlestate_value_files[MAX_IDLESTATE_VALUE_FILES] = {
+	[IDLESTATE_USAGE] = "usage",
+	[IDLESTATE_POWER] = "power",
+	[IDLESTATE_LATENCY] = "latency",
+	[IDLESTATE_TIME]  = "time",
+	[IDLESTATE_DISABLE]  = "disable",
+};
+
+static
+unsigned long long cpuidle_state_get_one_value(unsigned int cpu,
+					       unsigned int idlestate,
+					       enum idlestate_value which)
+{
+	unsigned long long value;
+	unsigned int len;
+	char linebuf[MAX_LINE_LEN];
+	char *endp;
+
+	if (which >= MAX_IDLESTATE_VALUE_FILES)
+		return 0;
+
+	len = cpuidle_state_read_file(cpu, idlestate,
+				      idlestate_value_files[which],
+				      linebuf, sizeof(linebuf));
+	if (len == 0)
+		return 0;
+
+	value = strtoull(linebuf, &endp, 0);
+
+	if (endp == linebuf || errno == ERANGE)
+		return 0;
+
+	return value;
+}
+
+/* read access to files which contain one string */
+
+enum idlestate_string {
+	IDLESTATE_DESC,
+	IDLESTATE_NAME,
+	MAX_IDLESTATE_STRING_FILES
+};
+
+static const char *idlestate_string_files[MAX_IDLESTATE_STRING_FILES] = {
+	[IDLESTATE_DESC] = "desc",
+	[IDLESTATE_NAME] = "name",
+};
+
+
+static char *cpuidle_state_get_one_string(unsigned int cpu,
+					unsigned int idlestate,
+					enum idlestate_string which)
+{
+	char linebuf[MAX_LINE_LEN];
+	char *result;
+	unsigned int len;
+
+	if (which >= MAX_IDLESTATE_STRING_FILES)
+		return NULL;
+
+	len = cpuidle_state_read_file(cpu, idlestate,
+				      idlestate_string_files[which],
+				      linebuf, sizeof(linebuf));
+	if (len == 0)
+		return NULL;
+
+	result = strdup(linebuf);
+	if (result == NULL)
+		return NULL;
+
+	if (result[strlen(result) - 1] == '\n')
+		result[strlen(result) - 1] = '\0';
+
+	return result;
+}
+
+/*
+ * Returns:
+ *    1  if disabled
+ *    0  if enabled
+ *    -1 if idlestate is not available
+ *    -2 if disabling is not supported by the kernel
+ */
+int cpuidle_is_state_disabled(unsigned int cpu,
+				unsigned int idlestate)
+{
+	if (cpuidle_state_count(cpu) <= idlestate)
+		return -1;
+
+	if (!cpuidle_state_file_exists(cpu, idlestate,
+				 idlestate_value_files[IDLESTATE_DISABLE]))
+		return -2;
+	return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_DISABLE);
+}
+
+/*
+ * Pass 1 as last argument to disable or 0 to enable the state
+ * Returns:
+ *    0  on success
+ *    negative values on error, for example:
+ *      -1 if idlestate is not available
+ *      -2 if disabling is not supported by the kernel
+ *      -3 No write access to disable/enable C-states
+ */
+int cpuidle_state_disable(unsigned int cpu,
+			    unsigned int idlestate,
+			    unsigned int disable)
+{
+	char value[SYSFS_PATH_MAX];
+	int bytes_written;
+
+	if (cpuidle_state_count(cpu) <= idlestate)
+		return -1;
+
+	if (!cpuidle_state_file_exists(cpu, idlestate,
+				 idlestate_value_files[IDLESTATE_DISABLE]))
+		return -2;
+
+	snprintf(value, SYSFS_PATH_MAX, "%u", disable);
+
+	bytes_written = cpuidle_state_write_file(cpu, idlestate, "disable",
+						   value, sizeof(disable));
+	if (bytes_written)
+		return 0;
+	return -3;
+}
+
+unsigned long cpuidle_state_latency(unsigned int cpu,
+					  unsigned int idlestate)
+{
+	return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_LATENCY);
+}
+
+unsigned long cpuidle_state_usage(unsigned int cpu,
+					unsigned int idlestate)
+{
+	return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_USAGE);
+}
+
+unsigned long long cpuidle_state_time(unsigned int cpu,
+					unsigned int idlestate)
+{
+	return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_TIME);
+}
+
+char *cpuidle_state_name(unsigned int cpu, unsigned int idlestate)
+{
+	return cpuidle_state_get_one_string(cpu, idlestate, IDLESTATE_NAME);
+}
+
+char *cpuidle_state_desc(unsigned int cpu, unsigned int idlestate)
+{
+	return cpuidle_state_get_one_string(cpu, idlestate, IDLESTATE_DESC);
+}
+
+/*
+ * Returns number of supported C-states of CPU core cpu
+ * Negativ in error case
+ * Zero if cpuidle does not export any C-states
+ */
+unsigned int cpuidle_state_count(unsigned int cpu)
+{
+	char file[SYSFS_PATH_MAX];
+	struct stat statbuf;
+	int idlestates = 1;
+
+
+	snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpuidle");
+	if (stat(file, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode))
+		return 0;
+
+	snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpu%u/cpuidle/state0", cpu);
+	if (stat(file, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode))
+		return 0;
+
+	while (stat(file, &statbuf) == 0 && S_ISDIR(statbuf.st_mode)) {
+		snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU
+			 "cpu%u/cpuidle/state%d", cpu, idlestates);
+		idlestates++;
+	}
+	idlestates--;
+	return idlestates;
+}
+
+/* CPUidle general /sys/devices/system/cpu/cpuidle/ sysfs access ********/
+
+/*
+ * helper function to read file from /sys into given buffer
+ * fname is a relative path under "cpu/cpuidle/" dir
+ */
+static unsigned int sysfs_cpuidle_read_file(const char *fname, char *buf,
+					    size_t buflen)
+{
+	char path[SYSFS_PATH_MAX];
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpuidle/%s", fname);
+
+	return sysfs_read_file(path, buf, buflen);
+}
+
+
+
+/* read access to files which contain one string */
+
+enum cpuidle_string {
+	CPUIDLE_GOVERNOR,
+	CPUIDLE_GOVERNOR_RO,
+	CPUIDLE_DRIVER,
+	MAX_CPUIDLE_STRING_FILES
+};
+
+static const char *cpuidle_string_files[MAX_CPUIDLE_STRING_FILES] = {
+	[CPUIDLE_GOVERNOR]	= "current_governor",
+	[CPUIDLE_GOVERNOR_RO]	= "current_governor_ro",
+	[CPUIDLE_DRIVER]	= "current_driver",
+};
+
+
+static char *sysfs_cpuidle_get_one_string(enum cpuidle_string which)
+{
+	char linebuf[MAX_LINE_LEN];
+	char *result;
+	unsigned int len;
+
+	if (which >= MAX_CPUIDLE_STRING_FILES)
+		return NULL;
+
+	len = sysfs_cpuidle_read_file(cpuidle_string_files[which],
+				linebuf, sizeof(linebuf));
+	if (len == 0)
+		return NULL;
+
+	result = strdup(linebuf);
+	if (result == NULL)
+		return NULL;
+
+	if (result[strlen(result) - 1] == '\n')
+		result[strlen(result) - 1] = '\0';
+
+	return result;
+}
+
+char *cpuidle_get_governor(void)
+{
+	char *tmp = sysfs_cpuidle_get_one_string(CPUIDLE_GOVERNOR_RO);
+	if (!tmp)
+		return sysfs_cpuidle_get_one_string(CPUIDLE_GOVERNOR);
+	else
+		return tmp;
+}
+
+char *cpuidle_get_driver(void)
+{
+	return sysfs_cpuidle_get_one_string(CPUIDLE_DRIVER);
+}
+/* CPUidle idlestate specific /sys/devices/system/cpu/cpuX/cpuidle/ access */
diff --git a/tools/power/cpupower/lib/cpuidle.h b/tools/power/cpupower/lib/cpuidle.h
new file mode 100644
index 0000000..04eb3cf
--- /dev/null
+++ b/tools/power/cpupower/lib/cpuidle.h
@@ -0,0 +1,23 @@
+#ifndef __CPUPOWER_CPUIDLE_H__
+#define __CPUPOWER_CPUIDLE_H__
+
+int cpuidle_is_state_disabled(unsigned int cpu,
+				       unsigned int idlestate);
+int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate,
+				   unsigned int disable);
+unsigned long cpuidle_state_latency(unsigned int cpu,
+						unsigned int idlestate);
+unsigned long cpuidle_state_usage(unsigned int cpu,
+					unsigned int idlestate);
+unsigned long long cpuidle_state_time(unsigned int cpu,
+						unsigned int idlestate);
+char *cpuidle_state_name(unsigned int cpu,
+				unsigned int idlestate);
+char *cpuidle_state_desc(unsigned int cpu,
+				unsigned int idlestate);
+unsigned int cpuidle_state_count(unsigned int cpu);
+
+char *cpuidle_get_governor(void);
+char *cpuidle_get_driver(void);
+
+#endif /* __CPUPOWER_HELPERS_SYSFS_H__ */
diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c
new file mode 100644
index 0000000..9c395ec9
--- /dev/null
+++ b/tools/power/cpupower/lib/cpupower.c
@@ -0,0 +1,192 @@
+/*
+ *  (C) 2004-2009  Dominik Brodowski <linux@dominikbrodowski.de>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+
+#include "cpupower.h"
+#include "cpupower_intern.h"
+
+unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen)
+{
+	int fd;
+	ssize_t numread;
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		return 0;
+
+	numread = read(fd, buf, buflen - 1);
+	if (numread < 1) {
+		close(fd);
+		return 0;
+	}
+
+	buf[numread] = '\0';
+	close(fd);
+
+	return (unsigned int) numread;
+}
+
+/*
+ * Detect whether a CPU is online
+ *
+ * Returns:
+ *     1 -> if CPU is online
+ *     0 -> if CPU is offline
+ *     negative errno values in error case
+ */
+int cpupower_is_cpu_online(unsigned int cpu)
+{
+	char path[SYSFS_PATH_MAX];
+	int fd;
+	ssize_t numread;
+	unsigned long long value;
+	char linebuf[MAX_LINE_LEN];
+	char *endp;
+	struct stat statbuf;
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u", cpu);
+
+	if (stat(path, &statbuf) != 0)
+		return 0;
+
+	/*
+	 * kernel without CONFIG_HOTPLUG_CPU
+	 * -> cpuX directory exists, but not cpuX/online file
+	 */
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/online", cpu);
+	if (stat(path, &statbuf) != 0)
+		return 1;
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		return -errno;
+
+	numread = read(fd, linebuf, MAX_LINE_LEN - 1);
+	if (numread < 1) {
+		close(fd);
+		return -EIO;
+	}
+	linebuf[numread] = '\0';
+	close(fd);
+
+	value = strtoull(linebuf, &endp, 0);
+	if (value > 1)
+		return -EINVAL;
+
+	return value;
+}
+
+/* returns -1 on failure, 0 on success */
+static int sysfs_topology_read_file(unsigned int cpu, const char *fname, int *result)
+{
+	char linebuf[MAX_LINE_LEN];
+	char *endp;
+	char path[SYSFS_PATH_MAX];
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s",
+			 cpu, fname);
+	if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0)
+		return -1;
+	*result = strtol(linebuf, &endp, 0);
+	if (endp == linebuf || errno == ERANGE)
+		return -1;
+	return 0;
+}
+
+static int __compare(const void *t1, const void *t2)
+{
+	struct cpuid_core_info *top1 = (struct cpuid_core_info *)t1;
+	struct cpuid_core_info *top2 = (struct cpuid_core_info *)t2;
+	if (top1->pkg < top2->pkg)
+		return -1;
+	else if (top1->pkg > top2->pkg)
+		return 1;
+	else if (top1->core < top2->core)
+		return -1;
+	else if (top1->core > top2->core)
+		return 1;
+	else if (top1->cpu < top2->cpu)
+		return -1;
+	else if (top1->cpu > top2->cpu)
+		return 1;
+	else
+		return 0;
+}
+
+/*
+ * Returns amount of cpus, negative on error, cpu_top must be
+ * passed to cpu_topology_release to free resources
+ *
+ * Array is sorted after ->pkg, ->core, then ->cpu
+ */
+int get_cpu_topology(struct cpupower_topology *cpu_top)
+{
+	int cpu, last_pkg, cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+	cpu_top->core_info = malloc(sizeof(struct cpuid_core_info) * cpus);
+	if (cpu_top->core_info == NULL)
+		return -ENOMEM;
+	cpu_top->pkgs = cpu_top->cores = 0;
+	for (cpu = 0; cpu < cpus; cpu++) {
+		cpu_top->core_info[cpu].cpu = cpu;
+		cpu_top->core_info[cpu].is_online = cpupower_is_cpu_online(cpu);
+		if(sysfs_topology_read_file(
+			cpu,
+			"physical_package_id",
+			&(cpu_top->core_info[cpu].pkg)) < 0) {
+			cpu_top->core_info[cpu].pkg = -1;
+			cpu_top->core_info[cpu].core = -1;
+			continue;
+		}
+		if(sysfs_topology_read_file(
+			cpu,
+			"core_id",
+			&(cpu_top->core_info[cpu].core)) < 0) {
+			cpu_top->core_info[cpu].pkg = -1;
+			cpu_top->core_info[cpu].core = -1;
+			continue;
+		}
+	}
+
+	qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info),
+	      __compare);
+
+	/* Count the number of distinct pkgs values. This works
+	   because the primary sort of the core_info struct was just
+	   done by pkg value. */
+	last_pkg = cpu_top->core_info[0].pkg;
+	for(cpu = 1; cpu < cpus; cpu++) {
+		if (cpu_top->core_info[cpu].pkg != last_pkg &&
+				cpu_top->core_info[cpu].pkg != -1) {
+
+			last_pkg = cpu_top->core_info[cpu].pkg;
+			cpu_top->pkgs++;
+		}
+	}
+	if (!(cpu_top->core_info[0].pkg == -1))
+		cpu_top->pkgs++;
+
+	/* Intel's cores count is not consecutively numbered, there may
+	 * be a core_id of 3, but none of 2. Assume there always is 0
+	 * Get amount of cores by counting duplicates in a package
+	for (cpu = 0; cpu_top->core_info[cpu].pkg = 0 && cpu < cpus; cpu++) {
+		if (cpu_top->core_info[cpu].core == 0)
+	cpu_top->cores++;
+	*/
+	return cpus;
+}
+
+void cpu_topology_release(struct cpupower_topology cpu_top)
+{
+	free(cpu_top.core_info);
+}
diff --git a/tools/power/cpupower/lib/cpupower.h b/tools/power/cpupower/lib/cpupower.h
new file mode 100644
index 0000000..fa031fc
--- /dev/null
+++ b/tools/power/cpupower/lib/cpupower.h
@@ -0,0 +1,35 @@
+#ifndef __CPUPOWER_CPUPOWER_H__
+#define __CPUPOWER_CPUPOWER_H__
+
+struct cpupower_topology {
+	/* Amount of CPU cores, packages and threads per core in the system */
+	unsigned int cores;
+	unsigned int pkgs;
+	unsigned int threads; /* per core */
+
+	/* Array gets mallocated with cores entries, holding per core info */
+	struct cpuid_core_info *core_info;
+};
+
+struct cpuid_core_info {
+	int pkg;
+	int core;
+	int cpu;
+
+	/* flags */
+	unsigned int is_online:1;
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int get_cpu_topology(struct cpupower_topology *cpu_top);
+void cpu_topology_release(struct cpupower_topology cpu_top);
+int cpupower_is_cpu_online(unsigned int cpu);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/tools/power/cpupower/lib/cpupower_intern.h b/tools/power/cpupower/lib/cpupower_intern.h
new file mode 100644
index 0000000..f8ec400
--- /dev/null
+++ b/tools/power/cpupower/lib/cpupower_intern.h
@@ -0,0 +1,5 @@
+#define PATH_TO_CPU "/sys/devices/system/cpu/"
+#define MAX_LINE_LEN 4096
+#define SYSFS_PATH_MAX 255
+
+unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen);
diff --git a/tools/power/cpupower/lib/sysfs.c b/tools/power/cpupower/lib/sysfs.c
deleted file mode 100644
index 870713a..0000000
--- a/tools/power/cpupower/lib/sysfs.c
+++ /dev/null
@@ -1,672 +0,0 @@
-/*
- *  (C) 2004-2009  Dominik Brodowski <linux@dominikbrodowski.de>
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- */
-
-#include <stdio.h>
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-
-#include "cpufreq.h"
-
-#define PATH_TO_CPU "/sys/devices/system/cpu/"
-#define MAX_LINE_LEN 4096
-#define SYSFS_PATH_MAX 255
-
-
-static unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen)
-{
-	int fd;
-	ssize_t numread;
-
-	fd = open(path, O_RDONLY);
-	if (fd == -1)
-		return 0;
-
-	numread = read(fd, buf, buflen - 1);
-	if (numread < 1) {
-		close(fd);
-		return 0;
-	}
-
-	buf[numread] = '\0';
-	close(fd);
-
-	return (unsigned int) numread;
-}
-
-
-/* CPUFREQ sysfs access **************************************************/
-
-/* helper function to read file from /sys into given buffer */
-/* fname is a relative path under "cpuX/cpufreq" dir */
-static unsigned int sysfs_cpufreq_read_file(unsigned int cpu, const char *fname,
-					    char *buf, size_t buflen)
-{
-	char path[SYSFS_PATH_MAX];
-
-	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s",
-			 cpu, fname);
-	return sysfs_read_file(path, buf, buflen);
-}
-
-/* helper function to write a new value to a /sys file */
-/* fname is a relative path under "cpuX/cpufreq" dir */
-static unsigned int sysfs_cpufreq_write_file(unsigned int cpu,
-					     const char *fname,
-					     const char *value, size_t len)
-{
-	char path[SYSFS_PATH_MAX];
-	int fd;
-	ssize_t numwrite;
-
-	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s",
-			 cpu, fname);
-
-	fd = open(path, O_WRONLY);
-	if (fd == -1)
-		return 0;
-
-	numwrite = write(fd, value, len);
-	if (numwrite < 1) {
-		close(fd);
-		return 0;
-	}
-
-	close(fd);
-
-	return (unsigned int) numwrite;
-}
-
-/* read access to files which contain one numeric value */
-
-enum cpufreq_value {
-	CPUINFO_CUR_FREQ,
-	CPUINFO_MIN_FREQ,
-	CPUINFO_MAX_FREQ,
-	CPUINFO_LATENCY,
-	SCALING_CUR_FREQ,
-	SCALING_MIN_FREQ,
-	SCALING_MAX_FREQ,
-	STATS_NUM_TRANSITIONS,
-	MAX_CPUFREQ_VALUE_READ_FILES
-};
-
-static const char *cpufreq_value_files[MAX_CPUFREQ_VALUE_READ_FILES] = {
-	[CPUINFO_CUR_FREQ] = "cpuinfo_cur_freq",
-	[CPUINFO_MIN_FREQ] = "cpuinfo_min_freq",
-	[CPUINFO_MAX_FREQ] = "cpuinfo_max_freq",
-	[CPUINFO_LATENCY]  = "cpuinfo_transition_latency",
-	[SCALING_CUR_FREQ] = "scaling_cur_freq",
-	[SCALING_MIN_FREQ] = "scaling_min_freq",
-	[SCALING_MAX_FREQ] = "scaling_max_freq",
-	[STATS_NUM_TRANSITIONS] = "stats/total_trans"
-};
-
-
-static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu,
-						 enum cpufreq_value which)
-{
-	unsigned long value;
-	unsigned int len;
-	char linebuf[MAX_LINE_LEN];
-	char *endp;
-
-	if (which >= MAX_CPUFREQ_VALUE_READ_FILES)
-		return 0;
-
-	len = sysfs_cpufreq_read_file(cpu, cpufreq_value_files[which],
-				linebuf, sizeof(linebuf));
-
-	if (len == 0)
-		return 0;
-
-	value = strtoul(linebuf, &endp, 0);
-
-	if (endp == linebuf || errno == ERANGE)
-		return 0;
-
-	return value;
-}
-
-/* read access to files which contain one string */
-
-enum cpufreq_string {
-	SCALING_DRIVER,
-	SCALING_GOVERNOR,
-	MAX_CPUFREQ_STRING_FILES
-};
-
-static const char *cpufreq_string_files[MAX_CPUFREQ_STRING_FILES] = {
-	[SCALING_DRIVER] = "scaling_driver",
-	[SCALING_GOVERNOR] = "scaling_governor",
-};
-
-
-static char *sysfs_cpufreq_get_one_string(unsigned int cpu,
-					   enum cpufreq_string which)
-{
-	char linebuf[MAX_LINE_LEN];
-	char *result;
-	unsigned int len;
-
-	if (which >= MAX_CPUFREQ_STRING_FILES)
-		return NULL;
-
-	len = sysfs_cpufreq_read_file(cpu, cpufreq_string_files[which],
-				linebuf, sizeof(linebuf));
-	if (len == 0)
-		return NULL;
-
-	result = strdup(linebuf);
-	if (result == NULL)
-		return NULL;
-
-	if (result[strlen(result) - 1] == '\n')
-		result[strlen(result) - 1] = '\0';
-
-	return result;
-}
-
-/* write access */
-
-enum cpufreq_write {
-	WRITE_SCALING_MIN_FREQ,
-	WRITE_SCALING_MAX_FREQ,
-	WRITE_SCALING_GOVERNOR,
-	WRITE_SCALING_SET_SPEED,
-	MAX_CPUFREQ_WRITE_FILES
-};
-
-static const char *cpufreq_write_files[MAX_CPUFREQ_WRITE_FILES] = {
-	[WRITE_SCALING_MIN_FREQ] = "scaling_min_freq",
-	[WRITE_SCALING_MAX_FREQ] = "scaling_max_freq",
-	[WRITE_SCALING_GOVERNOR] = "scaling_governor",
-	[WRITE_SCALING_SET_SPEED] = "scaling_setspeed",
-};
-
-static int sysfs_cpufreq_write_one_value(unsigned int cpu,
-					 enum cpufreq_write which,
-					 const char *new_value, size_t len)
-{
-	if (which >= MAX_CPUFREQ_WRITE_FILES)
-		return 0;
-
-	if (sysfs_cpufreq_write_file(cpu, cpufreq_write_files[which],
-					new_value, len) != len)
-		return -ENODEV;
-
-	return 0;
-};
-
-unsigned long sysfs_get_freq_kernel(unsigned int cpu)
-{
-	return sysfs_cpufreq_get_one_value(cpu, SCALING_CUR_FREQ);
-}
-
-unsigned long sysfs_get_freq_hardware(unsigned int cpu)
-{
-	return sysfs_cpufreq_get_one_value(cpu, CPUINFO_CUR_FREQ);
-}
-
-unsigned long sysfs_get_freq_transition_latency(unsigned int cpu)
-{
-	return sysfs_cpufreq_get_one_value(cpu, CPUINFO_LATENCY);
-}
-
-int sysfs_get_freq_hardware_limits(unsigned int cpu,
-			      unsigned long *min,
-			      unsigned long *max)
-{
-	if ((!min) || (!max))
-		return -EINVAL;
-
-	*min = sysfs_cpufreq_get_one_value(cpu, CPUINFO_MIN_FREQ);
-	if (!*min)
-		return -ENODEV;
-
-	*max = sysfs_cpufreq_get_one_value(cpu, CPUINFO_MAX_FREQ);
-	if (!*max)
-		return -ENODEV;
-
-	return 0;
-}
-
-char *sysfs_get_freq_driver(unsigned int cpu)
-{
-	return sysfs_cpufreq_get_one_string(cpu, SCALING_DRIVER);
-}
-
-struct cpufreq_policy *sysfs_get_freq_policy(unsigned int cpu)
-{
-	struct cpufreq_policy *policy;
-
-	policy = malloc(sizeof(struct cpufreq_policy));
-	if (!policy)
-		return NULL;
-
-	policy->governor = sysfs_cpufreq_get_one_string(cpu, SCALING_GOVERNOR);
-	if (!policy->governor) {
-		free(policy);
-		return NULL;
-	}
-	policy->min = sysfs_cpufreq_get_one_value(cpu, SCALING_MIN_FREQ);
-	policy->max = sysfs_cpufreq_get_one_value(cpu, SCALING_MAX_FREQ);
-	if ((!policy->min) || (!policy->max)) {
-		free(policy->governor);
-		free(policy);
-		return NULL;
-	}
-
-	return policy;
-}
-
-struct cpufreq_available_governors *
-sysfs_get_freq_available_governors(unsigned int cpu) {
-	struct cpufreq_available_governors *first = NULL;
-	struct cpufreq_available_governors *current = NULL;
-	char linebuf[MAX_LINE_LEN];
-	unsigned int pos, i;
-	unsigned int len;
-
-	len = sysfs_cpufreq_read_file(cpu, "scaling_available_governors",
-				linebuf, sizeof(linebuf));
-	if (len == 0)
-		return NULL;
-
-	pos = 0;
-	for (i = 0; i < len; i++) {
-		if (linebuf[i] == ' ' || linebuf[i] == '\n') {
-			if (i - pos < 2)
-				continue;
-			if (current) {
-				current->next = malloc(sizeof(*current));
-				if (!current->next)
-					goto error_out;
-				current = current->next;
-			} else {
-				first = malloc(sizeof(*first));
-				if (!first)
-					goto error_out;
-				current = first;
-			}
-			current->first = first;
-			current->next = NULL;
-
-			current->governor = malloc(i - pos + 1);
-			if (!current->governor)
-				goto error_out;
-
-			memcpy(current->governor, linebuf + pos, i - pos);
-			current->governor[i - pos] = '\0';
-			pos = i + 1;
-		}
-	}
-
-	return first;
-
- error_out:
-	while (first) {
-		current = first->next;
-		if (first->governor)
-			free(first->governor);
-		free(first);
-		first = current;
-	}
-	return NULL;
-}
-
-
-struct cpufreq_available_frequencies *
-sysfs_get_available_frequencies(unsigned int cpu) {
-	struct cpufreq_available_frequencies *first = NULL;
-	struct cpufreq_available_frequencies *current = NULL;
-	char one_value[SYSFS_PATH_MAX];
-	char linebuf[MAX_LINE_LEN];
-	unsigned int pos, i;
-	unsigned int len;
-
-	len = sysfs_cpufreq_read_file(cpu, "scaling_available_frequencies",
-				linebuf, sizeof(linebuf));
-	if (len == 0)
-		return NULL;
-
-	pos = 0;
-	for (i = 0; i < len; i++) {
-		if (linebuf[i] == ' ' || linebuf[i] == '\n') {
-			if (i - pos < 2)
-				continue;
-			if (i - pos >= SYSFS_PATH_MAX)
-				goto error_out;
-			if (current) {
-				current->next = malloc(sizeof(*current));
-				if (!current->next)
-					goto error_out;
-				current = current->next;
-			} else {
-				first = malloc(sizeof(*first));
-				if (!first)
-					goto error_out;
-				current = first;
-			}
-			current->first = first;
-			current->next = NULL;
-
-			memcpy(one_value, linebuf + pos, i - pos);
-			one_value[i - pos] = '\0';
-			if (sscanf(one_value, "%lu", &current->frequency) != 1)
-				goto error_out;
-
-			pos = i + 1;
-		}
-	}
-
-	return first;
-
- error_out:
-	while (first) {
-		current = first->next;
-		free(first);
-		first = current;
-	}
-	return NULL;
-}
-
-static struct cpufreq_affected_cpus *sysfs_get_cpu_list(unsigned int cpu,
-							const char *file)
-{
-	struct cpufreq_affected_cpus *first = NULL;
-	struct cpufreq_affected_cpus *current = NULL;
-	char one_value[SYSFS_PATH_MAX];
-	char linebuf[MAX_LINE_LEN];
-	unsigned int pos, i;
-	unsigned int len;
-
-	len = sysfs_cpufreq_read_file(cpu, file, linebuf, sizeof(linebuf));
-	if (len == 0)
-		return NULL;
-
-	pos = 0;
-	for (i = 0; i < len; i++) {
-		if (i == len || linebuf[i] == ' ' || linebuf[i] == '\n') {
-			if (i - pos  < 1)
-				continue;
-			if (i - pos >= SYSFS_PATH_MAX)
-				goto error_out;
-			if (current) {
-				current->next = malloc(sizeof(*current));
-				if (!current->next)
-					goto error_out;
-				current = current->next;
-			} else {
-				first = malloc(sizeof(*first));
-				if (!first)
-					goto error_out;
-				current = first;
-			}
-			current->first = first;
-			current->next = NULL;
-
-			memcpy(one_value, linebuf + pos, i - pos);
-			one_value[i - pos] = '\0';
-
-			if (sscanf(one_value, "%u", &current->cpu) != 1)
-				goto error_out;
-
-			pos = i + 1;
-		}
-	}
-
-	return first;
-
- error_out:
-	while (first) {
-		current = first->next;
-		free(first);
-		first = current;
-	}
-	return NULL;
-}
-
-struct cpufreq_affected_cpus *sysfs_get_freq_affected_cpus(unsigned int cpu)
-{
-	return sysfs_get_cpu_list(cpu, "affected_cpus");
-}
-
-struct cpufreq_affected_cpus *sysfs_get_freq_related_cpus(unsigned int cpu)
-{
-	return sysfs_get_cpu_list(cpu, "related_cpus");
-}
-
-struct cpufreq_stats *sysfs_get_freq_stats(unsigned int cpu,
-					unsigned long long *total_time) {
-	struct cpufreq_stats *first = NULL;
-	struct cpufreq_stats *current = NULL;
-	char one_value[SYSFS_PATH_MAX];
-	char linebuf[MAX_LINE_LEN];
-	unsigned int pos, i;
-	unsigned int len;
-
-	len = sysfs_cpufreq_read_file(cpu, "stats/time_in_state",
-				linebuf, sizeof(linebuf));
-	if (len == 0)
-		return NULL;
-
-	*total_time = 0;
-	pos = 0;
-	for (i = 0; i < len; i++) {
-		if (i == strlen(linebuf) || linebuf[i] == '\n')	{
-			if (i - pos < 2)
-				continue;
-			if ((i - pos) >= SYSFS_PATH_MAX)
-				goto error_out;
-			if (current) {
-				current->next = malloc(sizeof(*current));
-				if (!current->next)
-					goto error_out;
-				current = current->next;
-			} else {
-				first = malloc(sizeof(*first));
-				if (!first)
-					goto error_out;
-				current = first;
-			}
-			current->first = first;
-			current->next = NULL;
-
-			memcpy(one_value, linebuf + pos, i - pos);
-			one_value[i - pos] = '\0';
-			if (sscanf(one_value, "%lu %llu",
-					&current->frequency,
-					&current->time_in_state) != 2)
-				goto error_out;
-
-			*total_time = *total_time + current->time_in_state;
-			pos = i + 1;
-		}
-	}
-
-	return first;
-
- error_out:
-	while (first) {
-		current = first->next;
-		free(first);
-		first = current;
-	}
-	return NULL;
-}
-
-unsigned long sysfs_get_freq_transitions(unsigned int cpu)
-{
-	return sysfs_cpufreq_get_one_value(cpu, STATS_NUM_TRANSITIONS);
-}
-
-static int verify_gov(char *new_gov, char *passed_gov)
-{
-	unsigned int i, j = 0;
-
-	if (!passed_gov || (strlen(passed_gov) > 19))
-		return -EINVAL;
-
-	strncpy(new_gov, passed_gov, 20);
-	for (i = 0; i < 20; i++) {
-		if (j) {
-			new_gov[i] = '\0';
-			continue;
-		}
-		if ((new_gov[i] >= 'a') && (new_gov[i] <= 'z'))
-			continue;
-
-		if ((new_gov[i] >= 'A') && (new_gov[i] <= 'Z'))
-			continue;
-
-		if (new_gov[i] == '-')
-			continue;
-
-		if (new_gov[i] == '_')
-			continue;
-
-		if (new_gov[i] == '\0') {
-			j = 1;
-			continue;
-		}
-		return -EINVAL;
-	}
-	new_gov[19] = '\0';
-	return 0;
-}
-
-int sysfs_modify_freq_policy_governor(unsigned int cpu, char *governor)
-{
-	char new_gov[SYSFS_PATH_MAX];
-
-	if (!governor)
-		return -EINVAL;
-
-	if (verify_gov(new_gov, governor))
-		return -EINVAL;
-
-	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_GOVERNOR,
-					     new_gov, strlen(new_gov));
-};
-
-int sysfs_modify_freq_policy_max(unsigned int cpu, unsigned long max_freq)
-{
-	char value[SYSFS_PATH_MAX];
-
-	snprintf(value, SYSFS_PATH_MAX, "%lu", max_freq);
-
-	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
-					     value, strlen(value));
-};
-
-
-int sysfs_modify_freq_policy_min(unsigned int cpu, unsigned long min_freq)
-{
-	char value[SYSFS_PATH_MAX];
-
-	snprintf(value, SYSFS_PATH_MAX, "%lu", min_freq);
-
-	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MIN_FREQ,
-					     value, strlen(value));
-};
-
-
-int sysfs_set_freq_policy(unsigned int cpu, struct cpufreq_policy *policy)
-{
-	char min[SYSFS_PATH_MAX];
-	char max[SYSFS_PATH_MAX];
-	char gov[SYSFS_PATH_MAX];
-	int ret;
-	unsigned long old_min;
-	int write_max_first;
-
-	if (!policy || !(policy->governor))
-		return -EINVAL;
-
-	if (policy->max < policy->min)
-		return -EINVAL;
-
-	if (verify_gov(gov, policy->governor))
-		return -EINVAL;
-
-	snprintf(min, SYSFS_PATH_MAX, "%lu", policy->min);
-	snprintf(max, SYSFS_PATH_MAX, "%lu", policy->max);
-
-	old_min = sysfs_cpufreq_get_one_value(cpu, SCALING_MIN_FREQ);
-	write_max_first = (old_min && (policy->max < old_min) ? 0 : 1);
-
-	if (write_max_first) {
-		ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
-						    max, strlen(max));
-		if (ret)
-			return ret;
-	}
-
-	ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MIN_FREQ, min,
-					    strlen(min));
-	if (ret)
-		return ret;
-
-	if (!write_max_first) {
-		ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
-						    max, strlen(max));
-		if (ret)
-			return ret;
-	}
-
-	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_GOVERNOR,
-					     gov, strlen(gov));
-}
-
-int sysfs_set_frequency(unsigned int cpu, unsigned long target_frequency)
-{
-	struct cpufreq_policy *pol = sysfs_get_freq_policy(cpu);
-	char userspace_gov[] = "userspace";
-	char freq[SYSFS_PATH_MAX];
-	int ret;
-
-	if (!pol)
-		return -ENODEV;
-
-	if (strncmp(pol->governor, userspace_gov, 9) != 0) {
-		ret = sysfs_modify_freq_policy_governor(cpu, userspace_gov);
-		if (ret) {
-			cpufreq_put_policy(pol);
-			return ret;
-		}
-	}
-
-	cpufreq_put_policy(pol);
-
-	snprintf(freq, SYSFS_PATH_MAX, "%lu", target_frequency);
-
-	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_SET_SPEED,
-					     freq, strlen(freq));
-}
-
-/* CPUFREQ sysfs access **************************************************/
-
-/* General sysfs access **************************************************/
-int sysfs_cpu_exists(unsigned int cpu)
-{
-	char file[SYSFS_PATH_MAX];
-	struct stat statbuf;
-
-	snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpu%u/", cpu);
-
-	if (stat(file, &statbuf) != 0)
-		return -ENOSYS;
-
-	return S_ISDIR(statbuf.st_mode) ? 0 : -ENOSYS;
-}
-
-/* General sysfs access **************************************************/
diff --git a/tools/power/cpupower/lib/sysfs.h b/tools/power/cpupower/lib/sysfs.h
deleted file mode 100644
index c76a5e0..0000000
--- a/tools/power/cpupower/lib/sysfs.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* General */
-extern unsigned int sysfs_cpu_exists(unsigned int cpu);
-
-/* CPUfreq */
-extern unsigned long sysfs_get_freq_kernel(unsigned int cpu);
-extern unsigned long sysfs_get_freq_hardware(unsigned int cpu);
-extern unsigned long sysfs_get_freq_transition_latency(unsigned int cpu);
-extern int sysfs_get_freq_hardware_limits(unsigned int cpu,
-					unsigned long *min, unsigned long *max);
-extern char *sysfs_get_freq_driver(unsigned int cpu);
-extern struct cpufreq_policy *sysfs_get_freq_policy(unsigned int cpu);
-extern struct cpufreq_available_governors *sysfs_get_freq_available_governors(
-	unsigned int cpu);
-extern struct cpufreq_available_frequencies *sysfs_get_available_frequencies(
-	unsigned int cpu);
-extern struct cpufreq_affected_cpus *sysfs_get_freq_affected_cpus(
-	unsigned int cpu);
-extern struct cpufreq_affected_cpus *sysfs_get_freq_related_cpus(
-	unsigned int cpu);
-extern struct cpufreq_stats *sysfs_get_freq_stats(unsigned int cpu,
-						unsigned long long *total_time);
-extern unsigned long sysfs_get_freq_transitions(unsigned int cpu);
-extern int sysfs_set_freq_policy(unsigned int cpu,
-				struct cpufreq_policy *policy);
-extern int sysfs_modify_freq_policy_min(unsigned int cpu,
-					unsigned long min_freq);
-extern int sysfs_modify_freq_policy_max(unsigned int cpu,
-					unsigned long max_freq);
-extern int sysfs_modify_freq_policy_governor(unsigned int cpu, char *governor);
-extern int sysfs_set_frequency(unsigned int cpu,
-			unsigned long target_frequency);
diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c
index 0fbd1a2..b4bf769 100644
--- a/tools/power/cpupower/utils/cpufreq-set.c
+++ b/tools/power/cpupower/utils/cpufreq-set.c
@@ -16,8 +16,8 @@
 #include <getopt.h>
 
 #include "cpufreq.h"
+#include "cpuidle.h"
 #include "helpers/helpers.h"
-#include "helpers/sysfs.h"
 
 #define NORM_FREQ_LEN 32
 
@@ -296,7 +296,7 @@ int cmd_freq_set(int argc, char **argv)
 			struct cpufreq_affected_cpus *cpus;
 
 			if (!bitmask_isbitset(cpus_chosen, cpu) ||
-			    cpufreq_cpu_exists(cpu))
+			    cpupower_is_cpu_online(cpu))
 				continue;
 
 			cpus = cpufreq_get_related_cpus(cpu);
@@ -316,10 +316,10 @@ int cmd_freq_set(int argc, char **argv)
 	     cpu <= bitmask_last(cpus_chosen); cpu++) {
 
 		if (!bitmask_isbitset(cpus_chosen, cpu) ||
-		    cpufreq_cpu_exists(cpu))
+		    cpupower_is_cpu_online(cpu))
 			continue;
 
-		if (sysfs_is_cpu_online(cpu) != 1)
+		if (cpupower_is_cpu_online(cpu) != 1)
 			continue;
 
 		printf(_("Setting cpu: %d\n"), cpu);
diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c
index 8bf8ab5..b59c85d 100644
--- a/tools/power/cpupower/utils/cpuidle-info.c
+++ b/tools/power/cpupower/utils/cpuidle-info.c
@@ -13,8 +13,10 @@
 #include <string.h>
 #include <getopt.h>
 
-#include "helpers/helpers.h"
+#include <cpuidle.h>
+
 #include "helpers/sysfs.h"
+#include "helpers/helpers.h"
 #include "helpers/bitmask.h"
 
 #define LINE_LEN 10
@@ -24,7 +26,7 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose)
 	unsigned int idlestates, idlestate;
 	char *tmp;
 
-	idlestates = sysfs_get_idlestate_count(cpu);
+	idlestates = cpuidle_state_count(cpu);
 	if (idlestates == 0) {
 		printf(_("CPU %u: No idle states\n"), cpu);
 		return;
@@ -33,7 +35,7 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose)
 	printf(_("Number of idle states: %d\n"), idlestates);
 	printf(_("Available idle states:"));
 	for (idlestate = 0; idlestate < idlestates; idlestate++) {
-		tmp = sysfs_get_idlestate_name(cpu, idlestate);
+		tmp = cpuidle_state_name(cpu, idlestate);
 		if (!tmp)
 			continue;
 		printf(" %s", tmp);
@@ -45,28 +47,28 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose)
 		return;
 
 	for (idlestate = 0; idlestate < idlestates; idlestate++) {
-		int disabled = sysfs_is_idlestate_disabled(cpu, idlestate);
+		int disabled = cpuidle_is_state_disabled(cpu, idlestate);
 		/* Disabled interface not supported on older kernels */
 		if (disabled < 0)
 			disabled = 0;
-		tmp = sysfs_get_idlestate_name(cpu, idlestate);
+		tmp = cpuidle_state_name(cpu, idlestate);
 		if (!tmp)
 			continue;
 		printf("%s%s:\n", tmp, (disabled) ? " (DISABLED) " : "");
 		free(tmp);
 
-		tmp = sysfs_get_idlestate_desc(cpu, idlestate);
+		tmp = cpuidle_state_desc(cpu, idlestate);
 		if (!tmp)
 			continue;
 		printf(_("Flags/Description: %s\n"), tmp);
 		free(tmp);
 
 		printf(_("Latency: %lu\n"),
-		       sysfs_get_idlestate_latency(cpu, idlestate));
+		       cpuidle_state_latency(cpu, idlestate));
 		printf(_("Usage: %lu\n"),
-		       sysfs_get_idlestate_usage(cpu, idlestate));
+		       cpuidle_state_usage(cpu, idlestate));
 		printf(_("Duration: %llu\n"),
-		       sysfs_get_idlestate_time(cpu, idlestate));
+		       cpuidle_state_time(cpu, idlestate));
 	}
 }
 
@@ -74,7 +76,7 @@ static void cpuidle_general_output(void)
 {
 	char *tmp;
 
-	tmp = sysfs_get_cpuidle_driver();
+	tmp = cpuidle_get_driver();
 	if (!tmp) {
 		printf(_("Could not determine cpuidle driver\n"));
 		return;
@@ -83,7 +85,7 @@ static void cpuidle_general_output(void)
 	printf(_("CPUidle driver: %s\n"), tmp);
 	free(tmp);
 
-	tmp = sysfs_get_cpuidle_governor();
+	tmp = cpuidle_get_governor();
 	if (!tmp) {
 		printf(_("Could not determine cpuidle governor\n"));
 		return;
@@ -98,7 +100,7 @@ static void proc_cpuidle_cpu_output(unsigned int cpu)
 	long max_allowed_cstate = 2000000000;
 	unsigned int cstate, cstates;
 
-	cstates = sysfs_get_idlestate_count(cpu);
+	cstates = cpuidle_state_count(cpu);
 	if (cstates == 0) {
 		printf(_("CPU %u: No C-states info\n"), cpu);
 		return;
@@ -113,11 +115,11 @@ static void proc_cpuidle_cpu_output(unsigned int cpu)
 			 "type[C%d] "), cstate, cstate);
 		printf(_("promotion[--] demotion[--] "));
 		printf(_("latency[%03lu] "),
-		       sysfs_get_idlestate_latency(cpu, cstate));
+		       cpuidle_state_latency(cpu, cstate));
 		printf(_("usage[%08lu] "),
-		       sysfs_get_idlestate_usage(cpu, cstate));
+		       cpuidle_state_usage(cpu, cstate));
 		printf(_("duration[%020Lu] \n"),
-		       sysfs_get_idlestate_time(cpu, cstate));
+		       cpuidle_state_time(cpu, cstate));
 	}
 }
 
diff --git a/tools/power/cpupower/utils/cpuidle-set.c b/tools/power/cpupower/utils/cpuidle-set.c
index d6b6ae4..691c24d 100644
--- a/tools/power/cpupower/utils/cpuidle-set.c
+++ b/tools/power/cpupower/utils/cpuidle-set.c
@@ -5,12 +5,12 @@
 #include <limits.h>
 #include <string.h>
 #include <ctype.h>
-
 #include <getopt.h>
 
-#include "cpufreq.h"
+#include <cpufreq.h>
+#include <cpuidle.h>
+
 #include "helpers/helpers.h"
-#include "helpers/sysfs.h"
 
 static struct option info_opts[] = {
      {"disable",	required_argument,		NULL, 'd'},
@@ -104,16 +104,16 @@ int cmd_idle_set(int argc, char **argv)
 		if (!bitmask_isbitset(cpus_chosen, cpu))
 			continue;
 
-		if (sysfs_is_cpu_online(cpu) != 1)
+		if (cpupower_is_cpu_online(cpu) != 1)
 			continue;
 
-		idlestates = sysfs_get_idlestate_count(cpu);
+		idlestates = cpuidle_state_count(cpu);
 		if (idlestates <= 0)
 			continue;
 
 		switch (param) {
 		case 'd':
-			ret = sysfs_idlestate_disable(cpu, idlestate, 1);
+			ret = cpuidle_state_disable(cpu, idlestate, 1);
 			if (ret == 0)
 		printf(_("Idlestate %u disabled on CPU %u\n"),  idlestate, cpu);
 			else if (ret == -1)
@@ -126,7 +126,7 @@ int cmd_idle_set(int argc, char **argv)
 		       idlestate, cpu);
 			break;
 		case 'e':
-			ret = sysfs_idlestate_disable(cpu, idlestate, 0);
+			ret = cpuidle_state_disable(cpu, idlestate, 0);
 			if (ret == 0)
 		printf(_("Idlestate %u enabled on CPU %u\n"),  idlestate, cpu);
 			else if (ret == -1)
@@ -140,13 +140,13 @@ int cmd_idle_set(int argc, char **argv)
 			break;
 		case 'D':
 			for (idlestate = 0; idlestate < idlestates; idlestate++) {
-				disabled = sysfs_is_idlestate_disabled
+				disabled = cpuidle_is_state_disabled
 					(cpu, idlestate);
-				state_latency = sysfs_get_idlestate_latency
+				state_latency = cpuidle_state_latency
 					(cpu, idlestate);
 				if (disabled == 1) {
 					if (latency > state_latency){
-						ret = sysfs_idlestate_disable
+						ret = cpuidle_state_disable
 							(cpu, idlestate, 0);
 						if (ret == 0)
 		printf(_("Idlestate %u enabled on CPU %u\n"),  idlestate, cpu);
@@ -154,7 +154,7 @@ int cmd_idle_set(int argc, char **argv)
 					continue;
 				}
 				if (latency <= state_latency){
-					ret = sysfs_idlestate_disable
+					ret = cpuidle_state_disable
 						(cpu, idlestate, 1);
 					if (ret == 0)
 		printf(_("Idlestate %u disabled on CPU %u\n"), idlestate, cpu);
@@ -163,10 +163,10 @@ int cmd_idle_set(int argc, char **argv)
 			break;
 		case 'E':
 			for (idlestate = 0; idlestate < idlestates; idlestate++) {
-				disabled = sysfs_is_idlestate_disabled
+				disabled = cpuidle_is_state_disabled
 					(cpu, idlestate);
 				if (disabled == 1) {
-					ret = sysfs_idlestate_disable
+					ret = cpuidle_state_disable
 						(cpu, idlestate, 0);
 					if (ret == 0)
 		printf(_("Idlestate %u enabled on CPU %u\n"), idlestate, cpu);
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index aa9e954..afb66f8 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -14,6 +14,7 @@
 #include <locale.h>
 
 #include "helpers/bitmask.h"
+#include <cpupower.h>
 
 /* Internationalization ****************************/
 #ifdef NLS
@@ -92,31 +93,6 @@ extern int get_cpu_info(unsigned int cpu, struct cpupower_cpu_info *cpu_info);
 extern struct cpupower_cpu_info cpupower_cpu_info;
 /* cpuid and cpuinfo helpers  **************************/
 
-struct cpuid_core_info {
-	int pkg;
-	int core;
-	int cpu;
-
-	/* flags */
-	unsigned int is_online:1;
-};
-
-/* CPU topology/hierarchy parsing ******************/
-struct cpupower_topology {
-	/* Amount of CPU cores, packages and threads per core in the system */
-	unsigned int cores;
-	unsigned int pkgs;
-	unsigned int threads; /* per core */
-
-	/* Array gets mallocated with cores entries, holding per core info */
-	struct cpuid_core_info *core_info;
-};
-
-extern int get_cpu_topology(struct cpupower_topology *cpu_top);
-extern void cpu_topology_release(struct cpupower_topology cpu_top);
-
-/* CPU topology/hierarchy parsing ******************/
-
 /* X86 ONLY ****************************************/
 #if defined(__i386__) || defined(__x86_64__)
 
diff --git a/tools/power/cpupower/utils/helpers/topology.c b/tools/power/cpupower/utils/helpers/topology.c
index 5f9c908..a1a6c60 100644
--- a/tools/power/cpupower/utils/helpers/topology.c
+++ b/tools/power/cpupower/utils/helpers/topology.c
@@ -16,110 +16,7 @@
 #include <errno.h>
 #include <fcntl.h>
 
-#include <helpers/helpers.h>
-#include <helpers/sysfs.h>
+#include <cpuidle.h>
 
-/* returns -1 on failure, 0 on success */
-static int sysfs_topology_read_file(unsigned int cpu, const char *fname, int *result)
-{
-	char linebuf[MAX_LINE_LEN];
-	char *endp;
-	char path[SYSFS_PATH_MAX];
+/* CPU topology/hierarchy parsing ******************/
 
-	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s",
-			 cpu, fname);
-	if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0)
-		return -1;
-	*result = strtol(linebuf, &endp, 0);
-	if (endp == linebuf || errno == ERANGE)
-		return -1;
-	return 0;
-}
-
-static int __compare(const void *t1, const void *t2)
-{
-	struct cpuid_core_info *top1 = (struct cpuid_core_info *)t1;
-	struct cpuid_core_info *top2 = (struct cpuid_core_info *)t2;
-	if (top1->pkg < top2->pkg)
-		return -1;
-	else if (top1->pkg > top2->pkg)
-		return 1;
-	else if (top1->core < top2->core)
-		return -1;
-	else if (top1->core > top2->core)
-		return 1;
-	else if (top1->cpu < top2->cpu)
-		return -1;
-	else if (top1->cpu > top2->cpu)
-		return 1;
-	else
-		return 0;
-}
-
-/*
- * Returns amount of cpus, negative on error, cpu_top must be
- * passed to cpu_topology_release to free resources
- *
- * Array is sorted after ->pkg, ->core, then ->cpu
- */
-int get_cpu_topology(struct cpupower_topology *cpu_top)
-{
-	int cpu, last_pkg, cpus = sysconf(_SC_NPROCESSORS_CONF);
-
-	cpu_top->core_info = malloc(sizeof(struct cpuid_core_info) * cpus);
-	if (cpu_top->core_info == NULL)
-		return -ENOMEM;
-	cpu_top->pkgs = cpu_top->cores = 0;
-	for (cpu = 0; cpu < cpus; cpu++) {
-		cpu_top->core_info[cpu].cpu = cpu;
-		cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu);
-		if(sysfs_topology_read_file(
-			cpu,
-			"physical_package_id",
-			&(cpu_top->core_info[cpu].pkg)) < 0) {
-			cpu_top->core_info[cpu].pkg = -1;
-			cpu_top->core_info[cpu].core = -1;
-			continue;
-		}
-		if(sysfs_topology_read_file(
-			cpu,
-			"core_id",
-			&(cpu_top->core_info[cpu].core)) < 0) {
-			cpu_top->core_info[cpu].pkg = -1;
-			cpu_top->core_info[cpu].core = -1;
-			continue;
-		}
-	}
-
-	qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info),
-	      __compare);
-
-	/* Count the number of distinct pkgs values. This works
-	   because the primary sort of the core_info struct was just
-	   done by pkg value. */
-	last_pkg = cpu_top->core_info[0].pkg;
-	for(cpu = 1; cpu < cpus; cpu++) {
-		if (cpu_top->core_info[cpu].pkg != last_pkg &&
-				cpu_top->core_info[cpu].pkg != -1) {
-
-			last_pkg = cpu_top->core_info[cpu].pkg;
-			cpu_top->pkgs++;
-		}
-	}
-	if (!(cpu_top->core_info[0].pkg == -1))
-		cpu_top->pkgs++;
-
-	/* Intel's cores count is not consecutively numbered, there may
-	 * be a core_id of 3, but none of 2. Assume there always is 0
-	 * Get amount of cores by counting duplicates in a package
-	for (cpu = 0; cpu_top->core_info[cpu].pkg = 0 && cpu < cpus; cpu++) {
-		if (cpu_top->core_info[cpu].core == 0)
-	cpu_top->cores++;
-	*/
-	return cpus;
-}
-
-void cpu_topology_release(struct cpupower_topology cpu_top)
-{
-	free(cpu_top.core_info);
-}
diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
index bcd22a1..1b5da00 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
@@ -10,8 +10,8 @@
 #include <stdint.h>
 #include <string.h>
 #include <limits.h>
+#include <cpuidle.h>
 
-#include "helpers/sysfs.h"
 #include "helpers/helpers.h"
 #include "idle_monitor/cpupower-monitor.h"
 
@@ -51,7 +51,7 @@ static int cpuidle_start(void)
 		for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num;
 		     state++) {
 			previous_count[cpu][state] =
-				sysfs_get_idlestate_time(cpu, state);
+				cpuidle_state_time(cpu, state);
 			dprint("CPU %d - State: %d - Val: %llu\n",
 			       cpu, state, previous_count[cpu][state]);
 		}
@@ -70,7 +70,7 @@ static int cpuidle_stop(void)
 		for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num;
 		     state++) {
 			current_count[cpu][state] =
-				sysfs_get_idlestate_time(cpu, state);
+				cpuidle_state_time(cpu, state);
 			dprint("CPU %d - State: %d - Val: %llu\n",
 			       cpu, state, previous_count[cpu][state]);
 		}
@@ -132,13 +132,13 @@ static struct cpuidle_monitor *cpuidle_register(void)
 	char *tmp;
 
 	/* Assume idle state count is the same for all CPUs */
-	cpuidle_sysfs_monitor.hw_states_num = sysfs_get_idlestate_count(0);
+	cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(0);
 
 	if (cpuidle_sysfs_monitor.hw_states_num <= 0)
 		return NULL;
 
 	for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) {
-		tmp = sysfs_get_idlestate_name(0, num);
+		tmp = cpuidle_state_name(0, num);
 		if (tmp == NULL)
 			continue;
 
@@ -146,7 +146,7 @@ static struct cpuidle_monitor *cpuidle_register(void)
 		strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1);
 		free(tmp);
 
-		tmp = sysfs_get_idlestate_desc(0, num);
+		tmp = cpuidle_state_desc(0, num);
 		if (tmp == NULL)
 			continue;
 		strncpy(cpuidle_cstates[num].desc, tmp,	CSTATE_DESC_LEN - 1);
-- 
cgit v0.10.2


From 0722249ac1f3dcc3af9e9d7ed89792a68f066660 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Tue, 3 Nov 2015 19:04:16 +0900
Subject: PM / devfreq: exynos: Add generic exynos bus frequency driver

This patch adds the generic exynos bus frequency driver for AMBA AXI bus
of sub-blocks in exynos SoC with DEVFREQ framework. The Samsung Exynos SoC
have the common architecture for bus between DRAM and sub-blocks in SoC.
This driver can support the generic bus frequency driver for Exynos SoCs.

In devicetree, Each bus block has a bus clock, regulator, operation-point
and devfreq-event devices which measure the utilization of each bus block.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
[m.reichl and linux.amoon: Tested it on exynos4412-odroidu3 board]
Tested-by: Markus Reichl <m.reichl@fivetechno.de>
Tested-by: Anand Moon <linux.amoon@gmail.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Acked-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>

diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index 4de78c5..cedda8f 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig
@@ -66,6 +66,21 @@ config DEVFREQ_GOV_USERSPACE
 
 comment "DEVFREQ Drivers"
 
+config ARM_EXYNOS_BUS_DEVFREQ
+	bool "ARM EXYNOS Generic Memory Bus DEVFREQ Driver"
+	depends on ARCH_EXYNOS
+	select DEVFREQ_GOV_SIMPLE_ONDEMAND
+	select DEVFREQ_EVENT_EXYNOS_PPMU
+	select PM_DEVFREQ_EVENT
+	select PM_OPP
+	help
+	  This adds the common DEVFREQ driver for Exynos Memory bus. Exynos
+	  Memory bus has one more group of memory bus (e.g, MIF and INT block).
+	  Each memory bus group could contain many memoby bus block. It reads
+	  PPMU counters of memory controllers by using DEVFREQ-event device
+	  and adjusts the operating frequencies and voltages with OPP support.
+	  This does not yet operate with optimal voltages.
+
 config ARM_EXYNOS4_BUS_DEVFREQ
 	bool "ARM Exynos4210/4212/4412 Memory Bus DEVFREQ Driver"
 	depends on (CPU_EXYNOS4210 || SOC_EXYNOS4212 || SOC_EXYNOS4412) && !ARCH_MULTIPLATFORM
diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile
index 5134f9e..8af8aaf 100644
--- a/drivers/devfreq/Makefile
+++ b/drivers/devfreq/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_DEVFREQ_GOV_POWERSAVE)	+= governor_powersave.o
 obj-$(CONFIG_DEVFREQ_GOV_USERSPACE)	+= governor_userspace.o
 
 # DEVFREQ Drivers
+obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ)	+= exynos-bus.o
 obj-$(CONFIG_ARM_EXYNOS4_BUS_DEVFREQ)	+= exynos/
 obj-$(CONFIG_ARM_EXYNOS5_BUS_DEVFREQ)	+= exynos/
 obj-$(CONFIG_ARM_TEGRA_DEVFREQ)		+= tegra-devfreq.o
diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
new file mode 100644
index 0000000..137de61
--- /dev/null
+++ b/drivers/devfreq/exynos-bus.c
@@ -0,0 +1,443 @@
+/*
+ * Generic Exynos Bus frequency driver with DEVFREQ Framework
+ *
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ * Author : Chanwoo Choi <cw00.choi@samsung.com>
+ *
+ * This driver support Exynos Bus frequency feature by using
+ * DEVFREQ framework and is based on drivers/devfreq/exynos/exynos4_bus.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#include <linux/devfreq-event.h>
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/pm_opp.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+
+#define DEFAULT_SATURATION_RATIO	40
+#define DEFAULT_VOLTAGE_TOLERANCE	2
+
+struct exynos_bus {
+	struct device *dev;
+
+	struct devfreq *devfreq;
+	struct devfreq_event_dev **edev;
+	unsigned int edev_count;
+	struct mutex lock;
+
+	struct dev_pm_opp *curr_opp;
+
+	struct regulator *regulator;
+	struct clk *clk;
+	unsigned int voltage_tolerance;
+	unsigned int ratio;
+};
+
+/*
+ * Control the devfreq-event device to get the current state of bus
+ */
+#define exynos_bus_ops_edev(ops)				\
+static int exynos_bus_##ops(struct exynos_bus *bus)		\
+{								\
+	int i, ret;						\
+								\
+	for (i = 0; i < bus->edev_count; i++) {			\
+		if (!bus->edev[i])				\
+			continue;				\
+		ret = devfreq_event_##ops(bus->edev[i]);	\
+		if (ret < 0)					\
+			return ret;				\
+	}							\
+								\
+	return 0;						\
+}
+exynos_bus_ops_edev(enable_edev);
+exynos_bus_ops_edev(disable_edev);
+exynos_bus_ops_edev(set_event);
+
+static int exynos_bus_get_event(struct exynos_bus *bus,
+				struct devfreq_event_data *edata)
+{
+	struct devfreq_event_data event_data;
+	unsigned long load_count = 0, total_count = 0;
+	int i, ret = 0;
+
+	for (i = 0; i < bus->edev_count; i++) {
+		if (!bus->edev[i])
+			continue;
+
+		ret = devfreq_event_get_event(bus->edev[i], &event_data);
+		if (ret < 0)
+			return ret;
+
+		if (i == 0 || event_data.load_count > load_count) {
+			load_count = event_data.load_count;
+			total_count = event_data.total_count;
+		}
+	}
+
+	edata->load_count = load_count;
+	edata->total_count = total_count;
+
+	return ret;
+}
+
+/*
+ * Must necessary function for devfreq governor
+ */
+static int exynos_bus_target(struct device *dev, unsigned long *freq, u32 flags)
+{
+	struct exynos_bus *bus = dev_get_drvdata(dev);
+	struct dev_pm_opp *new_opp;
+	unsigned long old_freq, new_freq, old_volt, new_volt, tol;
+	int ret = 0;
+
+	/* Get new opp-bus instance according to new bus clock */
+	rcu_read_lock();
+	new_opp = devfreq_recommended_opp(dev, freq, flags);
+	if (IS_ERR(new_opp)) {
+		dev_err(dev, "failed to get recommended opp instance\n");
+		rcu_read_unlock();
+		return PTR_ERR(new_opp);
+	}
+
+	new_freq = dev_pm_opp_get_freq(new_opp);
+	new_volt = dev_pm_opp_get_voltage(new_opp);
+	old_freq = dev_pm_opp_get_freq(bus->curr_opp);
+	old_volt = dev_pm_opp_get_voltage(bus->curr_opp);
+	rcu_read_unlock();
+
+	if (old_freq == new_freq)
+		return 0;
+	tol = new_volt * bus->voltage_tolerance / 100;
+
+	/* Change voltage and frequency according to new OPP level */
+	mutex_lock(&bus->lock);
+
+	if (old_freq < new_freq) {
+		ret = regulator_set_voltage_tol(bus->regulator, new_volt, tol);
+		if (ret < 0) {
+			dev_err(bus->dev, "failed to set voltage\n");
+			goto out;
+		}
+	}
+
+	ret = clk_set_rate(bus->clk, new_freq);
+	if (ret < 0) {
+		dev_err(dev, "failed to change clock of bus\n");
+		clk_set_rate(bus->clk, old_freq);
+		goto out;
+	}
+
+	if (old_freq > new_freq) {
+		ret = regulator_set_voltage_tol(bus->regulator, new_volt, tol);
+		if (ret < 0) {
+			dev_err(bus->dev, "failed to set voltage\n");
+			goto out;
+		}
+	}
+	bus->curr_opp = new_opp;
+
+	dev_dbg(dev, "Set the frequency of bus (%lukHz -> %lukHz)\n",
+			old_freq/1000, new_freq/1000);
+out:
+	mutex_unlock(&bus->lock);
+
+	return ret;
+}
+
+static int exynos_bus_get_dev_status(struct device *dev,
+				     struct devfreq_dev_status *stat)
+{
+	struct exynos_bus *bus = dev_get_drvdata(dev);
+	struct devfreq_event_data edata;
+	int ret;
+
+	rcu_read_lock();
+	stat->current_frequency = dev_pm_opp_get_freq(bus->curr_opp);
+	rcu_read_unlock();
+
+	ret = exynos_bus_get_event(bus, &edata);
+	if (ret < 0) {
+		stat->total_time = stat->busy_time = 0;
+		goto err;
+	}
+
+	stat->busy_time = (edata.load_count * 100) / bus->ratio;
+	stat->total_time = edata.total_count;
+
+	dev_dbg(dev, "Usage of devfreq-event : %lu/%lu\n", stat->busy_time,
+							stat->total_time);
+
+err:
+	ret = exynos_bus_set_event(bus);
+	if (ret < 0) {
+		dev_err(dev, "failed to set event to devfreq-event devices\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static void exynos_bus_exit(struct device *dev)
+{
+	struct exynos_bus *bus = dev_get_drvdata(dev);
+	int ret;
+
+	ret = exynos_bus_disable_edev(bus);
+	if (ret < 0)
+		dev_warn(dev, "failed to disable the devfreq-event devices\n");
+
+	if (bus->regulator)
+		regulator_disable(bus->regulator);
+
+	dev_pm_opp_of_remove_table(dev);
+}
+
+static int exynos_bus_parse_of(struct device_node *np,
+			      struct exynos_bus *bus)
+{
+	struct device *dev = bus->dev;
+	unsigned long rate;
+	int i, ret, count, size;
+
+	/* Get the clock to provide each bus with source clock */
+	bus->clk = devm_clk_get(dev, "bus");
+	if (IS_ERR(bus->clk)) {
+		dev_err(dev, "failed to get bus clock\n");
+		return PTR_ERR(bus->clk);
+	}
+
+	ret = clk_prepare_enable(bus->clk);
+	if (ret < 0) {
+		dev_err(dev, "failed to get enable clock\n");
+		return ret;
+	}
+
+	/* Get the freq/voltage OPP table to scale the bus frequency */
+	rcu_read_lock();
+	ret = dev_pm_opp_of_add_table(dev);
+	if (ret < 0) {
+		dev_err(dev, "failed to get OPP table\n");
+		rcu_read_unlock();
+		goto err_clk;
+	}
+
+	rate = clk_get_rate(bus->clk);
+	bus->curr_opp = dev_pm_opp_find_freq_ceil(dev, &rate);
+	if (IS_ERR(bus->curr_opp)) {
+		dev_err(dev, "failed to find dev_pm_opp\n");
+		rcu_read_unlock();
+		ret = PTR_ERR(bus->curr_opp);
+		goto err_opp;
+	}
+	rcu_read_unlock();
+
+	/* Get the regulator to provide each bus with the power */
+	bus->regulator = devm_regulator_get(dev, "vdd");
+	if (IS_ERR(bus->regulator)) {
+		dev_err(dev, "failed to get VDD regulator\n");
+		ret = PTR_ERR(bus->regulator);
+		goto err_opp;
+	}
+
+	ret = regulator_enable(bus->regulator);
+	if (ret < 0) {
+		dev_err(dev, "failed to enable VDD regulator\n");
+		goto err_opp;
+	}
+
+	/*
+	 * Get the devfreq-event devices to get the current utilization of
+	 * buses. This raw data will be used in devfreq ondemand governor.
+	 */
+	count = devfreq_event_get_edev_count(dev);
+	if (count < 0) {
+		dev_err(dev, "failed to get the count of devfreq-event dev\n");
+		ret = count;
+		goto err_regulator;
+	}
+	bus->edev_count = count;
+
+	size = sizeof(*bus->edev) * count;
+	bus->edev = devm_kzalloc(dev, size, GFP_KERNEL);
+	if (!bus->edev) {
+		ret = -ENOMEM;
+		goto err_regulator;
+	}
+
+	for (i = 0; i < count; i++) {
+		bus->edev[i] = devfreq_event_get_edev_by_phandle(dev, i);
+		if (IS_ERR(bus->edev[i])) {
+			ret = -EPROBE_DEFER;
+			goto err_regulator;
+		}
+	}
+
+	/*
+	 * Optionally, Get the saturation ratio according to Exynos SoC
+	 * When measuring the utilization of each AXI bus with devfreq-event
+	 * devices, the measured real cycle might be much lower than the
+	 * total cycle of bus during sampling rate. In result, the devfreq
+	 * simple-ondemand governor might not decide to change the current
+	 * frequency due to too utilization (= real cycle/total cycle).
+	 * So, this property is used to adjust the utilization when calculating
+	 * the busy_time in exynos_bus_get_dev_status().
+	 */
+	if (of_property_read_u32(np, "exynos,saturation-ratio", &bus->ratio))
+		bus->ratio = DEFAULT_SATURATION_RATIO;
+
+	if (of_property_read_u32(np, "exynos,voltage-tolerance",
+					&bus->voltage_tolerance))
+		bus->voltage_tolerance = DEFAULT_VOLTAGE_TOLERANCE;
+
+	return 0;
+
+err_regulator:
+	regulator_disable(bus->regulator);
+err_opp:
+	dev_pm_opp_of_remove_table(dev);
+err_clk:
+	clk_disable_unprepare(bus->clk);
+
+	return ret;
+}
+
+static int exynos_bus_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct devfreq_dev_profile *profile;
+	struct devfreq_simple_ondemand_data *ondemand_data;
+	struct exynos_bus *bus;
+	int ret;
+
+	if (!np) {
+		dev_err(dev, "failed to find devicetree node\n");
+		return -EINVAL;
+	}
+
+	bus = devm_kzalloc(&pdev->dev, sizeof(*bus), GFP_KERNEL);
+	if (!bus)
+		return -ENOMEM;
+	mutex_init(&bus->lock);
+	bus->dev = &pdev->dev;
+	platform_set_drvdata(pdev, bus);
+
+	/* Parse the device-tree to get the resource information */
+	ret = exynos_bus_parse_of(np, bus);
+	if (ret < 0)
+		return ret;
+
+	/* Initalize the struct profile and governor data */
+	profile = devm_kzalloc(dev, sizeof(*profile), GFP_KERNEL);
+	if (!profile)
+		return -ENOMEM;
+	profile->polling_ms = 50;
+	profile->target = exynos_bus_target;
+	profile->get_dev_status = exynos_bus_get_dev_status;
+	profile->exit = exynos_bus_exit;
+
+	ondemand_data = devm_kzalloc(dev, sizeof(*ondemand_data), GFP_KERNEL);
+	if (!ondemand_data)
+		return -ENOMEM;
+	ondemand_data->upthreshold = 40;
+	ondemand_data->downdifferential = 5;
+
+	/* Add devfreq device to monitor and handle the exynos bus */
+	bus->devfreq = devm_devfreq_add_device(dev, profile, "simple_ondemand",
+						ondemand_data);
+	if (IS_ERR(bus->devfreq)) {
+		dev_err(dev, "failed to add devfreq device\n");
+		return  PTR_ERR(bus->devfreq);
+	}
+
+	/* Register opp_notifier to catch the change of OPP  */
+	ret = devm_devfreq_register_opp_notifier(dev, bus->devfreq);
+	if (ret < 0) {
+		dev_err(dev, "failed to register opp notifier\n");
+		return ret;
+	}
+
+	/*
+	 * Enable devfreq-event to get raw data which is used to determine
+	 * current bus load.
+	 */
+	ret = exynos_bus_enable_edev(bus);
+	if (ret < 0) {
+		dev_err(dev, "failed to enable devfreq-event devices\n");
+		return ret;
+	}
+
+	ret = exynos_bus_set_event(bus);
+	if (ret < 0) {
+		dev_err(dev, "failed to set event to devfreq-event devices\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int exynos_bus_resume(struct device *dev)
+{
+	struct exynos_bus *bus = dev_get_drvdata(dev);
+	int ret;
+
+	ret = exynos_bus_enable_edev(bus);
+	if (ret < 0) {
+		dev_err(dev, "failed to enable the devfreq-event devices\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int exynos_bus_suspend(struct device *dev)
+{
+	struct exynos_bus *bus = dev_get_drvdata(dev);
+	int ret;
+
+	ret = exynos_bus_disable_edev(bus);
+	if (ret < 0) {
+		dev_err(dev, "failed to disable the devfreq-event devices\n");
+		return ret;
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops exynos_bus_pm = {
+	SET_SYSTEM_SLEEP_PM_OPS(exynos_bus_suspend, exynos_bus_resume)
+};
+
+static const struct of_device_id exynos_bus_of_match[] = {
+	{ .compatible = "samsung,exynos-bus", },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, exynos_bus_of_match);
+
+static struct platform_driver exynos_bus_platdrv = {
+	.probe		= exynos_bus_probe,
+	.driver = {
+		.name	= "exynos-bus",
+		.pm	= &exynos_bus_pm,
+		.of_match_table = of_match_ptr(exynos_bus_of_match),
+	},
+};
+module_platform_driver(exynos_bus_platdrv);
+
+MODULE_DESCRIPTION("Generic Exynos Bus frequency driver");
+MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>");
+MODULE_LICENSE("GPL v2");
-- 
cgit v0.10.2


From 72c160bda1dbe75d532502374a6cb6be28990093 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Wed, 4 Nov 2015 10:52:45 +0900
Subject: PM / devfreq: exynos: Add documentation for generic exynos bus
 frequency driver

This patch adds the documentation for generic exynos bus frequency
driver.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>

diff --git a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
new file mode 100644
index 0000000..7200817
--- /dev/null
+++ b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
@@ -0,0 +1,95 @@
+* Generic Exynos Bus frequency device
+
+The Samsung Exynos SoC has many buses for data transfer between DRAM
+and sub-blocks in SoC. Most Exynos SoCs share the common architecture
+for buses. Generally, each bus of Exynos SoC includes a source clock
+and a power line, which are able to change the clock frequency
+of the bus in runtime. To monitor the usage of each bus in runtime,
+the driver uses the PPMU (Platform Performance Monitoring Unit), which
+is able to measure the current load of sub-blocks.
+
+There are a little different composition among Exynos SoC because each Exynos
+SoC has different sub-blocks. Therefore, such difference should be specified
+in devicetree file instead of each device driver. In result, this driver
+is able to support the bus frequency for all Exynos SoCs.
+
+Required properties for bus device:
+- compatible: Should be "samsung,exynos-bus".
+- clock-names : the name of clock used by the bus, "bus".
+- clocks : phandles for clock specified in "clock-names" property.
+- operating-points-v2: the OPP table including frequency/voltage information
+  to support DVFS (Dynamic Voltage/Frequency Scaling) feature.
+- vdd-supply: the regulator to provide the buses with the voltage.
+- devfreq-events: the devfreq-event device to monitor the current utilization
+  of buses.
+
+Optional properties for bus device:
+- exynos,saturation-ratio: the percentage value which is used to calibrate
+			the performance count against total cycle count.
+- exynos,voltage-tolerance: the percentage value for bus voltage tolerance
+			which is used to calculate the max voltage.
+
+Example1:
+	Show the AXI buses of Exynos3250 SoC. Exynos3250 divides the buses to
+	power line (regulator). The MIF (Memory Interface) AXI bus is used to
+	transfer data between DRAM and CPU and uses the VDD_MIF regulator.
+
+	- power line(VDD_MIF) --> bus for DMC (Dynamic Memory Controller) block
+
+	- MIF bus's frequency/voltage table
+	-----------------------
+	|Lv| Freq   | Voltage |
+	-----------------------
+	|L1| 50000  |800000   |
+	|L2| 100000 |800000   |
+	|L3| 134000 |800000   |
+	|L4| 200000 |825000   |
+	|L5| 400000 |875000   |
+	-----------------------
+
+Example2 :
+	The bus of DMC (Dynamic Memory Controller) block in exynos3250.dtsi
+	is listed below:
+
+	bus_dmc: bus_dmc {
+		compatible = "samsung,exynos-bus";
+		clocks = <&cmu_dmc CLK_DIV_DMC>;
+		clock-names = "bus";
+		operating-points-v2 = <&bus_dmc_opp_table>;
+		status = "disabled";
+	};
+
+	bus_dmc_opp_table: opp_table1 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp@50000000 {
+			opp-hz = /bits/ 64 <50000000>;
+			opp-microvolt = <800000>;
+		};
+		opp@100000000 {
+			opp-hz = /bits/ 64 <100000000>;
+			opp-microvolt = <800000>;
+		};
+		opp@134000000 {
+			opp-hz = /bits/ 64 <134000000>;
+			opp-microvolt = <800000>;
+		};
+		opp@200000000 {
+			opp-hz = /bits/ 64 <200000000>;
+			opp-microvolt = <825000>;
+		};
+		opp@400000000 {
+			opp-hz = /bits/ 64 <400000000>;
+			opp-microvolt = <875000>;
+		};
+	};
+
+	Usage case to handle the frequency and voltage of bus on runtime
+	in exynos3250-rinato.dts is listed below:
+
+	&bus_dmc {
+		devfreq-events = <&ppmu_dmc0_3>, <&ppmu_dmc1_3>;
+		vdd-supply = <&buck1_reg>;	/* VDD_MIF */
+		status = "okay";
+	};
-- 
cgit v0.10.2


From 8f510aeb223b26c4ffbece9fa92e4befea470f57 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Tue, 10 Nov 2015 20:31:07 +0900
Subject: PM / devfreq: Add devfreq_get_devfreq_by_phandle()

This patch adds the new devfreq_get_devfreq_by_phandle() OF helper function
which can find the instance of devfreq device by using phandle ("devfreq").

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
[m.reichl and linux.amoon: Tested it on exynos4412-odroidu3 board]
Tested-by: Markus Reichl <m.reichl@fivetechno.de>
Tested-by: Anand Moon <linux.amoon@gmail.com>
Acked-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 984c5e9..20a9422 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -25,6 +25,7 @@
 #include <linux/list.h>
 #include <linux/printk.h>
 #include <linux/hrtimer.h>
+#include <linux/of.h>
 #include "governor.h"
 
 static struct class *devfreq_class;
@@ -639,6 +640,49 @@ struct devfreq *devm_devfreq_add_device(struct device *dev,
 }
 EXPORT_SYMBOL(devm_devfreq_add_device);
 
+#ifdef CONFIG_OF
+/*
+ * devfreq_get_devfreq_by_phandle - Get the devfreq device from devicetree
+ * @dev - instance to the given device
+ * @index - index into list of devfreq
+ *
+ * return the instance of devfreq device
+ */
+struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index)
+{
+	struct device_node *node;
+	struct devfreq *devfreq;
+
+	if (!dev)
+		return ERR_PTR(-EINVAL);
+
+	if (!dev->of_node)
+		return ERR_PTR(-EINVAL);
+
+	node = of_parse_phandle(dev->of_node, "devfreq", index);
+	if (!node)
+		return ERR_PTR(-ENODEV);
+
+	mutex_lock(&devfreq_list_lock);
+	list_for_each_entry(devfreq, &devfreq_list, node) {
+		if (devfreq->dev.parent
+			&& devfreq->dev.parent->of_node == node) {
+			mutex_unlock(&devfreq_list_lock);
+			return devfreq;
+		}
+	}
+	mutex_unlock(&devfreq_list_lock);
+
+	return ERR_PTR(-EPROBE_DEFER);
+}
+#else
+struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif /* CONFIG_OF */
+EXPORT_SYMBOL_GPL(devfreq_get_devfreq_by_phandle);
+
 /**
  * devm_devfreq_remove_device() - Resource-managed devfreq_remove_device()
  * @dev:	the device to add devfreq feature.
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 6fa02a2..aa0b842 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -208,6 +208,9 @@ extern int devm_devfreq_register_opp_notifier(struct device *dev,
 extern void devm_devfreq_unregister_opp_notifier(struct device *dev,
 						struct devfreq *devfreq);
 
+extern struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev,
+						int index);
+
 /**
  * devfreq_update_stats() - update the last_status pointer in struct devfreq
  * @df:		the devfreq instance whose status needs updating
@@ -307,6 +310,12 @@ static inline void devm_devfreq_unregister_opp_notifier(struct device *dev,
 {
 }
 
+static inline struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev,
+							int index)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 static inline int devfreq_update_stats(struct devfreq *df)
 {
 	return -EINVAL;
-- 
cgit v0.10.2


From 0fe3a66410a3ba96679be903f1e287d7a0a264a9 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Tue, 26 Jan 2016 13:21:26 +0900
Subject: PM / devfreq: Add new DEVFREQ_TRANSITION_NOTIFIER notifier

This patch adds the new DEVFREQ_TRANSITION_NOTIFIER notifier to send
the notification when the frequency of device is changed.
This notifier has two state as following:
- DEVFREQ_PRECHANGE  : Notify it before chaning the frequency of device
- DEVFREQ_POSTCHANGE : Notify it after changed the frequency of device

And this patch adds the resourced-managed function to release the resource
automatically when error happen.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
[m.reichl and linux.amoon: Tested it on exynos4412-odroidu3 board]
Tested-by: Markus Reichl <m.reichl@fivetechno.de>
Tested-by: Anand Moon <linux.amoon@gmail.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Acked-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 20a9422..1d6c803 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -189,6 +189,29 @@ static struct devfreq_governor *find_devfreq_governor(const char *name)
 	return ERR_PTR(-ENODEV);
 }
 
+static int devfreq_notify_transition(struct devfreq *devfreq,
+		struct devfreq_freqs *freqs, unsigned int state)
+{
+	if (!devfreq)
+		return -EINVAL;
+
+	switch (state) {
+	case DEVFREQ_PRECHANGE:
+		srcu_notifier_call_chain(&devfreq->transition_notifier_list,
+				DEVFREQ_PRECHANGE, freqs);
+		break;
+
+	case DEVFREQ_POSTCHANGE:
+		srcu_notifier_call_chain(&devfreq->transition_notifier_list,
+				DEVFREQ_POSTCHANGE, freqs);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /* Load monitoring helper functions for governors use */
 
 /**
@@ -200,7 +223,8 @@ static struct devfreq_governor *find_devfreq_governor(const char *name)
  */
 int update_devfreq(struct devfreq *devfreq)
 {
-	unsigned long freq;
+	struct devfreq_freqs freqs;
+	unsigned long freq, cur_freq;
 	int err = 0;
 	u32 flags = 0;
 
@@ -234,10 +258,22 @@ int update_devfreq(struct devfreq *devfreq)
 		flags |= DEVFREQ_FLAG_LEAST_UPPER_BOUND; /* Use LUB */
 	}
 
+	if (devfreq->profile->get_cur_freq)
+		devfreq->profile->get_cur_freq(devfreq->dev.parent, &cur_freq);
+	else
+		cur_freq = devfreq->previous_freq;
+
+	freqs.old = cur_freq;
+	freqs.new = freq;
+	devfreq_notify_transition(devfreq, &freqs, DEVFREQ_PRECHANGE);
+
 	err = devfreq->profile->target(devfreq->dev.parent, &freq, flags);
 	if (err)
 		return err;
 
+	freqs.new = freq;
+	devfreq_notify_transition(devfreq, &freqs, DEVFREQ_POSTCHANGE);
+
 	if (devfreq->profile->freq_table)
 		if (devfreq_update_status(devfreq, freq))
 			dev_err(&devfreq->dev,
@@ -542,6 +578,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
 		goto err_out;
 	}
 
+	srcu_init_notifier_head(&devfreq->transition_notifier_list);
+
 	mutex_unlock(&devfreq->lock);
 
 	mutex_lock(&devfreq_list_lock);
@@ -1310,6 +1348,129 @@ void devm_devfreq_unregister_opp_notifier(struct device *dev,
 }
 EXPORT_SYMBOL(devm_devfreq_unregister_opp_notifier);
 
+/**
+ * devfreq_register_notifier() - Register a driver with devfreq
+ * @devfreq:	The devfreq object.
+ * @nb:		The notifier block to register.
+ * @list:	DEVFREQ_TRANSITION_NOTIFIER.
+ */
+int devfreq_register_notifier(struct devfreq *devfreq,
+				struct notifier_block *nb,
+				unsigned int list)
+{
+	int ret = 0;
+
+	if (!devfreq)
+		return -EINVAL;
+
+	switch (list) {
+	case DEVFREQ_TRANSITION_NOTIFIER:
+		ret = srcu_notifier_chain_register(
+				&devfreq->transition_notifier_list, nb);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(devfreq_register_notifier);
+
+/*
+ * devfreq_unregister_notifier() - Unregister a driver with devfreq
+ * @devfreq:	The devfreq object.
+ * @nb:		The notifier block to be unregistered.
+ * @list:	DEVFREQ_TRANSITION_NOTIFIER.
+ */
+int devfreq_unregister_notifier(struct devfreq *devfreq,
+				struct notifier_block *nb,
+				unsigned int list)
+{
+	int ret = 0;
+
+	if (!devfreq)
+		return -EINVAL;
+
+	switch (list) {
+	case DEVFREQ_TRANSITION_NOTIFIER:
+		ret = srcu_notifier_chain_unregister(
+				&devfreq->transition_notifier_list, nb);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(devfreq_unregister_notifier);
+
+struct devfreq_notifier_devres {
+	struct devfreq *devfreq;
+	struct notifier_block *nb;
+	unsigned int list;
+};
+
+static void devm_devfreq_notifier_release(struct device *dev, void *res)
+{
+	struct devfreq_notifier_devres *this = res;
+
+	devfreq_unregister_notifier(this->devfreq, this->nb, this->list);
+}
+
+/**
+ * devm_devfreq_register_notifier()
+	- Resource-managed devfreq_register_notifier()
+ * @dev:	The devfreq user device. (parent of devfreq)
+ * @devfreq:	The devfreq object.
+ * @nb:		The notifier block to be unregistered.
+ * @list:	DEVFREQ_TRANSITION_NOTIFIER.
+ */
+int devm_devfreq_register_notifier(struct device *dev,
+				struct devfreq *devfreq,
+				struct notifier_block *nb,
+				unsigned int list)
+{
+	struct devfreq_notifier_devres *ptr;
+	int ret;
+
+	ptr = devres_alloc(devm_devfreq_notifier_release, sizeof(*ptr),
+				GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	ret = devfreq_register_notifier(devfreq, nb, list);
+	if (ret) {
+		devres_free(ptr);
+		return ret;
+	}
+
+	ptr->devfreq = devfreq;
+	ptr->nb = nb;
+	ptr->list = list;
+	devres_add(dev, ptr);
+
+	return 0;
+}
+EXPORT_SYMBOL(devm_devfreq_register_notifier);
+
+/**
+ * devm_devfreq_unregister_notifier()
+	- Resource-managed devfreq_unregister_notifier()
+ * @dev:	The devfreq user device. (parent of devfreq)
+ * @devfreq:	The devfreq object.
+ * @nb:		The notifier block to be unregistered.
+ * @list:	DEVFREQ_TRANSITION_NOTIFIER.
+ */
+void devm_devfreq_unregister_notifier(struct device *dev,
+				struct devfreq *devfreq,
+				struct notifier_block *nb,
+				unsigned int list)
+{
+	WARN_ON(devres_release(dev, devm_devfreq_notifier_release,
+			       devm_devfreq_dev_match, devfreq));
+}
+EXPORT_SYMBOL(devm_devfreq_unregister_notifier);
+
 MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
 MODULE_DESCRIPTION("devfreq class support");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index aa0b842..98c6993 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -19,6 +19,13 @@
 
 #define DEVFREQ_NAME_LEN 16
 
+/* DEVFREQ notifier interface */
+#define DEVFREQ_TRANSITION_NOTIFIER	(0)
+
+/* Transition notifiers of DEVFREQ_TRANSITION_NOTIFIER */
+#define	DEVFREQ_PRECHANGE		(0)
+#define DEVFREQ_POSTCHANGE		(1)
+
 struct devfreq;
 
 /**
@@ -143,6 +150,7 @@ struct devfreq_governor {
  * @trans_table:	Statistics of devfreq transitions
  * @time_in_state:	Statistics of devfreq states
  * @last_stat_updated:	The last time stat updated
+ * @transition_notifier_list: list head of DEVFREQ_TRANSITION_NOTIFIER notifier
  *
  * This structure stores the devfreq information for a give device.
  *
@@ -177,6 +185,13 @@ struct devfreq {
 	unsigned int *trans_table;
 	unsigned long *time_in_state;
 	unsigned long last_stat_updated;
+
+	struct srcu_notifier_head transition_notifier_list;
+};
+
+struct devfreq_freqs {
+	unsigned long old;
+	unsigned long new;
 };
 
 #if defined(CONFIG_PM_DEVFREQ)
@@ -207,7 +222,20 @@ extern int devm_devfreq_register_opp_notifier(struct device *dev,
 					      struct devfreq *devfreq);
 extern void devm_devfreq_unregister_opp_notifier(struct device *dev,
 						struct devfreq *devfreq);
-
+extern int devfreq_register_notifier(struct devfreq *devfreq,
+					struct notifier_block *nb,
+					unsigned int list);
+extern int devfreq_unregister_notifier(struct devfreq *devfreq,
+					struct notifier_block *nb,
+					unsigned int list);
+extern int devm_devfreq_register_notifier(struct device *dev,
+				struct devfreq *devfreq,
+				struct notifier_block *nb,
+				unsigned int list);
+extern void devm_devfreq_unregister_notifier(struct device *dev,
+				struct devfreq *devfreq,
+				struct notifier_block *nb,
+				unsigned int list);
 extern struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev,
 						int index);
 
@@ -310,6 +338,35 @@ static inline void devm_devfreq_unregister_opp_notifier(struct device *dev,
 {
 }
 
+static inline int devfreq_register_notifier(struct devfreq *devfreq,
+					struct notifier_block *nb,
+					unsigned int list)
+{
+	return 0;
+}
+
+static inline int devfreq_unregister_notifier(struct devfreq *devfreq,
+					struct notifier_block *nb,
+					unsigned int list)
+{
+	return 0;
+}
+
+static inline int devm_devfreq_register_notifier(struct device *dev,
+				struct devfreq *devfreq,
+				struct notifier_block *nb,
+				unsigned int list)
+{
+	return 0;
+}
+
+static inline void devm_devfreq_unregister_notifier(struct device *dev,
+				struct devfreq *devfreq,
+				struct notifier_block *nb,
+				unsigned int list)
+{
+}
+
 static inline struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev,
 							int index)
 {
-- 
cgit v0.10.2


From 996133119f57334c38b020dbfaaac5b5eb127e29 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Tue, 22 Mar 2016 13:44:03 +0900
Subject: PM / devfreq: Add new passive governor

This patch adds the new passive governor for DEVFREQ framework. The following
governors are already present and used for DVFS (Dynamic Voltage and Frequency
Scaling) drivers. The following governors are independently used for one device
driver which don't give the influence to other device drviers and also don't
receive the effect from other device drivers.
- ondemand / performance / powersave / userspace

The passive governor depends on operation of parent driver with specific
governos extremely and is not able to decide the new frequency by oneself.
According to the decided new frequency of parent driver with governor,
the passive governor uses it to decide the appropriate frequency for own
device driver. The passive governor must need the following information
from device tree:
- the source clock and OPP tables
- the instance of parent device

For exameple,
there are one more devfreq device drivers which need to change their source
clock according to their utilization on runtime. But, they share the same
power line (e.g., regulator). So, specific device driver is operated as parent
with ondemand governor and then the rest device driver with passive governor
is influenced by parent device.

Suggested-by: Myungjoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
[tjakobi: Reported RCU locking issue and cw00.choi fix it]
Reported-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
[linux.amoon: Reported possible recursive locking and cw00.choi fix it]
Reported-by: Anand Moon <linux.amoon@gmail.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Acked-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>

diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index cedda8f..d260cd0 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig
@@ -64,6 +64,14 @@ config DEVFREQ_GOV_USERSPACE
 	  Otherwise, the governor does not change the frequency
 	  given at the initialization.
 
+config DEVFREQ_GOV_PASSIVE
+	tristate "Passive"
+	help
+	  Sets the frequency based on the frequency of its parent devfreq
+	  device. This governor does not change the frequency by itself
+	  through sysfs entries. The passive governor recommends that
+	  devfreq device uses the OPP table to get the frequency/voltage.
+
 comment "DEVFREQ Drivers"
 
 config ARM_EXYNOS_BUS_DEVFREQ
diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile
index 8af8aaf..2633087 100644
--- a/drivers/devfreq/Makefile
+++ b/drivers/devfreq/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)	+= governor_simpleondemand.o
 obj-$(CONFIG_DEVFREQ_GOV_PERFORMANCE)	+= governor_performance.o
 obj-$(CONFIG_DEVFREQ_GOV_POWERSAVE)	+= governor_powersave.o
 obj-$(CONFIG_DEVFREQ_GOV_USERSPACE)	+= governor_userspace.o
+obj-$(CONFIG_DEVFREQ_GOV_PASSIVE)	+= governor_passive.o
 
 # DEVFREQ Drivers
 obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ)	+= exynos-bus.o
diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
new file mode 100644
index 0000000..a4b0b02
--- /dev/null
+++ b/drivers/devfreq/governor_passive.c
@@ -0,0 +1,207 @@
+/*
+ * linux/drivers/devfreq/governor_passive.c
+ *
+ * Copyright (C) 2016 Samsung Electronics
+ * Author: Chanwoo Choi <cw00.choi@samsung.com>
+ * Author: MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/devfreq.h>
+#include "governor.h"
+
+static int devfreq_passive_get_target_freq(struct devfreq *devfreq,
+					unsigned long *freq)
+{
+	struct devfreq_passive_data *p_data
+			= (struct devfreq_passive_data *)devfreq->data;
+	struct devfreq *parent_devfreq = (struct devfreq *)p_data->parent;
+	unsigned long child_freq = ULONG_MAX;
+	struct dev_pm_opp *opp;
+	int i, count, ret = 0;
+
+	/*
+	 * If the devfreq device with passive governor has the specific method
+	 * to determine the next frequency, should use the get_target_freq()
+	 * of struct devfreq_passive_data.
+	 */
+	if (p_data->get_target_freq) {
+		ret = p_data->get_target_freq(devfreq, freq);
+		goto out;
+	}
+
+	/*
+	 * If the parent and passive devfreq device uses the OPP table,
+	 * get the next frequency by using the OPP table.
+	 */
+
+	/*
+	 * - parent devfreq device uses the governors except for passive.
+	 * - passive devfreq device uses the passive governor.
+	 *
+	 * Each devfreq has the OPP table. After deciding the new frequency
+	 * from the governor of parent devfreq device, the passive governor
+	 * need to get the index of new frequency on OPP table of parent
+	 * device. And then the index is used for getting the suitable
+	 * new frequency for passive devfreq device.
+	 */
+	if (!devfreq->profile || !devfreq->profile->freq_table
+		|| devfreq->profile->max_state <= 0)
+		return -EINVAL;
+
+	/*
+	 * The passive governor have to get the correct frequency from OPP
+	 * list of parent device. Because in this case, *freq is temporary
+	 * value which is decided by ondemand governor.
+	 */
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(parent_devfreq->dev.parent, freq, 0);
+	rcu_read_unlock();
+	if (IS_ERR(opp)) {
+		ret = PTR_ERR(opp);
+		goto out;
+	}
+
+	/*
+	 * Get the OPP table's index of decided freqeuncy by governor
+	 * of parent device.
+	 */
+	for (i = 0; i < parent_devfreq->profile->max_state; i++)
+		if (parent_devfreq->profile->freq_table[i] == *freq)
+			break;
+
+	if (i == parent_devfreq->profile->max_state) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Get the suitable frequency by using index of parent device. */
+	if (i < devfreq->profile->max_state) {
+		child_freq = devfreq->profile->freq_table[i];
+	} else {
+		count = devfreq->profile->max_state;
+		child_freq = devfreq->profile->freq_table[count - 1];
+	}
+
+	/* Return the suitable frequency for passive device. */
+	*freq = child_freq;
+
+out:
+	return ret;
+}
+
+static int update_devfreq_passive(struct devfreq *devfreq, unsigned long freq)
+{
+	int ret;
+
+	if (!devfreq->governor)
+		return -EINVAL;
+
+	mutex_lock_nested(&devfreq->lock, SINGLE_DEPTH_NESTING);
+
+	ret = devfreq->governor->get_target_freq(devfreq, &freq);
+	if (ret < 0)
+		goto out;
+
+	ret = devfreq->profile->target(devfreq->dev.parent, &freq, 0);
+	if (ret < 0)
+		goto out;
+
+	devfreq->previous_freq = freq;
+
+out:
+	mutex_unlock(&devfreq->lock);
+
+	return 0;
+}
+
+static int devfreq_passive_notifier_call(struct notifier_block *nb,
+				unsigned long event, void *ptr)
+{
+	struct devfreq_passive_data *data
+			= container_of(nb, struct devfreq_passive_data, nb);
+	struct devfreq *devfreq = (struct devfreq *)data->this;
+	struct devfreq *parent = (struct devfreq *)data->parent;
+	struct devfreq_freqs *freqs = (struct devfreq_freqs *)ptr;
+	unsigned long freq = freqs->new;
+
+	switch (event) {
+	case DEVFREQ_PRECHANGE:
+		if (parent->previous_freq > freq)
+			update_devfreq_passive(devfreq, freq);
+		break;
+	case DEVFREQ_POSTCHANGE:
+		if (parent->previous_freq < freq)
+			update_devfreq_passive(devfreq, freq);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static int devfreq_passive_event_handler(struct devfreq *devfreq,
+				unsigned int event, void *data)
+{
+	struct device *dev = devfreq->dev.parent;
+	struct devfreq_passive_data *p_data
+			= (struct devfreq_passive_data *)devfreq->data;
+	struct devfreq *parent = (struct devfreq *)p_data->parent;
+	struct notifier_block *nb = &p_data->nb;
+	int ret = 0;
+
+	if (!parent)
+		return -EPROBE_DEFER;
+
+	switch (event) {
+	case DEVFREQ_GOV_START:
+		if (!p_data->this)
+			p_data->this = devfreq;
+
+		nb->notifier_call = devfreq_passive_notifier_call;
+		ret = devm_devfreq_register_notifier(dev, parent, nb,
+					DEVFREQ_TRANSITION_NOTIFIER);
+		break;
+	case DEVFREQ_GOV_STOP:
+		devm_devfreq_unregister_notifier(dev, parent, nb,
+					DEVFREQ_TRANSITION_NOTIFIER);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static struct devfreq_governor devfreq_passive = {
+	.name = "passive",
+	.get_target_freq = devfreq_passive_get_target_freq,
+	.event_handler = devfreq_passive_event_handler,
+};
+
+static int __init devfreq_passive_init(void)
+{
+	return devfreq_add_governor(&devfreq_passive);
+}
+subsys_initcall(devfreq_passive_init);
+
+static void __exit devfreq_passive_exit(void)
+{
+	int ret;
+
+	ret = devfreq_remove_governor(&devfreq_passive);
+	if (ret)
+		pr_err("%s: failed remove governor %d\n", __func__, ret);
+
+	return;
+}
+module_exit(devfreq_passive_exit);
+
+MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>");
+MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
+MODULE_DESCRIPTION("DEVFREQ Passive governor");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 98c6993..2de4e2e 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -272,6 +272,39 @@ struct devfreq_simple_ondemand_data {
 };
 #endif
 
+#if IS_ENABLED(CONFIG_DEVFREQ_GOV_PASSIVE)
+/**
+ * struct devfreq_passive_data - void *data fed to struct devfreq
+ *	and devfreq_add_device
+ * @parent:	the devfreq instance of parent device.
+ * @get_target_freq:	Optional callback, Returns desired operating frequency
+ *			for the device using passive governor. That is called
+ *			when passive governor should decide the next frequency
+ *			by using the new frequency of parent devfreq device
+ *			using governors except for passive governor.
+ *			If the devfreq device has the specific method to decide
+ *			the next frequency, should use this callback.
+ * @this:	the devfreq instance of own device.
+ * @nb:		the notifier block for DEVFREQ_TRANSITION_NOTIFIER list
+ *
+ * The devfreq_passive_data have to set the devfreq instance of parent
+ * device with governors except for the passive governor. But, don't need to
+ * initialize the 'this' and 'nb' field because the devfreq core will handle
+ * them.
+ */
+struct devfreq_passive_data {
+	/* Should set the devfreq instance of parent device */
+	struct devfreq *parent;
+
+	/* Optional callback to decide the next frequency of passvice device */
+	int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
+
+	/* For passive governor's internal use. Don't need to set them */
+	struct devfreq *this;
+	struct notifier_block nb;
+};
+#endif
+
 #else /* !CONFIG_PM_DEVFREQ */
 static inline struct devfreq *devfreq_add_device(struct device *dev,
 					  struct devfreq_dev_profile *profile,
-- 
cgit v0.10.2


From 403e0689d2a9eb33d9281db4079e964eb65ba9ef Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Thu, 5 Nov 2015 18:29:27 +0900
Subject: PM / devfreq: exynos: Add support of bus frequency of sub-blocks
 using passive governor

This patch adds the support of bus frequency feature for sub-blocks which share
the one power line. If each bus depends on the power line, each bus is not able
to change the voltage by oneself. To optimize the power-consumption on runtime,
some buses using the same power line should change the source clock and
regulator at the same time. So, this patch uses the passive governor to support
the bus frequency for all buses which sharing the one power line.

For example,

Exynos3250 include the two power line for AXI buses as following:
: VDD_MIF : MIF (Memory Interface) provide the DMC (Dynamic Memory Controller)
  with the power (regulator).
: VDD_INT : INT (Internal) provide the various sub-blocks with the power
  (regulator).

Each bus is included in as follwoing block. In the case of VDD_MIF, only DMC bus
use the power line. So, there is no any depencency between buese. But, in the
case of VDD_INT, various buses share the one power line of VDD_INT. We need to
make the depenency between buses. When using passive governor, there is no
problem to support the bus frequency as DVFS for all buses. One bus should be
operated as the parent bus device which gathering the current load of INT block
and then decides the new frequency with some governors except of passive
governor. After deciding the new frequency by the parent bus device, the rest
bus devices will change the each source clock according to new frequency of the
parent bus device.

- MIF (Memory Interface) block
: VDD_MIF |--- DMC

- INT (Internal) block
: VDD_INT |--- LEFTBUS (parent)
          |--- PERIL
          |--- MFC
          |--- G3D
          |--- RIGHTBUS
          |--- FSYS
          |--- LCD0
          |--- PERIR
          |--- ISP
          |--- CAM

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
[tjakobi: Reported debugfs error during booting and cw00.choi fix it.]
Reported-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Acked-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>

diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index d260cd0..30e1daf 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig
@@ -78,6 +78,7 @@ config ARM_EXYNOS_BUS_DEVFREQ
 	bool "ARM EXYNOS Generic Memory Bus DEVFREQ Driver"
 	depends on ARCH_EXYNOS
 	select DEVFREQ_GOV_SIMPLE_ONDEMAND
+	select DEVFREQ_GOV_PASSIVE
 	select DEVFREQ_EVENT_EXYNOS_PPMU
 	select PM_DEVFREQ_EVENT
 	select PM_OPP
diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
index 137de61..50fe1a1 100644
--- a/drivers/devfreq/exynos-bus.c
+++ b/drivers/devfreq/exynos-bus.c
@@ -1,7 +1,7 @@
 /*
  * Generic Exynos Bus frequency driver with DEVFREQ Framework
  *
- * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
  * Author : Chanwoo Choi <cw00.choi@samsung.com>
  *
  * This driver support Exynos Bus frequency feature by using
@@ -93,7 +93,7 @@ static int exynos_bus_get_event(struct exynos_bus *bus,
 }
 
 /*
- * Must necessary function for devfreq governor
+ * Must necessary function for devfreq simple-ondemand governor
  */
 static int exynos_bus_target(struct device *dev, unsigned long *freq, u32 flags)
 {
@@ -202,59 +202,81 @@ static void exynos_bus_exit(struct device *dev)
 		regulator_disable(bus->regulator);
 
 	dev_pm_opp_of_remove_table(dev);
+	clk_disable_unprepare(bus->clk);
 }
 
-static int exynos_bus_parse_of(struct device_node *np,
-			      struct exynos_bus *bus)
+/*
+ * Must necessary function for devfreq passive governor
+ */
+static int exynos_bus_passive_target(struct device *dev, unsigned long *freq,
+					u32 flags)
 {
-	struct device *dev = bus->dev;
-	unsigned long rate;
-	int i, ret, count, size;
+	struct exynos_bus *bus = dev_get_drvdata(dev);
+	struct dev_pm_opp *new_opp;
+	unsigned long old_freq, new_freq;
+	int ret = 0;
 
-	/* Get the clock to provide each bus with source clock */
-	bus->clk = devm_clk_get(dev, "bus");
-	if (IS_ERR(bus->clk)) {
-		dev_err(dev, "failed to get bus clock\n");
-		return PTR_ERR(bus->clk);
+	/* Get new opp-bus instance according to new bus clock */
+	rcu_read_lock();
+	new_opp = devfreq_recommended_opp(dev, freq, flags);
+	if (IS_ERR(new_opp)) {
+		dev_err(dev, "failed to get recommended opp instance\n");
+		rcu_read_unlock();
+		return PTR_ERR(new_opp);
 	}
 
-	ret = clk_prepare_enable(bus->clk);
-	if (ret < 0) {
-		dev_err(dev, "failed to get enable clock\n");
-		return ret;
-	}
+	new_freq = dev_pm_opp_get_freq(new_opp);
+	old_freq = dev_pm_opp_get_freq(bus->curr_opp);
+	rcu_read_unlock();
 
-	/* Get the freq/voltage OPP table to scale the bus frequency */
-	rcu_read_lock();
-	ret = dev_pm_opp_of_add_table(dev);
+	if (old_freq == new_freq)
+		return 0;
+
+	/* Change the frequency according to new OPP level */
+	mutex_lock(&bus->lock);
+
+	ret = clk_set_rate(bus->clk, new_freq);
 	if (ret < 0) {
-		dev_err(dev, "failed to get OPP table\n");
-		rcu_read_unlock();
-		goto err_clk;
+		dev_err(dev, "failed to set the clock of bus\n");
+		goto out;
 	}
 
-	rate = clk_get_rate(bus->clk);
-	bus->curr_opp = dev_pm_opp_find_freq_ceil(dev, &rate);
-	if (IS_ERR(bus->curr_opp)) {
-		dev_err(dev, "failed to find dev_pm_opp\n");
-		rcu_read_unlock();
-		ret = PTR_ERR(bus->curr_opp);
-		goto err_opp;
-	}
-	rcu_read_unlock();
+	*freq = new_freq;
+	bus->curr_opp = new_opp;
+
+	dev_dbg(dev, "Set the frequency of bus (%lukHz -> %lukHz)\n",
+			old_freq/1000, new_freq/1000);
+out:
+	mutex_unlock(&bus->lock);
+
+	return ret;
+}
+
+static void exynos_bus_passive_exit(struct device *dev)
+{
+	struct exynos_bus *bus = dev_get_drvdata(dev);
+
+	dev_pm_opp_of_remove_table(dev);
+	clk_disable_unprepare(bus->clk);
+}
+
+static int exynos_bus_parent_parse_of(struct device_node *np,
+					struct exynos_bus *bus)
+{
+	struct device *dev = bus->dev;
+	int i, ret, count, size;
 
 	/* Get the regulator to provide each bus with the power */
 	bus->regulator = devm_regulator_get(dev, "vdd");
 	if (IS_ERR(bus->regulator)) {
 		dev_err(dev, "failed to get VDD regulator\n");
-		ret = PTR_ERR(bus->regulator);
-		goto err_opp;
+		return PTR_ERR(bus->regulator);
 	}
 
 	ret = regulator_enable(bus->regulator);
 	if (ret < 0) {
 		dev_err(dev, "failed to enable VDD regulator\n");
-		goto err_opp;
+		return ret;
 	}
 
 	/*
@@ -305,6 +327,51 @@ static int exynos_bus_parse_of(struct device_node *np,
 
 err_regulator:
 	regulator_disable(bus->regulator);
+
+	return ret;
+}
+
+static int exynos_bus_parse_of(struct device_node *np,
+			      struct exynos_bus *bus)
+{
+	struct device *dev = bus->dev;
+	unsigned long rate;
+	int ret;
+
+	/* Get the clock to provide each bus with source clock */
+	bus->clk = devm_clk_get(dev, "bus");
+	if (IS_ERR(bus->clk)) {
+		dev_err(dev, "failed to get bus clock\n");
+		return PTR_ERR(bus->clk);
+	}
+
+	ret = clk_prepare_enable(bus->clk);
+	if (ret < 0) {
+		dev_err(dev, "failed to get enable clock\n");
+		return ret;
+	}
+
+	/* Get the freq and voltage from OPP table to scale the bus freq */
+	rcu_read_lock();
+	ret = dev_pm_opp_of_add_table(dev);
+	if (ret < 0) {
+		dev_err(dev, "failed to get OPP table\n");
+		rcu_read_unlock();
+		goto err_clk;
+	}
+
+	rate = clk_get_rate(bus->clk);
+	bus->curr_opp = devfreq_recommended_opp(dev, &rate, 0);
+	if (IS_ERR(bus->curr_opp)) {
+		dev_err(dev, "failed to find dev_pm_opp\n");
+		rcu_read_unlock();
+		ret = PTR_ERR(bus->curr_opp);
+		goto err_opp;
+	}
+	rcu_read_unlock();
+
+	return 0;
+
 err_opp:
 	dev_pm_opp_of_remove_table(dev);
 err_clk:
@@ -319,8 +386,11 @@ static int exynos_bus_probe(struct platform_device *pdev)
 	struct device_node *np = dev->of_node;
 	struct devfreq_dev_profile *profile;
 	struct devfreq_simple_ondemand_data *ondemand_data;
+	struct devfreq_passive_data *passive_data;
+	struct devfreq *parent_devfreq;
 	struct exynos_bus *bus;
-	int ret;
+	int ret, max_state;
+	unsigned long min_freq, max_freq;
 
 	if (!np) {
 		dev_err(dev, "failed to find devicetree node\n");
@@ -337,20 +407,33 @@ static int exynos_bus_probe(struct platform_device *pdev)
 	/* Parse the device-tree to get the resource information */
 	ret = exynos_bus_parse_of(np, bus);
 	if (ret < 0)
-		return ret;
+		goto err;
 
-	/* Initalize the struct profile and governor data */
 	profile = devm_kzalloc(dev, sizeof(*profile), GFP_KERNEL);
-	if (!profile)
-		return -ENOMEM;
+	if (!profile) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	if (of_parse_phandle(dev->of_node, "devfreq", 0))
+		goto passive;
+	else
+		ret = exynos_bus_parent_parse_of(np, bus);
+
+	if (ret < 0)
+		goto err;
+
+	/* Initalize the struct profile and governor data for parent device */
 	profile->polling_ms = 50;
 	profile->target = exynos_bus_target;
 	profile->get_dev_status = exynos_bus_get_dev_status;
 	profile->exit = exynos_bus_exit;
 
 	ondemand_data = devm_kzalloc(dev, sizeof(*ondemand_data), GFP_KERNEL);
-	if (!ondemand_data)
-		return -ENOMEM;
+	if (!ondemand_data) {
+		ret = -ENOMEM;
+		goto err;
+	}
 	ondemand_data->upthreshold = 40;
 	ondemand_data->downdifferential = 5;
 
@@ -359,14 +442,15 @@ static int exynos_bus_probe(struct platform_device *pdev)
 						ondemand_data);
 	if (IS_ERR(bus->devfreq)) {
 		dev_err(dev, "failed to add devfreq device\n");
-		return  PTR_ERR(bus->devfreq);
+		ret = PTR_ERR(bus->devfreq);
+		goto err;
 	}
 
 	/* Register opp_notifier to catch the change of OPP  */
 	ret = devm_devfreq_register_opp_notifier(dev, bus->devfreq);
 	if (ret < 0) {
 		dev_err(dev, "failed to register opp notifier\n");
-		return ret;
+		goto err;
 	}
 
 	/*
@@ -376,16 +460,59 @@ static int exynos_bus_probe(struct platform_device *pdev)
 	ret = exynos_bus_enable_edev(bus);
 	if (ret < 0) {
 		dev_err(dev, "failed to enable devfreq-event devices\n");
-		return ret;
+		goto err;
 	}
 
 	ret = exynos_bus_set_event(bus);
 	if (ret < 0) {
 		dev_err(dev, "failed to set event to devfreq-event devices\n");
-		return ret;
+		goto err;
 	}
 
+	goto out;
+passive:
+	/* Initalize the struct profile and governor data for passive device */
+	profile->target = exynos_bus_passive_target;
+	profile->exit = exynos_bus_passive_exit;
+
+	/* Get the instance of parent devfreq device */
+	parent_devfreq = devfreq_get_devfreq_by_phandle(dev, 0);
+	if (IS_ERR(parent_devfreq)) {
+		ret = -EPROBE_DEFER;
+		goto err;
+	}
+
+	passive_data = devm_kzalloc(dev, sizeof(*passive_data), GFP_KERNEL);
+	if (!passive_data) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	passive_data->parent = parent_devfreq;
+
+	/* Add devfreq device for exynos bus with passive governor */
+	bus->devfreq = devm_devfreq_add_device(dev, profile, "passive",
+						passive_data);
+	if (IS_ERR(bus->devfreq)) {
+		dev_err(dev,
+			"failed to add devfreq dev with passive governor\n");
+		ret = -EPROBE_DEFER;
+		goto err;
+	}
+
+out:
+	max_state = bus->devfreq->profile->max_state;
+	min_freq = (bus->devfreq->profile->freq_table[0] / 1000);
+	max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000);
+	pr_info("exynos-bus: new bus device registered: %s (%6ld KHz ~ %6ld KHz)\n",
+			dev_name(dev), min_freq, max_freq);
+
 	return 0;
+
+err:
+	dev_pm_opp_of_remove_table(dev);
+	clk_disable_unprepare(bus->clk);
+
+	return ret;
 }
 
 #ifdef CONFIG_PM_SLEEP
-- 
cgit v0.10.2


From a296cb5c35ce5429415043a22a385c24673e4aeb Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Mon, 23 Nov 2015 23:18:52 +0900
Subject: PM / devfreq: exynos: Update documentation for bus devices using
 passive governor

This patch updates the documentation for passive bus devices and adds the
detailed example of Exynos3250.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Acked-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>

diff --git a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
index 7200817..2fa15dc 100644
--- a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
+++ b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
@@ -8,22 +8,46 @@ of the bus in runtime. To monitor the usage of each bus in runtime,
 the driver uses the PPMU (Platform Performance Monitoring Unit), which
 is able to measure the current load of sub-blocks.
 
+The Exynos SoC includes the various sub-blocks which have the each AXI bus.
+The each AXI bus has the owned source clock but, has not the only owned
+power line. The power line might be shared among one more sub-blocks.
+So, we can divide into two type of device as the role of each sub-block.
+There are two type of bus devices as following:
+- parent bus device
+- passive bus device
+
+Basically, parent and passive bus device share the same power line.
+The parent bus device can only change the voltage of shared power line
+and the rest bus devices (passive bus device) depend on the decision of
+the parent bus device. If there are three blocks which share the VDD_xxx
+power line, Only one block should be parent device and then the rest blocks
+should depend on the parent device as passive device.
+
+	VDD_xxx |--- A block (parent)
+		|--- B block (passive)
+		|--- C block (passive)
+
 There are a little different composition among Exynos SoC because each Exynos
 SoC has different sub-blocks. Therefore, such difference should be specified
 in devicetree file instead of each device driver. In result, this driver
 is able to support the bus frequency for all Exynos SoCs.
 
-Required properties for bus device:
+Required properties for all bus devices:
 - compatible: Should be "samsung,exynos-bus".
 - clock-names : the name of clock used by the bus, "bus".
 - clocks : phandles for clock specified in "clock-names" property.
 - operating-points-v2: the OPP table including frequency/voltage information
   to support DVFS (Dynamic Voltage/Frequency Scaling) feature.
+
+Required properties only for parent bus device:
 - vdd-supply: the regulator to provide the buses with the voltage.
 - devfreq-events: the devfreq-event device to monitor the current utilization
   of buses.
 
-Optional properties for bus device:
+Required properties only for passive bus device:
+- devfreq: the parent bus device.
+
+Optional properties only for parent bus device:
 - exynos,saturation-ratio: the percentage value which is used to calibrate
 			the performance count against total cycle count.
 - exynos,voltage-tolerance: the percentage value for bus voltage tolerance
@@ -34,7 +58,20 @@ Example1:
 	power line (regulator). The MIF (Memory Interface) AXI bus is used to
 	transfer data between DRAM and CPU and uses the VDD_MIF regulator.
 
-	- power line(VDD_MIF) --> bus for DMC (Dynamic Memory Controller) block
+	- MIF (Memory Interface) block
+	: VDD_MIF |--- DMC (Dynamic Memory Controller)
+
+	- INT (Internal) block
+	: VDD_INT |--- LEFTBUS (parent device)
+		  |--- PERIL
+		  |--- MFC
+		  |--- G3D
+		  |--- RIGHTBUS
+		  |--- FSYS
+		  |--- LCD0
+		  |--- PERIR
+		  |--- ISP
+		  |--- CAM
 
 	- MIF bus's frequency/voltage table
 	-----------------------
@@ -47,6 +84,24 @@ Example1:
 	|L5| 400000 |875000   |
 	-----------------------
 
+	- INT bus's frequency/voltage table
+	----------------------------------------------------------
+	|Block|LEFTBUS|RIGHTBUS|MCUISP |ISP    |PERIL  ||VDD_INT |
+	| name|       |LCD0    |       |       |       ||        |
+	|     |       |FSYS    |       |       |       ||        |
+	|     |       |MFC     |       |       |       ||        |
+	----------------------------------------------------------
+	|Mode |*parent|passive |passive|passive|passive||        |
+	----------------------------------------------------------
+	|Lv   |Frequency                               ||Voltage |
+	----------------------------------------------------------
+	|L1   |50000  |50000   |50000  |50000  |50000  ||900000  |
+	|L2   |80000  |80000   |80000  |80000  |80000  ||900000  |
+	|L3   |100000 |100000  |100000 |100000 |100000 ||1000000 |
+	|L4   |134000 |134000  |200000 |200000 |       ||1000000 |
+	|L5   |200000 |200000  |400000 |300000 |       ||1000000 |
+	----------------------------------------------------------
+
 Example2 :
 	The bus of DMC (Dynamic Memory Controller) block in exynos3250.dtsi
 	is listed below:
@@ -85,6 +140,154 @@ Example2 :
 		};
 	};
 
+	bus_leftbus: bus_leftbus {
+		compatible = "samsung,exynos-bus";
+		clocks = <&cmu CLK_DIV_GDL>;
+		clock-names = "bus";
+		operating-points-v2 = <&bus_leftbus_opp_table>;
+		status = "disabled";
+	};
+
+	bus_rightbus: bus_rightbus {
+		compatible = "samsung,exynos-bus";
+		clocks = <&cmu CLK_DIV_GDR>;
+		clock-names = "bus";
+		operating-points-v2 = <&bus_leftbus_opp_table>;
+		status = "disabled";
+	};
+
+	bus_lcd0: bus_lcd0 {
+		compatible = "samsung,exynos-bus";
+		clocks = <&cmu CLK_DIV_ACLK_160>;
+		clock-names = "bus";
+		operating-points-v2 = <&bus_leftbus_opp_table>;
+		status = "disabled";
+	};
+
+	bus_fsys: bus_fsys {
+		compatible = "samsung,exynos-bus";
+		clocks = <&cmu CLK_DIV_ACLK_200>;
+		clock-names = "bus";
+		operating-points-v2 = <&bus_leftbus_opp_table>;
+		status = "disabled";
+	};
+
+	bus_mcuisp: bus_mcuisp {
+		compatible = "samsung,exynos-bus";
+		clocks = <&cmu CLK_DIV_ACLK_400_MCUISP>;
+		clock-names = "bus";
+		operating-points-v2 = <&bus_mcuisp_opp_table>;
+		status = "disabled";
+	};
+
+	bus_isp: bus_isp {
+		compatible = "samsung,exynos-bus";
+		clocks = <&cmu CLK_DIV_ACLK_266>;
+		clock-names = "bus";
+		operating-points-v2 = <&bus_isp_opp_table>;
+		status = "disabled";
+	};
+
+	bus_peril: bus_peril {
+		compatible = "samsung,exynos-bus";
+		clocks = <&cmu CLK_DIV_ACLK_100>;
+		clock-names = "bus";
+		operating-points-v2 = <&bus_peril_opp_table>;
+		status = "disabled";
+	};
+
+	bus_mfc: bus_mfc {
+		compatible = "samsung,exynos-bus";
+		clocks = <&cmu CLK_SCLK_MFC>;
+		clock-names = "bus";
+		operating-points-v2 = <&bus_leftbus_opp_table>;
+		status = "disabled";
+	};
+
+	bus_leftbus_opp_table: opp_table1 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp@50000000 {
+			opp-hz = /bits/ 64 <50000000>;
+			opp-microvolt = <900000>;
+		};
+		opp@80000000 {
+			opp-hz = /bits/ 64 <80000000>;
+			opp-microvolt = <900000>;
+		};
+		opp@100000000 {
+			opp-hz = /bits/ 64 <100000000>;
+			opp-microvolt = <1000000>;
+		};
+		opp@134000000 {
+			opp-hz = /bits/ 64 <134000000>;
+			opp-microvolt = <1000000>;
+		};
+		opp@200000000 {
+			opp-hz = /bits/ 64 <200000000>;
+			opp-microvolt = <1000000>;
+		};
+	};
+
+	bus_mcuisp_opp_table: opp_table2 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp@50000000 {
+			opp-hz = /bits/ 64 <50000000>;
+		};
+		opp@80000000 {
+			opp-hz = /bits/ 64 <80000000>;
+		};
+		opp@100000000 {
+			opp-hz = /bits/ 64 <100000000>;
+		};
+		opp@200000000 {
+			opp-hz = /bits/ 64 <200000000>;
+		};
+		opp@400000000 {
+			opp-hz = /bits/ 64 <400000000>;
+		};
+	};
+
+	bus_isp_opp_table: opp_table3 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp@50000000 {
+			opp-hz = /bits/ 64 <50000000>;
+		};
+		opp@80000000 {
+			opp-hz = /bits/ 64 <80000000>;
+		};
+		opp@100000000 {
+			opp-hz = /bits/ 64 <100000000>;
+		};
+		opp@200000000 {
+			opp-hz = /bits/ 64 <200000000>;
+		};
+		opp@300000000 {
+			opp-hz = /bits/ 64 <300000000>;
+		};
+	};
+
+	bus_peril_opp_table: opp_table4 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp@50000000 {
+			opp-hz = /bits/ 64 <50000000>;
+		};
+		opp@80000000 {
+			opp-hz = /bits/ 64 <80000000>;
+		};
+		opp@100000000 {
+			opp-hz = /bits/ 64 <100000000>;
+		};
+	};
+
+
 	Usage case to handle the frequency and voltage of bus on runtime
 	in exynos3250-rinato.dts is listed below:
 
@@ -93,3 +296,44 @@ Example2 :
 		vdd-supply = <&buck1_reg>;	/* VDD_MIF */
 		status = "okay";
 	};
+
+	&bus_leftbus {
+		devfreq-events = <&ppmu_leftbus_3>, <&ppmu_rightbus_3>;
+		vdd-supply = <&buck3_reg>;
+		status = "okay";
+	};
+
+	&bus_rightbus {
+		devfreq = <&bus_leftbus>;
+		status = "okay";
+	};
+
+	&bus_lcd0 {
+		devfreq = <&bus_leftbus>;
+		status = "okay";
+	};
+
+	&bus_fsys {
+		devfreq = <&bus_leftbus>;
+		status = "okay";
+	};
+
+	&bus_mcuisp {
+		devfreq = <&bus_leftbus>;
+		status = "okay";
+	};
+
+	&bus_isp {
+		devfreq = <&bus_leftbus>;
+		status = "okay";
+	};
+
+	&bus_peril {
+		devfreq = <&bus_leftbus>;
+		status = "okay";
+	};
+
+	&bus_mfc {
+		devfreq = <&bus_leftbus>;
+		status = "okay";
+	};
-- 
cgit v0.10.2


From 38279cac36397477d14edf84eff77c8454bc259a Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Fri, 27 Nov 2015 12:55:23 +0900
Subject: PM / devfreq: exynos: Add the detailed correlation between sub-blocks
 and power line

This patch adds the detailed correlation between sub-blocks and power line
for Exynos3250, Exynos4210 and Exynos4x12.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Acked-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>

diff --git a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
index 2fa15dc..7dbd4ab 100644
--- a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
+++ b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
@@ -53,6 +53,57 @@ Optional properties only for parent bus device:
 - exynos,voltage-tolerance: the percentage value for bus voltage tolerance
 			which is used to calculate the max voltage.
 
+Detailed correlation between sub-blocks and power line according to Exynos SoC:
+- In case of Exynos3250, there are two power line as following:
+	VDD_MIF |--- DMC
+
+	VDD_INT |--- LEFTBUS (parent device)
+		|--- PERIL
+		|--- MFC
+		|--- G3D
+		|--- RIGHTBUS
+		|--- PERIR
+		|--- FSYS
+		|--- LCD0
+		|--- PERIR
+		|--- ISP
+		|--- CAM
+
+- In case of Exynos4210, there is one power line as following:
+	VDD_INT |--- DMC (parent device)
+		|--- LEFTBUS
+		|--- PERIL
+		|--- MFC(L)
+		|--- G3D
+		|--- TV
+		|--- LCD0
+		|--- RIGHTBUS
+		|--- PERIR
+		|--- MFC(R)
+		|--- CAM
+		|--- FSYS
+		|--- GPS
+		|--- LCD0
+		|--- LCD1
+
+- In case of Exynos4x12, there are two power line as following:
+	VDD_MIF |--- DMC
+
+	VDD_INT |--- LEFTBUS (parent device)
+		|--- PERIL
+		|--- MFC(L)
+		|--- G3D
+		|--- TV
+		|--- IMAGE
+		|--- RIGHTBUS
+		|--- PERIR
+		|--- MFC(R)
+		|--- CAM
+		|--- FSYS
+		|--- GPS
+		|--- LCD0
+		|--- ISP
+
 Example1:
 	Show the AXI buses of Exynos3250 SoC. Exynos3250 divides the buses to
 	power line (regulator). The MIF (Memory Interface) AXI bus is used to
-- 
cgit v0.10.2


From bfcd6204871cf63e6833b87cdcfe47d7d2da8a29 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Mon, 23 Nov 2015 23:58:17 +0900
Subject: PM / devfreq: exynos: Remove unused exynos4/5 busfreq driver

This patch removes the unused exynos4/5 busfreq driver. Instead,
generic exynos-bus frequency driver support the all Exynos SoCs.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Acked-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>

diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index 30e1daf..78dac0e 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig
@@ -90,28 +90,6 @@ config ARM_EXYNOS_BUS_DEVFREQ
 	  and adjusts the operating frequencies and voltages with OPP support.
 	  This does not yet operate with optimal voltages.
 
-config ARM_EXYNOS4_BUS_DEVFREQ
-	bool "ARM Exynos4210/4212/4412 Memory Bus DEVFREQ Driver"
-	depends on (CPU_EXYNOS4210 || SOC_EXYNOS4212 || SOC_EXYNOS4412) && !ARCH_MULTIPLATFORM
-	select DEVFREQ_GOV_SIMPLE_ONDEMAND
-	select PM_OPP
-	help
-	  This adds the DEVFREQ driver for Exynos4210 memory bus (vdd_int)
-	  and Exynos4212/4412 memory interface and bus (vdd_mif + vdd_int).
-	  It reads PPMU counters of memory controllers and adjusts
-	  the operating frequencies and voltages with OPP support.
-	  This does not yet operate with optimal voltages.
-
-config ARM_EXYNOS5_BUS_DEVFREQ
-	tristate "ARM Exynos5250 Bus DEVFREQ Driver"
-	depends on SOC_EXYNOS5250
-	select DEVFREQ_GOV_SIMPLE_ONDEMAND
-	select PM_OPP
-	help
-	  This adds the DEVFREQ driver for Exynos5250 bus interface (vdd_int).
-	  It reads PPMU counters of memory controllers and adjusts the
-	  operating frequencies and voltages with OPP support.
-
 config ARM_TEGRA_DEVFREQ
        tristate "Tegra DEVFREQ Driver"
        depends on ARCH_TEGRA_124_SOC
diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile
index 2633087..09f11d9 100644
--- a/drivers/devfreq/Makefile
+++ b/drivers/devfreq/Makefile
@@ -8,8 +8,6 @@ obj-$(CONFIG_DEVFREQ_GOV_PASSIVE)	+= governor_passive.o
 
 # DEVFREQ Drivers
 obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ)	+= exynos-bus.o
-obj-$(CONFIG_ARM_EXYNOS4_BUS_DEVFREQ)	+= exynos/
-obj-$(CONFIG_ARM_EXYNOS5_BUS_DEVFREQ)	+= exynos/
 obj-$(CONFIG_ARM_TEGRA_DEVFREQ)		+= tegra-devfreq.o
 
 # DEVFREQ Event Drivers
diff --git a/drivers/devfreq/exynos/Makefile b/drivers/devfreq/exynos/Makefile
deleted file mode 100644
index 49bc917..0000000
--- a/drivers/devfreq/exynos/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-# Exynos DEVFREQ Drivers
-obj-$(CONFIG_ARM_EXYNOS4_BUS_DEVFREQ)	+= exynos_ppmu.o exynos4_bus.o
-obj-$(CONFIG_ARM_EXYNOS5_BUS_DEVFREQ)	+= exynos_ppmu.o exynos5_bus.o
diff --git a/drivers/devfreq/exynos/exynos4_bus.c b/drivers/devfreq/exynos/exynos4_bus.c
deleted file mode 100644
index da95092..0000000
--- a/drivers/devfreq/exynos/exynos4_bus.c
+++ /dev/null
@@ -1,1055 +0,0 @@
-/* drivers/devfreq/exynos4210_memorybus.c
- *
- * Copyright (c) 2011 Samsung Electronics Co., Ltd.
- *		http://www.samsung.com/
- *	MyungJoo Ham <myungjoo.ham@samsung.com>
- *
- * EXYNOS4 - Memory/Bus clock frequency scaling support in DEVFREQ framework
- *	This version supports EXYNOS4210 only. This changes bus frequencies
- *	and vddint voltages. Exynos4412/4212 should be able to be supported
- *	with minor modifications.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/mutex.h>
-#include <linux/suspend.h>
-#include <linux/pm_opp.h>
-#include <linux/devfreq.h>
-#include <linux/platform_device.h>
-#include <linux/regulator/consumer.h>
-#include <linux/module.h>
-
-#include <mach/map.h>
-
-#include "exynos_ppmu.h"
-#include "exynos4_bus.h"
-
-#define MAX_SAFEVOLT	1200000 /* 1.2V */
-
-enum exynos4_busf_type {
-	TYPE_BUSF_EXYNOS4210,
-	TYPE_BUSF_EXYNOS4x12,
-};
-
-/* Assume that the bus is saturated if the utilization is 40% */
-#define BUS_SATURATION_RATIO	40
-
-enum busclk_level_idx {
-	LV_0 = 0,
-	LV_1,
-	LV_2,
-	LV_3,
-	LV_4,
-	_LV_END
-};
-
-enum exynos_ppmu_idx {
-	PPMU_DMC0,
-	PPMU_DMC1,
-	PPMU_END,
-};
-
-#define EX4210_LV_MAX	LV_2
-#define EX4x12_LV_MAX	LV_4
-#define EX4210_LV_NUM	(LV_2 + 1)
-#define EX4x12_LV_NUM	(LV_4 + 1)
-
-/**
- * struct busfreq_opp_info - opp information for bus
- * @rate:	Frequency in hertz
- * @volt:	Voltage in microvolts corresponding to this OPP
- */
-struct busfreq_opp_info {
-	unsigned long rate;
-	unsigned long volt;
-};
-
-struct busfreq_data {
-	enum exynos4_busf_type type;
-	struct device *dev;
-	struct devfreq *devfreq;
-	bool disabled;
-	struct regulator *vdd_int;
-	struct regulator *vdd_mif; /* Exynos4412/4212 only */
-	struct busfreq_opp_info curr_oppinfo;
-	struct busfreq_ppmu_data ppmu_data;
-
-	struct notifier_block pm_notifier;
-	struct mutex lock;
-
-	/* Dividers calculated at boot/probe-time */
-	unsigned int dmc_divtable[_LV_END]; /* DMC0 */
-	unsigned int top_divtable[_LV_END];
-};
-
-/* 4210 controls clock of mif and voltage of int */
-static struct bus_opp_table exynos4210_busclk_table[] = {
-	{LV_0, 400000, 1150000},
-	{LV_1, 267000, 1050000},
-	{LV_2, 133000, 1025000},
-	{0, 0, 0},
-};
-
-/*
- * MIF is the main control knob clock for Exynos4x12 MIF/INT
- * clock and voltage of both mif/int are controlled.
- */
-static struct bus_opp_table exynos4x12_mifclk_table[] = {
-	{LV_0, 400000, 1100000},
-	{LV_1, 267000, 1000000},
-	{LV_2, 160000, 950000},
-	{LV_3, 133000, 950000},
-	{LV_4, 100000, 950000},
-	{0, 0, 0},
-};
-
-/*
- * INT is not the control knob of 4x12. LV_x is not meant to represent
- * the current performance. (MIF does)
- */
-static struct bus_opp_table exynos4x12_intclk_table[] = {
-	{LV_0, 200000, 1000000},
-	{LV_1, 160000, 950000},
-	{LV_2, 133000, 925000},
-	{LV_3, 100000, 900000},
-	{0, 0, 0},
-};
-
-/* TODO: asv volt definitions are "__initdata"? */
-/* Some chips have different operating voltages */
-static unsigned int exynos4210_asv_volt[][EX4210_LV_NUM] = {
-	{1150000, 1050000, 1050000},
-	{1125000, 1025000, 1025000},
-	{1100000, 1000000, 1000000},
-	{1075000, 975000, 975000},
-	{1050000, 950000, 950000},
-};
-
-static unsigned int exynos4x12_mif_step_50[][EX4x12_LV_NUM] = {
-	/* 400      267     160     133     100 */
-	{1050000, 950000, 900000, 900000, 900000}, /* ASV0 */
-	{1050000, 950000, 900000, 900000, 900000}, /* ASV1 */
-	{1050000, 950000, 900000, 900000, 900000}, /* ASV2 */
-	{1050000, 900000, 900000, 900000, 900000}, /* ASV3 */
-	{1050000, 900000, 900000, 900000, 850000}, /* ASV4 */
-	{1050000, 900000, 900000, 850000, 850000}, /* ASV5 */
-	{1050000, 900000, 850000, 850000, 850000}, /* ASV6 */
-	{1050000, 900000, 850000, 850000, 850000}, /* ASV7 */
-	{1050000, 900000, 850000, 850000, 850000}, /* ASV8 */
-};
-
-static unsigned int exynos4x12_int_volt[][EX4x12_LV_NUM] = {
-	/* 200    160      133     100 */
-	{1000000, 950000, 925000, 900000}, /* ASV0 */
-	{975000,  925000, 925000, 900000}, /* ASV1 */
-	{950000,  925000, 900000, 875000}, /* ASV2 */
-	{950000,  900000, 900000, 875000}, /* ASV3 */
-	{925000,  875000, 875000, 875000}, /* ASV4 */
-	{900000,  850000, 850000, 850000}, /* ASV5 */
-	{900000,  850000, 850000, 850000}, /* ASV6 */
-	{900000,  850000, 850000, 850000}, /* ASV7 */
-	{900000,  850000, 850000, 850000}, /* ASV8 */
-};
-
-/*** Clock Divider Data for Exynos4210 ***/
-static unsigned int exynos4210_clkdiv_dmc0[][8] = {
-	/*
-	 * Clock divider value for following
-	 * { DIVACP, DIVACP_PCLK, DIVDPHY, DIVDMC, DIVDMCD
-	 *		DIVDMCP, DIVCOPY2, DIVCORE_TIMERS }
-	 */
-
-	/* DMC L0: 400MHz */
-	{ 3, 1, 1, 1, 1, 1, 3, 1 },
-	/* DMC L1: 266.7MHz */
-	{ 4, 1, 1, 2, 1, 1, 3, 1 },
-	/* DMC L2: 133MHz */
-	{ 5, 1, 1, 5, 1, 1, 3, 1 },
-};
-static unsigned int exynos4210_clkdiv_top[][5] = {
-	/*
-	 * Clock divider value for following
-	 * { DIVACLK200, DIVACLK100, DIVACLK160, DIVACLK133, DIVONENAND }
-	 */
-	/* ACLK200 L0: 200MHz */
-	{ 3, 7, 4, 5, 1 },
-	/* ACLK200 L1: 160MHz */
-	{ 4, 7, 5, 6, 1 },
-	/* ACLK200 L2: 133MHz */
-	{ 5, 7, 7, 7, 1 },
-};
-static unsigned int exynos4210_clkdiv_lr_bus[][2] = {
-	/*
-	 * Clock divider value for following
-	 * { DIVGDL/R, DIVGPL/R }
-	 */
-	/* ACLK_GDL/R L1: 200MHz */
-	{ 3, 1 },
-	/* ACLK_GDL/R L2: 160MHz */
-	{ 4, 1 },
-	/* ACLK_GDL/R L3: 133MHz */
-	{ 5, 1 },
-};
-
-/*** Clock Divider Data for Exynos4212/4412 ***/
-static unsigned int exynos4x12_clkdiv_dmc0[][6] = {
-	/*
-	 * Clock divider value for following
-	 * { DIVACP, DIVACP_PCLK, DIVDPHY, DIVDMC, DIVDMCD
-	 *              DIVDMCP}
-	 */
-
-	/* DMC L0: 400MHz */
-	{3, 1, 1, 1, 1, 1},
-	/* DMC L1: 266.7MHz */
-	{4, 1, 1, 2, 1, 1},
-	/* DMC L2: 160MHz */
-	{5, 1, 1, 4, 1, 1},
-	/* DMC L3: 133MHz */
-	{5, 1, 1, 5, 1, 1},
-	/* DMC L4: 100MHz */
-	{7, 1, 1, 7, 1, 1},
-};
-static unsigned int exynos4x12_clkdiv_dmc1[][6] = {
-	/*
-	 * Clock divider value for following
-	 * { G2DACP, DIVC2C, DIVC2C_ACLK }
-	 */
-
-	/* DMC L0: 400MHz */
-	{3, 1, 1},
-	/* DMC L1: 266.7MHz */
-	{4, 2, 1},
-	/* DMC L2: 160MHz */
-	{5, 4, 1},
-	/* DMC L3: 133MHz */
-	{5, 5, 1},
-	/* DMC L4: 100MHz */
-	{7, 7, 1},
-};
-static unsigned int exynos4x12_clkdiv_top[][5] = {
-	/*
-	 * Clock divider value for following
-	 * { DIVACLK266_GPS, DIVACLK100, DIVACLK160,
-		DIVACLK133, DIVONENAND }
-	 */
-
-	/* ACLK_GDL/R L0: 200MHz */
-	{2, 7, 4, 5, 1},
-	/* ACLK_GDL/R L1: 200MHz */
-	{2, 7, 4, 5, 1},
-	/* ACLK_GDL/R L2: 160MHz */
-	{4, 7, 5, 7, 1},
-	/* ACLK_GDL/R L3: 133MHz */
-	{4, 7, 5, 7, 1},
-	/* ACLK_GDL/R L4: 100MHz */
-	{7, 7, 7, 7, 1},
-};
-static unsigned int exynos4x12_clkdiv_lr_bus[][2] = {
-	/*
-	 * Clock divider value for following
-	 * { DIVGDL/R, DIVGPL/R }
-	 */
-
-	/* ACLK_GDL/R L0: 200MHz */
-	{3, 1},
-	/* ACLK_GDL/R L1: 200MHz */
-	{3, 1},
-	/* ACLK_GDL/R L2: 160MHz */
-	{4, 1},
-	/* ACLK_GDL/R L3: 133MHz */
-	{5, 1},
-	/* ACLK_GDL/R L4: 100MHz */
-	{7, 1},
-};
-static unsigned int exynos4x12_clkdiv_sclkip[][3] = {
-	/*
-	 * Clock divider value for following
-	 * { DIVMFC, DIVJPEG, DIVFIMC0~3}
-	 */
-
-	/* SCLK_MFC: 200MHz */
-	{3, 3, 4},
-	/* SCLK_MFC: 200MHz */
-	{3, 3, 4},
-	/* SCLK_MFC: 160MHz */
-	{4, 4, 5},
-	/* SCLK_MFC: 133MHz */
-	{5, 5, 5},
-	/* SCLK_MFC: 100MHz */
-	{7, 7, 7},
-};
-
-
-static int exynos4210_set_busclk(struct busfreq_data *data,
-				 struct busfreq_opp_info *oppi)
-{
-	unsigned int index;
-	unsigned int tmp;
-
-	for (index = LV_0; index < EX4210_LV_NUM; index++)
-		if (oppi->rate == exynos4210_busclk_table[index].clk)
-			break;
-
-	if (index == EX4210_LV_NUM)
-		return -EINVAL;
-
-	/* Change Divider - DMC0 */
-	tmp = data->dmc_divtable[index];
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_DMC0);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_DMC0);
-	} while (tmp & 0x11111111);
-
-	/* Change Divider - TOP */
-	tmp = data->top_divtable[index];
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_TOP);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_TOP);
-	} while (tmp & 0x11111);
-
-	/* Change Divider - LEFTBUS */
-	tmp = __raw_readl(EXYNOS4_CLKDIV_LEFTBUS);
-
-	tmp &= ~(EXYNOS4_CLKDIV_BUS_GDLR_MASK | EXYNOS4_CLKDIV_BUS_GPLR_MASK);
-
-	tmp |= ((exynos4210_clkdiv_lr_bus[index][0] <<
-				EXYNOS4_CLKDIV_BUS_GDLR_SHIFT) |
-		(exynos4210_clkdiv_lr_bus[index][1] <<
-				EXYNOS4_CLKDIV_BUS_GPLR_SHIFT));
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_LEFTBUS);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_LEFTBUS);
-	} while (tmp & 0x11);
-
-	/* Change Divider - RIGHTBUS */
-	tmp = __raw_readl(EXYNOS4_CLKDIV_RIGHTBUS);
-
-	tmp &= ~(EXYNOS4_CLKDIV_BUS_GDLR_MASK | EXYNOS4_CLKDIV_BUS_GPLR_MASK);
-
-	tmp |= ((exynos4210_clkdiv_lr_bus[index][0] <<
-				EXYNOS4_CLKDIV_BUS_GDLR_SHIFT) |
-		(exynos4210_clkdiv_lr_bus[index][1] <<
-				EXYNOS4_CLKDIV_BUS_GPLR_SHIFT));
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_RIGHTBUS);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_RIGHTBUS);
-	} while (tmp & 0x11);
-
-	return 0;
-}
-
-static int exynos4x12_set_busclk(struct busfreq_data *data,
-				 struct busfreq_opp_info *oppi)
-{
-	unsigned int index;
-	unsigned int tmp;
-
-	for (index = LV_0; index < EX4x12_LV_NUM; index++)
-		if (oppi->rate == exynos4x12_mifclk_table[index].clk)
-			break;
-
-	if (index == EX4x12_LV_NUM)
-		return -EINVAL;
-
-	/* Change Divider - DMC0 */
-	tmp = data->dmc_divtable[index];
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_DMC0);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_DMC0);
-	} while (tmp & 0x11111111);
-
-	/* Change Divider - DMC1 */
-	tmp = __raw_readl(EXYNOS4_CLKDIV_DMC1);
-
-	tmp &= ~(EXYNOS4_CLKDIV_DMC1_G2D_ACP_MASK |
-		EXYNOS4_CLKDIV_DMC1_C2C_MASK |
-		EXYNOS4_CLKDIV_DMC1_C2CACLK_MASK);
-
-	tmp |= ((exynos4x12_clkdiv_dmc1[index][0] <<
-				EXYNOS4_CLKDIV_DMC1_G2D_ACP_SHIFT) |
-		(exynos4x12_clkdiv_dmc1[index][1] <<
-				EXYNOS4_CLKDIV_DMC1_C2C_SHIFT) |
-		(exynos4x12_clkdiv_dmc1[index][2] <<
-				EXYNOS4_CLKDIV_DMC1_C2CACLK_SHIFT));
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_DMC1);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_DMC1);
-	} while (tmp & 0x111111);
-
-	/* Change Divider - TOP */
-	tmp = __raw_readl(EXYNOS4_CLKDIV_TOP);
-
-	tmp &= ~(EXYNOS4_CLKDIV_TOP_ACLK266_GPS_MASK |
-		EXYNOS4_CLKDIV_TOP_ACLK100_MASK |
-		EXYNOS4_CLKDIV_TOP_ACLK160_MASK |
-		EXYNOS4_CLKDIV_TOP_ACLK133_MASK |
-		EXYNOS4_CLKDIV_TOP_ONENAND_MASK);
-
-	tmp |= ((exynos4x12_clkdiv_top[index][0] <<
-				EXYNOS4_CLKDIV_TOP_ACLK266_GPS_SHIFT) |
-		(exynos4x12_clkdiv_top[index][1] <<
-				EXYNOS4_CLKDIV_TOP_ACLK100_SHIFT) |
-		(exynos4x12_clkdiv_top[index][2] <<
-				EXYNOS4_CLKDIV_TOP_ACLK160_SHIFT) |
-		(exynos4x12_clkdiv_top[index][3] <<
-				EXYNOS4_CLKDIV_TOP_ACLK133_SHIFT) |
-		(exynos4x12_clkdiv_top[index][4] <<
-				EXYNOS4_CLKDIV_TOP_ONENAND_SHIFT));
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_TOP);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_TOP);
-	} while (tmp & 0x11111);
-
-	/* Change Divider - LEFTBUS */
-	tmp = __raw_readl(EXYNOS4_CLKDIV_LEFTBUS);
-
-	tmp &= ~(EXYNOS4_CLKDIV_BUS_GDLR_MASK | EXYNOS4_CLKDIV_BUS_GPLR_MASK);
-
-	tmp |= ((exynos4x12_clkdiv_lr_bus[index][0] <<
-				EXYNOS4_CLKDIV_BUS_GDLR_SHIFT) |
-		(exynos4x12_clkdiv_lr_bus[index][1] <<
-				EXYNOS4_CLKDIV_BUS_GPLR_SHIFT));
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_LEFTBUS);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_LEFTBUS);
-	} while (tmp & 0x11);
-
-	/* Change Divider - RIGHTBUS */
-	tmp = __raw_readl(EXYNOS4_CLKDIV_RIGHTBUS);
-
-	tmp &= ~(EXYNOS4_CLKDIV_BUS_GDLR_MASK | EXYNOS4_CLKDIV_BUS_GPLR_MASK);
-
-	tmp |= ((exynos4x12_clkdiv_lr_bus[index][0] <<
-				EXYNOS4_CLKDIV_BUS_GDLR_SHIFT) |
-		(exynos4x12_clkdiv_lr_bus[index][1] <<
-				EXYNOS4_CLKDIV_BUS_GPLR_SHIFT));
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_RIGHTBUS);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_RIGHTBUS);
-	} while (tmp & 0x11);
-
-	/* Change Divider - MFC */
-	tmp = __raw_readl(EXYNOS4_CLKDIV_MFC);
-
-	tmp &= ~(EXYNOS4_CLKDIV_MFC_MASK);
-
-	tmp |= ((exynos4x12_clkdiv_sclkip[index][0] <<
-				EXYNOS4_CLKDIV_MFC_SHIFT));
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_MFC);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_MFC);
-	} while (tmp & 0x1);
-
-	/* Change Divider - JPEG */
-	tmp = __raw_readl(EXYNOS4_CLKDIV_CAM1);
-
-	tmp &= ~(EXYNOS4_CLKDIV_CAM1_JPEG_MASK);
-
-	tmp |= ((exynos4x12_clkdiv_sclkip[index][1] <<
-				EXYNOS4_CLKDIV_CAM1_JPEG_SHIFT));
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_CAM1);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_CAM1);
-	} while (tmp & 0x1);
-
-	/* Change Divider - FIMC0~3 */
-	tmp = __raw_readl(EXYNOS4_CLKDIV_CAM);
-
-	tmp &= ~(EXYNOS4_CLKDIV_CAM_FIMC0_MASK | EXYNOS4_CLKDIV_CAM_FIMC1_MASK |
-		EXYNOS4_CLKDIV_CAM_FIMC2_MASK | EXYNOS4_CLKDIV_CAM_FIMC3_MASK);
-
-	tmp |= ((exynos4x12_clkdiv_sclkip[index][2] <<
-				EXYNOS4_CLKDIV_CAM_FIMC0_SHIFT) |
-		(exynos4x12_clkdiv_sclkip[index][2] <<
-				EXYNOS4_CLKDIV_CAM_FIMC1_SHIFT) |
-		(exynos4x12_clkdiv_sclkip[index][2] <<
-				EXYNOS4_CLKDIV_CAM_FIMC2_SHIFT) |
-		(exynos4x12_clkdiv_sclkip[index][2] <<
-				EXYNOS4_CLKDIV_CAM_FIMC3_SHIFT));
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_CAM);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_CAM1);
-	} while (tmp & 0x1111);
-
-	return 0;
-}
-
-static int exynos4x12_get_intspec(unsigned long mifclk)
-{
-	int i = 0;
-
-	while (exynos4x12_intclk_table[i].clk) {
-		if (exynos4x12_intclk_table[i].clk <= mifclk)
-			return i;
-		i++;
-	}
-
-	return -EINVAL;
-}
-
-static int exynos4_bus_setvolt(struct busfreq_data *data,
-			       struct busfreq_opp_info *oppi,
-			       struct busfreq_opp_info *oldoppi)
-{
-	int err = 0, tmp;
-	unsigned long volt = oppi->volt;
-
-	switch (data->type) {
-	case TYPE_BUSF_EXYNOS4210:
-		/* OPP represents DMC clock + INT voltage */
-		err = regulator_set_voltage(data->vdd_int, volt,
-					    MAX_SAFEVOLT);
-		break;
-	case TYPE_BUSF_EXYNOS4x12:
-		/* OPP represents MIF clock + MIF voltage */
-		err = regulator_set_voltage(data->vdd_mif, volt,
-					    MAX_SAFEVOLT);
-		if (err)
-			break;
-
-		tmp = exynos4x12_get_intspec(oppi->rate);
-		if (tmp < 0) {
-			err = tmp;
-			regulator_set_voltage(data->vdd_mif,
-					      oldoppi->volt,
-					      MAX_SAFEVOLT);
-			break;
-		}
-		err = regulator_set_voltage(data->vdd_int,
-					    exynos4x12_intclk_table[tmp].volt,
-					    MAX_SAFEVOLT);
-		/*  Try to recover */
-		if (err)
-			regulator_set_voltage(data->vdd_mif,
-					      oldoppi->volt,
-					      MAX_SAFEVOLT);
-		break;
-	default:
-		err = -EINVAL;
-	}
-
-	return err;
-}
-
-static int exynos4_bus_target(struct device *dev, unsigned long *_freq,
-			      u32 flags)
-{
-	int err = 0;
-	struct platform_device *pdev = container_of(dev, struct platform_device,
-						    dev);
-	struct busfreq_data *data = platform_get_drvdata(pdev);
-	struct dev_pm_opp *opp;
-	unsigned long freq;
-	unsigned long old_freq = data->curr_oppinfo.rate;
-	struct busfreq_opp_info	new_oppinfo;
-
-	rcu_read_lock();
-	opp = devfreq_recommended_opp(dev, _freq, flags);
-	if (IS_ERR(opp)) {
-		rcu_read_unlock();
-		return PTR_ERR(opp);
-	}
-	new_oppinfo.rate = dev_pm_opp_get_freq(opp);
-	new_oppinfo.volt = dev_pm_opp_get_voltage(opp);
-	rcu_read_unlock();
-	freq = new_oppinfo.rate;
-
-	if (old_freq == freq)
-		return 0;
-
-	dev_dbg(dev, "targeting %lukHz %luuV\n", freq, new_oppinfo.volt);
-
-	mutex_lock(&data->lock);
-
-	if (data->disabled)
-		goto out;
-
-	if (old_freq < freq)
-		err = exynos4_bus_setvolt(data, &new_oppinfo,
-					  &data->curr_oppinfo);
-	if (err)
-		goto out;
-
-	if (old_freq != freq) {
-		switch (data->type) {
-		case TYPE_BUSF_EXYNOS4210:
-			err = exynos4210_set_busclk(data, &new_oppinfo);
-			break;
-		case TYPE_BUSF_EXYNOS4x12:
-			err = exynos4x12_set_busclk(data, &new_oppinfo);
-			break;
-		default:
-			err = -EINVAL;
-		}
-	}
-	if (err)
-		goto out;
-
-	if (old_freq > freq)
-		err = exynos4_bus_setvolt(data, &new_oppinfo,
-					  &data->curr_oppinfo);
-	if (err)
-		goto out;
-
-	data->curr_oppinfo = new_oppinfo;
-out:
-	mutex_unlock(&data->lock);
-	return err;
-}
-
-static int exynos4_bus_get_dev_status(struct device *dev,
-				      struct devfreq_dev_status *stat)
-{
-	struct busfreq_data *data = dev_get_drvdata(dev);
-	struct busfreq_ppmu_data *ppmu_data = &data->ppmu_data;
-	int busier;
-
-	exynos_read_ppmu(ppmu_data);
-	busier = exynos_get_busier_ppmu(ppmu_data);
-	stat->current_frequency = data->curr_oppinfo.rate;
-
-	/* Number of cycles spent on memory access */
-	stat->busy_time = ppmu_data->ppmu[busier].count[PPMU_PMNCNT3];
-	stat->busy_time *= 100 / BUS_SATURATION_RATIO;
-	stat->total_time = ppmu_data->ppmu[busier].ccnt;
-
-	/* If the counters have overflown, retry */
-	if (ppmu_data->ppmu[busier].ccnt_overflow ||
-	    ppmu_data->ppmu[busier].count_overflow[0])
-		return -EAGAIN;
-
-	return 0;
-}
-
-static struct devfreq_dev_profile exynos4_devfreq_profile = {
-	.initial_freq	= 400000,
-	.polling_ms	= 50,
-	.target		= exynos4_bus_target,
-	.get_dev_status	= exynos4_bus_get_dev_status,
-};
-
-static int exynos4210_init_tables(struct busfreq_data *data)
-{
-	u32 tmp;
-	int mgrp;
-	int i, err = 0;
-
-	tmp = __raw_readl(EXYNOS4_CLKDIV_DMC0);
-	for (i = LV_0; i < EX4210_LV_NUM; i++) {
-		tmp &= ~(EXYNOS4_CLKDIV_DMC0_ACP_MASK |
-			EXYNOS4_CLKDIV_DMC0_ACPPCLK_MASK |
-			EXYNOS4_CLKDIV_DMC0_DPHY_MASK |
-			EXYNOS4_CLKDIV_DMC0_DMC_MASK |
-			EXYNOS4_CLKDIV_DMC0_DMCD_MASK |
-			EXYNOS4_CLKDIV_DMC0_DMCP_MASK |
-			EXYNOS4_CLKDIV_DMC0_COPY2_MASK |
-			EXYNOS4_CLKDIV_DMC0_CORETI_MASK);
-
-		tmp |= ((exynos4210_clkdiv_dmc0[i][0] <<
-					EXYNOS4_CLKDIV_DMC0_ACP_SHIFT) |
-			(exynos4210_clkdiv_dmc0[i][1] <<
-					EXYNOS4_CLKDIV_DMC0_ACPPCLK_SHIFT) |
-			(exynos4210_clkdiv_dmc0[i][2] <<
-					EXYNOS4_CLKDIV_DMC0_DPHY_SHIFT) |
-			(exynos4210_clkdiv_dmc0[i][3] <<
-					EXYNOS4_CLKDIV_DMC0_DMC_SHIFT) |
-			(exynos4210_clkdiv_dmc0[i][4] <<
-					EXYNOS4_CLKDIV_DMC0_DMCD_SHIFT) |
-			(exynos4210_clkdiv_dmc0[i][5] <<
-					EXYNOS4_CLKDIV_DMC0_DMCP_SHIFT) |
-			(exynos4210_clkdiv_dmc0[i][6] <<
-					EXYNOS4_CLKDIV_DMC0_COPY2_SHIFT) |
-			(exynos4210_clkdiv_dmc0[i][7] <<
-					EXYNOS4_CLKDIV_DMC0_CORETI_SHIFT));
-
-		data->dmc_divtable[i] = tmp;
-	}
-
-	tmp = __raw_readl(EXYNOS4_CLKDIV_TOP);
-	for (i = LV_0; i <  EX4210_LV_NUM; i++) {
-		tmp &= ~(EXYNOS4_CLKDIV_TOP_ACLK200_MASK |
-			EXYNOS4_CLKDIV_TOP_ACLK100_MASK |
-			EXYNOS4_CLKDIV_TOP_ACLK160_MASK |
-			EXYNOS4_CLKDIV_TOP_ACLK133_MASK |
-			EXYNOS4_CLKDIV_TOP_ONENAND_MASK);
-
-		tmp |= ((exynos4210_clkdiv_top[i][0] <<
-					EXYNOS4_CLKDIV_TOP_ACLK200_SHIFT) |
-			(exynos4210_clkdiv_top[i][1] <<
-					EXYNOS4_CLKDIV_TOP_ACLK100_SHIFT) |
-			(exynos4210_clkdiv_top[i][2] <<
-					EXYNOS4_CLKDIV_TOP_ACLK160_SHIFT) |
-			(exynos4210_clkdiv_top[i][3] <<
-					EXYNOS4_CLKDIV_TOP_ACLK133_SHIFT) |
-			(exynos4210_clkdiv_top[i][4] <<
-					EXYNOS4_CLKDIV_TOP_ONENAND_SHIFT));
-
-		data->top_divtable[i] = tmp;
-	}
-
-	/*
-	 * TODO: init tmp based on busfreq_data
-	 * (device-tree or platform-data)
-	 */
-	tmp = 0; /* Max voltages for the reliability of the unknown */
-
-	pr_debug("ASV Group of Exynos4 is %d\n", tmp);
-	/* Use merged grouping for voltage */
-	switch (tmp) {
-	case 0:
-		mgrp = 0;
-		break;
-	case 1:
-	case 2:
-		mgrp = 1;
-		break;
-	case 3:
-	case 4:
-		mgrp = 2;
-		break;
-	case 5:
-	case 6:
-		mgrp = 3;
-		break;
-	case 7:
-		mgrp = 4;
-		break;
-	default:
-		pr_warn("Unknown ASV Group. Use max voltage.\n");
-		mgrp = 0;
-	}
-
-	for (i = LV_0; i < EX4210_LV_NUM; i++)
-		exynos4210_busclk_table[i].volt = exynos4210_asv_volt[mgrp][i];
-
-	for (i = LV_0; i < EX4210_LV_NUM; i++) {
-		err = dev_pm_opp_add(data->dev, exynos4210_busclk_table[i].clk,
-			      exynos4210_busclk_table[i].volt);
-		if (err) {
-			dev_err(data->dev, "Cannot add opp entries.\n");
-			return err;
-		}
-	}
-
-
-	return 0;
-}
-
-static int exynos4x12_init_tables(struct busfreq_data *data)
-{
-	unsigned int i;
-	unsigned int tmp;
-	int ret;
-
-	/* Enable pause function for DREX2 DVFS */
-	tmp = __raw_readl(EXYNOS4_DMC_PAUSE_CTRL);
-	tmp |= EXYNOS4_DMC_PAUSE_ENABLE;
-	__raw_writel(tmp, EXYNOS4_DMC_PAUSE_CTRL);
-
-	tmp = __raw_readl(EXYNOS4_CLKDIV_DMC0);
-
-	for (i = 0; i <  EX4x12_LV_NUM; i++) {
-		tmp &= ~(EXYNOS4_CLKDIV_DMC0_ACP_MASK |
-			EXYNOS4_CLKDIV_DMC0_ACPPCLK_MASK |
-			EXYNOS4_CLKDIV_DMC0_DPHY_MASK |
-			EXYNOS4_CLKDIV_DMC0_DMC_MASK |
-			EXYNOS4_CLKDIV_DMC0_DMCD_MASK |
-			EXYNOS4_CLKDIV_DMC0_DMCP_MASK);
-
-		tmp |= ((exynos4x12_clkdiv_dmc0[i][0] <<
-					EXYNOS4_CLKDIV_DMC0_ACP_SHIFT) |
-			(exynos4x12_clkdiv_dmc0[i][1] <<
-					EXYNOS4_CLKDIV_DMC0_ACPPCLK_SHIFT) |
-			(exynos4x12_clkdiv_dmc0[i][2] <<
-					EXYNOS4_CLKDIV_DMC0_DPHY_SHIFT) |
-			(exynos4x12_clkdiv_dmc0[i][3] <<
-					EXYNOS4_CLKDIV_DMC0_DMC_SHIFT) |
-			(exynos4x12_clkdiv_dmc0[i][4] <<
-					EXYNOS4_CLKDIV_DMC0_DMCD_SHIFT) |
-			(exynos4x12_clkdiv_dmc0[i][5] <<
-					EXYNOS4_CLKDIV_DMC0_DMCP_SHIFT));
-
-		data->dmc_divtable[i] = tmp;
-	}
-
-	tmp = 0; /* Max voltages for the reliability of the unknown */
-
-	if (tmp > 8)
-		tmp = 0;
-	pr_debug("ASV Group of Exynos4x12 is %d\n", tmp);
-
-	for (i = 0; i < EX4x12_LV_NUM; i++) {
-		exynos4x12_mifclk_table[i].volt =
-			exynos4x12_mif_step_50[tmp][i];
-		exynos4x12_intclk_table[i].volt =
-			exynos4x12_int_volt[tmp][i];
-	}
-
-	for (i = 0; i < EX4x12_LV_NUM; i++) {
-		ret = dev_pm_opp_add(data->dev, exynos4x12_mifclk_table[i].clk,
-			      exynos4x12_mifclk_table[i].volt);
-		if (ret) {
-			dev_err(data->dev, "Fail to add opp entries.\n");
-			return ret;
-		}
-	}
-
-	return 0;
-}
-
-static int exynos4_busfreq_pm_notifier_event(struct notifier_block *this,
-		unsigned long event, void *ptr)
-{
-	struct busfreq_data *data = container_of(this, struct busfreq_data,
-						 pm_notifier);
-	struct dev_pm_opp *opp;
-	struct busfreq_opp_info	new_oppinfo;
-	unsigned long maxfreq = ULONG_MAX;
-	int err = 0;
-
-	switch (event) {
-	case PM_SUSPEND_PREPARE:
-		/* Set Fastest and Deactivate DVFS */
-		mutex_lock(&data->lock);
-
-		data->disabled = true;
-
-		rcu_read_lock();
-		opp = dev_pm_opp_find_freq_floor(data->dev, &maxfreq);
-		if (IS_ERR(opp)) {
-			rcu_read_unlock();
-			dev_err(data->dev, "%s: unable to find a min freq\n",
-				__func__);
-			mutex_unlock(&data->lock);
-			return PTR_ERR(opp);
-		}
-		new_oppinfo.rate = dev_pm_opp_get_freq(opp);
-		new_oppinfo.volt = dev_pm_opp_get_voltage(opp);
-		rcu_read_unlock();
-
-		err = exynos4_bus_setvolt(data, &new_oppinfo,
-					  &data->curr_oppinfo);
-		if (err)
-			goto unlock;
-
-		switch (data->type) {
-		case TYPE_BUSF_EXYNOS4210:
-			err = exynos4210_set_busclk(data, &new_oppinfo);
-			break;
-		case TYPE_BUSF_EXYNOS4x12:
-			err = exynos4x12_set_busclk(data, &new_oppinfo);
-			break;
-		default:
-			err = -EINVAL;
-		}
-		if (err)
-			goto unlock;
-
-		data->curr_oppinfo = new_oppinfo;
-unlock:
-		mutex_unlock(&data->lock);
-		if (err)
-			return err;
-		return NOTIFY_OK;
-	case PM_POST_RESTORE:
-	case PM_POST_SUSPEND:
-		/* Reactivate */
-		mutex_lock(&data->lock);
-		data->disabled = false;
-		mutex_unlock(&data->lock);
-		return NOTIFY_OK;
-	}
-
-	return NOTIFY_DONE;
-}
-
-static int exynos4_busfreq_probe(struct platform_device *pdev)
-{
-	struct busfreq_data *data;
-	struct busfreq_ppmu_data *ppmu_data;
-	struct dev_pm_opp *opp;
-	struct device *dev = &pdev->dev;
-	int err = 0;
-
-	data = devm_kzalloc(&pdev->dev, sizeof(struct busfreq_data), GFP_KERNEL);
-	if (data == NULL) {
-		dev_err(dev, "Cannot allocate memory.\n");
-		return -ENOMEM;
-	}
-
-	ppmu_data = &data->ppmu_data;
-	ppmu_data->ppmu_end = PPMU_END;
-	ppmu_data->ppmu = devm_kzalloc(dev,
-				       sizeof(struct exynos_ppmu) * PPMU_END,
-				       GFP_KERNEL);
-	if (!ppmu_data->ppmu) {
-		dev_err(dev, "Failed to allocate memory for exynos_ppmu\n");
-		return -ENOMEM;
-	}
-
-	data->type = pdev->id_entry->driver_data;
-	ppmu_data->ppmu[PPMU_DMC0].hw_base = S5P_VA_DMC0;
-	ppmu_data->ppmu[PPMU_DMC1].hw_base = S5P_VA_DMC1;
-	data->pm_notifier.notifier_call = exynos4_busfreq_pm_notifier_event;
-	data->dev = dev;
-	mutex_init(&data->lock);
-
-	switch (data->type) {
-	case TYPE_BUSF_EXYNOS4210:
-		err = exynos4210_init_tables(data);
-		break;
-	case TYPE_BUSF_EXYNOS4x12:
-		err = exynos4x12_init_tables(data);
-		break;
-	default:
-		dev_err(dev, "Cannot determine the device id %d\n", data->type);
-		err = -EINVAL;
-	}
-	if (err) {
-		dev_err(dev, "Cannot initialize busfreq table %d\n",
-			     data->type);
-		return err;
-	}
-
-	data->vdd_int = devm_regulator_get(dev, "vdd_int");
-	if (IS_ERR(data->vdd_int)) {
-		dev_err(dev, "Cannot get the regulator \"vdd_int\"\n");
-		return PTR_ERR(data->vdd_int);
-	}
-	if (data->type == TYPE_BUSF_EXYNOS4x12) {
-		data->vdd_mif = devm_regulator_get(dev, "vdd_mif");
-		if (IS_ERR(data->vdd_mif)) {
-			dev_err(dev, "Cannot get the regulator \"vdd_mif\"\n");
-			return PTR_ERR(data->vdd_mif);
-		}
-	}
-
-	rcu_read_lock();
-	opp = dev_pm_opp_find_freq_floor(dev,
-					 &exynos4_devfreq_profile.initial_freq);
-	if (IS_ERR(opp)) {
-		rcu_read_unlock();
-		dev_err(dev, "Invalid initial frequency %lu kHz.\n",
-			exynos4_devfreq_profile.initial_freq);
-		return PTR_ERR(opp);
-	}
-	data->curr_oppinfo.rate = dev_pm_opp_get_freq(opp);
-	data->curr_oppinfo.volt = dev_pm_opp_get_voltage(opp);
-	rcu_read_unlock();
-
-	platform_set_drvdata(pdev, data);
-
-	data->devfreq = devm_devfreq_add_device(dev, &exynos4_devfreq_profile,
-					   "simple_ondemand", NULL);
-	if (IS_ERR(data->devfreq))
-		return PTR_ERR(data->devfreq);
-
-	/*
-	 * Start PPMU (Performance Profiling Monitoring Unit) to check
-	 * utilization of each IP in the Exynos4 SoC.
-	 */
-	busfreq_mon_reset(ppmu_data);
-
-	/* Register opp_notifier for Exynos4 busfreq */
-	err = devm_devfreq_register_opp_notifier(dev, data->devfreq);
-	if (err < 0) {
-		dev_err(dev, "Failed to register opp notifier\n");
-		return err;
-	}
-
-	/* Register pm_notifier for Exynos4 busfreq */
-	err = register_pm_notifier(&data->pm_notifier);
-	if (err) {
-		dev_err(dev, "Failed to setup pm notifier\n");
-		return err;
-	}
-
-	return 0;
-}
-
-static int exynos4_busfreq_remove(struct platform_device *pdev)
-{
-	struct busfreq_data *data = platform_get_drvdata(pdev);
-
-	/* Unregister all of notifier chain */
-	unregister_pm_notifier(&data->pm_notifier);
-
-	return 0;
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int exynos4_busfreq_resume(struct device *dev)
-{
-	struct busfreq_data *data = dev_get_drvdata(dev);
-	struct busfreq_ppmu_data *ppmu_data = &data->ppmu_data;
-
-	busfreq_mon_reset(ppmu_data);
-	return 0;
-}
-#endif
-
-static SIMPLE_DEV_PM_OPS(exynos4_busfreq_pm_ops, NULL, exynos4_busfreq_resume);
-
-static const struct platform_device_id exynos4_busfreq_id[] = {
-	{ "exynos4210-busfreq", TYPE_BUSF_EXYNOS4210 },
-	{ "exynos4412-busfreq", TYPE_BUSF_EXYNOS4x12 },
-	{ "exynos4212-busfreq", TYPE_BUSF_EXYNOS4x12 },
-	{ },
-};
-
-static struct platform_driver exynos4_busfreq_driver = {
-	.probe	= exynos4_busfreq_probe,
-	.remove	= exynos4_busfreq_remove,
-	.id_table = exynos4_busfreq_id,
-	.driver = {
-		.name	= "exynos4-busfreq",
-		.pm	= &exynos4_busfreq_pm_ops,
-	},
-};
-
-static int __init exynos4_busfreq_init(void)
-{
-	return platform_driver_register(&exynos4_busfreq_driver);
-}
-late_initcall(exynos4_busfreq_init);
-
-static void __exit exynos4_busfreq_exit(void)
-{
-	platform_driver_unregister(&exynos4_busfreq_driver);
-}
-module_exit(exynos4_busfreq_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("EXYNOS4 busfreq driver with devfreq framework");
-MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
diff --git a/drivers/devfreq/exynos/exynos4_bus.h b/drivers/devfreq/exynos/exynos4_bus.h
deleted file mode 100644
index 94c73c1..0000000
--- a/drivers/devfreq/exynos/exynos4_bus.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2013 Samsung Electronics Co., Ltd.
- *		http://www.samsung.com/
- *
- * EXYNOS4 BUS header
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
-
-#ifndef __DEVFREQ_EXYNOS4_BUS_H
-#define __DEVFREQ_EXYNOS4_BUS_H __FILE__
-
-#include <mach/map.h>
-
-#define EXYNOS4_CLKDIV_LEFTBUS			(S5P_VA_CMU + 0x04500)
-#define EXYNOS4_CLKDIV_STAT_LEFTBUS		(S5P_VA_CMU + 0x04600)
-
-#define EXYNOS4_CLKDIV_RIGHTBUS			(S5P_VA_CMU + 0x08500)
-#define EXYNOS4_CLKDIV_STAT_RIGHTBUS		(S5P_VA_CMU + 0x08600)
-
-#define EXYNOS4_CLKDIV_TOP			(S5P_VA_CMU + 0x0C510)
-#define EXYNOS4_CLKDIV_CAM			(S5P_VA_CMU + 0x0C520)
-#define EXYNOS4_CLKDIV_MFC			(S5P_VA_CMU + 0x0C528)
-
-#define EXYNOS4_CLKDIV_STAT_TOP			(S5P_VA_CMU + 0x0C610)
-#define EXYNOS4_CLKDIV_STAT_MFC			(S5P_VA_CMU + 0x0C628)
-
-#define EXYNOS4210_CLKGATE_IP_IMAGE		(S5P_VA_CMU + 0x0C930)
-#define EXYNOS4212_CLKGATE_IP_IMAGE		(S5P_VA_CMU + 0x04930)
-
-#define EXYNOS4_CLKDIV_DMC0			(S5P_VA_CMU + 0x10500)
-#define EXYNOS4_CLKDIV_DMC1			(S5P_VA_CMU + 0x10504)
-#define EXYNOS4_CLKDIV_STAT_DMC0		(S5P_VA_CMU + 0x10600)
-#define EXYNOS4_CLKDIV_STAT_DMC1		(S5P_VA_CMU + 0x10604)
-
-#define EXYNOS4_DMC_PAUSE_CTRL			(S5P_VA_CMU + 0x11094)
-#define EXYNOS4_DMC_PAUSE_ENABLE		(1 << 0)
-
-#define EXYNOS4_CLKDIV_DMC0_ACP_SHIFT		(0)
-#define EXYNOS4_CLKDIV_DMC0_ACP_MASK		(0x7 << EXYNOS4_CLKDIV_DMC0_ACP_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_ACPPCLK_SHIFT	(4)
-#define EXYNOS4_CLKDIV_DMC0_ACPPCLK_MASK	(0x7 << EXYNOS4_CLKDIV_DMC0_ACPPCLK_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_DPHY_SHIFT		(8)
-#define EXYNOS4_CLKDIV_DMC0_DPHY_MASK		(0x7 << EXYNOS4_CLKDIV_DMC0_DPHY_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_DMC_SHIFT		(12)
-#define EXYNOS4_CLKDIV_DMC0_DMC_MASK		(0x7 << EXYNOS4_CLKDIV_DMC0_DMC_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_DMCD_SHIFT		(16)
-#define EXYNOS4_CLKDIV_DMC0_DMCD_MASK		(0x7 << EXYNOS4_CLKDIV_DMC0_DMCD_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_DMCP_SHIFT		(20)
-#define EXYNOS4_CLKDIV_DMC0_DMCP_MASK		(0x7 << EXYNOS4_CLKDIV_DMC0_DMCP_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_COPY2_SHIFT		(24)
-#define EXYNOS4_CLKDIV_DMC0_COPY2_MASK		(0x7 << EXYNOS4_CLKDIV_DMC0_COPY2_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_CORETI_SHIFT	(28)
-#define EXYNOS4_CLKDIV_DMC0_CORETI_MASK		(0x7 << EXYNOS4_CLKDIV_DMC0_CORETI_SHIFT)
-
-#define EXYNOS4_CLKDIV_DMC1_G2D_ACP_SHIFT	(0)
-#define EXYNOS4_CLKDIV_DMC1_G2D_ACP_MASK	(0xf << EXYNOS4_CLKDIV_DMC1_G2D_ACP_SHIFT)
-#define EXYNOS4_CLKDIV_DMC1_C2C_SHIFT		(4)
-#define EXYNOS4_CLKDIV_DMC1_C2C_MASK		(0x7 << EXYNOS4_CLKDIV_DMC1_C2C_SHIFT)
-#define EXYNOS4_CLKDIV_DMC1_PWI_SHIFT		(8)
-#define EXYNOS4_CLKDIV_DMC1_PWI_MASK		(0xf << EXYNOS4_CLKDIV_DMC1_PWI_SHIFT)
-#define EXYNOS4_CLKDIV_DMC1_C2CACLK_SHIFT	(12)
-#define EXYNOS4_CLKDIV_DMC1_C2CACLK_MASK	(0x7 << EXYNOS4_CLKDIV_DMC1_C2CACLK_SHIFT)
-#define EXYNOS4_CLKDIV_DMC1_DVSEM_SHIFT		(16)
-#define EXYNOS4_CLKDIV_DMC1_DVSEM_MASK		(0x7f << EXYNOS4_CLKDIV_DMC1_DVSEM_SHIFT)
-#define EXYNOS4_CLKDIV_DMC1_DPM_SHIFT		(24)
-#define EXYNOS4_CLKDIV_DMC1_DPM_MASK		(0x7f << EXYNOS4_CLKDIV_DMC1_DPM_SHIFT)
-
-#define EXYNOS4_CLKDIV_MFC_SHIFT		(0)
-#define EXYNOS4_CLKDIV_MFC_MASK			(0x7 << EXYNOS4_CLKDIV_MFC_SHIFT)
-
-#define EXYNOS4_CLKDIV_TOP_ACLK200_SHIFT	(0)
-#define EXYNOS4_CLKDIV_TOP_ACLK200_MASK		(0x7 << EXYNOS4_CLKDIV_TOP_ACLK200_SHIFT)
-#define EXYNOS4_CLKDIV_TOP_ACLK100_SHIFT	(4)
-#define EXYNOS4_CLKDIV_TOP_ACLK100_MASK		(0xF << EXYNOS4_CLKDIV_TOP_ACLK100_SHIFT)
-#define EXYNOS4_CLKDIV_TOP_ACLK160_SHIFT	(8)
-#define EXYNOS4_CLKDIV_TOP_ACLK160_MASK		(0x7 << EXYNOS4_CLKDIV_TOP_ACLK160_SHIFT)
-#define EXYNOS4_CLKDIV_TOP_ACLK133_SHIFT	(12)
-#define EXYNOS4_CLKDIV_TOP_ACLK133_MASK		(0x7 << EXYNOS4_CLKDIV_TOP_ACLK133_SHIFT)
-#define EXYNOS4_CLKDIV_TOP_ONENAND_SHIFT	(16)
-#define EXYNOS4_CLKDIV_TOP_ONENAND_MASK		(0x7 << EXYNOS4_CLKDIV_TOP_ONENAND_SHIFT)
-#define EXYNOS4_CLKDIV_TOP_ACLK266_GPS_SHIFT	(20)
-#define EXYNOS4_CLKDIV_TOP_ACLK266_GPS_MASK	(0x7 << EXYNOS4_CLKDIV_TOP_ACLK266_GPS_SHIFT)
-#define EXYNOS4_CLKDIV_TOP_ACLK400_MCUISP_SHIFT	(24)
-#define EXYNOS4_CLKDIV_TOP_ACLK400_MCUISP_MASK	(0x7 << EXYNOS4_CLKDIV_TOP_ACLK400_MCUISP_SHIFT)
-
-#define EXYNOS4_CLKDIV_BUS_GDLR_SHIFT		(0)
-#define EXYNOS4_CLKDIV_BUS_GDLR_MASK		(0x7 << EXYNOS4_CLKDIV_BUS_GDLR_SHIFT)
-#define EXYNOS4_CLKDIV_BUS_GPLR_SHIFT		(4)
-#define EXYNOS4_CLKDIV_BUS_GPLR_MASK		(0x7 << EXYNOS4_CLKDIV_BUS_GPLR_SHIFT)
-
-#define EXYNOS4_CLKDIV_CAM_FIMC0_SHIFT		(0)
-#define EXYNOS4_CLKDIV_CAM_FIMC0_MASK		(0xf << EXYNOS4_CLKDIV_CAM_FIMC0_SHIFT)
-#define EXYNOS4_CLKDIV_CAM_FIMC1_SHIFT		(4)
-#define EXYNOS4_CLKDIV_CAM_FIMC1_MASK		(0xf << EXYNOS4_CLKDIV_CAM_FIMC1_SHIFT)
-#define EXYNOS4_CLKDIV_CAM_FIMC2_SHIFT		(8)
-#define EXYNOS4_CLKDIV_CAM_FIMC2_MASK		(0xf << EXYNOS4_CLKDIV_CAM_FIMC2_SHIFT)
-#define EXYNOS4_CLKDIV_CAM_FIMC3_SHIFT		(12)
-#define EXYNOS4_CLKDIV_CAM_FIMC3_MASK		(0xf << EXYNOS4_CLKDIV_CAM_FIMC3_SHIFT)
-
-#define EXYNOS4_CLKDIV_CAM1			(S5P_VA_CMU + 0x0C568)
-
-#define EXYNOS4_CLKDIV_STAT_CAM1		(S5P_VA_CMU + 0x0C668)
-
-#define EXYNOS4_CLKDIV_CAM1_JPEG_SHIFT		(0)
-#define EXYNOS4_CLKDIV_CAM1_JPEG_MASK		(0xf << EXYNOS4_CLKDIV_CAM1_JPEG_SHIFT)
-
-#endif /* __DEVFREQ_EXYNOS4_BUS_H */
diff --git a/drivers/devfreq/exynos/exynos5_bus.c b/drivers/devfreq/exynos/exynos5_bus.c
deleted file mode 100644
index 297ea30..0000000
--- a/drivers/devfreq/exynos/exynos5_bus.c
+++ /dev/null
@@ -1,431 +0,0 @@
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *		http://www.samsung.com/
- *
- * EXYNOS5 INT clock frequency scaling support using DEVFREQ framework
- * Based on work done by Jonghwan Choi <jhbird.choi@samsung.com>
- * Support for only EXYNOS5250 is present.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/module.h>
-#include <linux/devfreq.h>
-#include <linux/io.h>
-#include <linux/pm_opp.h>
-#include <linux/slab.h>
-#include <linux/suspend.h>
-#include <linux/clk.h>
-#include <linux/delay.h>
-#include <linux/platform_device.h>
-#include <linux/pm_qos.h>
-#include <linux/regulator/consumer.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
-
-#include "exynos_ppmu.h"
-
-#define MAX_SAFEVOLT			1100000 /* 1.10V */
-/* Assume that the bus is saturated if the utilization is 25% */
-#define INT_BUS_SATURATION_RATIO	25
-
-enum int_level_idx {
-	LV_0,
-	LV_1,
-	LV_2,
-	LV_3,
-	LV_4,
-	_LV_END
-};
-
-enum exynos_ppmu_list {
-	PPMU_RIGHT,
-	PPMU_END,
-};
-
-struct busfreq_data_int {
-	struct device *dev;
-	struct devfreq *devfreq;
-	struct regulator *vdd_int;
-	struct busfreq_ppmu_data ppmu_data;
-	unsigned long curr_freq;
-	bool disabled;
-
-	struct notifier_block pm_notifier;
-	struct mutex lock;
-	struct pm_qos_request int_req;
-	struct clk *int_clk;
-};
-
-struct int_bus_opp_table {
-	unsigned int idx;
-	unsigned long clk;
-	unsigned long volt;
-};
-
-static struct int_bus_opp_table exynos5_int_opp_table[] = {
-	{LV_0, 266000, 1025000},
-	{LV_1, 200000, 1025000},
-	{LV_2, 160000, 1025000},
-	{LV_3, 133000, 1025000},
-	{LV_4, 100000, 1025000},
-	{0, 0, 0},
-};
-
-static int exynos5_int_setvolt(struct busfreq_data_int *data,
-				unsigned long volt)
-{
-	return regulator_set_voltage(data->vdd_int, volt, MAX_SAFEVOLT);
-}
-
-static int exynos5_busfreq_int_target(struct device *dev, unsigned long *_freq,
-			      u32 flags)
-{
-	int err = 0;
-	struct platform_device *pdev = container_of(dev, struct platform_device,
-						    dev);
-	struct busfreq_data_int *data = platform_get_drvdata(pdev);
-	struct dev_pm_opp *opp;
-	unsigned long old_freq, freq;
-	unsigned long volt;
-
-	rcu_read_lock();
-	opp = devfreq_recommended_opp(dev, _freq, flags);
-	if (IS_ERR(opp)) {
-		rcu_read_unlock();
-		dev_err(dev, "%s: Invalid OPP.\n", __func__);
-		return PTR_ERR(opp);
-	}
-
-	freq = dev_pm_opp_get_freq(opp);
-	volt = dev_pm_opp_get_voltage(opp);
-	rcu_read_unlock();
-
-	old_freq = data->curr_freq;
-
-	if (old_freq == freq)
-		return 0;
-
-	dev_dbg(dev, "targeting %lukHz %luuV\n", freq, volt);
-
-	mutex_lock(&data->lock);
-
-	if (data->disabled)
-		goto out;
-
-	if (freq > exynos5_int_opp_table[0].clk)
-		pm_qos_update_request(&data->int_req, freq * 16 / 1000);
-	else
-		pm_qos_update_request(&data->int_req, -1);
-
-	if (old_freq < freq)
-		err = exynos5_int_setvolt(data, volt);
-	if (err)
-		goto out;
-
-	err = clk_set_rate(data->int_clk, freq * 1000);
-
-	if (err)
-		goto out;
-
-	if (old_freq > freq)
-		err = exynos5_int_setvolt(data, volt);
-	if (err)
-		goto out;
-
-	data->curr_freq = freq;
-out:
-	mutex_unlock(&data->lock);
-	return err;
-}
-
-static int exynos5_int_get_dev_status(struct device *dev,
-				      struct devfreq_dev_status *stat)
-{
-	struct platform_device *pdev = container_of(dev, struct platform_device,
-						    dev);
-	struct busfreq_data_int *data = platform_get_drvdata(pdev);
-	struct busfreq_ppmu_data *ppmu_data = &data->ppmu_data;
-	int busier_dmc;
-
-	exynos_read_ppmu(ppmu_data);
-	busier_dmc = exynos_get_busier_ppmu(ppmu_data);
-
-	stat->current_frequency = data->curr_freq;
-
-	/* Number of cycles spent on memory access */
-	stat->busy_time = ppmu_data->ppmu[busier_dmc].count[PPMU_PMNCNT3];
-	stat->busy_time *= 100 / INT_BUS_SATURATION_RATIO;
-	stat->total_time = ppmu_data->ppmu[busier_dmc].ccnt;
-
-	return 0;
-}
-
-static struct devfreq_dev_profile exynos5_devfreq_int_profile = {
-	.initial_freq		= 160000,
-	.polling_ms		= 100,
-	.target			= exynos5_busfreq_int_target,
-	.get_dev_status		= exynos5_int_get_dev_status,
-};
-
-static int exynos5250_init_int_tables(struct busfreq_data_int *data)
-{
-	int i, err = 0;
-
-	for (i = LV_0; i < _LV_END; i++) {
-		err = dev_pm_opp_add(data->dev, exynos5_int_opp_table[i].clk,
-				exynos5_int_opp_table[i].volt);
-		if (err) {
-			dev_err(data->dev, "Cannot add opp entries.\n");
-			return err;
-		}
-	}
-
-	return 0;
-}
-
-static int exynos5_busfreq_int_pm_notifier_event(struct notifier_block *this,
-		unsigned long event, void *ptr)
-{
-	struct busfreq_data_int *data = container_of(this,
-					struct busfreq_data_int, pm_notifier);
-	struct dev_pm_opp *opp;
-	unsigned long maxfreq = ULONG_MAX;
-	unsigned long freq;
-	unsigned long volt;
-	int err = 0;
-
-	switch (event) {
-	case PM_SUSPEND_PREPARE:
-		/* Set Fastest and Deactivate DVFS */
-		mutex_lock(&data->lock);
-
-		data->disabled = true;
-
-		rcu_read_lock();
-		opp = dev_pm_opp_find_freq_floor(data->dev, &maxfreq);
-		if (IS_ERR(opp)) {
-			rcu_read_unlock();
-			err = PTR_ERR(opp);
-			goto unlock;
-		}
-		freq = dev_pm_opp_get_freq(opp);
-		volt = dev_pm_opp_get_voltage(opp);
-		rcu_read_unlock();
-
-		err = exynos5_int_setvolt(data, volt);
-		if (err)
-			goto unlock;
-
-		err = clk_set_rate(data->int_clk, freq * 1000);
-
-		if (err)
-			goto unlock;
-
-		data->curr_freq = freq;
-unlock:
-		mutex_unlock(&data->lock);
-		if (err)
-			return NOTIFY_BAD;
-		return NOTIFY_OK;
-	case PM_POST_RESTORE:
-	case PM_POST_SUSPEND:
-		/* Reactivate */
-		mutex_lock(&data->lock);
-		data->disabled = false;
-		mutex_unlock(&data->lock);
-		return NOTIFY_OK;
-	}
-
-	return NOTIFY_DONE;
-}
-
-static int exynos5_busfreq_int_probe(struct platform_device *pdev)
-{
-	struct busfreq_data_int *data;
-	struct busfreq_ppmu_data *ppmu_data;
-	struct dev_pm_opp *opp;
-	struct device *dev = &pdev->dev;
-	struct device_node *np;
-	unsigned long initial_freq;
-	unsigned long initial_volt;
-	int err = 0;
-	int i;
-
-	data = devm_kzalloc(&pdev->dev, sizeof(struct busfreq_data_int),
-				GFP_KERNEL);
-	if (data == NULL) {
-		dev_err(dev, "Cannot allocate memory.\n");
-		return -ENOMEM;
-	}
-
-	ppmu_data = &data->ppmu_data;
-	ppmu_data->ppmu_end = PPMU_END;
-	ppmu_data->ppmu = devm_kzalloc(dev,
-				       sizeof(struct exynos_ppmu) * PPMU_END,
-				       GFP_KERNEL);
-	if (!ppmu_data->ppmu) {
-		dev_err(dev, "Failed to allocate memory for exynos_ppmu\n");
-		return -ENOMEM;
-	}
-
-	np = of_find_compatible_node(NULL, NULL, "samsung,exynos5250-ppmu");
-	if (np == NULL) {
-		pr_err("Unable to find PPMU node\n");
-		return -ENOENT;
-	}
-
-	for (i = 0; i < ppmu_data->ppmu_end; i++) {
-		/* map PPMU memory region */
-		ppmu_data->ppmu[i].hw_base = of_iomap(np, i);
-		if (ppmu_data->ppmu[i].hw_base == NULL) {
-			dev_err(&pdev->dev, "failed to map memory region\n");
-			return -ENOMEM;
-		}
-	}
-	data->pm_notifier.notifier_call = exynos5_busfreq_int_pm_notifier_event;
-	data->dev = dev;
-	mutex_init(&data->lock);
-
-	err = exynos5250_init_int_tables(data);
-	if (err)
-		return err;
-
-	data->vdd_int = devm_regulator_get(dev, "vdd_int");
-	if (IS_ERR(data->vdd_int)) {
-		dev_err(dev, "Cannot get the regulator \"vdd_int\"\n");
-		return PTR_ERR(data->vdd_int);
-	}
-
-	data->int_clk = devm_clk_get(dev, "int_clk");
-	if (IS_ERR(data->int_clk)) {
-		dev_err(dev, "Cannot get clock \"int_clk\"\n");
-		return PTR_ERR(data->int_clk);
-	}
-
-	rcu_read_lock();
-	opp = dev_pm_opp_find_freq_floor(dev,
-			&exynos5_devfreq_int_profile.initial_freq);
-	if (IS_ERR(opp)) {
-		rcu_read_unlock();
-		dev_err(dev, "Invalid initial frequency %lu kHz.\n",
-		       exynos5_devfreq_int_profile.initial_freq);
-		return PTR_ERR(opp);
-	}
-	initial_freq = dev_pm_opp_get_freq(opp);
-	initial_volt = dev_pm_opp_get_voltage(opp);
-	rcu_read_unlock();
-	data->curr_freq = initial_freq;
-
-	err = clk_set_rate(data->int_clk, initial_freq * 1000);
-	if (err) {
-		dev_err(dev, "Failed to set initial frequency\n");
-		return err;
-	}
-
-	err = exynos5_int_setvolt(data, initial_volt);
-	if (err)
-		return err;
-
-	platform_set_drvdata(pdev, data);
-
-	busfreq_mon_reset(ppmu_data);
-
-	data->devfreq = devm_devfreq_add_device(dev, &exynos5_devfreq_int_profile,
-					   "simple_ondemand", NULL);
-	if (IS_ERR(data->devfreq))
-		return PTR_ERR(data->devfreq);
-
-	err = devm_devfreq_register_opp_notifier(dev, data->devfreq);
-	if (err < 0) {
-		dev_err(dev, "Failed to register opp notifier\n");
-		return err;
-	}
-
-	err = register_pm_notifier(&data->pm_notifier);
-	if (err) {
-		dev_err(dev, "Failed to setup pm notifier\n");
-		return err;
-	}
-
-	/* TODO: Add a new QOS class for int/mif bus */
-	pm_qos_add_request(&data->int_req, PM_QOS_NETWORK_THROUGHPUT, -1);
-
-	return 0;
-}
-
-static int exynos5_busfreq_int_remove(struct platform_device *pdev)
-{
-	struct busfreq_data_int *data = platform_get_drvdata(pdev);
-
-	pm_qos_remove_request(&data->int_req);
-	unregister_pm_notifier(&data->pm_notifier);
-
-	return 0;
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int exynos5_busfreq_int_resume(struct device *dev)
-{
-	struct platform_device *pdev = container_of(dev, struct platform_device,
-						    dev);
-	struct busfreq_data_int *data = platform_get_drvdata(pdev);
-	struct busfreq_ppmu_data *ppmu_data = &data->ppmu_data;
-
-	busfreq_mon_reset(ppmu_data);
-	return 0;
-}
-static const struct dev_pm_ops exynos5_busfreq_int_pm = {
-	.resume	= exynos5_busfreq_int_resume,
-};
-#endif
-static SIMPLE_DEV_PM_OPS(exynos5_busfreq_int_pm_ops, NULL,
-			 exynos5_busfreq_int_resume);
-
-/* platform device pointer for exynos5 devfreq device. */
-static struct platform_device *exynos5_devfreq_pdev;
-
-static struct platform_driver exynos5_busfreq_int_driver = {
-	.probe		= exynos5_busfreq_int_probe,
-	.remove		= exynos5_busfreq_int_remove,
-	.driver		= {
-		.name		= "exynos5-bus-int",
-		.pm		= &exynos5_busfreq_int_pm_ops,
-	},
-};
-
-static int __init exynos5_busfreq_int_init(void)
-{
-	int ret;
-
-	ret = platform_driver_register(&exynos5_busfreq_int_driver);
-	if (ret < 0)
-		goto out;
-
-	exynos5_devfreq_pdev =
-		platform_device_register_simple("exynos5-bus-int", -1, NULL, 0);
-	if (IS_ERR(exynos5_devfreq_pdev)) {
-		ret = PTR_ERR(exynos5_devfreq_pdev);
-		goto out1;
-	}
-
-	return 0;
-out1:
-	platform_driver_unregister(&exynos5_busfreq_int_driver);
-out:
-	return ret;
-}
-late_initcall(exynos5_busfreq_int_init);
-
-static void __exit exynos5_busfreq_int_exit(void)
-{
-	platform_device_unregister(exynos5_devfreq_pdev);
-	platform_driver_unregister(&exynos5_busfreq_int_driver);
-}
-module_exit(exynos5_busfreq_int_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("EXYNOS5 busfreq driver with devfreq framework");
diff --git a/drivers/devfreq/exynos/exynos_ppmu.c b/drivers/devfreq/exynos/exynos_ppmu.c
deleted file mode 100644
index 97b75e5..0000000
--- a/drivers/devfreq/exynos/exynos_ppmu.c
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *		http://www.samsung.com/
- *
- * EXYNOS - PPMU support
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/io.h>
-
-#include "exynos_ppmu.h"
-
-void exynos_ppmu_reset(void __iomem *ppmu_base)
-{
-	__raw_writel(PPMU_CYCLE_RESET | PPMU_COUNTER_RESET, ppmu_base);
-	__raw_writel(PPMU_ENABLE_CYCLE  |
-		     PPMU_ENABLE_COUNT0 |
-		     PPMU_ENABLE_COUNT1 |
-		     PPMU_ENABLE_COUNT2 |
-		     PPMU_ENABLE_COUNT3,
-		     ppmu_base + PPMU_CNTENS);
-}
-
-void exynos_ppmu_setevent(void __iomem *ppmu_base, unsigned int ch,
-			unsigned int evt)
-{
-	__raw_writel(evt, ppmu_base + PPMU_BEVTSEL(ch));
-}
-
-void exynos_ppmu_start(void __iomem *ppmu_base)
-{
-	__raw_writel(PPMU_ENABLE, ppmu_base);
-}
-
-void exynos_ppmu_stop(void __iomem *ppmu_base)
-{
-	__raw_writel(PPMU_DISABLE, ppmu_base);
-}
-
-unsigned int exynos_ppmu_read(void __iomem *ppmu_base, unsigned int ch)
-{
-	unsigned int total;
-
-	if (ch == PPMU_PMNCNT3)
-		total = ((__raw_readl(ppmu_base + PMCNT_OFFSET(ch)) << 8) |
-			  __raw_readl(ppmu_base + PMCNT_OFFSET(ch + 1)));
-	else
-		total = __raw_readl(ppmu_base + PMCNT_OFFSET(ch));
-
-	return total;
-}
-
-void busfreq_mon_reset(struct busfreq_ppmu_data *ppmu_data)
-{
-	unsigned int i;
-
-	for (i = 0; i < ppmu_data->ppmu_end; i++) {
-		void __iomem *ppmu_base = ppmu_data->ppmu[i].hw_base;
-
-		/* Reset the performance and cycle counters */
-		exynos_ppmu_reset(ppmu_base);
-
-		/* Setup count registers to monitor read/write transactions */
-		ppmu_data->ppmu[i].event[PPMU_PMNCNT3] = RDWR_DATA_COUNT;
-		exynos_ppmu_setevent(ppmu_base, PPMU_PMNCNT3,
-					ppmu_data->ppmu[i].event[PPMU_PMNCNT3]);
-
-		exynos_ppmu_start(ppmu_base);
-	}
-}
-EXPORT_SYMBOL(busfreq_mon_reset);
-
-void exynos_read_ppmu(struct busfreq_ppmu_data *ppmu_data)
-{
-	int i, j;
-
-	for (i = 0; i < ppmu_data->ppmu_end; i++) {
-		void __iomem *ppmu_base = ppmu_data->ppmu[i].hw_base;
-
-		exynos_ppmu_stop(ppmu_base);
-
-		/* Update local data from PPMU */
-		ppmu_data->ppmu[i].ccnt = __raw_readl(ppmu_base + PPMU_CCNT);
-
-		for (j = PPMU_PMNCNT0; j < PPMU_PMNCNT_MAX; j++) {
-			if (ppmu_data->ppmu[i].event[j] == 0)
-				ppmu_data->ppmu[i].count[j] = 0;
-			else
-				ppmu_data->ppmu[i].count[j] =
-					exynos_ppmu_read(ppmu_base, j);
-		}
-	}
-
-	busfreq_mon_reset(ppmu_data);
-}
-EXPORT_SYMBOL(exynos_read_ppmu);
-
-int exynos_get_busier_ppmu(struct busfreq_ppmu_data *ppmu_data)
-{
-	unsigned int count = 0;
-	int i, j, busy = 0;
-
-	for (i = 0; i < ppmu_data->ppmu_end; i++) {
-		for (j = PPMU_PMNCNT0; j < PPMU_PMNCNT_MAX; j++) {
-			if (ppmu_data->ppmu[i].count[j] > count) {
-				count = ppmu_data->ppmu[i].count[j];
-				busy = i;
-			}
-		}
-	}
-
-	return busy;
-}
-EXPORT_SYMBOL(exynos_get_busier_ppmu);
diff --git a/drivers/devfreq/exynos/exynos_ppmu.h b/drivers/devfreq/exynos/exynos_ppmu.h
deleted file mode 100644
index 71f17ba..0000000
--- a/drivers/devfreq/exynos/exynos_ppmu.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *		http://www.samsung.com/
- *
- * EXYNOS PPMU header
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
-
-#ifndef __DEVFREQ_EXYNOS_PPMU_H
-#define __DEVFREQ_EXYNOS_PPMU_H __FILE__
-
-#include <linux/ktime.h>
-
-/* For PPMU Control */
-#define PPMU_ENABLE             BIT(0)
-#define PPMU_DISABLE            0x0
-#define PPMU_CYCLE_RESET        BIT(1)
-#define PPMU_COUNTER_RESET      BIT(2)
-
-#define PPMU_ENABLE_COUNT0      BIT(0)
-#define PPMU_ENABLE_COUNT1      BIT(1)
-#define PPMU_ENABLE_COUNT2      BIT(2)
-#define PPMU_ENABLE_COUNT3      BIT(3)
-#define PPMU_ENABLE_CYCLE       BIT(31)
-
-#define PPMU_CNTENS		0x10
-#define PPMU_FLAG		0x50
-#define PPMU_CCNT_OVERFLOW	BIT(31)
-#define PPMU_CCNT		0x100
-
-#define PPMU_PMCNT0		0x110
-#define PPMU_PMCNT_OFFSET	0x10
-#define PMCNT_OFFSET(x)		(PPMU_PMCNT0 + (PPMU_PMCNT_OFFSET * x))
-
-#define PPMU_BEVT0SEL		0x1000
-#define PPMU_BEVTSEL_OFFSET	0x100
-#define PPMU_BEVTSEL(x)		(PPMU_BEVT0SEL + (ch * PPMU_BEVTSEL_OFFSET))
-
-/* For Event Selection */
-#define RD_DATA_COUNT		0x5
-#define WR_DATA_COUNT		0x6
-#define RDWR_DATA_COUNT		0x7
-
-enum ppmu_counter {
-	PPMU_PMNCNT0,
-	PPMU_PMCCNT1,
-	PPMU_PMNCNT2,
-	PPMU_PMNCNT3,
-	PPMU_PMNCNT_MAX,
-};
-
-struct bus_opp_table {
-	unsigned int idx;
-	unsigned long clk;
-	unsigned long volt;
-};
-
-struct exynos_ppmu {
-	void __iomem *hw_base;
-	unsigned int ccnt;
-	unsigned int event[PPMU_PMNCNT_MAX];
-	unsigned int count[PPMU_PMNCNT_MAX];
-	unsigned long long ns;
-	ktime_t reset_time;
-	bool ccnt_overflow;
-	bool count_overflow[PPMU_PMNCNT_MAX];
-};
-
-struct busfreq_ppmu_data {
-	struct exynos_ppmu *ppmu;
-	int ppmu_end;
-};
-
-void exynos_ppmu_reset(void __iomem *ppmu_base);
-void exynos_ppmu_setevent(void __iomem *ppmu_base, unsigned int ch,
-			unsigned int evt);
-void exynos_ppmu_start(void __iomem *ppmu_base);
-void exynos_ppmu_stop(void __iomem *ppmu_base);
-unsigned int exynos_ppmu_read(void __iomem *ppmu_base, unsigned int ch);
-void busfreq_mon_reset(struct busfreq_ppmu_data *ppmu_data);
-void exynos_read_ppmu(struct busfreq_ppmu_data *ppmu_data);
-int exynos_get_busier_ppmu(struct busfreq_ppmu_data *ppmu_data);
-#endif /* __DEVFREQ_EXYNOS_PPMU_H */
-- 
cgit v0.10.2


From 490b864ba0724ac464234d6a326b0aa40b75afc7 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Thu, 10 Dec 2015 11:18:32 +0900
Subject: MAINTAINERS: Add samsung bus frequency driver entry

This patch adds the 'BUS FREQUENCY DRIVER FOR SAMSUNG EXYNOS' entry to review the
patches as maintainer. I can access the all datasheet of Exynos SoC and test it
on some Exynos-based board. Patches will be picked up by DEVFREQ maintainer
on devfreq git repository.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index 42e65d1..7e9da3a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3539,6 +3539,15 @@ F:	drivers/devfreq/devfreq-event.c
 F:	include/linux/devfreq-event.h
 F:	Documentation/devicetree/bindings/devfreq/event/
 
+BUS FREQUENCY DRIVER FOR SAMSUNG EXYNOS
+M:	Chanwoo Choi <cw00.choi@samsung.com>
+L:	linux-pm@vger.kernel.org
+L:	linux-samsung-soc@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mzx/devfreq.git
+S:	Maintained
+F:	drivers/devfreq/exynos-bus.c
+F:	Documentation/devicetree/bindings/devfreq/exynos-bus.txt
+
 DEVICE NUMBER REGISTRY
 M:	Torben Mathiasen <device@lanana.org>
 W:	http://lanana.org/docs/device-list/index.html
-- 
cgit v0.10.2


From 0179a913875a8b39eaf5b848c876439a3a4650af Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Thu, 7 Apr 2016 11:29:11 +0900
Subject: PM / devfreq: event: Add new Exynos NoC probe driver

This patch adds NoC (Network on Chip) Probe driver which provides
the primitive values to get the performance data. The packets that the Network
on Chip (NoC) probes detects are transported over the network infrastructure.
Exynos542x bus has multiple NoC probes to provide bandwidth information about
behavior of the SoC that you can use while analyzing system performance.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Tested-by: Markus Reichl <m.reichl@fivetechno.de>
Tested-by: Anand Moon <linux.amoon@gmail.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>

diff --git a/Documentation/devicetree/bindings/devfreq/event/exynos-nocp.txt b/Documentation/devicetree/bindings/devfreq/event/exynos-nocp.txt
new file mode 100644
index 0000000..fd459f0
--- /dev/null
+++ b/Documentation/devicetree/bindings/devfreq/event/exynos-nocp.txt
@@ -0,0 +1,26 @@
+
+* Samsung Exynos NoC (Network on Chip) Probe device
+
+The Samsung Exynos542x SoC has NoC (Network on Chip) Probe for NoC bus.
+NoC provides the primitive values to get the performance data. The packets
+that the Network on Chip (NoC) probes detects are transported over
+the network infrastructure to observer units. You can configure probes to
+capture packets with header or data on the data request response network,
+or as traffic debug or statistic collectors. Exynos542x bus has multiple
+NoC probes to provide bandwidth information about behavior of the SoC
+that you can use while analyzing system performance.
+
+Required properties:
+- compatible: Should be "samsung,exynos5420-nocp"
+- reg: physical base address of each NoC Probe and length of memory mapped region.
+
+Optional properties:
+- clock-names : the name of clock used by the NoC Probe, "nocp"
+- clocks : phandles for clock specified in "clock-names" property
+
+Example : NoC Probe nodes in Device Tree are listed below.
+
+	nocp_mem0_0: nocp@10CA1000 {
+		compatible = "samsung,exynos5420-nocp";
+		reg = <0x10CA1000 0x200>;
+	};
diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig
index a11720a..1e8b4f4 100644
--- a/drivers/devfreq/event/Kconfig
+++ b/drivers/devfreq/event/Kconfig
@@ -13,6 +13,14 @@ menuconfig PM_DEVFREQ_EVENT
 
 if PM_DEVFREQ_EVENT
 
+config DEVFREQ_EVENT_EXYNOS_NOCP
+	bool "EXYNOS NoC (Network On Chip) Probe DEVFREQ event Driver"
+	depends on ARCH_EXYNOS
+	select PM_OPP
+	help
+	  This add the devfreq-event driver for Exynos SoC. It provides NoC
+	  (Network on Chip) Probe counters to measure the bandwidth of AXI bus.
+
 config DEVFREQ_EVENT_EXYNOS_PPMU
 	bool "EXYNOS PPMU (Platform Performance Monitoring Unit) DEVFREQ event Driver"
 	depends on ARCH_EXYNOS
diff --git a/drivers/devfreq/event/Makefile b/drivers/devfreq/event/Makefile
index be146ea..3d6afd3 100644
--- a/drivers/devfreq/event/Makefile
+++ b/drivers/devfreq/event/Makefile
@@ -1,2 +1,4 @@
 # Exynos DEVFREQ Event Drivers
+
+obj-$(CONFIG_DEVFREQ_EVENT_EXYNOS_NOCP) += exynos-nocp.o
 obj-$(CONFIG_DEVFREQ_EVENT_EXYNOS_PPMU) += exynos-ppmu.o
diff --git a/drivers/devfreq/event/exynos-nocp.c b/drivers/devfreq/event/exynos-nocp.c
new file mode 100644
index 0000000..6b6a5f3
--- /dev/null
+++ b/drivers/devfreq/event/exynos-nocp.c
@@ -0,0 +1,304 @@
+/*
+ * exynos-nocp.c - EXYNOS NoC (Network On Chip) Probe support
+ *
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ * Author : Chanwoo Choi <cw00.choi@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/devfreq-event.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include "exynos-nocp.h"
+
+struct exynos_nocp {
+	struct devfreq_event_dev *edev;
+	struct devfreq_event_desc desc;
+
+	struct device *dev;
+
+	struct regmap *regmap;
+	struct clk *clk;
+};
+
+/*
+ * The devfreq-event ops structure for nocp probe.
+ */
+static int exynos_nocp_set_event(struct devfreq_event_dev *edev)
+{
+	struct exynos_nocp *nocp = devfreq_event_get_drvdata(edev);
+	int ret;
+
+	/* Disable NoC probe */
+	ret = regmap_update_bits(nocp->regmap, NOCP_MAIN_CTL,
+				NOCP_MAIN_CTL_STATEN_MASK, 0);
+	if (ret < 0) {
+		dev_err(nocp->dev, "failed to disable the NoC probe device\n");
+		return ret;
+	}
+
+	/* Set a statistics dump period to 0 */
+	ret = regmap_write(nocp->regmap, NOCP_STAT_PERIOD, 0x0);
+	if (ret < 0)
+		goto out;
+
+	/* Set the IntEvent fields of *_SRC */
+	ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_0_SRC,
+				NOCP_CNT_SRC_INTEVENT_MASK,
+				NOCP_CNT_SRC_INTEVENT_BYTE_MASK);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_1_SRC,
+				NOCP_CNT_SRC_INTEVENT_MASK,
+				NOCP_CNT_SRC_INTEVENT_CHAIN_MASK);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_2_SRC,
+				NOCP_CNT_SRC_INTEVENT_MASK,
+				NOCP_CNT_SRC_INTEVENT_CYCLE_MASK);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_3_SRC,
+				NOCP_CNT_SRC_INTEVENT_MASK,
+				NOCP_CNT_SRC_INTEVENT_CHAIN_MASK);
+	if (ret < 0)
+		goto out;
+
+
+	/* Set an alarm with a max/min value of 0 to generate StatALARM */
+	ret = regmap_write(nocp->regmap, NOCP_STAT_ALARM_MIN, 0x0);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_write(nocp->regmap, NOCP_STAT_ALARM_MAX, 0x0);
+	if (ret < 0)
+		goto out;
+
+	/* Set AlarmMode */
+	ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_0_ALARM_MODE,
+				NOCP_CNT_ALARM_MODE_MASK,
+				NOCP_CNT_ALARM_MODE_MIN_MAX_MASK);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_1_ALARM_MODE,
+				NOCP_CNT_ALARM_MODE_MASK,
+				NOCP_CNT_ALARM_MODE_MIN_MAX_MASK);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_2_ALARM_MODE,
+				NOCP_CNT_ALARM_MODE_MASK,
+				NOCP_CNT_ALARM_MODE_MIN_MAX_MASK);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_3_ALARM_MODE,
+				NOCP_CNT_ALARM_MODE_MASK,
+				NOCP_CNT_ALARM_MODE_MIN_MAX_MASK);
+	if (ret < 0)
+		goto out;
+
+	/* Enable the measurements by setting AlarmEn and StatEn */
+	ret = regmap_update_bits(nocp->regmap, NOCP_MAIN_CTL,
+			NOCP_MAIN_CTL_STATEN_MASK | NOCP_MAIN_CTL_ALARMEN_MASK,
+			NOCP_MAIN_CTL_STATEN_MASK | NOCP_MAIN_CTL_ALARMEN_MASK);
+	if (ret < 0)
+		goto out;
+
+	/* Set GlobalEN */
+	ret = regmap_update_bits(nocp->regmap, NOCP_CFG_CTL,
+				NOCP_CFG_CTL_GLOBALEN_MASK,
+				NOCP_CFG_CTL_GLOBALEN_MASK);
+	if (ret < 0)
+		goto out;
+
+	/* Enable NoC probe */
+	ret = regmap_update_bits(nocp->regmap, NOCP_MAIN_CTL,
+				NOCP_MAIN_CTL_STATEN_MASK,
+				NOCP_MAIN_CTL_STATEN_MASK);
+	if (ret < 0)
+		goto out;
+
+	return 0;
+
+out:
+	/* Reset NoC probe */
+	if (regmap_update_bits(nocp->regmap, NOCP_MAIN_CTL,
+				NOCP_MAIN_CTL_STATEN_MASK, 0)) {
+		dev_err(nocp->dev, "Failed to reset NoC probe device\n");
+	}
+
+	return ret;
+}
+
+static int exynos_nocp_get_event(struct devfreq_event_dev *edev,
+				struct devfreq_event_data *edata)
+{
+	struct exynos_nocp *nocp = devfreq_event_get_drvdata(edev);
+	unsigned int counter[4];
+	int ret;
+
+	/* Read cycle count */
+	ret = regmap_read(nocp->regmap, NOCP_COUNTERS_0_VAL, &counter[0]);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_read(nocp->regmap, NOCP_COUNTERS_1_VAL, &counter[1]);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_read(nocp->regmap, NOCP_COUNTERS_2_VAL, &counter[2]);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_read(nocp->regmap, NOCP_COUNTERS_3_VAL, &counter[3]);
+	if (ret < 0)
+		goto out;
+
+	edata->load_count = ((counter[1] << 16) | counter[0]);
+	edata->total_count = ((counter[3] << 16) | counter[2]);
+
+	dev_dbg(&edev->dev, "%s (event: %ld/%ld)\n", edev->desc->name,
+					edata->load_count, edata->total_count);
+
+	return 0;
+
+out:
+	edata->load_count = 0;
+	edata->total_count = 0;
+
+	dev_err(nocp->dev, "Failed to read the counter of NoC probe device\n");
+
+	return ret;
+}
+
+static const struct devfreq_event_ops exynos_nocp_ops = {
+	.set_event = exynos_nocp_set_event,
+	.get_event = exynos_nocp_get_event,
+};
+
+static const struct of_device_id exynos_nocp_id_match[] = {
+	{ .compatible = "samsung,exynos5420-nocp", },
+	{ /* sentinel */ },
+};
+
+static struct regmap_config exynos_nocp_regmap_config = {
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_stride = 4,
+	.max_register = NOCP_COUNTERS_3_VAL,
+};
+
+static int exynos_nocp_parse_dt(struct platform_device *pdev,
+				struct exynos_nocp *nocp)
+{
+	struct device *dev = nocp->dev;
+	struct device_node *np = dev->of_node;
+	struct resource *res;
+	void __iomem *base;
+
+	if (!np) {
+		dev_err(dev, "failed to find devicetree node\n");
+		return -EINVAL;
+	}
+
+	nocp->clk = devm_clk_get(dev, "nocp");
+	if (IS_ERR(nocp->clk))
+		nocp->clk = NULL;
+
+	/* Maps the memory mapped IO to control nocp register */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (IS_ERR(res))
+		return PTR_ERR(res);
+
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	exynos_nocp_regmap_config.max_register = resource_size(res) - 4;
+
+	nocp->regmap = devm_regmap_init_mmio(dev, base,
+					&exynos_nocp_regmap_config);
+	if (IS_ERR(nocp->regmap)) {
+		dev_err(dev, "failed to initialize regmap\n");
+		return PTR_ERR(nocp->regmap);
+	}
+
+	return 0;
+}
+
+static int exynos_nocp_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct exynos_nocp *nocp;
+	int ret;
+
+	nocp = devm_kzalloc(&pdev->dev, sizeof(*nocp), GFP_KERNEL);
+	if (!nocp)
+		return -ENOMEM;
+
+	nocp->dev = &pdev->dev;
+
+	/* Parse dt data to get resource */
+	ret = exynos_nocp_parse_dt(pdev, nocp);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"failed to parse devicetree for resource\n");
+		return ret;
+	}
+
+	/* Add devfreq-event device to measure the bandwidth of NoC */
+	nocp->desc.ops = &exynos_nocp_ops;
+	nocp->desc.driver_data = nocp;
+	nocp->desc.name = np->full_name;
+	nocp->edev = devm_devfreq_event_add_edev(&pdev->dev, &nocp->desc);
+	if (IS_ERR(nocp->edev)) {
+		dev_err(&pdev->dev,
+			"failed to add devfreq-event device\n");
+		return PTR_ERR(nocp->edev);
+	}
+	platform_set_drvdata(pdev, nocp);
+
+	clk_prepare_enable(nocp->clk);
+
+	pr_info("exynos-nocp: new NoC Probe device registered: %s\n",
+			dev_name(dev));
+
+	return 0;
+}
+
+static int exynos_nocp_remove(struct platform_device *pdev)
+{
+	struct exynos_nocp *nocp = platform_get_drvdata(pdev);
+
+	clk_disable_unprepare(nocp->clk);
+
+	return 0;
+}
+
+static struct platform_driver exynos_nocp_driver = {
+	.probe	= exynos_nocp_probe,
+	.remove	= exynos_nocp_remove,
+	.driver = {
+		.name	= "exynos-nocp",
+		.of_match_table = exynos_nocp_id_match,
+	},
+};
+module_platform_driver(exynos_nocp_driver);
+
+MODULE_DESCRIPTION("Exynos NoC (Network on Chip) Probe driver");
+MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/devfreq/event/exynos-nocp.h b/drivers/devfreq/event/exynos-nocp.h
new file mode 100644
index 0000000..28564db
--- /dev/null
+++ b/drivers/devfreq/event/exynos-nocp.h
@@ -0,0 +1,78 @@
+/*
+ * exynos-nocp.h - EXYNOS NoC (Network on Chip) Probe header file
+ *
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ * Author : Chanwoo Choi <cw00.choi@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __EXYNOS_NOCP_H__
+#define __EXYNOS_NOCP_H__
+
+enum nocp_reg {
+	NOCP_ID_REVISION_ID		= 0x04,
+	NOCP_MAIN_CTL			= 0x08,
+	NOCP_CFG_CTL			= 0x0C,
+
+	NOCP_STAT_PERIOD		= 0x24,
+	NOCP_STAT_GO			= 0x28,
+	NOCP_STAT_ALARM_MIN		= 0x2C,
+	NOCP_STAT_ALARM_MAX		= 0x30,
+	NOCP_STAT_ALARM_STATUS		= 0x34,
+	NOCP_STAT_ALARM_CLR		= 0x38,
+
+	NOCP_COUNTERS_0_SRC		= 0x138,
+	NOCP_COUNTERS_0_ALARM_MODE	= 0x13C,
+	NOCP_COUNTERS_0_VAL		= 0x140,
+
+	NOCP_COUNTERS_1_SRC		= 0x14C,
+	NOCP_COUNTERS_1_ALARM_MODE	= 0x150,
+	NOCP_COUNTERS_1_VAL		= 0x154,
+
+	NOCP_COUNTERS_2_SRC		= 0x160,
+	NOCP_COUNTERS_2_ALARM_MODE	= 0x164,
+	NOCP_COUNTERS_2_VAL		= 0x168,
+
+	NOCP_COUNTERS_3_SRC		= 0x174,
+	NOCP_COUNTERS_3_ALARM_MODE	= 0x178,
+	NOCP_COUNTERS_3_VAL		= 0x17C,
+};
+
+/* NOCP_MAIN_CTL register */
+#define NOCP_MAIN_CTL_ERREN_MASK		BIT(0)
+#define NOCP_MAIN_CTL_TRACEEN_MASK		BIT(1)
+#define NOCP_MAIN_CTL_PAYLOADEN_MASK		BIT(2)
+#define NOCP_MAIN_CTL_STATEN_MASK		BIT(3)
+#define NOCP_MAIN_CTL_ALARMEN_MASK		BIT(4)
+#define NOCP_MAIN_CTL_STATCONDDUMP_MASK	BIT(5)
+#define NOCP_MAIN_CTL_INTRUSIVEMODE_MASK	BIT(6)
+
+/* NOCP_CFG_CTL register */
+#define NOCP_CFG_CTL_GLOBALEN_MASK		BIT(0)
+#define NOCP_CFG_CTL_ACTIVE_MASK		BIT(1)
+
+/* NOCP_COUNTERS_x_SRC register */
+#define NOCP_CNT_SRC_INTEVENT_SHIFT		0
+#define NOCP_CNT_SRC_INTEVENT_MASK		(0x1F << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_OFF_MASK		(0x0 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_CYCLE_MASK	(0x1 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_IDLE_MASK		(0x2 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_XFER_MASK		(0x3 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_BUSY_MASK		(0x4 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_WAIT_MASK		(0x5 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_PKT_MASK		(0x6 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_BYTE_MASK		(0x8 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_CHAIN_MASK	(0x10 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+
+/* NOCP_COUNTERS_x_ALARM_MODE register */
+#define NOCP_CNT_ALARM_MODE_SHIFT		0
+#define NOCP_CNT_ALARM_MODE_MASK		(0x3 << NOCP_CNT_ALARM_MODE_SHIFT)
+#define NOCP_CNT_ALARM_MODE_OFF_MASK		(0x0 << NOCP_CNT_ALARM_MODE_SHIFT)
+#define NOCP_CNT_ALARM_MODE_MIN_MASK		(0x1 << NOCP_CNT_ALARM_MODE_SHIFT)
+#define NOCP_CNT_ALARM_MODE_MAX_MASK		(0x2 << NOCP_CNT_ALARM_MODE_SHIFT)
+#define NOCP_CNT_ALARM_MODE_MIN_MAX_MASK	(0x3 << NOCP_CNT_ALARM_MODE_SHIFT)
+
+#endif /* __EXYNOS_NOCP_H__ */
-- 
cgit v0.10.2


From 19cf91d0f9ddc494217a0abaed91dfbddea7c958 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Thu, 14 Apr 2016 14:37:12 +0900
Subject: PM / devfreq: event: Find the instance of devfreq-event device by
 using phandle

This patch use the phandle to find the instance of devfreq-event device in
Device Tree when calling the devfreq_event_get_edev_by_phandle() because there
is two type devfreq-event devices as following:

First case, exynos-ppmu.c driver provides the maximum four event of each PPMU.
So, when getting the instance of devfreq-event device, using the unique name of
struct devfreq_event_desc.

Second case, exynos-nocp.c driver provide the only one event of each NoC Probe
device. So, when getting the instance of devfreq-event device, using the
phandle of each NoC probe device.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: MyungJoo Ham <myungjoo.ham@samsung.com>

diff --git a/drivers/devfreq/devfreq-event.c b/drivers/devfreq/devfreq-event.c
index 38bf144..39b048e 100644
--- a/drivers/devfreq/devfreq-event.c
+++ b/drivers/devfreq/devfreq-event.c
@@ -235,6 +235,11 @@ struct devfreq_event_dev *devfreq_event_get_edev_by_phandle(struct device *dev,
 
 	mutex_lock(&devfreq_event_list_lock);
 	list_for_each_entry(edev, &devfreq_event_list, node) {
+		if (edev->dev.parent && edev->dev.parent->of_node == node)
+			goto out;
+	}
+
+	list_for_each_entry(edev, &devfreq_event_list, node) {
 		if (!strcmp(edev->desc->name, node->name))
 			goto out;
 	}
-- 
cgit v0.10.2


From a9d1f4e0cb80b917f68e5b007436d50797c4eeab Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Fri, 27 Nov 2015 13:03:59 +0900
Subject: PM / devfreq: exynos: Add the detailed correlation for Exynos5422 bus

This patch adds the detailed corrleation between sub-blocks and power line
for Exynos5422.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: MyungJoo Ham <myungjoo.ham@samsung.com>

diff --git a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
index 7dbd4ab..d3ec8e6 100644
--- a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
+++ b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
@@ -104,6 +104,25 @@ Detailed correlation between sub-blocks and power line according to Exynos SoC:
 		|--- LCD0
 		|--- ISP
 
+- In case of Exynos5422, there are two power line as following:
+	VDD_MIF |--- DREX 0 (parent device, DRAM EXpress controller)
+	        |--- DREX 1
+
+	VDD_INT |--- NoC_Core (parent device)
+		|--- G2D
+		|--- G3D
+		|--- DISP1
+		|--- NoC_WCORE
+		|--- GSCL
+		|--- MSCL
+		|--- ISP
+		|--- MFC
+		|--- GEN
+		|--- PERIS
+		|--- PERIC
+		|--- FSYS
+		|--- FSYS2
+
 Example1:
 	Show the AXI buses of Exynos3250 SoC. Exynos3250 divides the buses to
 	power line (regulator). The MIF (Memory Interface) AXI bus is used to
-- 
cgit v0.10.2


From 83cb0e4d837af4348cc218638e9d6daddd21d260 Mon Sep 17 00:00:00 2001
From: MyungJoo Ham <myungjoo.ham@samsung.com>
Date: Fri, 29 Apr 2016 16:13:03 +0900
Subject: PM / devfreq: style/typo fixes

- Typo in comments fixed
- Unnecessary return statement removed

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>

diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
index 50fe1a1..2363d0a 100644
--- a/drivers/devfreq/exynos-bus.c
+++ b/drivers/devfreq/exynos-bus.c
@@ -423,7 +423,7 @@ static int exynos_bus_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto err;
 
-	/* Initalize the struct profile and governor data for parent device */
+	/* Initialize the struct profile and governor data for parent device */
 	profile->polling_ms = 50;
 	profile->target = exynos_bus_target;
 	profile->get_dev_status = exynos_bus_get_dev_status;
@@ -471,7 +471,7 @@ static int exynos_bus_probe(struct platform_device *pdev)
 
 	goto out;
 passive:
-	/* Initalize the struct profile and governor data for passive device */
+	/* Initialize the struct profile and governor data for passive device */
 	profile->target = exynos_bus_passive_target;
 	profile->exit = exynos_bus_passive_exit;
 
diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
index a4b0b02..9ef46e2 100644
--- a/drivers/devfreq/governor_passive.c
+++ b/drivers/devfreq/governor_passive.c
@@ -196,8 +196,6 @@ static void __exit devfreq_passive_exit(void)
 	ret = devfreq_remove_governor(&devfreq_passive);
 	if (ret)
 		pr_err("%s: failed remove governor %d\n", __func__, ret);
-
-	return;
 }
 module_exit(devfreq_passive_exit);
 
-- 
cgit v0.10.2


From 0b81561c1d30bb44e65d3725042e27b548bb1a86 Mon Sep 17 00:00:00 2001
From: Arjun Sreedharan <arjun024@gmail.com>
Date: Fri, 29 Apr 2016 09:00:14 +0200
Subject: cpupower: fix potential memory leak

Signed-off-by: Thomas Renninger <trenn@suse.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c
index 2d09c92..9b65f05 100644
--- a/tools/power/cpupower/bench/parse.c
+++ b/tools/power/cpupower/bench/parse.c
@@ -65,7 +65,7 @@ FILE *prepare_output(const char *dirname)
 {
 	FILE *output = NULL;
 	int len;
-	char *filename;
+	char *filename, *filename_tmp;
 	struct utsname sysdata;
 	DIR *dir;
 
@@ -88,13 +88,15 @@ FILE *prepare_output(const char *dirname)
 
 	if (uname(&sysdata) == 0) {
 		len += strlen(sysdata.nodename) + strlen(sysdata.release);
-		filename = realloc(filename, sizeof(char) * len);
+		filename_tmp = realloc(filename, sizeof(*filename) * len);
 
-		if (!filename) {
+		if (filename_tmp == NULL) {
+			free(filename);
 			perror("realloc");
 			goto out_dir;
 		}
 
+		filename = filename_tmp;
 		snprintf(filename, len - 1, "%s/benchmark_%s_%s_%li.log",
 			dirname, sysdata.nodename, sysdata.release, time(NULL));
 	} else {
-- 
cgit v0.10.2


From e43e94c1eda76dabd686ddf6f7825f54d747b310 Mon Sep 17 00:00:00 2001
From: Sai Gurrappadi <sgurrappadi@nvidia.com>
Date: Fri, 29 Apr 2016 14:44:37 -0700
Subject: cpufreq: Fix GOV_LIMITS handling for the userspace governor

Currently, the userspace governor only updates frequency on GOV_LIMITS
if policy->cur falls outside policy->{min/max}. However, it is also
necessary to update current frequency on GOV_LIMITS to match the user
requested value if it can be achieved within the new policy->{max/min}.

This was previously the behaviour in the governor until commit d1922f0
("cpufreq: Simplify userspace governor") which incorrectly assumed that
policy->cur == user requested frequency via scaling_setspeed. This won't
be true if the user requested frequency falls outside policy->{min/max}.
Ex: a temporary thermal cap throttled the user requested frequency.

Fix this by storing the user requested frequency in a seperate variable.
The governor will then try to achieve this request on every GOV_LIMITS
change.

Fixes: d1922f02562f (cpufreq: Simplify userspace governor)
Signed-off-by: Sai Gurrappadi <sgurrappadi@nvidia.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c
index 4d16f45..9f3dec9 100644
--- a/drivers/cpufreq/cpufreq_userspace.c
+++ b/drivers/cpufreq/cpufreq_userspace.c
@@ -17,6 +17,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/slab.h>
 
 static DEFINE_PER_CPU(unsigned int, cpu_is_managed);
 static DEFINE_MUTEX(userspace_mutex);
@@ -31,6 +32,7 @@ static DEFINE_MUTEX(userspace_mutex);
 static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq)
 {
 	int ret = -EINVAL;
+	unsigned int *setspeed = policy->governor_data;
 
 	pr_debug("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq);
 
@@ -38,6 +40,8 @@ static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq)
 	if (!per_cpu(cpu_is_managed, policy->cpu))
 		goto err;
 
+	*setspeed = freq;
+
 	ret = __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
  err:
 	mutex_unlock(&userspace_mutex);
@@ -49,19 +53,45 @@ static ssize_t show_speed(struct cpufreq_policy *policy, char *buf)
 	return sprintf(buf, "%u\n", policy->cur);
 }
 
+static int cpufreq_userspace_policy_init(struct cpufreq_policy *policy)
+{
+	unsigned int *setspeed;
+
+	setspeed = kzalloc(sizeof(*setspeed), GFP_KERNEL);
+	if (!setspeed)
+		return -ENOMEM;
+
+	policy->governor_data = setspeed;
+	return 0;
+}
+
 static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
 				   unsigned int event)
 {
+	unsigned int *setspeed = policy->governor_data;
 	unsigned int cpu = policy->cpu;
 	int rc = 0;
 
+	if (event == CPUFREQ_GOV_POLICY_INIT)
+		return cpufreq_userspace_policy_init(policy);
+
+	if (!setspeed)
+		return -EINVAL;
+
 	switch (event) {
+	case CPUFREQ_GOV_POLICY_EXIT:
+		mutex_lock(&userspace_mutex);
+		policy->governor_data = NULL;
+		kfree(setspeed);
+		mutex_unlock(&userspace_mutex);
+		break;
 	case CPUFREQ_GOV_START:
 		BUG_ON(!policy->cur);
 		pr_debug("started managing cpu %u\n", cpu);
 
 		mutex_lock(&userspace_mutex);
 		per_cpu(cpu_is_managed, cpu) = 1;
+		*setspeed = policy->cur;
 		mutex_unlock(&userspace_mutex);
 		break;
 	case CPUFREQ_GOV_STOP:
@@ -69,20 +99,23 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
 
 		mutex_lock(&userspace_mutex);
 		per_cpu(cpu_is_managed, cpu) = 0;
+		*setspeed = 0;
 		mutex_unlock(&userspace_mutex);
 		break;
 	case CPUFREQ_GOV_LIMITS:
 		mutex_lock(&userspace_mutex);
-		pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n",
-			cpu, policy->min, policy->max,
-			policy->cur);
+		pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz, last set to %u kHz\n",
+			cpu, policy->min, policy->max, policy->cur, *setspeed);
 
-		if (policy->max < policy->cur)
+		if (policy->max < *setspeed)
 			__cpufreq_driver_target(policy, policy->max,
 						CPUFREQ_RELATION_H);
-		else if (policy->min > policy->cur)
+		else if (policy->min > *setspeed)
 			__cpufreq_driver_target(policy, policy->min,
 						CPUFREQ_RELATION_L);
+		else
+			__cpufreq_driver_target(policy, *setspeed,
+						CPUFREQ_RELATION_L);
 		mutex_unlock(&userspace_mutex);
 		break;
 	}
-- 
cgit v0.10.2


From ddbb74bc70c0dbaab85d1aa2564b0b3217267454 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 30 Apr 2016 13:33:29 +0200
Subject: PM / OPP: pass cpumask by reference

The new use of dev_pm_opp_set_sharing_cpus resulted in a harmless compiler
warning with CONFIG_CPUMASK_OFFSTACK=y:

drivers/cpufreq/mvebu-cpufreq.c: In function 'armada_xp_pmsu_cpufreq_init':
include/linux/cpumask.h:550:25: error: passing argument 2 of 'dev_pm_opp_set_sharing_cpus' discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers]

The problem here is that cpumask_var_t gets passed by reference, but
by declaring a 'const cpumask_var_t' argument, only the pointer is
constant, not the actual mask. This is harmless because the function
does not actually modify the mask.

This patch changes the function prototypes for all of the related functions
to pass a 'struct cpumask *' instead of 'cpumask_var_t', matching what
most other such functions do in the kernel. This lets us mark all the
other similar functions as taking a 'const' mask where possible,
and it avoids the warning without any change in object code.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 947bd567f7a5 (mvebu: Use dev_pm_opp_set_sharing_cpus() to mark OPP tables as shared)
Acked-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c
index 3428380..8e0b634 100644
--- a/drivers/base/power/opp/cpu.c
+++ b/drivers/base/power/opp/cpu.c
@@ -132,7 +132,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table);
  * that this function is *NOT* called under RCU protection or in contexts where
  * mutex cannot be locked.
  */
-void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask)
+void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask)
 {
 	struct device *cpu_dev;
 	int cpu;
@@ -164,7 +164,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_remove_table);
  * that this function is *NOT* called under RCU protection or in contexts where
  * mutex cannot be locked.
  */
-int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask)
+int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask)
 {
 	struct device *cpu_dev;
 	int cpu, ret = 0;
@@ -217,7 +217,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_add_table);
  * that this function is *NOT* called under RCU protection or in contexts where
  * mutex cannot be locked.
  */
-int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
+int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
 {
 	struct device_node *np, *tmp_np;
 	struct device *tcpu_dev;
@@ -288,7 +288,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus);
  * mutex cannot be locked.
  */
 int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev,
-				const cpumask_var_t cpumask)
+				const struct cpumask *cpumask)
 {
 	struct opp_device *opp_dev;
 	struct opp_table *opp_table;
@@ -346,7 +346,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus);
  * that this function is *NOT* called under RCU protection or in contexts where
  * mutex cannot be locked.
  */
-int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
+int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
 {
 	struct opp_device *opp_dev;
 	struct opp_table *opp_table;
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 15f5544..5221d25 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -65,8 +65,8 @@ void dev_pm_opp_put_prop_name(struct device *dev);
 int dev_pm_opp_set_regulator(struct device *dev, const char *name);
 void dev_pm_opp_put_regulator(struct device *dev);
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
-int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const cpumask_var_t cpumask);
-int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask);
+int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask);
+int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask);
 #else
 static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 {
@@ -180,12 +180,12 @@ static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_f
 	return -ENOTSUPP;
 }
 
-static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const cpumask_var_t cpumask)
+static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask)
 {
 	return -ENOTSUPP;
 }
 
-static inline int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
+static inline int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
 {
 	return -EINVAL;
 }
@@ -195,9 +195,9 @@ static inline int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, cpumask_va
 #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
 int dev_pm_opp_of_add_table(struct device *dev);
 void dev_pm_opp_of_remove_table(struct device *dev);
-int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask);
-void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask);
-int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask);
+int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask);
+void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask);
+int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask);
 #else
 static inline int dev_pm_opp_of_add_table(struct device *dev)
 {
@@ -208,16 +208,16 @@ static inline void dev_pm_opp_of_remove_table(struct device *dev)
 {
 }
 
-static inline int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask)
+static inline int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask)
 {
 	return -ENOTSUPP;
 }
 
-static inline void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask)
+static inline void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask)
 {
 }
 
-static inline int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
+static inline int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
 {
 	return -ENOTSUPP;
 }
-- 
cgit v0.10.2


From d9c99acb630de767a295c37cd704d08f8237e3f8 Mon Sep 17 00:00:00 2001
From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Date: Mon, 2 May 2016 15:39:25 +0200
Subject: cpufreq: tango: Use generic platdev driver

Add tango4 compatible string to the list.

Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index ac4a0ba..3646b14 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -66,6 +66,8 @@ static const struct of_device_id machines[] __initconst = {
 	{ .compatible = "rockchip,rk3368", },
 	{ .compatible = "rockchip,rk3399", },
 
+	{ .compatible = "sigma,tango4" },
+
 	{ .compatible = "ti,omap2", },
 	{ .compatible = "ti,omap3", },
 	{ .compatible = "ti,omap4", },
-- 
cgit v0.10.2


From 411466c5081d2f649b3583cae0f6c9ad5edec636 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <Sudeep.Holla@arm.com>
Date: Tue, 3 May 2016 15:05:04 +0100
Subject: PM / OPP: add non-OF versions of dev_pm_opp_{cpumask_, }remove_table

Functions dev_pm_opp_of_{cpumask_,}remove_table removes/frees all the
static OPP entries associated with the device and/or all cpus(in case
of cpumask) that are created from DT.

However the OPP entries are populated reading from the firmware or some
different method using dev_pm_opp_add are marked dynamic and can't be
removed using above functions.

This patch adds non DT/OF versions of dev_pm_opp_{cpumask_,}remove_table
to support the above mentioned usecase.

This is in preparation to make use of the same in scpi-cpufreq.c

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 433b600..9f8bf04 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -1845,21 +1845,11 @@ struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier);
 
-#ifdef CONFIG_OF
-/**
- * dev_pm_opp_of_remove_table() - Free OPP table entries created from static DT
- *				  entries
- * @dev:	device pointer used to lookup OPP table.
- *
- * Free OPPs created using static entries present in DT.
- *
- * Locking: The internal opp_table and opp structures are RCU protected.
- * Hence this function indirectly uses RCU updater strategy with mutex locks
- * to keep the integrity of the internal data structures. Callers should ensure
- * that this function is *NOT* called under RCU protection or in contexts where
- * mutex cannot be locked.
+/*
+ * Free OPPs either created using static entries present in DT or even the
+ * dynamically added entries based on remove_all param.
  */
-void dev_pm_opp_of_remove_table(struct device *dev)
+static void _dev_pm_opp_remove_table(struct device *dev, bool remove_all)
 {
 	struct opp_table *opp_table;
 	struct dev_pm_opp *opp, *tmp;
@@ -1884,7 +1874,7 @@ void dev_pm_opp_of_remove_table(struct device *dev)
 	if (list_is_singular(&opp_table->dev_list)) {
 		/* Free static OPPs */
 		list_for_each_entry_safe(opp, tmp, &opp_table->opp_list, node) {
-			if (!opp->dynamic)
+			if (remove_all || !opp->dynamic)
 				_opp_remove(opp_table, opp, true);
 		}
 	} else {
@@ -1894,6 +1884,44 @@ void dev_pm_opp_of_remove_table(struct device *dev)
 unlock:
 	mutex_unlock(&opp_table_lock);
 }
+
+/**
+ * dev_pm_opp_remove_table() - Free all OPPs associated with the device
+ * @dev:	device pointer used to lookup OPP table.
+ *
+ * Free both OPPs created using static entries present in DT and the
+ * dynamically added entries.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function indirectly uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_remove_table(struct device *dev)
+{
+	_dev_pm_opp_remove_table(dev, true);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_remove_table);
+
+#ifdef CONFIG_OF
+/**
+ * dev_pm_opp_of_remove_table() - Free OPP table entries created from static DT
+ *				  entries
+ * @dev:	device pointer used to lookup OPP table.
+ *
+ * Free OPPs created using static entries present in DT.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function indirectly uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_of_remove_table(struct device *dev)
+{
+	_dev_pm_opp_remove_table(dev, false);
+}
 EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table);
 
 /* Returns opp descriptor node for a device, caller must do of_node_put() */
diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c
index 8e0b634..357781e 100644
--- a/drivers/base/power/opp/cpu.c
+++ b/drivers/base/power/opp/cpu.c
@@ -119,20 +119,8 @@ void dev_pm_opp_free_cpufreq_table(struct device *dev,
 EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table);
 #endif	/* CONFIG_CPU_FREQ */
 
-#ifdef CONFIG_OF
-/**
- * dev_pm_opp_of_cpumask_remove_table() - Removes OPP table for @cpumask
- * @cpumask:	cpumask for which OPP table needs to be removed
- *
- * This removes the OPP tables for CPUs present in the @cpumask.
- *
- * Locking: The internal opp_table and opp structures are RCU protected.
- * Hence this function internally uses RCU updater strategy with mutex locks
- * to keep the integrity of the internal data structures. Callers should ensure
- * that this function is *NOT* called under RCU protection or in contexts where
- * mutex cannot be locked.
- */
-void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask)
+static void
+_dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of)
 {
 	struct device *cpu_dev;
 	int cpu;
@@ -147,9 +135,51 @@ void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask)
 			continue;
 		}
 
-		dev_pm_opp_of_remove_table(cpu_dev);
+		if (of)
+			dev_pm_opp_of_remove_table(cpu_dev);
+		else
+			dev_pm_opp_remove_table(cpu_dev);
 	}
 }
+
+/**
+ * dev_pm_opp_cpumask_remove_table() - Removes OPP table for @cpumask
+ * @cpumask:	cpumask for which OPP table needs to be removed
+ *
+ * This removes the OPP tables for CPUs present in the @cpumask.
+ * This should be used to remove all the OPPs entries associated with
+ * the cpus in @cpumask.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask)
+{
+	_dev_pm_opp_cpumask_remove_table(cpumask, false);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_cpumask_remove_table);
+
+#ifdef CONFIG_OF
+/**
+ * dev_pm_opp_of_cpumask_remove_table() - Removes OPP table for @cpumask
+ * @cpumask:	cpumask for which OPP table needs to be removed
+ *
+ * This removes the OPP tables for CPUs present in the @cpumask.
+ * This should be used only to remove static entries created from DT.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask)
+{
+	_dev_pm_opp_cpumask_remove_table(cpumask, true);
+}
 EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_remove_table);
 
 /**
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 5221d25..bca2615 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -67,6 +67,8 @@ void dev_pm_opp_put_regulator(struct device *dev);
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
 int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask);
 int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask);
+void dev_pm_opp_remove_table(struct device *dev);
+void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask);
 #else
 static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 {
@@ -190,6 +192,14 @@ static inline int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpu
 	return -EINVAL;
 }
 
+static inline void dev_pm_opp_remove_table(struct device *dev)
+{
+}
+
+static inline void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask)
+{
+}
+
 #endif		/* CONFIG_PM_OPP */
 
 #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
-- 
cgit v0.10.2


From d9975b0b079c2708d39c01dad882c91a7a5a348f Mon Sep 17 00:00:00 2001
From: Sudeep Holla <Sudeep.Holla@arm.com>
Date: Tue, 3 May 2016 15:05:05 +0100
Subject: cpufreq: arm_big_little: use generic OPP functions for {init,
 free}_opp_table

Currently when performing random CPU hot-plugs and suspend-to-ram(S2R)
on systems using arm_big_little cpufreq driver, we get warnings similar
to something like below:

cpu cpu1: _opp_add: duplicate OPPs detected. Existing: freq: 600000000,
	volt: 800000, enabled: 1. New: freq: 600000000, volt: 800000, enabled: 1

This is mainly because the OPPs for the shared cpus are not set. We can
just use dev_pm_opp_of_cpumask_add_table in case the OPPs are obtained
from DT(arm_big_little_dt.c) or use dev_pm_opp_set_sharing_cpus if the
OPPs are obtained by other means like firmware(e.g. scpi-cpufreq.c)

Also now that the generic dev_pm_opp{,_of}_cpumask_remove_table can
handle removal of opp table and entries for all associated CPUs, we can
re-use dev_pm_opp{,_of}_cpumask_remove_table as free_opp_table in
cpufreq_arm_bL_ops.

This patch makes necessary changes to reuse the generic OPP functions for
{init,free}_opp_table and thereby eliminating the warnings.

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index c251247..4180422 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -298,7 +298,8 @@ static int merge_cluster_tables(void)
 	return 0;
 }
 
-static void _put_cluster_clk_and_freq_table(struct device *cpu_dev)
+static void _put_cluster_clk_and_freq_table(struct device *cpu_dev,
+					    const struct cpumask *cpumask)
 {
 	u32 cluster = raw_cpu_to_cluster(cpu_dev->id);
 
@@ -308,11 +309,12 @@ static void _put_cluster_clk_and_freq_table(struct device *cpu_dev)
 	clk_put(clk[cluster]);
 	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table[cluster]);
 	if (arm_bL_ops->free_opp_table)
-		arm_bL_ops->free_opp_table(cpu_dev);
+		arm_bL_ops->free_opp_table(cpumask);
 	dev_dbg(cpu_dev, "%s: cluster: %d\n", __func__, cluster);
 }
 
-static void put_cluster_clk_and_freq_table(struct device *cpu_dev)
+static void put_cluster_clk_and_freq_table(struct device *cpu_dev,
+					   const struct cpumask *cpumask)
 {
 	u32 cluster = cpu_to_cluster(cpu_dev->id);
 	int i;
@@ -321,7 +323,7 @@ static void put_cluster_clk_and_freq_table(struct device *cpu_dev)
 		return;
 
 	if (cluster < MAX_CLUSTERS)
-		return _put_cluster_clk_and_freq_table(cpu_dev);
+		return _put_cluster_clk_and_freq_table(cpu_dev, cpumask);
 
 	for_each_present_cpu(i) {
 		struct device *cdev = get_cpu_device(i);
@@ -330,14 +332,15 @@ static void put_cluster_clk_and_freq_table(struct device *cpu_dev)
 			return;
 		}
 
-		_put_cluster_clk_and_freq_table(cdev);
+		_put_cluster_clk_and_freq_table(cdev, cpumask);
 	}
 
 	/* free virtual table */
 	kfree(freq_table[cluster]);
 }
 
-static int _get_cluster_clk_and_freq_table(struct device *cpu_dev)
+static int _get_cluster_clk_and_freq_table(struct device *cpu_dev,
+					   const struct cpumask *cpumask)
 {
 	u32 cluster = raw_cpu_to_cluster(cpu_dev->id);
 	int ret;
@@ -345,7 +348,7 @@ static int _get_cluster_clk_and_freq_table(struct device *cpu_dev)
 	if (freq_table[cluster])
 		return 0;
 
-	ret = arm_bL_ops->init_opp_table(cpu_dev);
+	ret = arm_bL_ops->init_opp_table(cpumask);
 	if (ret) {
 		dev_err(cpu_dev, "%s: init_opp_table failed, cpu: %d, err: %d\n",
 				__func__, cpu_dev->id, ret);
@@ -374,14 +377,15 @@ static int _get_cluster_clk_and_freq_table(struct device *cpu_dev)
 
 free_opp_table:
 	if (arm_bL_ops->free_opp_table)
-		arm_bL_ops->free_opp_table(cpu_dev);
+		arm_bL_ops->free_opp_table(cpumask);
 out:
 	dev_err(cpu_dev, "%s: Failed to get data for cluster: %d\n", __func__,
 			cluster);
 	return ret;
 }
 
-static int get_cluster_clk_and_freq_table(struct device *cpu_dev)
+static int get_cluster_clk_and_freq_table(struct device *cpu_dev,
+					  const struct cpumask *cpumask)
 {
 	u32 cluster = cpu_to_cluster(cpu_dev->id);
 	int i, ret;
@@ -390,7 +394,7 @@ static int get_cluster_clk_and_freq_table(struct device *cpu_dev)
 		return 0;
 
 	if (cluster < MAX_CLUSTERS) {
-		ret = _get_cluster_clk_and_freq_table(cpu_dev);
+		ret = _get_cluster_clk_and_freq_table(cpu_dev, cpumask);
 		if (ret)
 			atomic_dec(&cluster_usage[cluster]);
 		return ret;
@@ -407,7 +411,7 @@ static int get_cluster_clk_and_freq_table(struct device *cpu_dev)
 			return -ENODEV;
 		}
 
-		ret = _get_cluster_clk_and_freq_table(cdev);
+		ret = _get_cluster_clk_and_freq_table(cdev, cpumask);
 		if (ret)
 			goto put_clusters;
 	}
@@ -433,7 +437,7 @@ put_clusters:
 			return -ENODEV;
 		}
 
-		_put_cluster_clk_and_freq_table(cdev);
+		_put_cluster_clk_and_freq_table(cdev, cpumask);
 	}
 
 	atomic_dec(&cluster_usage[cluster]);
@@ -455,18 +459,6 @@ static int bL_cpufreq_init(struct cpufreq_policy *policy)
 		return -ENODEV;
 	}
 
-	ret = get_cluster_clk_and_freq_table(cpu_dev);
-	if (ret)
-		return ret;
-
-	ret = cpufreq_table_validate_and_show(policy, freq_table[cur_cluster]);
-	if (ret) {
-		dev_err(cpu_dev, "CPU %d, cluster: %d invalid freq table\n",
-				policy->cpu, cur_cluster);
-		put_cluster_clk_and_freq_table(cpu_dev);
-		return ret;
-	}
-
 	if (cur_cluster < MAX_CLUSTERS) {
 		int cpu;
 
@@ -479,6 +471,18 @@ static int bL_cpufreq_init(struct cpufreq_policy *policy)
 		per_cpu(physical_cluster, policy->cpu) = A15_CLUSTER;
 	}
 
+	ret = get_cluster_clk_and_freq_table(cpu_dev, policy->cpus);
+	if (ret)
+		return ret;
+
+	ret = cpufreq_table_validate_and_show(policy, freq_table[cur_cluster]);
+	if (ret) {
+		dev_err(cpu_dev, "CPU %d, cluster: %d invalid freq table\n",
+			policy->cpu, cur_cluster);
+		put_cluster_clk_and_freq_table(cpu_dev, policy->cpus);
+		return ret;
+	}
+
 	if (arm_bL_ops->get_transition_latency)
 		policy->cpuinfo.transition_latency =
 			arm_bL_ops->get_transition_latency(cpu_dev);
@@ -509,7 +513,7 @@ static int bL_cpufreq_exit(struct cpufreq_policy *policy)
 		return -ENODEV;
 	}
 
-	put_cluster_clk_and_freq_table(cpu_dev);
+	put_cluster_clk_and_freq_table(cpu_dev, policy->related_cpus);
 	dev_dbg(cpu_dev, "%s: Exited, cpu: %d\n", __func__, policy->cpu);
 
 	return 0;
diff --git a/drivers/cpufreq/arm_big_little.h b/drivers/cpufreq/arm_big_little.h
index b88889d..184d7c3 100644
--- a/drivers/cpufreq/arm_big_little.h
+++ b/drivers/cpufreq/arm_big_little.h
@@ -30,11 +30,11 @@ struct cpufreq_arm_bL_ops {
 	 * This must set opp table for cpu_dev in a similar way as done by
 	 * dev_pm_opp_of_add_table().
 	 */
-	int (*init_opp_table)(struct device *cpu_dev);
+	int (*init_opp_table)(const struct cpumask *cpumask);
 
 	/* Optional */
 	int (*get_transition_latency)(struct device *cpu_dev);
-	void (*free_opp_table)(struct device *cpu_dev);
+	void (*free_opp_table)(const struct cpumask *cpumask);
 };
 
 int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops);
diff --git a/drivers/cpufreq/arm_big_little_dt.c b/drivers/cpufreq/arm_big_little_dt.c
index 16ddeef..39b3f51 100644
--- a/drivers/cpufreq/arm_big_little_dt.c
+++ b/drivers/cpufreq/arm_big_little_dt.c
@@ -43,23 +43,6 @@ static struct device_node *get_cpu_node_with_valid_op(int cpu)
 	return np;
 }
 
-static int dt_init_opp_table(struct device *cpu_dev)
-{
-	struct device_node *np;
-	int ret;
-
-	np = of_node_get(cpu_dev->of_node);
-	if (!np) {
-		pr_err("failed to find cpu%d node\n", cpu_dev->id);
-		return -ENOENT;
-	}
-
-	ret = dev_pm_opp_of_add_table(cpu_dev);
-	of_node_put(np);
-
-	return ret;
-}
-
 static int dt_get_transition_latency(struct device *cpu_dev)
 {
 	struct device_node *np;
@@ -81,8 +64,8 @@ static int dt_get_transition_latency(struct device *cpu_dev)
 static struct cpufreq_arm_bL_ops dt_bL_ops = {
 	.name	= "dt-bl",
 	.get_transition_latency = dt_get_transition_latency,
-	.init_opp_table = dt_init_opp_table,
-	.free_opp_table = dev_pm_opp_of_remove_table,
+	.init_opp_table = dev_pm_opp_of_cpumask_add_table,
+	.free_opp_table = dev_pm_opp_of_cpumask_remove_table,
 };
 
 static int generic_bL_probe(struct platform_device *pdev)
diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c
index de5e89b..e8a7bf5 100644
--- a/drivers/cpufreq/scpi-cpufreq.c
+++ b/drivers/cpufreq/scpi-cpufreq.c
@@ -18,6 +18,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/cpu.h>
 #include <linux/cpufreq.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
@@ -38,10 +39,20 @@ static struct scpi_dvfs_info *scpi_get_dvfs_info(struct device *cpu_dev)
 	return scpi_ops->dvfs_get_info(domain);
 }
 
-static int scpi_opp_table_ops(struct device *cpu_dev, bool remove)
+static int scpi_get_transition_latency(struct device *cpu_dev)
 {
-	int idx, ret = 0;
+	struct scpi_dvfs_info *info = scpi_get_dvfs_info(cpu_dev);
+
+	if (IS_ERR(info))
+		return PTR_ERR(info);
+	return info->latency;
+}
+
+static int scpi_init_opp_table(const struct cpumask *cpumask)
+{
+	int idx, ret;
 	struct scpi_opp *opp;
+	struct device *cpu_dev = get_cpu_device(cpumask_first(cpumask));
 	struct scpi_dvfs_info *info = scpi_get_dvfs_info(cpu_dev);
 
 	if (IS_ERR(info))
@@ -51,11 +62,7 @@ static int scpi_opp_table_ops(struct device *cpu_dev, bool remove)
 		return -EIO;
 
 	for (opp = info->opps, idx = 0; idx < info->count; idx++, opp++) {
-		if (remove)
-			dev_pm_opp_remove(cpu_dev, opp->freq);
-		else
-			ret = dev_pm_opp_add(cpu_dev, opp->freq,
-					     opp->m_volt * 1000);
+		ret = dev_pm_opp_add(cpu_dev, opp->freq, opp->m_volt * 1000);
 		if (ret) {
 			dev_warn(cpu_dev, "failed to add opp %uHz %umV\n",
 				 opp->freq, opp->m_volt);
@@ -64,33 +71,19 @@ static int scpi_opp_table_ops(struct device *cpu_dev, bool remove)
 			return ret;
 		}
 	}
-	return ret;
-}
 
-static int scpi_get_transition_latency(struct device *cpu_dev)
-{
-	struct scpi_dvfs_info *info = scpi_get_dvfs_info(cpu_dev);
-
-	if (IS_ERR(info))
-		return PTR_ERR(info);
-	return info->latency;
-}
-
-static int scpi_init_opp_table(struct device *cpu_dev)
-{
-	return scpi_opp_table_ops(cpu_dev, false);
-}
-
-static void scpi_free_opp_table(struct device *cpu_dev)
-{
-	scpi_opp_table_ops(cpu_dev, true);
+	ret = dev_pm_opp_set_sharing_cpus(cpu_dev, cpumask);
+	if (ret)
+		dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
+			__func__, ret);
+	return ret;
 }
 
 static struct cpufreq_arm_bL_ops scpi_cpufreq_ops = {
 	.name	= "scpi",
 	.get_transition_latency = scpi_get_transition_latency,
 	.init_opp_table = scpi_init_opp_table,
-	.free_opp_table = scpi_free_opp_table,
+	.free_opp_table = dev_pm_opp_cpumask_remove_table,
 };
 
 static int scpi_cpufreq_probe(struct platform_device *pdev)
diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c
index 433e93f..87e5bdc 100644
--- a/drivers/cpufreq/vexpress-spc-cpufreq.c
+++ b/drivers/cpufreq/vexpress-spc-cpufreq.c
@@ -18,6 +18,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/cpu.h>
 #include <linux/cpufreq.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
@@ -26,8 +27,9 @@
 
 #include "arm_big_little.h"
 
-static int ve_spc_init_opp_table(struct device *cpu_dev)
+static int ve_spc_init_opp_table(const struct cpumask *cpumask)
 {
+	struct device *cpu_dev = get_cpu_device(cpumask_first(cpumask));
 	/*
 	 * platform specific SPC code must initialise the opp table
 	 * so just check if the OPP count is non-zero
-- 
cgit v0.10.2


From e59a8f7ff4573fd54f1acec0e29280a6556fdde9 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Wed, 4 May 2016 15:07:34 -0700
Subject: cpufreq: intel_pstate: Ignore _PPC processing under HWP

When HWP (hardware P states) feature is active, the ACPI _PSS and _PPC
is not used. So ignore processing for _PPC limits.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index a0823e8..74453fe 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -380,6 +380,9 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 	int ret;
 	int i;
 
+	if (hwp_active)
+		return;
+
 	if (!intel_pstate_get_ppc_enable_status())
 		return;
 
-- 
cgit v0.10.2


From f47b72a15a9679dd4dc1af681d4d2f1ca2815552 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 5 May 2016 16:20:33 +0530
Subject: PM / OPP: Move CONFIG_OF dependent code in a separate file

Recently, a few issues were noticed in the code where CONFIG_OF wasn't
consistently used for many routines. The core file is big enough now and
ifdef hackery makes it less readable.

Move OF-specific code to another file and compile that only if CONFIG_OF
is enabled.

Compile-tested:
- For ARM (exynos) with CONFIG_OF enabled
- For X86 with CONFIG_OF disabled (have to enable CONFIG_PM_OPP separately)

No functional changes.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Tested-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/base/power/opp/Makefile b/drivers/base/power/opp/Makefile
index 19837ef..e70ceb4 100644
--- a/drivers/base/power/opp/Makefile
+++ b/drivers/base/power/opp/Makefile
@@ -1,3 +1,4 @@
 ccflags-$(CONFIG_DEBUG_DRIVER)	:= -DDEBUG
 obj-y				+= core.o cpu.o
+obj-$(CONFIG_OF)		+= of.o
 obj-$(CONFIG_DEBUG_FS)		+= debugfs.o
diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 9f8bf04..f98b01a 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -18,7 +18,6 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/device.h>
-#include <linux/of.h>
 #include <linux/export.h>
 #include <linux/regulator/consumer.h>
 
@@ -29,7 +28,7 @@
  * from here, with each opp_table containing the list of opps it supports in
  * various states of availability.
  */
-static LIST_HEAD(opp_tables);
+LIST_HEAD(opp_tables);
 /* Lock to allow exclusive modification to the device and opp lists */
 DEFINE_MUTEX(opp_table_lock);
 
@@ -53,26 +52,6 @@ static struct opp_device *_find_opp_dev(const struct device *dev,
 	return NULL;
 }
 
-static struct opp_table *_managed_opp(const struct device_node *np)
-{
-	struct opp_table *opp_table;
-
-	list_for_each_entry_rcu(opp_table, &opp_tables, node) {
-		if (opp_table->np == np) {
-			/*
-			 * Multiple devices can point to the same OPP table and
-			 * so will have same node-pointer, np.
-			 *
-			 * But the OPPs will be considered as shared only if the
-			 * OPP table contains a "opp-shared" property.
-			 */
-			return opp_table->shared_opp ? opp_table : NULL;
-		}
-	}
-
-	return NULL;
-}
-
 /**
  * _find_opp_table() - find opp_table struct using device pointer
  * @dev:	device pointer used to lookup OPP table
@@ -760,7 +739,6 @@ static struct opp_table *_add_opp_table(struct device *dev)
 {
 	struct opp_table *opp_table;
 	struct opp_device *opp_dev;
-	struct device_node *np;
 	int ret;
 
 	/* Check for existing table for 'dev' first */
@@ -784,20 +762,7 @@ static struct opp_table *_add_opp_table(struct device *dev)
 		return NULL;
 	}
 
-	/*
-	 * Only required for backward compatibility with v1 bindings, but isn't
-	 * harmful for other cases. And so we do it unconditionally.
-	 */
-	np = of_node_get(dev->of_node);
-	if (np) {
-		u32 val;
-
-		if (!of_property_read_u32(np, "clock-latency", &val))
-			opp_table->clock_latency_ns_max = val;
-		of_property_read_u32(np, "voltage-tolerance",
-				     &opp_table->voltage_tolerance_v1);
-		of_node_put(np);
-	}
+	_of_init_opp_table(opp_table, dev);
 
 	/* Set regulator to a non-NULL error value */
 	opp_table->regulator = ERR_PTR(-ENXIO);
@@ -893,8 +858,8 @@ static void _kfree_opp_rcu(struct rcu_head *head)
  * It is assumed that the caller holds required mutex for an RCU updater
  * strategy.
  */
-static void _opp_remove(struct opp_table *opp_table,
-			struct dev_pm_opp *opp, bool notify)
+void _opp_remove(struct opp_table *opp_table, struct dev_pm_opp *opp,
+		 bool notify)
 {
 	/*
 	 * Notify the changes in the availability of the operable
@@ -955,8 +920,8 @@ unlock:
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_remove);
 
-static struct dev_pm_opp *_allocate_opp(struct device *dev,
-					struct opp_table **opp_table)
+struct dev_pm_opp *_allocate_opp(struct device *dev,
+				 struct opp_table **opp_table)
 {
 	struct dev_pm_opp *opp;
 
@@ -992,8 +957,8 @@ static bool _opp_supported_by_regulators(struct dev_pm_opp *opp,
 	return true;
 }
 
-static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
-		    struct opp_table *opp_table)
+int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
+	     struct opp_table *opp_table)
 {
 	struct dev_pm_opp *opp;
 	struct list_head *head = &opp_table->opp_list;
@@ -1069,8 +1034,8 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
  *		Duplicate OPPs (both freq and volt are same) and !opp->available
  * -ENOMEM	Memory allocation failure
  */
-static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt,
-		       bool dynamic)
+int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt,
+		bool dynamic)
 {
 	struct opp_table *opp_table;
 	struct dev_pm_opp *new_opp;
@@ -1115,83 +1080,6 @@ unlock:
 	return ret;
 }
 
-/* TODO: Support multiple regulators */
-static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
-			      struct opp_table *opp_table)
-{
-	u32 microvolt[3] = {0};
-	u32 val;
-	int count, ret;
-	struct property *prop = NULL;
-	char name[NAME_MAX];
-
-	/* Search for "opp-microvolt-<name>" */
-	if (opp_table->prop_name) {
-		snprintf(name, sizeof(name), "opp-microvolt-%s",
-			 opp_table->prop_name);
-		prop = of_find_property(opp->np, name, NULL);
-	}
-
-	if (!prop) {
-		/* Search for "opp-microvolt" */
-		sprintf(name, "opp-microvolt");
-		prop = of_find_property(opp->np, name, NULL);
-
-		/* Missing property isn't a problem, but an invalid entry is */
-		if (!prop)
-			return 0;
-	}
-
-	count = of_property_count_u32_elems(opp->np, name);
-	if (count < 0) {
-		dev_err(dev, "%s: Invalid %s property (%d)\n",
-			__func__, name, count);
-		return count;
-	}
-
-	/* There can be one or three elements here */
-	if (count != 1 && count != 3) {
-		dev_err(dev, "%s: Invalid number of elements in %s property (%d)\n",
-			__func__, name, count);
-		return -EINVAL;
-	}
-
-	ret = of_property_read_u32_array(opp->np, name, microvolt, count);
-	if (ret) {
-		dev_err(dev, "%s: error parsing %s: %d\n", __func__, name, ret);
-		return -EINVAL;
-	}
-
-	opp->u_volt = microvolt[0];
-
-	if (count == 1) {
-		opp->u_volt_min = opp->u_volt;
-		opp->u_volt_max = opp->u_volt;
-	} else {
-		opp->u_volt_min = microvolt[1];
-		opp->u_volt_max = microvolt[2];
-	}
-
-	/* Search for "opp-microamp-<name>" */
-	prop = NULL;
-	if (opp_table->prop_name) {
-		snprintf(name, sizeof(name), "opp-microamp-%s",
-			 opp_table->prop_name);
-		prop = of_find_property(opp->np, name, NULL);
-	}
-
-	if (!prop) {
-		/* Search for "opp-microamp" */
-		sprintf(name, "opp-microamp");
-		prop = of_find_property(opp->np, name, NULL);
-	}
-
-	if (prop && !of_property_read_u32(opp->np, name, &val))
-		opp->u_amp = val;
-
-	return 0;
-}
-
 /**
  * dev_pm_opp_set_supported_hw() - Set supported platforms
  * @dev: Device for which supported-hw has to be set.
@@ -1520,144 +1408,6 @@ unlock:
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulator);
 
-static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table,
-			      struct device_node *np)
-{
-	unsigned int count = opp_table->supported_hw_count;
-	u32 version;
-	int ret;
-
-	if (!opp_table->supported_hw)
-		return true;
-
-	while (count--) {
-		ret = of_property_read_u32_index(np, "opp-supported-hw", count,
-						 &version);
-		if (ret) {
-			dev_warn(dev, "%s: failed to read opp-supported-hw property at index %d: %d\n",
-				 __func__, count, ret);
-			return false;
-		}
-
-		/* Both of these are bitwise masks of the versions */
-		if (!(version & opp_table->supported_hw[count]))
-			return false;
-	}
-
-	return true;
-}
-
-/**
- * _opp_add_static_v2() - Allocate static OPPs (As per 'v2' DT bindings)
- * @dev:	device for which we do this operation
- * @np:		device node
- *
- * This function adds an opp definition to the opp table and returns status. The
- * opp can be controlled using dev_pm_opp_enable/disable functions and may be
- * removed by dev_pm_opp_remove.
- *
- * Locking: The internal opp_table and opp structures are RCU protected.
- * Hence this function internally uses RCU updater strategy with mutex locks
- * to keep the integrity of the internal data structures. Callers should ensure
- * that this function is *NOT* called under RCU protection or in contexts where
- * mutex cannot be locked.
- *
- * Return:
- * 0		On success OR
- *		Duplicate OPPs (both freq and volt are same) and opp->available
- * -EEXIST	Freq are same and volt are different OR
- *		Duplicate OPPs (both freq and volt are same) and !opp->available
- * -ENOMEM	Memory allocation failure
- * -EINVAL	Failed parsing the OPP node
- */
-static int _opp_add_static_v2(struct device *dev, struct device_node *np)
-{
-	struct opp_table *opp_table;
-	struct dev_pm_opp *new_opp;
-	u64 rate;
-	u32 val;
-	int ret;
-
-	/* Hold our table modification lock here */
-	mutex_lock(&opp_table_lock);
-
-	new_opp = _allocate_opp(dev, &opp_table);
-	if (!new_opp) {
-		ret = -ENOMEM;
-		goto unlock;
-	}
-
-	ret = of_property_read_u64(np, "opp-hz", &rate);
-	if (ret < 0) {
-		dev_err(dev, "%s: opp-hz not found\n", __func__);
-		goto free_opp;
-	}
-
-	/* Check if the OPP supports hardware's hierarchy of versions or not */
-	if (!_opp_is_supported(dev, opp_table, np)) {
-		dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate);
-		goto free_opp;
-	}
-
-	/*
-	 * Rate is defined as an unsigned long in clk API, and so casting
-	 * explicitly to its type. Must be fixed once rate is 64 bit
-	 * guaranteed in clk API.
-	 */
-	new_opp->rate = (unsigned long)rate;
-	new_opp->turbo = of_property_read_bool(np, "turbo-mode");
-
-	new_opp->np = np;
-	new_opp->dynamic = false;
-	new_opp->available = true;
-
-	if (!of_property_read_u32(np, "clock-latency-ns", &val))
-		new_opp->clock_latency_ns = val;
-
-	ret = opp_parse_supplies(new_opp, dev, opp_table);
-	if (ret)
-		goto free_opp;
-
-	ret = _opp_add(dev, new_opp, opp_table);
-	if (ret)
-		goto free_opp;
-
-	/* OPP to select on device suspend */
-	if (of_property_read_bool(np, "opp-suspend")) {
-		if (opp_table->suspend_opp) {
-			dev_warn(dev, "%s: Multiple suspend OPPs found (%lu %lu)\n",
-				 __func__, opp_table->suspend_opp->rate,
-				 new_opp->rate);
-		} else {
-			new_opp->suspend = true;
-			opp_table->suspend_opp = new_opp;
-		}
-	}
-
-	if (new_opp->clock_latency_ns > opp_table->clock_latency_ns_max)
-		opp_table->clock_latency_ns_max = new_opp->clock_latency_ns;
-
-	mutex_unlock(&opp_table_lock);
-
-	pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu latency:%lu\n",
-		 __func__, new_opp->turbo, new_opp->rate, new_opp->u_volt,
-		 new_opp->u_volt_min, new_opp->u_volt_max,
-		 new_opp->clock_latency_ns);
-
-	/*
-	 * Notify the changes in the availability of the operable
-	 * frequency/voltage list.
-	 */
-	srcu_notifier_call_chain(&opp_table->srcu_head, OPP_EVENT_ADD, new_opp);
-	return 0;
-
-free_opp:
-	_opp_remove(opp_table, new_opp, false);
-unlock:
-	mutex_unlock(&opp_table_lock);
-	return ret;
-}
-
 /**
  * dev_pm_opp_add()  - Add an OPP table from a table definitions
  * @dev:	device for which we do this operation
@@ -1849,7 +1599,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier);
  * Free OPPs either created using static entries present in DT or even the
  * dynamically added entries based on remove_all param.
  */
-static void _dev_pm_opp_remove_table(struct device *dev, bool remove_all)
+void _dev_pm_opp_remove_table(struct device *dev, bool remove_all)
 {
 	struct opp_table *opp_table;
 	struct dev_pm_opp *opp, *tmp;
@@ -1903,179 +1653,3 @@ void dev_pm_opp_remove_table(struct device *dev)
 	_dev_pm_opp_remove_table(dev, true);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_remove_table);
-
-#ifdef CONFIG_OF
-/**
- * dev_pm_opp_of_remove_table() - Free OPP table entries created from static DT
- *				  entries
- * @dev:	device pointer used to lookup OPP table.
- *
- * Free OPPs created using static entries present in DT.
- *
- * Locking: The internal opp_table and opp structures are RCU protected.
- * Hence this function indirectly uses RCU updater strategy with mutex locks
- * to keep the integrity of the internal data structures. Callers should ensure
- * that this function is *NOT* called under RCU protection or in contexts where
- * mutex cannot be locked.
- */
-void dev_pm_opp_of_remove_table(struct device *dev)
-{
-	_dev_pm_opp_remove_table(dev, false);
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table);
-
-/* Returns opp descriptor node for a device, caller must do of_node_put() */
-struct device_node *_of_get_opp_desc_node(struct device *dev)
-{
-	/*
-	 * TODO: Support for multiple OPP tables.
-	 *
-	 * There should be only ONE phandle present in "operating-points-v2"
-	 * property.
-	 */
-
-	return of_parse_phandle(dev->of_node, "operating-points-v2", 0);
-}
-
-/* Initializes OPP tables based on new bindings */
-static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
-{
-	struct device_node *np;
-	struct opp_table *opp_table;
-	int ret = 0, count = 0;
-
-	mutex_lock(&opp_table_lock);
-
-	opp_table = _managed_opp(opp_np);
-	if (opp_table) {
-		/* OPPs are already managed */
-		if (!_add_opp_dev(dev, opp_table))
-			ret = -ENOMEM;
-		mutex_unlock(&opp_table_lock);
-		return ret;
-	}
-	mutex_unlock(&opp_table_lock);
-
-	/* We have opp-table node now, iterate over it and add OPPs */
-	for_each_available_child_of_node(opp_np, np) {
-		count++;
-
-		ret = _opp_add_static_v2(dev, np);
-		if (ret) {
-			dev_err(dev, "%s: Failed to add OPP, %d\n", __func__,
-				ret);
-			goto free_table;
-		}
-	}
-
-	/* There should be one of more OPP defined */
-	if (WARN_ON(!count))
-		return -ENOENT;
-
-	mutex_lock(&opp_table_lock);
-
-	opp_table = _find_opp_table(dev);
-	if (WARN_ON(IS_ERR(opp_table))) {
-		ret = PTR_ERR(opp_table);
-		mutex_unlock(&opp_table_lock);
-		goto free_table;
-	}
-
-	opp_table->np = opp_np;
-	opp_table->shared_opp = of_property_read_bool(opp_np, "opp-shared");
-
-	mutex_unlock(&opp_table_lock);
-
-	return 0;
-
-free_table:
-	dev_pm_opp_of_remove_table(dev);
-
-	return ret;
-}
-
-/* Initializes OPP tables based on old-deprecated bindings */
-static int _of_add_opp_table_v1(struct device *dev)
-{
-	const struct property *prop;
-	const __be32 *val;
-	int nr;
-
-	prop = of_find_property(dev->of_node, "operating-points", NULL);
-	if (!prop)
-		return -ENODEV;
-	if (!prop->value)
-		return -ENODATA;
-
-	/*
-	 * Each OPP is a set of tuples consisting of frequency and
-	 * voltage like <freq-kHz vol-uV>.
-	 */
-	nr = prop->length / sizeof(u32);
-	if (nr % 2) {
-		dev_err(dev, "%s: Invalid OPP table\n", __func__);
-		return -EINVAL;
-	}
-
-	val = prop->value;
-	while (nr) {
-		unsigned long freq = be32_to_cpup(val++) * 1000;
-		unsigned long volt = be32_to_cpup(val++);
-
-		if (_opp_add_v1(dev, freq, volt, false))
-			dev_warn(dev, "%s: Failed to add OPP %ld\n",
-				 __func__, freq);
-		nr -= 2;
-	}
-
-	return 0;
-}
-
-/**
- * dev_pm_opp_of_add_table() - Initialize opp table from device tree
- * @dev:	device pointer used to lookup OPP table.
- *
- * Register the initial OPP table with the OPP library for given device.
- *
- * Locking: The internal opp_table and opp structures are RCU protected.
- * Hence this function indirectly uses RCU updater strategy with mutex locks
- * to keep the integrity of the internal data structures. Callers should ensure
- * that this function is *NOT* called under RCU protection or in contexts where
- * mutex cannot be locked.
- *
- * Return:
- * 0		On success OR
- *		Duplicate OPPs (both freq and volt are same) and opp->available
- * -EEXIST	Freq are same and volt are different OR
- *		Duplicate OPPs (both freq and volt are same) and !opp->available
- * -ENOMEM	Memory allocation failure
- * -ENODEV	when 'operating-points' property is not found or is invalid data
- *		in device node.
- * -ENODATA	when empty 'operating-points' property is found
- * -EINVAL	when invalid entries are found in opp-v2 table
- */
-int dev_pm_opp_of_add_table(struct device *dev)
-{
-	struct device_node *opp_np;
-	int ret;
-
-	/*
-	 * OPPs have two version of bindings now. The older one is deprecated,
-	 * try for the new binding first.
-	 */
-	opp_np = _of_get_opp_desc_node(dev);
-	if (!opp_np) {
-		/*
-		 * Try old-deprecated bindings for backward compatibility with
-		 * older dtbs.
-		 */
-		return _of_add_opp_table_v1(dev);
-	}
-
-	ret = _of_add_opp_table_v2(dev, opp_np);
-	of_node_put(opp_np);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table);
-#endif
diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c
index 357781e..83d6e7b 100644
--- a/drivers/base/power/opp/cpu.c
+++ b/drivers/base/power/opp/cpu.c
@@ -18,7 +18,6 @@
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/export.h>
-#include <linux/of.h>
 #include <linux/slab.h>
 
 #include "opp.h"
@@ -119,8 +118,7 @@ void dev_pm_opp_free_cpufreq_table(struct device *dev,
 EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table);
 #endif	/* CONFIG_CPU_FREQ */
 
-static void
-_dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of)
+void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of)
 {
 	struct device *cpu_dev;
 	int cpu;
@@ -162,145 +160,6 @@ void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_cpumask_remove_table);
 
-#ifdef CONFIG_OF
-/**
- * dev_pm_opp_of_cpumask_remove_table() - Removes OPP table for @cpumask
- * @cpumask:	cpumask for which OPP table needs to be removed
- *
- * This removes the OPP tables for CPUs present in the @cpumask.
- * This should be used only to remove static entries created from DT.
- *
- * Locking: The internal opp_table and opp structures are RCU protected.
- * Hence this function internally uses RCU updater strategy with mutex locks
- * to keep the integrity of the internal data structures. Callers should ensure
- * that this function is *NOT* called under RCU protection or in contexts where
- * mutex cannot be locked.
- */
-void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask)
-{
-	_dev_pm_opp_cpumask_remove_table(cpumask, true);
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_remove_table);
-
-/**
- * dev_pm_opp_of_cpumask_add_table() - Adds OPP table for @cpumask
- * @cpumask:	cpumask for which OPP table needs to be added.
- *
- * This adds the OPP tables for CPUs present in the @cpumask.
- *
- * Locking: The internal opp_table and opp structures are RCU protected.
- * Hence this function internally uses RCU updater strategy with mutex locks
- * to keep the integrity of the internal data structures. Callers should ensure
- * that this function is *NOT* called under RCU protection or in contexts where
- * mutex cannot be locked.
- */
-int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask)
-{
-	struct device *cpu_dev;
-	int cpu, ret = 0;
-
-	WARN_ON(cpumask_empty(cpumask));
-
-	for_each_cpu(cpu, cpumask) {
-		cpu_dev = get_cpu_device(cpu);
-		if (!cpu_dev) {
-			pr_err("%s: failed to get cpu%d device\n", __func__,
-			       cpu);
-			continue;
-		}
-
-		ret = dev_pm_opp_of_add_table(cpu_dev);
-		if (ret) {
-			pr_err("%s: couldn't find opp table for cpu:%d, %d\n",
-			       __func__, cpu, ret);
-
-			/* Free all other OPPs */
-			dev_pm_opp_of_cpumask_remove_table(cpumask);
-			break;
-		}
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_add_table);
-
-/*
- * Works only for OPP v2 bindings.
- *
- * Returns -ENOENT if operating-points-v2 bindings aren't supported.
- */
-/**
- * dev_pm_opp_of_get_sharing_cpus() - Get cpumask of CPUs sharing OPPs with
- *				      @cpu_dev using operating-points-v2
- *				      bindings.
- *
- * @cpu_dev:	CPU device for which we do this operation
- * @cpumask:	cpumask to update with information of sharing CPUs
- *
- * This updates the @cpumask with CPUs that are sharing OPPs with @cpu_dev.
- *
- * Returns -ENOENT if operating-points-v2 isn't present for @cpu_dev.
- *
- * Locking: The internal opp_table and opp structures are RCU protected.
- * Hence this function internally uses RCU updater strategy with mutex locks
- * to keep the integrity of the internal data structures. Callers should ensure
- * that this function is *NOT* called under RCU protection or in contexts where
- * mutex cannot be locked.
- */
-int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
-{
-	struct device_node *np, *tmp_np;
-	struct device *tcpu_dev;
-	int cpu, ret = 0;
-
-	/* Get OPP descriptor node */
-	np = _of_get_opp_desc_node(cpu_dev);
-	if (!np) {
-		dev_dbg(cpu_dev, "%s: Couldn't find cpu_dev node.\n", __func__);
-		return -ENOENT;
-	}
-
-	cpumask_set_cpu(cpu_dev->id, cpumask);
-
-	/* OPPs are shared ? */
-	if (!of_property_read_bool(np, "opp-shared"))
-		goto put_cpu_node;
-
-	for_each_possible_cpu(cpu) {
-		if (cpu == cpu_dev->id)
-			continue;
-
-		tcpu_dev = get_cpu_device(cpu);
-		if (!tcpu_dev) {
-			dev_err(cpu_dev, "%s: failed to get cpu%d device\n",
-				__func__, cpu);
-			ret = -ENODEV;
-			goto put_cpu_node;
-		}
-
-		/* Get OPP descriptor node */
-		tmp_np = _of_get_opp_desc_node(tcpu_dev);
-		if (!tmp_np) {
-			dev_err(tcpu_dev, "%s: Couldn't find tcpu_dev node.\n",
-				__func__);
-			ret = -ENOENT;
-			goto put_cpu_node;
-		}
-
-		/* CPUs are sharing opp node */
-		if (np == tmp_np)
-			cpumask_set_cpu(cpu, cpumask);
-
-		of_node_put(tmp_np);
-	}
-
-put_cpu_node:
-	of_node_put(np);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus);
-#endif
-
 /**
  * dev_pm_opp_set_sharing_cpus() - Mark OPP table as shared by few CPUs
  * @cpu_dev:	CPU device for which we do this operation
diff --git a/drivers/base/power/opp/of.c b/drivers/base/power/opp/of.c
new file mode 100644
index 0000000..94d2010
--- /dev/null
+++ b/drivers/base/power/opp/of.c
@@ -0,0 +1,591 @@
+/*
+ * Generic OPP OF helpers
+ *
+ * Copyright (C) 2009-2010 Texas Instruments Incorporated.
+ *	Nishanth Menon
+ *	Romit Dasgupta
+ *	Kevin Hilman
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/device.h>
+#include <linux/of.h>
+#include <linux/export.h>
+
+#include "opp.h"
+
+static struct opp_table *_managed_opp(const struct device_node *np)
+{
+	struct opp_table *opp_table;
+
+	list_for_each_entry_rcu(opp_table, &opp_tables, node) {
+		if (opp_table->np == np) {
+			/*
+			 * Multiple devices can point to the same OPP table and
+			 * so will have same node-pointer, np.
+			 *
+			 * But the OPPs will be considered as shared only if the
+			 * OPP table contains a "opp-shared" property.
+			 */
+			return opp_table->shared_opp ? opp_table : NULL;
+		}
+	}
+
+	return NULL;
+}
+
+void _of_init_opp_table(struct opp_table *opp_table, struct device *dev)
+{
+	struct device_node *np;
+
+	/*
+	 * Only required for backward compatibility with v1 bindings, but isn't
+	 * harmful for other cases. And so we do it unconditionally.
+	 */
+	np = of_node_get(dev->of_node);
+	if (np) {
+		u32 val;
+
+		if (!of_property_read_u32(np, "clock-latency", &val))
+			opp_table->clock_latency_ns_max = val;
+		of_property_read_u32(np, "voltage-tolerance",
+				     &opp_table->voltage_tolerance_v1);
+		of_node_put(np);
+	}
+}
+
+static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table,
+			      struct device_node *np)
+{
+	unsigned int count = opp_table->supported_hw_count;
+	u32 version;
+	int ret;
+
+	if (!opp_table->supported_hw)
+		return true;
+
+	while (count--) {
+		ret = of_property_read_u32_index(np, "opp-supported-hw", count,
+						 &version);
+		if (ret) {
+			dev_warn(dev, "%s: failed to read opp-supported-hw property at index %d: %d\n",
+				 __func__, count, ret);
+			return false;
+		}
+
+		/* Both of these are bitwise masks of the versions */
+		if (!(version & opp_table->supported_hw[count]))
+			return false;
+	}
+
+	return true;
+}
+
+/* TODO: Support multiple regulators */
+static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
+			      struct opp_table *opp_table)
+{
+	u32 microvolt[3] = {0};
+	u32 val;
+	int count, ret;
+	struct property *prop = NULL;
+	char name[NAME_MAX];
+
+	/* Search for "opp-microvolt-<name>" */
+	if (opp_table->prop_name) {
+		snprintf(name, sizeof(name), "opp-microvolt-%s",
+			 opp_table->prop_name);
+		prop = of_find_property(opp->np, name, NULL);
+	}
+
+	if (!prop) {
+		/* Search for "opp-microvolt" */
+		sprintf(name, "opp-microvolt");
+		prop = of_find_property(opp->np, name, NULL);
+
+		/* Missing property isn't a problem, but an invalid entry is */
+		if (!prop)
+			return 0;
+	}
+
+	count = of_property_count_u32_elems(opp->np, name);
+	if (count < 0) {
+		dev_err(dev, "%s: Invalid %s property (%d)\n",
+			__func__, name, count);
+		return count;
+	}
+
+	/* There can be one or three elements here */
+	if (count != 1 && count != 3) {
+		dev_err(dev, "%s: Invalid number of elements in %s property (%d)\n",
+			__func__, name, count);
+		return -EINVAL;
+	}
+
+	ret = of_property_read_u32_array(opp->np, name, microvolt, count);
+	if (ret) {
+		dev_err(dev, "%s: error parsing %s: %d\n", __func__, name, ret);
+		return -EINVAL;
+	}
+
+	opp->u_volt = microvolt[0];
+
+	if (count == 1) {
+		opp->u_volt_min = opp->u_volt;
+		opp->u_volt_max = opp->u_volt;
+	} else {
+		opp->u_volt_min = microvolt[1];
+		opp->u_volt_max = microvolt[2];
+	}
+
+	/* Search for "opp-microamp-<name>" */
+	prop = NULL;
+	if (opp_table->prop_name) {
+		snprintf(name, sizeof(name), "opp-microamp-%s",
+			 opp_table->prop_name);
+		prop = of_find_property(opp->np, name, NULL);
+	}
+
+	if (!prop) {
+		/* Search for "opp-microamp" */
+		sprintf(name, "opp-microamp");
+		prop = of_find_property(opp->np, name, NULL);
+	}
+
+	if (prop && !of_property_read_u32(opp->np, name, &val))
+		opp->u_amp = val;
+
+	return 0;
+}
+
+/**
+ * dev_pm_opp_of_remove_table() - Free OPP table entries created from static DT
+ *				  entries
+ * @dev:	device pointer used to lookup OPP table.
+ *
+ * Free OPPs created using static entries present in DT.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function indirectly uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_of_remove_table(struct device *dev)
+{
+	_dev_pm_opp_remove_table(dev, false);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table);
+
+/* Returns opp descriptor node for a device, caller must do of_node_put() */
+struct device_node *_of_get_opp_desc_node(struct device *dev)
+{
+	/*
+	 * TODO: Support for multiple OPP tables.
+	 *
+	 * There should be only ONE phandle present in "operating-points-v2"
+	 * property.
+	 */
+
+	return of_parse_phandle(dev->of_node, "operating-points-v2", 0);
+}
+
+/**
+ * _opp_add_static_v2() - Allocate static OPPs (As per 'v2' DT bindings)
+ * @dev:	device for which we do this operation
+ * @np:		device node
+ *
+ * This function adds an opp definition to the opp table and returns status. The
+ * opp can be controlled using dev_pm_opp_enable/disable functions and may be
+ * removed by dev_pm_opp_remove.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ *
+ * Return:
+ * 0		On success OR
+ *		Duplicate OPPs (both freq and volt are same) and opp->available
+ * -EEXIST	Freq are same and volt are different OR
+ *		Duplicate OPPs (both freq and volt are same) and !opp->available
+ * -ENOMEM	Memory allocation failure
+ * -EINVAL	Failed parsing the OPP node
+ */
+static int _opp_add_static_v2(struct device *dev, struct device_node *np)
+{
+	struct opp_table *opp_table;
+	struct dev_pm_opp *new_opp;
+	u64 rate;
+	u32 val;
+	int ret;
+
+	/* Hold our table modification lock here */
+	mutex_lock(&opp_table_lock);
+
+	new_opp = _allocate_opp(dev, &opp_table);
+	if (!new_opp) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	ret = of_property_read_u64(np, "opp-hz", &rate);
+	if (ret < 0) {
+		dev_err(dev, "%s: opp-hz not found\n", __func__);
+		goto free_opp;
+	}
+
+	/* Check if the OPP supports hardware's hierarchy of versions or not */
+	if (!_opp_is_supported(dev, opp_table, np)) {
+		dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate);
+		goto free_opp;
+	}
+
+	/*
+	 * Rate is defined as an unsigned long in clk API, and so casting
+	 * explicitly to its type. Must be fixed once rate is 64 bit
+	 * guaranteed in clk API.
+	 */
+	new_opp->rate = (unsigned long)rate;
+	new_opp->turbo = of_property_read_bool(np, "turbo-mode");
+
+	new_opp->np = np;
+	new_opp->dynamic = false;
+	new_opp->available = true;
+
+	if (!of_property_read_u32(np, "clock-latency-ns", &val))
+		new_opp->clock_latency_ns = val;
+
+	ret = opp_parse_supplies(new_opp, dev, opp_table);
+	if (ret)
+		goto free_opp;
+
+	ret = _opp_add(dev, new_opp, opp_table);
+	if (ret)
+		goto free_opp;
+
+	/* OPP to select on device suspend */
+	if (of_property_read_bool(np, "opp-suspend")) {
+		if (opp_table->suspend_opp) {
+			dev_warn(dev, "%s: Multiple suspend OPPs found (%lu %lu)\n",
+				 __func__, opp_table->suspend_opp->rate,
+				 new_opp->rate);
+		} else {
+			new_opp->suspend = true;
+			opp_table->suspend_opp = new_opp;
+		}
+	}
+
+	if (new_opp->clock_latency_ns > opp_table->clock_latency_ns_max)
+		opp_table->clock_latency_ns_max = new_opp->clock_latency_ns;
+
+	mutex_unlock(&opp_table_lock);
+
+	pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu latency:%lu\n",
+		 __func__, new_opp->turbo, new_opp->rate, new_opp->u_volt,
+		 new_opp->u_volt_min, new_opp->u_volt_max,
+		 new_opp->clock_latency_ns);
+
+	/*
+	 * Notify the changes in the availability of the operable
+	 * frequency/voltage list.
+	 */
+	srcu_notifier_call_chain(&opp_table->srcu_head, OPP_EVENT_ADD, new_opp);
+	return 0;
+
+free_opp:
+	_opp_remove(opp_table, new_opp, false);
+unlock:
+	mutex_unlock(&opp_table_lock);
+	return ret;
+}
+
+/* Initializes OPP tables based on new bindings */
+static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
+{
+	struct device_node *np;
+	struct opp_table *opp_table;
+	int ret = 0, count = 0;
+
+	mutex_lock(&opp_table_lock);
+
+	opp_table = _managed_opp(opp_np);
+	if (opp_table) {
+		/* OPPs are already managed */
+		if (!_add_opp_dev(dev, opp_table))
+			ret = -ENOMEM;
+		mutex_unlock(&opp_table_lock);
+		return ret;
+	}
+	mutex_unlock(&opp_table_lock);
+
+	/* We have opp-table node now, iterate over it and add OPPs */
+	for_each_available_child_of_node(opp_np, np) {
+		count++;
+
+		ret = _opp_add_static_v2(dev, np);
+		if (ret) {
+			dev_err(dev, "%s: Failed to add OPP, %d\n", __func__,
+				ret);
+			goto free_table;
+		}
+	}
+
+	/* There should be one of more OPP defined */
+	if (WARN_ON(!count))
+		return -ENOENT;
+
+	mutex_lock(&opp_table_lock);
+
+	opp_table = _find_opp_table(dev);
+	if (WARN_ON(IS_ERR(opp_table))) {
+		ret = PTR_ERR(opp_table);
+		mutex_unlock(&opp_table_lock);
+		goto free_table;
+	}
+
+	opp_table->np = opp_np;
+	opp_table->shared_opp = of_property_read_bool(opp_np, "opp-shared");
+
+	mutex_unlock(&opp_table_lock);
+
+	return 0;
+
+free_table:
+	dev_pm_opp_of_remove_table(dev);
+
+	return ret;
+}
+
+/* Initializes OPP tables based on old-deprecated bindings */
+static int _of_add_opp_table_v1(struct device *dev)
+{
+	const struct property *prop;
+	const __be32 *val;
+	int nr;
+
+	prop = of_find_property(dev->of_node, "operating-points", NULL);
+	if (!prop)
+		return -ENODEV;
+	if (!prop->value)
+		return -ENODATA;
+
+	/*
+	 * Each OPP is a set of tuples consisting of frequency and
+	 * voltage like <freq-kHz vol-uV>.
+	 */
+	nr = prop->length / sizeof(u32);
+	if (nr % 2) {
+		dev_err(dev, "%s: Invalid OPP table\n", __func__);
+		return -EINVAL;
+	}
+
+	val = prop->value;
+	while (nr) {
+		unsigned long freq = be32_to_cpup(val++) * 1000;
+		unsigned long volt = be32_to_cpup(val++);
+
+		if (_opp_add_v1(dev, freq, volt, false))
+			dev_warn(dev, "%s: Failed to add OPP %ld\n",
+				 __func__, freq);
+		nr -= 2;
+	}
+
+	return 0;
+}
+
+/**
+ * dev_pm_opp_of_add_table() - Initialize opp table from device tree
+ * @dev:	device pointer used to lookup OPP table.
+ *
+ * Register the initial OPP table with the OPP library for given device.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function indirectly uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ *
+ * Return:
+ * 0		On success OR
+ *		Duplicate OPPs (both freq and volt are same) and opp->available
+ * -EEXIST	Freq are same and volt are different OR
+ *		Duplicate OPPs (both freq and volt are same) and !opp->available
+ * -ENOMEM	Memory allocation failure
+ * -ENODEV	when 'operating-points' property is not found or is invalid data
+ *		in device node.
+ * -ENODATA	when empty 'operating-points' property is found
+ * -EINVAL	when invalid entries are found in opp-v2 table
+ */
+int dev_pm_opp_of_add_table(struct device *dev)
+{
+	struct device_node *opp_np;
+	int ret;
+
+	/*
+	 * OPPs have two version of bindings now. The older one is deprecated,
+	 * try for the new binding first.
+	 */
+	opp_np = _of_get_opp_desc_node(dev);
+	if (!opp_np) {
+		/*
+		 * Try old-deprecated bindings for backward compatibility with
+		 * older dtbs.
+		 */
+		return _of_add_opp_table_v1(dev);
+	}
+
+	ret = _of_add_opp_table_v2(dev, opp_np);
+	of_node_put(opp_np);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table);
+
+/* CPU device specific helpers */
+
+/**
+ * dev_pm_opp_of_cpumask_remove_table() - Removes OPP table for @cpumask
+ * @cpumask:	cpumask for which OPP table needs to be removed
+ *
+ * This removes the OPP tables for CPUs present in the @cpumask.
+ * This should be used only to remove static entries created from DT.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask)
+{
+	_dev_pm_opp_cpumask_remove_table(cpumask, true);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_remove_table);
+
+/**
+ * dev_pm_opp_of_cpumask_add_table() - Adds OPP table for @cpumask
+ * @cpumask:	cpumask for which OPP table needs to be added.
+ *
+ * This adds the OPP tables for CPUs present in the @cpumask.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask)
+{
+	struct device *cpu_dev;
+	int cpu, ret = 0;
+
+	WARN_ON(cpumask_empty(cpumask));
+
+	for_each_cpu(cpu, cpumask) {
+		cpu_dev = get_cpu_device(cpu);
+		if (!cpu_dev) {
+			pr_err("%s: failed to get cpu%d device\n", __func__,
+			       cpu);
+			continue;
+		}
+
+		ret = dev_pm_opp_of_add_table(cpu_dev);
+		if (ret) {
+			pr_err("%s: couldn't find opp table for cpu:%d, %d\n",
+			       __func__, cpu, ret);
+
+			/* Free all other OPPs */
+			dev_pm_opp_of_cpumask_remove_table(cpumask);
+			break;
+		}
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_add_table);
+
+/*
+ * Works only for OPP v2 bindings.
+ *
+ * Returns -ENOENT if operating-points-v2 bindings aren't supported.
+ */
+/**
+ * dev_pm_opp_of_get_sharing_cpus() - Get cpumask of CPUs sharing OPPs with
+ *				      @cpu_dev using operating-points-v2
+ *				      bindings.
+ *
+ * @cpu_dev:	CPU device for which we do this operation
+ * @cpumask:	cpumask to update with information of sharing CPUs
+ *
+ * This updates the @cpumask with CPUs that are sharing OPPs with @cpu_dev.
+ *
+ * Returns -ENOENT if operating-points-v2 isn't present for @cpu_dev.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev,
+				   struct cpumask *cpumask)
+{
+	struct device_node *np, *tmp_np;
+	struct device *tcpu_dev;
+	int cpu, ret = 0;
+
+	/* Get OPP descriptor node */
+	np = _of_get_opp_desc_node(cpu_dev);
+	if (!np) {
+		dev_dbg(cpu_dev, "%s: Couldn't find cpu_dev node.\n", __func__);
+		return -ENOENT;
+	}
+
+	cpumask_set_cpu(cpu_dev->id, cpumask);
+
+	/* OPPs are shared ? */
+	if (!of_property_read_bool(np, "opp-shared"))
+		goto put_cpu_node;
+
+	for_each_possible_cpu(cpu) {
+		if (cpu == cpu_dev->id)
+			continue;
+
+		tcpu_dev = get_cpu_device(cpu);
+		if (!tcpu_dev) {
+			dev_err(cpu_dev, "%s: failed to get cpu%d device\n",
+				__func__, cpu);
+			ret = -ENODEV;
+			goto put_cpu_node;
+		}
+
+		/* Get OPP descriptor node */
+		tmp_np = _of_get_opp_desc_node(tcpu_dev);
+		if (!tmp_np) {
+			dev_err(tcpu_dev, "%s: Couldn't find tcpu_dev node.\n",
+				__func__);
+			ret = -ENOENT;
+			goto put_cpu_node;
+		}
+
+		/* CPUs are sharing opp node */
+		if (np == tmp_np)
+			cpumask_set_cpu(cpu, cpumask);
+
+		of_node_put(tmp_np);
+	}
+
+put_cpu_node:
+	of_node_put(np);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus);
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index f67f806..20f3be2 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -28,6 +28,8 @@ struct regulator;
 /* Lock to allow exclusive modification to the device and opp lists */
 extern struct mutex opp_table_lock;
 
+extern struct list_head opp_tables;
+
 /*
  * Internal data structure organization with the OPP layer library is as
  * follows:
@@ -183,6 +185,18 @@ struct opp_table {
 struct opp_table *_find_opp_table(struct device *dev);
 struct opp_device *_add_opp_dev(const struct device *dev, struct opp_table *opp_table);
 struct device_node *_of_get_opp_desc_node(struct device *dev);
+void _dev_pm_opp_remove_table(struct device *dev, bool remove_all);
+struct dev_pm_opp *_allocate_opp(struct device *dev, struct opp_table **opp_table);
+int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, struct opp_table *opp_table);
+void _opp_remove(struct opp_table *opp_table, struct dev_pm_opp *opp, bool notify);
+int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt, bool dynamic);
+void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of);
+
+#ifdef CONFIG_OF
+void _of_init_opp_table(struct opp_table *opp_table, struct device *dev);
+#else
+static inline void _of_init_opp_table(struct opp_table *opp_table, struct device *dev) {}
+#endif
 
 #ifdef CONFIG_DEBUG_FS
 void opp_debug_remove_one(struct dev_pm_opp *opp);
-- 
cgit v0.10.2


From 9485e4ca0b486248ce07d7dd1411a1080d24ed0d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 6 May 2016 01:30:37 +0200
Subject: cpufreq: governor: Fix handling of special cases in dbs_update()

As reported in KBZ 69821:

"With CONFIG_HZ_PERIODIC=y cpu stays at the lowest frequcency 800MHz
 even if usage goes to 100%, frequency does not scale up, the governor
 in use is ondemand. Neither works conservative. Performance and
 userspace governors work as expected.

 With CONFIG_NO_HZ_IDLE or CONFIG_NO_HZ_FULL cpu scales up with ondemand
 as expected."

Analysis carried out by Chen Yu leads to the conclusion that the
observed issue is due to idle_time in dbs_update() representing a
negative number in which case the function will return 0 as the load
(unless load is greater than 0 for another CPU sharing the policy),
although that need not be the right choice.

Indeed, idle_time representing a negative number means that during
the last sampling interval the CPU was almost 100% busy on the rough
average, so 100 should be returned as the load in that case.

Modify the code accordingly and rearrange it to clarify the handling
of all of the special cases in it.  While at it, also avoid returning
zero as the load if time_elapsed is 0 (it doesn't really make sense
to return 0 then).

Link: https://bugzilla.kernel.org/show_bug.cgi?id=69821
Tested-by: Chen Yu <yu.c.chen@intel.com>
Tested-by: Timo Valtoaho <timo.valtoaho@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index eb2fdbd..be498d5 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -156,47 +156,62 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
 			j_cdbs->prev_cpu_nice = cur_nice;
 		}
 
-		if (unlikely(!time_elapsed || time_elapsed < idle_time))
-			continue;
-
-		/*
-		 * If the CPU had gone completely idle, and a task just woke up
-		 * on this CPU now, it would be unfair to calculate 'load' the
-		 * usual way for this elapsed time-window, because it will show
-		 * near-zero load, irrespective of how CPU intensive that task
-		 * actually is. This is undesirable for latency-sensitive bursty
-		 * workloads.
-		 *
-		 * To avoid this, we reuse the 'load' from the previous
-		 * time-window and give this task a chance to start with a
-		 * reasonably high CPU frequency. (However, we shouldn't over-do
-		 * this copy, lest we get stuck at a high load (high frequency)
-		 * for too long, even when the current system load has actually
-		 * dropped down. So we perform the copy only once, upon the
-		 * first wake-up from idle.)
-		 *
-		 * Detecting this situation is easy: the governor's utilization
-		 * update handler would not have run during CPU-idle periods.
-		 * Hence, an unusually large 'time_elapsed' (as compared to the
-		 * sampling rate) indicates this scenario.
-		 *
-		 * prev_load can be zero in two cases and we must recalculate it
-		 * for both cases:
-		 * - during long idle intervals
-		 * - explicitly set to zero
-		 */
-		if (unlikely(time_elapsed > 2 * sampling_rate &&
-			     j_cdbs->prev_load)) {
+		if (unlikely(!time_elapsed)) {
+			/*
+			 * That can only happen when this function is called
+			 * twice in a row with a very short interval between the
+			 * calls, so the previous load value can be used then.
+			 */
 			load = j_cdbs->prev_load;
-
+		} else if (unlikely(time_elapsed > 2 * sampling_rate &&
+				    j_cdbs->prev_load)) {
 			/*
-			 * Perform a destructive copy, to ensure that we copy
-			 * the previous load only once, upon the first wake-up
-			 * from idle.
+			 * If the CPU had gone completely idle and a task has
+			 * just woken up on this CPU now, it would be unfair to
+			 * calculate 'load' the usual way for this elapsed
+			 * time-window, because it would show near-zero load,
+			 * irrespective of how CPU intensive that task actually
+			 * was. This is undesirable for latency-sensitive bursty
+			 * workloads.
+			 *
+			 * To avoid this, reuse the 'load' from the previous
+			 * time-window and give this task a chance to start with
+			 * a reasonably high CPU frequency. However, that
+			 * shouldn't be over-done, lest we get stuck at a high
+			 * load (high frequency) for too long, even when the
+			 * current system load has actually dropped down, so
+			 * clear prev_load to guarantee that the load will be
+			 * computed again next time.
+			 *
+			 * Detecting this situation is easy: the governor's
+			 * utilization update handler would not have run during
+			 * CPU-idle periods.  Hence, an unusually large
+			 * 'time_elapsed' (as compared to the sampling rate)
+			 * indicates this scenario.
 			 */
+			load = j_cdbs->prev_load;
 			j_cdbs->prev_load = 0;
 		} else {
-			load = 100 * (time_elapsed - idle_time) / time_elapsed;
+			if (time_elapsed >= idle_time) {
+				load = 100 * (time_elapsed - idle_time) / time_elapsed;
+			} else {
+				/*
+				 * That can happen if idle_time is returned by
+				 * get_cpu_idle_time_jiffy().  In that case
+				 * idle_time is roughly equal to the difference
+				 * between time_elapsed and "busy time" obtained
+				 * from CPU statistics.  Then, the "busy time"
+				 * can end up being greater than time_elapsed
+				 * (for example, if jiffies_64 and the CPU
+				 * statistics are updated by different CPUs),
+				 * so idle_time may in fact be negative.  That
+				 * means, though, that the CPU was busy all
+				 * the time (on the rough average) during the
+				 * last sampling interval and 100 can be
+				 * returned as the load.
+				 */
+				load = (int)idle_time < 0 ? 100 : 0;
+			}
 			j_cdbs->prev_load = load;
 		}
 
-- 
cgit v0.10.2


From bf7cdff19429a72c32257da70d5345ffca66d861 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 6 May 2016 15:00:38 +0200
Subject: cpufreq: schedutil: Make it depend on CONFIG_SMP

Make the schedutil cpufreq governor depend on CONFIG_SMP, because
the scheduler-provided utilization numbers used by it are only
available with CONFIG_SMP set.

Fixes: 9bdcb44e391d (cpufreq: schedutil: New governor based on scheduler utilization data)
Reported-by: Steve Muckle <steve.muckle@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 5d74826..e9849b4 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -200,7 +200,7 @@ config CPU_FREQ_GOV_CONSERVATIVE
 
 config CPU_FREQ_GOV_SCHEDUTIL
 	tristate "'schedutil' cpufreq policy governor"
-	depends on CPU_FREQ
+	depends on CPU_FREQ && SMP
 	select CPU_FREQ_GOV_ATTR_SET
 	select IRQ_WORK
 	help
-- 
cgit v0.10.2


From f96fd0c86f081c27a0ba5efadfc13f612b4071d3 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 7 May 2016 02:24:48 +0200
Subject: intel_pstate: Clean up intel_pstate_get()

intel_pstate_get() contains a local variable that's initialized but
never used and it can be written in fewer lines of code, so clean
it up.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 3addb22..cde1a4e 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1404,14 +1404,9 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 
 static unsigned int intel_pstate_get(unsigned int cpu_num)
 {
-	struct sample *sample;
-	struct cpudata *cpu;
+	struct cpudata *cpu = all_cpu_data[cpu_num];
 
-	cpu = all_cpu_data[cpu_num];
-	if (!cpu)
-		return 0;
-	sample = &cpu->sample;
-	return get_avg_frequency(cpu);
+	return cpu ? get_avg_frequency(cpu) : 0;
 }
 
 static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
-- 
cgit v0.10.2


From 1fd3ff2874f79c04354f3e80e583afbe6fa6eaa2 Mon Sep 17 00:00:00 2001
From: Akshay Adiga <akshay.adiga@linux.vnet.ibm.com>
Date: Tue, 3 May 2016 20:49:35 +0530
Subject: cpufreq: powernv: Move smp_call_function_any() out of irq safe block

Fix a WARN_ON caused by smp_call_function_any() when irq is disabled,
because of changes made in the patch ('cpufreq: powernv: Ramp-down
 global pstate slower than local-pstate')
https://patchwork.ozlabs.org/patch/612058/

 WARNING: CPU: 0 PID: 4 at kernel/smp.c:291
smp_call_function_single+0x170/0x180

 Call Trace:
 [c0000007f648f9f0] [c0000007f648fa90] 0xc0000007f648fa90 (unreliable)
 [c0000007f648fa30] [c0000000001430e0] smp_call_function_any+0x170/0x1c0
 [c0000007f648fa90] [c0000000007b4b00]
powernv_cpufreq_target_index+0xe0/0x250
 [c0000007f648fb00] [c0000000007ac9dc]
__cpufreq_driver_target+0x20c/0x3d0
 [c0000007f648fbc0] [c0000000007b1b4c] od_dbs_timer+0xcc/0x260
 [c0000007f648fc10] [c0000000007b3024] dbs_work_handler+0x54/0xa0
 [c0000007f648fc50] [c0000000000c49a8] process_one_work+0x1d8/0x590
 [c0000007f648fce0] [c0000000000c4e08] worker_thread+0xa8/0x660
 [c0000007f648fd80] [c0000000000cca88] kthread+0x108/0x130
 [c0000007f648fe30] [c0000000000095e8] ret_from_kernel_thread+0x5c/0x74

- Calling smp_call_function_any() with interrupt disabled (through
 spin_lock_irqsave) could cause a deadlock, as smp_call_function_any()
 relies on the IPI to complete. This is detected in the
 smp_call_function_any() call and hence the WARN_ON.

- As the spinlock (gpstates->lock) is only used to synchronize access of
 global_pstate_info  between timer irq handler and target_index calls. And
 the timer irq handler just try_locks() hence it would not cause a
 deadlock. Hence could do without making spinlocks irq safe.

- As the smp_call_function_any() is a blocking call and does not access
 global_pstates_info, it could reduce the critcal section by moving
 smp_call_function_any() after giving up the lock.

Reported-by: Abdul Haleem <abdhalee@linux.vnet.linux.com>
Signed-off-by: Akshay Adiga <akshay.adiga@linux.vnet.ibm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 144c732..1f0e20c 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -581,9 +581,10 @@ void gpstate_timer_handler(unsigned long data)
 	gpstates->last_gpstate = freq_data.gpstate_id;
 	gpstates->last_lpstate = freq_data.pstate_id;
 
+	spin_unlock(&gpstates->gpstate_lock);
+
 	/* Timer may get migrated to a different cpu on cpu hot unplug */
 	smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1);
-	spin_unlock(&gpstates->gpstate_lock);
 }
 
 /*
@@ -596,7 +597,6 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
 {
 	struct powernv_smp_call_data freq_data;
 	unsigned int cur_msec, gpstate_id;
-	unsigned long flags;
 	struct global_pstate_info *gpstates = policy->driver_data;
 
 	if (unlikely(rebooting) && new_index != get_nominal_index())
@@ -607,7 +607,7 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
 
 	cur_msec = jiffies_to_msecs(get_jiffies_64());
 
-	spin_lock_irqsave(&gpstates->gpstate_lock, flags);
+	spin_lock(&gpstates->gpstate_lock);
 	freq_data.pstate_id = powernv_freqs[new_index].driver_data;
 
 	if (!gpstates->last_sampled_time) {
@@ -654,13 +654,14 @@ gpstates_done:
 	gpstates->last_gpstate = freq_data.gpstate_id;
 	gpstates->last_lpstate = freq_data.pstate_id;
 
+	spin_unlock(&gpstates->gpstate_lock);
+
 	/*
 	 * Use smp_call_function to send IPI and execute the
 	 * mtspr on target CPU.  We could do that without IPI
 	 * if current CPU is within policy->cpus (core)
 	 */
 	smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1);
-	spin_unlock_irqrestore(&gpstates->gpstate_lock, flags);
 	return 0;
 }
 
-- 
cgit v0.10.2


From 0bc10b93f2d9c6089ce968681ef9febecdc9b8e0 Mon Sep 17 00:00:00 2001
From: Akshay Adiga <akshay.adiga@linux.vnet.ibm.com>
Date: Tue, 3 May 2016 20:49:36 +0530
Subject: cpufreq: powernv: del_timer_sync when global and local pstate are
 equal

When global and local pstate are equal in a powernv_target_index() call,
we don't queue a timer. But we may have timer already queued for future.
This could cause the timer to fire one additional time for no use.

Signed-off-by: Akshay Adiga <akshay.adiga@linux.vnet.ibm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 1f0e20c..54c4536 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -647,6 +647,8 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
 	 */
 	if (gpstate_id != freq_data.pstate_id)
 		queue_gpstate_timer(gpstates);
+	else
+		del_timer_sync(&gpstates->timer);
 
 gpstates_done:
 	freq_data.gpstate_id = gpstate_id;
-- 
cgit v0.10.2


From cfe9492fdf04d1191e7ac3a43e6df5e20c3f4164 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 May 2016 14:52:01 +0200
Subject: cpufreq: schedutil: Make default depend on CONFIG_SMP

CPU_FREQ_GOV_SCHEDUTIL gained a dependency on SMP, so now we
get a warning if it gets selected by CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
without SMP:

warning: (CPU_FREQ_DEFAULT_GOV_SCHEDUTIL) selects CPU_FREQ_GOV_SCHEDUTIL which has unmet direct dependencies (CPU_FREQ && SMP)

This adds another dependency to avoid the problem.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: bf7cdff19429 (cpufreq: schedutil: Make it depend on CONFIG_SMP)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index e9849b4..b72c7d7 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -110,6 +110,7 @@ config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
 
 config CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
 	bool "schedutil"
+	depends on SMP
 	select CPU_FREQ_GOV_SCHEDUTIL
 	select CPU_FREQ_GOV_PERFORMANCE
 	help
-- 
cgit v0.10.2


From 4578ee7e1defe534582fe3afdb747b86023207f0 Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Wed, 11 May 2016 14:33:08 +0800
Subject: intel_pstate: Avoid unnecessary synchronize_sched() during
 initialization

Currently, in intel_pstate_clear_update_util_hook(), after
clearing the utilization update hook, we leverage
synchronize_sched() to deal with synchronization, which
is a little bit time-costly because synchronize_sched()
has to wait for all the CPUs to go through a grace period.

Actually, the synchronize_sched() is not necessary if the utilization
update hook has not been set for the given CPU yet, so make the driver
check if that's the case and avoid the synchronize_sched() call then.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=116371
Tested-by: Tian Ye <yex.tian@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
[ rjw : Rebase ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index cde1a4e..36953b5 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -168,6 +168,7 @@ struct _pid {
  * struct cpudata -	Per CPU instance data storage
  * @cpu:		CPU number for this instance data
  * @update_util:	CPUFreq utility callback information
+ * @update_util_set:	CPUFreq utility callback is set
  * @pstate:		Stores P state limits for this CPU
  * @vid:		Stores VID limits for this CPU
  * @pid:		Stores PID parameters for this CPU
@@ -187,6 +188,7 @@ struct cpudata {
 	int cpu;
 
 	struct update_util_data update_util;
+	bool   update_util_set;
 
 	struct pstate_data pstate;
 	struct vid_data vid;
@@ -1417,11 +1419,18 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
 	cpu->sample.time = 0;
 	cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
 				     intel_pstate_update_util);
+	cpu->update_util_set = true;
 }
 
 static void intel_pstate_clear_update_util_hook(unsigned int cpu)
 {
+	struct cpudata *cpu_data = all_cpu_data[cpu];
+
+	if (!cpu_data->update_util_set)
+		return;
+
 	cpufreq_remove_update_util_hook(cpu);
+	cpu_data->update_util_set = false;
 	synchronize_sched();
 }
 
-- 
cgit v0.10.2


From a1c9787dc38097d554f9da8372031b3d6f8c140a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 11 May 2016 19:09:12 +0200
Subject: intel_pstate: Clarify average performance computation

The core_pct_busy field of struct sample actually contains the
average performace during the last sampling period (in percent)
and not the utilization of the core as suggested by its name
which is confusing.

For this reason, change the name of that field to core_avg_perf
and rename the function that computes its value accordingly.

Also notice that storing this value as percentage requires a costly
integer multiplication to be carried out in a hot path, so instead
store it as an "extended fixed point" value with more fraction bits
and update the code using it accordingly (it is better to change the
name of the field along with its meaning in one go than to make those
two changes separately, as that would likely lead to more
confusion).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 36953b5..19712e2 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -49,6 +49,9 @@
 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
 #define fp_toint(X) ((X) >> FRAC_BITS)
 
+#define EXT_BITS 6
+#define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS)
+
 static inline int32_t mul_fp(int32_t x, int32_t y)
 {
 	return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
@@ -70,12 +73,22 @@ static inline int ceiling_fp(int32_t x)
 	return ret;
 }
 
+static inline u64 mul_ext_fp(u64 x, u64 y)
+{
+	return (x * y) >> EXT_FRAC_BITS;
+}
+
+static inline u64 div_ext_fp(u64 x, u64 y)
+{
+	return div64_u64(x << EXT_FRAC_BITS, y);
+}
+
 /**
  * struct sample -	Store performance sample
- * @core_pct_busy:	Ratio of APERF/MPERF in percent, which is actual
+ * @core_avg_perf:	Ratio of APERF/MPERF which is the actual average
  *			performance during last sample period
  * @busy_scaled:	Scaled busy value which is used to calculate next
- *			P state. This can be different than core_pct_busy
+ *			P state. This can be different than core_avg_perf
  *			to account for cpu idle period
  * @aperf:		Difference of actual performance frequency clock count
  *			read from APERF MSR between last and current sample
@@ -90,7 +103,7 @@ static inline int ceiling_fp(int32_t x)
  * data for choosing next P State.
  */
 struct sample {
-	int32_t core_pct_busy;
+	int32_t core_avg_perf;
 	int32_t busy_scaled;
 	u64 aperf;
 	u64 mperf;
@@ -1152,15 +1165,11 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 	intel_pstate_set_min_pstate(cpu);
 }
 
-static inline void intel_pstate_calc_busy(struct cpudata *cpu)
+static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu)
 {
 	struct sample *sample = &cpu->sample;
-	int64_t core_pct;
-
-	core_pct = sample->aperf * int_tofp(100);
-	core_pct = div64_u64(core_pct, sample->mperf);
 
-	sample->core_pct_busy = (int32_t)core_pct;
+	sample->core_avg_perf = div_ext_fp(sample->aperf, sample->mperf);
 }
 
 static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
@@ -1203,9 +1212,8 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
 
 static inline int32_t get_avg_frequency(struct cpudata *cpu)
 {
-	return fp_toint(mul_fp(cpu->sample.core_pct_busy,
-			       int_tofp(cpu->pstate.max_pstate_physical *
-						cpu->pstate.scaling / 100)));
+	return mul_ext_fp(cpu->sample.core_avg_perf,
+			  cpu->pstate.max_pstate_physical * cpu->pstate.scaling);
 }
 
 static inline int32_t get_avg_pstate(struct cpudata *cpu)
@@ -1265,10 +1273,10 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 	 * period. The result will be a percentage of busy at a
 	 * specified pstate.
 	 */
-	core_busy = cpu->sample.core_pct_busy;
 	max_pstate = cpu->pstate.max_pstate_physical;
 	current_pstate = cpu->pstate.current_pstate;
-	core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
+	core_busy = mul_ext_fp(cpu->sample.core_avg_perf,
+			       div_fp(100 * max_pstate, current_pstate));
 
 	/*
 	 * Since our utilization update callback will not run unless we are
@@ -1317,7 +1325,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 	intel_pstate_update_pstate(cpu, target_pstate);
 
 	sample = &cpu->sample;
-	trace_pstate_sample(fp_toint(sample->core_pct_busy),
+	trace_pstate_sample(mul_ext_fp(100, sample->core_avg_perf),
 		fp_toint(sample->busy_scaled),
 		from,
 		cpu->pstate.current_pstate,
@@ -1337,7 +1345,7 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time,
 		bool sample_taken = intel_pstate_sample(cpu, time);
 
 		if (sample_taken) {
-			intel_pstate_calc_busy(cpu);
+			intel_pstate_calc_avg_perf(cpu);
 			if (!hwp_active)
 				intel_pstate_adjust_busy_pstate(cpu);
 		}
-- 
cgit v0.10.2


From 8edb0a6e48d147bb2aa466c58e03c52d2b0d6ee7 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 11 May 2016 19:10:42 +0200
Subject: intel_pstate: Use sample.core_avg_perf in get_avg_pstate()

Notice that get_avg_pstate() can use sample.core_avg_perf instead of
carrying the same division again, so make it do that.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 19712e2..ff5c591 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1218,8 +1218,8 @@ static inline int32_t get_avg_frequency(struct cpudata *cpu)
 
 static inline int32_t get_avg_pstate(struct cpudata *cpu)
 {
-	return div64_u64(cpu->pstate.max_pstate_physical * cpu->sample.aperf,
-			 cpu->sample.mperf);
+	return mul_ext_fp(cpu->pstate.max_pstate_physical,
+			  cpu->sample.core_avg_perf);
 }
 
 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
-- 
cgit v0.10.2


From 1aa7a6e2b8105f22a5f7d6def281f776459c95ba Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 11 May 2016 19:11:26 +0200
Subject: intel_pstate: Clean up get_target_pstate_use_performance()

The comments and the core_busy variable name in
get_target_pstate_use_performance() are totally confusing,
so modify them to reflect what's going on.

The results of the computations should be the same as before.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index ff5c591..b76a98d 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1259,43 +1259,38 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
 
 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 {
-	int32_t core_busy, max_pstate, current_pstate, sample_ratio;
+	int32_t perf_scaled, max_pstate, current_pstate, sample_ratio;
 	u64 duration_ns;
 
 	/*
-	 * core_busy is the ratio of actual performance to max
-	 * max_pstate is the max non turbo pstate available
-	 * current_pstate was the pstate that was requested during
-	 * 	the last sample period.
-	 *
-	 * We normalize core_busy, which was our actual percent
-	 * performance to what we requested during the last sample
-	 * period. The result will be a percentage of busy at a
-	 * specified pstate.
+	 * perf_scaled is the average performance during the last sampling
+	 * period scaled by the ratio of the maximum P-state to the P-state
+	 * requested last time (in percent).  That measures the system's
+	 * response to the previous P-state selection.
 	 */
 	max_pstate = cpu->pstate.max_pstate_physical;
 	current_pstate = cpu->pstate.current_pstate;
-	core_busy = mul_ext_fp(cpu->sample.core_avg_perf,
+	perf_scaled = mul_ext_fp(cpu->sample.core_avg_perf,
 			       div_fp(100 * max_pstate, current_pstate));
 
 	/*
 	 * Since our utilization update callback will not run unless we are
 	 * in C0, check if the actual elapsed time is significantly greater (3x)
 	 * than our sample interval.  If it is, then we were idle for a long
-	 * enough period of time to adjust our busyness.
+	 * enough period of time to adjust our performance metric.
 	 */
 	duration_ns = cpu->sample.time - cpu->last_sample_time;
 	if ((s64)duration_ns > pid_params.sample_rate_ns * 3) {
 		sample_ratio = div_fp(pid_params.sample_rate_ns, duration_ns);
-		core_busy = mul_fp(core_busy, sample_ratio);
+		perf_scaled = mul_fp(perf_scaled, sample_ratio);
 	} else {
 		sample_ratio = div_fp(100 * cpu->sample.mperf, cpu->sample.tsc);
 		if (sample_ratio < int_tofp(1))
-			core_busy = 0;
+			perf_scaled = 0;
 	}
 
-	cpu->sample.busy_scaled = core_busy;
-	return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy);
+	cpu->sample.busy_scaled = perf_scaled;
+	return cpu->pstate.current_pstate - pid_calc(&cpu->pid, perf_scaled);
 }
 
 static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
-- 
cgit v0.10.2