From 2c906b317b2d9c7e32b0d513e102bd68a2c49112 Mon Sep 17 00:00:00 2001
From: Alexander Clouter <alex@digriz.org.uk>
Date: Wed, 22 Mar 2006 09:54:10 +0000
Subject: [PATCH] cpufreq_conservative: aligning of codebase with ondemand

Since the conservative govenor was released its codebase has drifted from the
the direction and updates that have been applied to the ondemand govornor.

This patch addresses the lack of updates in that period and brings
conservative back up to date.  The resulting diff file between
cpufreq_ondemand.c and cpufreq_conservative.c is now much smaller and shows
more clearly the differences between the two.

Another reason to do this is ages ago, knowingly, I did a piss poor attempt
at making conservative less responsive by knocking up
DEF_SAMPLING_RATE_LATENCY_MULTIPLIER by two orders of magnitude.  I did fix
this ages ago but in my dis-organisation I must have toasted the diff and
left it the way it was.  About two weeks ago a user contacted me saying he
was having problems with the conservative governor with his AMD Athlon XP-M
2800+ as /sys/devices/system/cpu/cpu0/cpufreq/conservative showed
  sampling_rate_min   9950000
  sampling_rate_max   1360065408

Nine seconds to decide about changing the frequency....not too responsive :)

Signed-off-by: Alexander Clouter <alex-kernel@digriz.org.uk>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index ac38766..adecd31 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -35,12 +35,7 @@
  */
 
 #define DEF_FREQUENCY_UP_THRESHOLD		(80)
-#define MIN_FREQUENCY_UP_THRESHOLD		(0)
-#define MAX_FREQUENCY_UP_THRESHOLD		(100)
-
 #define DEF_FREQUENCY_DOWN_THRESHOLD		(20)
-#define MIN_FREQUENCY_DOWN_THRESHOLD		(0)
-#define MAX_FREQUENCY_DOWN_THRESHOLD		(100)
 
 /* 
  * The polling frequency of this governor depends on the capability of 
@@ -53,10 +48,14 @@
  * All times here are in uS.
  */
 static unsigned int 				def_sampling_rate;
-#define MIN_SAMPLING_RATE			(def_sampling_rate / 2)
+#define MIN_SAMPLING_RATE_RATIO			(2)
+/* for correct statistics, we need at least 10 ticks between each measure */
+#define MIN_STAT_SAMPLING_RATE			(MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10))
+#define MIN_SAMPLING_RATE			(def_sampling_rate / MIN_SAMPLING_RATE_RATIO)
 #define MAX_SAMPLING_RATE			(500 * def_sampling_rate)
-#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER	(100000)
-#define DEF_SAMPLING_DOWN_FACTOR		(5)
+#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER	(1000)
+#define DEF_SAMPLING_DOWN_FACTOR		(1)
+#define MAX_SAMPLING_DOWN_FACTOR		(10)
 #define TRANSITION_LATENCY_LIMIT		(10 * 1000)
 
 static void do_dbs_timer(void *data);
@@ -136,7 +135,7 @@ static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused,
 	unsigned int input;
 	int ret;
 	ret = sscanf (buf, "%u", &input);
-	if (ret != 1 )
+	if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
 		return -EINVAL;
 
 	mutex_lock(&dbs_mutex);
@@ -173,8 +172,7 @@ static ssize_t store_up_threshold(struct cpufreq_policy *unused,
 	ret = sscanf (buf, "%u", &input);
 
 	mutex_lock(&dbs_mutex);
-	if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || 
-			input < MIN_FREQUENCY_UP_THRESHOLD ||
+	if (ret != 1 || input > 100 || input < 0 ||
 			input <= dbs_tuners_ins.down_threshold) {
 		mutex_unlock(&dbs_mutex);
 		return -EINVAL;
@@ -194,8 +192,7 @@ static ssize_t store_down_threshold(struct cpufreq_policy *unused,
 	ret = sscanf (buf, "%u", &input);
 
 	mutex_lock(&dbs_mutex);
-	if (ret != 1 || input > MAX_FREQUENCY_DOWN_THRESHOLD || 
-			input < MIN_FREQUENCY_DOWN_THRESHOLD ||
+	if (ret != 1 || input > 100 || input < 0 ||
 			input >= dbs_tuners_ins.up_threshold) {
 		mutex_unlock(&dbs_mutex);
 		return -EINVAL;
@@ -337,7 +334,6 @@ static void dbs_check_cpu(int cpu)
 	 */
 
 	/* Check for frequency increase */
-
 	idle_ticks = UINT_MAX;
 	for_each_cpu_mask(j, policy->cpus) {
 		unsigned int tmp_idle_ticks, total_idle_ticks;
@@ -357,7 +353,7 @@ static void dbs_check_cpu(int cpu)
 	/* Scale idle ticks by 100 and compare with up and down ticks */
 	idle_ticks *= 100;
 	up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) *
-		usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+			usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
 
 	if (idle_ticks < up_idle_ticks) {
 		down_skip[cpu] = 0;
@@ -398,6 +394,7 @@ static void dbs_check_cpu(int cpu)
 		struct cpu_dbs_info_s *j_dbs_info;
 
 		j_dbs_info = &per_cpu(cpu_dbs_info, j);
+		/* Check for frequency decrease */
 		total_idle_ticks = j_dbs_info->prev_cpu_idle_up;
 		tmp_idle_ticks = total_idle_ticks -
 			j_dbs_info->prev_cpu_idle_down;
@@ -414,12 +411,14 @@ static void dbs_check_cpu(int cpu)
 	freq_down_sampling_rate = dbs_tuners_ins.sampling_rate *
 		dbs_tuners_ins.sampling_down_factor;
 	down_idle_ticks = (100 - dbs_tuners_ins.down_threshold) *
-			usecs_to_jiffies(freq_down_sampling_rate);
+		usecs_to_jiffies(freq_down_sampling_rate);
 
 	if (idle_ticks > down_idle_ticks) {
-		/* if we are already at the lowest speed then break out early
+		/*
+		 * if we are already at the lowest speed then break out early
 		 * or if we 'cannot' reduce the speed as the user might want
-		 * freq_step to be zero */
+		 * freq_step to be zero
+		 */
 		if (requested_freq[cpu] == policy->min
 				|| dbs_tuners_ins.freq_step == 0)
 			return;
@@ -434,9 +433,8 @@ static void dbs_check_cpu(int cpu)
 		if (requested_freq[cpu] < policy->min)
 			requested_freq[cpu] = policy->min;
 
-		__cpufreq_driver_target(policy,
-			requested_freq[cpu],
-			CPUFREQ_RELATION_H);
+		__cpufreq_driver_target(policy, requested_freq[cpu],
+				CPUFREQ_RELATION_H);
 		return;
 	}
 }
@@ -507,13 +505,16 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 		if (dbs_enable == 1) {
 			unsigned int latency;
 			/* policy latency is in nS. Convert it to uS first */
+			latency = policy->cpuinfo.transition_latency / 1000;
+			if (latency == 0)
+				latency = 1;
 
-			latency = policy->cpuinfo.transition_latency;
-			if (latency < 1000)
-				latency = 1000;
-
-			def_sampling_rate = (latency / 1000) *
+			def_sampling_rate = latency *
 					DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
+
+			if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
+				def_sampling_rate = MIN_STAT_SAMPLING_RATE;
+
 			dbs_tuners_ins.sampling_rate = def_sampling_rate;
 			dbs_tuners_ins.ignore_nice = 0;
 			dbs_tuners_ins.freq_step = 5;
-- 
cgit v0.10.2


From e8a02572252f9115c2b8296c40fd8b985f06f872 Mon Sep 17 00:00:00 2001
From: Alexander Clouter <alex@digriz.org.uk>
Date: Wed, 22 Mar 2006 09:56:23 +0000
Subject: [PATCH] cpufreq_conservative: alter default responsiveness

The sensible approach to making conservative less responsive than ondemand :)
As mentioned in patch [1/4].  We do not want conservative to shoot through
all the frequencies, its point (by default) is to slowly move through them.

By default its ten times less responsive.

Signed-off-by: Alexander Clouter <alex-kernel@digriz.org.uk>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index adecd31..3ca3cf0 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -509,7 +509,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 			if (latency == 0)
 				latency = 1;
 
-			def_sampling_rate = latency *
+			def_sampling_rate = 10 * latency *
 					DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
 
 			if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
-- 
cgit v0.10.2


From 08a28e2e98aa821cf6f15f8a267beb2f33377bb9 Mon Sep 17 00:00:00 2001
From: Alexander Clouter <alex@digriz.org.uk>
Date: Wed, 22 Mar 2006 09:59:16 +0000
Subject: [PATCH] cpufreq_conservative: make for_each_cpu() safe

All these changes should make cpufreq_conservative safe in regards to the x86
for_each_cpu cpumask.h changes and whatnot.

Whilst making it safe a number of pointless for loops related to the cpu
mask's were removed.  I was never comfortable with all those for loops,
especially as the iteration is over the same data again and again for each
CPU you had in a single poll, an O(n^2) outcome to frequency scaling.

The approach I use is to assume by default no CPU's exist and it sets the
requested_freq to zero as a kind of flag, the reasoning is in the source ;)
If the CPU is queried and requested_freq is zero then it initialises the
variable to current_freq and then continues as if nothing happened which
should be the same net effect as before?

Signed-off-by: Alexander Clouter <alex-kernel@digriz.org.uk>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 3ca3cf0..7498f25 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -294,31 +294,40 @@ static struct attribute_group dbs_attr_group = {
 static void dbs_check_cpu(int cpu)
 {
 	unsigned int idle_ticks, up_idle_ticks, down_idle_ticks;
+	unsigned int tmp_idle_ticks, total_idle_ticks;
 	unsigned int freq_step;
 	unsigned int freq_down_sampling_rate;
-	static int down_skip[NR_CPUS];
-	static int requested_freq[NR_CPUS];
-	static unsigned short init_flag = 0;
-	struct cpu_dbs_info_s *this_dbs_info;
-	struct cpu_dbs_info_s *dbs_info;
-
+	static unsigned short down_skip[NR_CPUS];
+	static unsigned int requested_freq[NR_CPUS];
+	static unsigned int init_flag = NR_CPUS;
+	struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
 	struct cpufreq_policy *policy;
-	unsigned int j;
 
-	this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
 	if (!this_dbs_info->enable)
 		return;
 
-	policy = this_dbs_info->cur_policy;
-
-	if ( init_flag == 0 ) {
-		for_each_online_cpu(j) {
-			dbs_info = &per_cpu(cpu_dbs_info, j);
-			requested_freq[j] = dbs_info->cur_policy->cur;
+	if ( init_flag != 0 ) {
+		for_each_cpu(init_flag) {
+			down_skip[init_flag] = 0;
+			/* I doubt a CPU exists with a freq of 0hz :) */
+			requested_freq[init_flag] = 0;
 		}
-		init_flag = 1;
+		init_flag = 0;
 	}
 	
+	/*
+	 * If its a freshly initialised cpu we setup requested_freq.  This
+	 * check could be avoided if we did not care about a first time
+	 * stunted increase in CPU speed when there is a load.  I feel we
+	 * should be initialising this to something.  The removal of a CPU
+	 * is not a problem, after a short time the CPU should settle down
+	 * to a 'natural' frequency.
+	 */
+	if (requested_freq[cpu] == 0)
+		requested_freq[cpu] = this_dbs_info->cur_policy->cur;
+
+	policy = this_dbs_info->cur_policy;
+
 	/* 
 	 * The default safe range is 20% to 80% 
 	 * Every sampling_rate, we check
@@ -335,20 +344,15 @@ static void dbs_check_cpu(int cpu)
 
 	/* Check for frequency increase */
 	idle_ticks = UINT_MAX;
-	for_each_cpu_mask(j, policy->cpus) {
-		unsigned int tmp_idle_ticks, total_idle_ticks;
-		struct cpu_dbs_info_s *j_dbs_info;
 
-		j_dbs_info = &per_cpu(cpu_dbs_info, j);
-		/* Check for frequency increase */
-		total_idle_ticks = get_cpu_idle_time(j);
-		tmp_idle_ticks = total_idle_ticks -
-			j_dbs_info->prev_cpu_idle_up;
-		j_dbs_info->prev_cpu_idle_up = total_idle_ticks;
-
-		if (tmp_idle_ticks < idle_ticks)
-			idle_ticks = tmp_idle_ticks;
-	}
+	/* Check for frequency increase */
+	total_idle_ticks = get_cpu_idle_time(cpu);
+	tmp_idle_ticks = total_idle_ticks -
+		this_dbs_info->prev_cpu_idle_up;
+	this_dbs_info->prev_cpu_idle_up = total_idle_ticks;
+
+	if (tmp_idle_ticks < idle_ticks)
+		idle_ticks = tmp_idle_ticks;
 
 	/* Scale idle ticks by 100 and compare with up and down ticks */
 	idle_ticks *= 100;
@@ -357,13 +361,9 @@ static void dbs_check_cpu(int cpu)
 
 	if (idle_ticks < up_idle_ticks) {
 		down_skip[cpu] = 0;
-		for_each_cpu_mask(j, policy->cpus) {
-			struct cpu_dbs_info_s *j_dbs_info;
+		this_dbs_info->prev_cpu_idle_down =
+			this_dbs_info->prev_cpu_idle_up;
 
-			j_dbs_info = &per_cpu(cpu_dbs_info, j);
-			j_dbs_info->prev_cpu_idle_down = 
-					j_dbs_info->prev_cpu_idle_up;
-		}
 		/* if we are already at full speed then break out early */
 		if (requested_freq[cpu] == policy->max)
 			return;
@@ -388,21 +388,14 @@ static void dbs_check_cpu(int cpu)
 	if (down_skip[cpu] < dbs_tuners_ins.sampling_down_factor)
 		return;
 
-	idle_ticks = UINT_MAX;
-	for_each_cpu_mask(j, policy->cpus) {
-		unsigned int tmp_idle_ticks, total_idle_ticks;
-		struct cpu_dbs_info_s *j_dbs_info;
+	/* Check for frequency decrease */
+	total_idle_ticks = this_dbs_info->prev_cpu_idle_up;
+	tmp_idle_ticks = total_idle_ticks -
+		this_dbs_info->prev_cpu_idle_down;
+	this_dbs_info->prev_cpu_idle_down = total_idle_ticks;
 
-		j_dbs_info = &per_cpu(cpu_dbs_info, j);
-		/* Check for frequency decrease */
-		total_idle_ticks = j_dbs_info->prev_cpu_idle_up;
-		tmp_idle_ticks = total_idle_ticks -
-			j_dbs_info->prev_cpu_idle_down;
-		j_dbs_info->prev_cpu_idle_down = total_idle_ticks;
-
-		if (tmp_idle_ticks < idle_ticks)
-			idle_ticks = tmp_idle_ticks;
-	}
+	if (tmp_idle_ticks < idle_ticks)
+		idle_ticks = tmp_idle_ticks;
 
 	/* Scale idle ticks by 100 and compare with up and down ticks */
 	idle_ticks *= 100;
@@ -491,7 +484,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 			j_dbs_info = &per_cpu(cpu_dbs_info, j);
 			j_dbs_info->cur_policy = policy;
 		
-			j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j);
+			j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(cpu);
 			j_dbs_info->prev_cpu_idle_down
 				= j_dbs_info->prev_cpu_idle_up;
 		}
-- 
cgit v0.10.2


From a159b82770ab84e1b5e0306fa65e158188492b16 Mon Sep 17 00:00:00 2001
From: Alexander Clouter <alex@digriz.org.uk>
Date: Wed, 22 Mar 2006 10:00:18 +0000
Subject: [PATCH] cpufreq_conservative: alternative initialise approach

Venki, author of cpufreq_ondemand, came up with a neater way to remove the
initialiser code from the main loop of my code and out to the point when the
governor is actually initialised.

Not only does it look but it also feels cleaner, plus its simpler to
understand.  It also saves a bunch of pointless conditional statements in the
main loop.

Signed-off-by: Alexander Clouter <alex-kernel@digriz.org.uk>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 7498f25..a152d2c 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -65,6 +65,8 @@ struct cpu_dbs_info_s {
 	unsigned int 		prev_cpu_idle_up;
 	unsigned int 		prev_cpu_idle_down;
 	unsigned int 		enable;
+	unsigned int		down_skip;
+	unsigned int		requested_freq;
 };
 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
 
@@ -297,35 +299,12 @@ static void dbs_check_cpu(int cpu)
 	unsigned int tmp_idle_ticks, total_idle_ticks;
 	unsigned int freq_step;
 	unsigned int freq_down_sampling_rate;
-	static unsigned short down_skip[NR_CPUS];
-	static unsigned int requested_freq[NR_CPUS];
-	static unsigned int init_flag = NR_CPUS;
 	struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
 	struct cpufreq_policy *policy;
 
 	if (!this_dbs_info->enable)
 		return;
 
-	if ( init_flag != 0 ) {
-		for_each_cpu(init_flag) {
-			down_skip[init_flag] = 0;
-			/* I doubt a CPU exists with a freq of 0hz :) */
-			requested_freq[init_flag] = 0;
-		}
-		init_flag = 0;
-	}
-	
-	/*
-	 * If its a freshly initialised cpu we setup requested_freq.  This
-	 * check could be avoided if we did not care about a first time
-	 * stunted increase in CPU speed when there is a load.  I feel we
-	 * should be initialising this to something.  The removal of a CPU
-	 * is not a problem, after a short time the CPU should settle down
-	 * to a 'natural' frequency.
-	 */
-	if (requested_freq[cpu] == 0)
-		requested_freq[cpu] = this_dbs_info->cur_policy->cur;
-
 	policy = this_dbs_info->cur_policy;
 
 	/* 
@@ -360,12 +339,12 @@ static void dbs_check_cpu(int cpu)
 			usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
 
 	if (idle_ticks < up_idle_ticks) {
-		down_skip[cpu] = 0;
+		this_dbs_info->down_skip = 0;
 		this_dbs_info->prev_cpu_idle_down =
 			this_dbs_info->prev_cpu_idle_up;
 
 		/* if we are already at full speed then break out early */
-		if (requested_freq[cpu] == policy->max)
+		if (this_dbs_info->requested_freq == policy->max)
 			return;
 		
 		freq_step = (dbs_tuners_ins.freq_step * policy->max) / 100;
@@ -374,18 +353,18 @@ static void dbs_check_cpu(int cpu)
 		if (unlikely(freq_step == 0))
 			freq_step = 5;
 		
-		requested_freq[cpu] += freq_step;
-		if (requested_freq[cpu] > policy->max)
-			requested_freq[cpu] = policy->max;
+		this_dbs_info->requested_freq += freq_step;
+		if (this_dbs_info->requested_freq > policy->max)
+			this_dbs_info->requested_freq = policy->max;
 
-		__cpufreq_driver_target(policy, requested_freq[cpu], 
+		__cpufreq_driver_target(policy, this_dbs_info->requested_freq,
 			CPUFREQ_RELATION_H);
 		return;
 	}
 
 	/* Check for frequency decrease */
-	down_skip[cpu]++;
-	if (down_skip[cpu] < dbs_tuners_ins.sampling_down_factor)
+	this_dbs_info->down_skip++;
+	if (this_dbs_info->down_skip < dbs_tuners_ins.sampling_down_factor)
 		return;
 
 	/* Check for frequency decrease */
@@ -399,7 +378,7 @@ static void dbs_check_cpu(int cpu)
 
 	/* Scale idle ticks by 100 and compare with up and down ticks */
 	idle_ticks *= 100;
-	down_skip[cpu] = 0;
+	this_dbs_info->down_skip = 0;
 
 	freq_down_sampling_rate = dbs_tuners_ins.sampling_rate *
 		dbs_tuners_ins.sampling_down_factor;
@@ -412,7 +391,7 @@ static void dbs_check_cpu(int cpu)
 		 * or if we 'cannot' reduce the speed as the user might want
 		 * freq_step to be zero
 		 */
-		if (requested_freq[cpu] == policy->min
+		if (this_dbs_info->requested_freq == policy->min
 				|| dbs_tuners_ins.freq_step == 0)
 			return;
 
@@ -422,11 +401,11 @@ static void dbs_check_cpu(int cpu)
 		if (unlikely(freq_step == 0))
 			freq_step = 5;
 
-		requested_freq[cpu] -= freq_step;
-		if (requested_freq[cpu] < policy->min)
-			requested_freq[cpu] = policy->min;
+		this_dbs_info->requested_freq -= freq_step;
+		if (this_dbs_info->requested_freq < policy->min)
+			this_dbs_info->requested_freq = policy->min;
 
-		__cpufreq_driver_target(policy, requested_freq[cpu],
+		__cpufreq_driver_target(policy, this_dbs_info->requested_freq,
 				CPUFREQ_RELATION_H);
 		return;
 	}
@@ -489,6 +468,8 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 				= j_dbs_info->prev_cpu_idle_up;
 		}
 		this_dbs_info->enable = 1;
+		this_dbs_info->down_skip = 0;
+		this_dbs_info->requested_freq = policy->cur;
 		sysfs_create_group(&policy->kobj, &dbs_attr_group);
 		dbs_enable++;
 		/*
-- 
cgit v0.10.2


From ff8c288d7d1a368b663058cdee1ea0adcdef2fa2 Mon Sep 17 00:00:00 2001
From: Eric Piel <Eric.Piel@tremplin-utc.net>
Date: Fri, 10 Mar 2006 11:34:16 +0200
Subject: [PATCH] cpufreq_ondemand: Warn if it cannot run due to too long
 transition latency

Display a warning if the ondemand governor can not be selected due to a
transition latency of the cpufreq driver which is too long.

Signed-off-by: Eric Piel <eric.piel@tremplin-utc.net>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>

diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 69aa1db..6430489 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -395,8 +395,11 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 			return -EINVAL;
 
 		if (policy->cpuinfo.transition_latency >
-				(TRANSITION_LATENCY_LIMIT * 1000))
+				(TRANSITION_LATENCY_LIMIT * 1000)) {
+			printk(KERN_WARNING "ondemand governor failed to load "
+			       "due to too long transition latency\n");
 			return -EINVAL;
+		}
 		if (this_dbs_info->enable) /* Already enabled */
 			break;
 
-- 
cgit v0.10.2


From 9cbad61b41f0b6f0a4c600fe96d8292ffd592b50 Mon Sep 17 00:00:00 2001
From: Eric Piel <Eric.Piel@tremplin-utc.net>
Date: Fri, 10 Mar 2006 11:35:27 +0200
Subject: [PATCH] cpufreq_ondemand: keep ignore_nice_load value when it is
 reselected

Keep the value of ignore_nice_load of the ondemand governor even after
the governor has been deselected and selected back. This is the behavior
of the other exported values of the ondemand governor and it's much more
user-friendly.

Signed-off-by: Eric Piel <eric.piel@tremplin-utc.net>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>

diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 6430489..cd846f57 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -84,6 +84,7 @@ struct dbs_tuners {
 static struct dbs_tuners dbs_tuners_ins = {
 	.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
 	.sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
+	.ignore_nice = 0,
 };
 
 static inline unsigned int get_cpu_idle_time(unsigned int cpu)
@@ -434,8 +435,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 				def_sampling_rate = MIN_STAT_SAMPLING_RATE;
 
 			dbs_tuners_ins.sampling_rate = def_sampling_rate;
-			dbs_tuners_ins.ignore_nice = 0;
-
 			dbs_timer_init();
 		}
 
-- 
cgit v0.10.2


From 7c9d8c0e84d395a01289ebd1597758939a875a86 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 26 Mar 2006 11:11:03 +0200
Subject: [PATCH] cpufreq_ondemand: add range check

Assert that cpufreq_target is, at least, called with the minimum frequency
allowed by this policy, not something lower. It triggered problems on ARM.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>

diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index cd846f57..956d121 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -351,6 +351,9 @@ static void dbs_check_cpu(int cpu)
 	freq_next = (freq_next * policy->cur) /
 			(dbs_tuners_ins.up_threshold - 10);
 
+	if (freq_next < policy->min)
+		freq_next = policy->min;
+
 	if (freq_next <= ((policy->cur * 95) / 100))
 		__cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
 }
-- 
cgit v0.10.2


From 8faaea3faa5ed0b2a15afb7b4e57ce0cd8dbe4ef Mon Sep 17 00:00:00 2001
From: Freddy Spierenburg <freddy@dusktilldawn.nl>
Date: Sun, 26 Mar 2006 21:22:51 +0100
Subject: [SERIAL] Small time UART configuration fix for AU1100 processor

The AU1100 processor does not have an internal UART2. Only
UART0, UART1 and UART3 exist. This patch removes the non existing
UART2 and replaces it with a descriptive comment.

Signed-off-by: Freddy Spierenburg <freddy@dusktilldawn.nl>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/serial/8250_au1x00.c b/drivers/serial/8250_au1x00.c
index 8d8d7a7..3d1bfd0 100644
--- a/drivers/serial/8250_au1x00.c
+++ b/drivers/serial/8250_au1x00.c
@@ -51,7 +51,7 @@ static struct plat_serial8250_port au1x00_data[] = {
 #elif defined(CONFIG_SOC_AU1100)
 	PORT(UART0_ADDR, AU1100_UART0_INT),
 	PORT(UART1_ADDR, AU1100_UART1_INT),
-	PORT(UART2_ADDR, AU1100_UART2_INT),
+	/* The internal UART2 does not exist on the AU1100 processor. */
 	PORT(UART3_ADDR, AU1100_UART3_INT),
 #elif defined(CONFIG_SOC_AU1550)
 	PORT(UART0_ADDR, AU1550_UART0_INT),
-- 
cgit v0.10.2


From 335bd9dff31d042b773591933d3ee5bd62d5ea27 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Sun, 26 Mar 2006 21:25:57 +0100
Subject: [SERIAL] Remove obsoleted au1x00_uart driver

As announced in feature-removal-schedule.txt.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/mips/au1000/common/setup.c b/arch/mips/au1000/common/setup.c
index 1080558..307e98c 100644
--- a/arch/mips/au1000/common/setup.c
+++ b/arch/mips/au1000/common/setup.c
@@ -94,7 +94,7 @@ void __init plat_setup(void)
 
 	argptr = prom_getcmdline();
 
-#if defined(CONFIG_SERIAL_AU1X00_CONSOLE) || defined(CONFIG_SERIAL_8250_CONSOLE)
+#ifdef CONFIG_SERIAL_8250_CONSOLE
 	if ((argptr = strstr(argptr, "console=")) == NULL) {
 		argptr = prom_getcmdline();
 		strcat(argptr, " console=ttyS0,115200");
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index ceb3697..fe0d8b8 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -620,22 +620,6 @@ config SERIAL_SH_SCI_CONSOLE
 	depends on SERIAL_SH_SCI=y
 	select SERIAL_CORE_CONSOLE
 
-config SERIAL_AU1X00
-	bool "Enable Au1x00 UART Support"
-	depends on MIPS && SOC_AU1X00
-	select SERIAL_CORE
-	help
-	  If you have an Alchemy AU1X00 processor (MIPS based) and you want
-	  to use serial ports, say Y.  Otherwise, say N.
-
-config SERIAL_AU1X00_CONSOLE
-	bool "Enable Au1x00 serial console"
-	depends on SERIAL_AU1X00
-	select SERIAL_CORE_CONSOLE
-	help
-	  If you have an Alchemy AU1X00 processor (MIPS based) and you want
-	  to use a console on a serial port, say Y.  Otherwise, say N.
-
 config SERIAL_CORE
 	tristate
 
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index a3a4323..d2b4c21 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -41,7 +41,6 @@ obj-$(CONFIG_SERIAL_COLDFIRE) += mcfserial.o
 obj-$(CONFIG_V850E_UART) += v850e_uart.o
 obj-$(CONFIG_SERIAL_PMACZILOG) += pmac_zilog.o
 obj-$(CONFIG_SERIAL_LH7A40X) += serial_lh7a40x.o
-obj-$(CONFIG_SERIAL_AU1X00) += au1x00_uart.o
 obj-$(CONFIG_SERIAL_DZ) += dz.o
 obj-$(CONFIG_SERIAL_SH_SCI) += sh-sci.o
 obj-$(CONFIG_SERIAL_SGI_L1_CONSOLE) += sn_console.o
diff --git a/drivers/serial/au1x00_uart.c b/drivers/serial/au1x00_uart.c
deleted file mode 100644
index 948880a..0000000
--- a/drivers/serial/au1x00_uart.c
+++ /dev/null
@@ -1,1287 +0,0 @@
-/*
- *  Driver for 8250/16550-type serial ports
- *
- *  Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o.
- *
- *  Copyright (C) 2001 Russell King.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * A note about mapbase / membase
- *
- *  mapbase is the physical address of the IO port.  Currently, we don't
- *  support this very well, and it may well be dropped from this driver
- *  in future.  As such, mapbase should be NULL.
- *
- *  membase is an 'ioremapped' cookie.  This is compatible with the old
- *  serial.c driver, and is currently the preferred form.
- */
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/tty.h>
-#include <linux/ioport.h>
-#include <linux/init.h>
-#include <linux/console.h>
-#include <linux/sysrq.h>
-#include <linux/serial.h>
-#include <linux/serialP.h>
-#include <linux/delay.h>
-
-#include <asm/serial.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/mach-au1x00/au1000.h>
-
-#if defined(CONFIG_SERIAL_AU1X00_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
-#define SUPPORT_SYSRQ
-#endif
-
-#include <linux/serial_core.h>
-#include "8250.h"
-
-/*
- * Debugging.
- */
-#if 0
-#define DEBUG_AUTOCONF(fmt...)	printk(fmt)
-#else
-#define DEBUG_AUTOCONF(fmt...)	do { } while (0)
-#endif
-
-#if 0
-#define DEBUG_INTR(fmt...)	printk(fmt)
-#else
-#define DEBUG_INTR(fmt...)	do { } while (0)
-#endif
-
-#define PASS_LIMIT	256
-
-/*
- * We default to IRQ0 for the "no irq" hack.   Some
- * machine types want others as well - they're free
- * to redefine this in their header file.
- */
-#define is_real_interrupt(irq)	((irq) != 0)
-
-static struct old_serial_port old_serial_port[] = {
-	{	.baud_base = 0,
-		.iomem_base = (u8 *)UART0_ADDR,
-		.irq = AU1000_UART0_INT,
-		.flags = STD_COM_FLAGS,
-		.iomem_reg_shift = 2,
-	}, {
-		.baud_base = 0,
-		.iomem_base = (u8 *)UART1_ADDR,
-		.irq = AU1000_UART1_INT,
-		.flags = STD_COM_FLAGS,
-		.iomem_reg_shift = 2
-	}, {
-		.baud_base = 0,
-		.iomem_base = (u8 *)UART2_ADDR,
-		.irq = AU1000_UART2_INT,
-		.flags = STD_COM_FLAGS,
-		.iomem_reg_shift = 2
-	}, {
-		.baud_base = 0,
-		.iomem_base = (u8 *)UART3_ADDR,
-		.irq = AU1000_UART3_INT,
-		.flags = STD_COM_FLAGS,
-		.iomem_reg_shift = 2
-	}
-};
-
-#define UART_NR	ARRAY_SIZE(old_serial_port)
-
-struct uart_8250_port {
-	struct uart_port	port;
-	struct timer_list	timer;		/* "no irq" timer */
-	struct list_head	list;		/* ports on this IRQ */
-	unsigned short		rev;
-	unsigned char		acr;
-	unsigned char		ier;
-	unsigned char		lcr;
-	unsigned char		mcr_mask;	/* mask of user bits */
-	unsigned char		mcr_force;	/* mask of forced bits */
-	unsigned char		lsr_break_flag;
-
-	/*
-	 * We provide a per-port pm hook.
-	 */
-	void			(*pm)(struct uart_port *port,
-				      unsigned int state, unsigned int old);
-};
-
-struct irq_info {
-	spinlock_t		lock;
-	struct list_head	*head;
-};
-
-static struct irq_info irq_lists[NR_IRQS];
-
-/*
- * Here we define the default xmit fifo size used for each type of UART.
- */
-static const struct serial_uart_config uart_config[PORT_MAX_8250+1] = {
-	{ "unknown",	1,	0 },
-	{ "8250",	1,	0 },
-	{ "16450",	1,	0 },
-	{ "16550",	1,	0 },
-	/* PORT_16550A */
-	{ "AU1X00_UART",16,	UART_CLEAR_FIFO | UART_USE_FIFO },
-};
-
-static unsigned int serial_in(struct uart_8250_port *up, int offset)
-{
-	return au_readl((unsigned long)up->port.membase + offset);
-}
-
-static void serial_out(struct uart_8250_port *up, int offset, int value)
-{
-	au_writel(value, (unsigned long)up->port.membase + offset);
-}
-
-#define serial_inp(up, offset)		serial_in(up, offset)
-#define serial_outp(up, offset, value)	serial_out(up, offset, value)
-
-/*
- * This routine is called by rs_init() to initialize a specific serial
- * port.  It determines what type of UART chip this serial port is
- * using: 8250, 16450, 16550, 16550A.  The important question is
- * whether or not this UART is a 16550A or not, since this will
- * determine whether or not we can use its FIFO features or not.
- */
-static void autoconfig(struct uart_8250_port *up, unsigned int probeflags)
-{
-	unsigned char save_lcr, save_mcr;
-	unsigned long flags;
-
-	if (!up->port.iobase && !up->port.mapbase && !up->port.membase)
-		return;
-
-	DEBUG_AUTOCONF("ttyS%d: autoconf (0x%04x, 0x%08lx): ",
-			up->port.line, up->port.iobase, up->port.membase);
-
-	/*
-	 * We really do need global IRQs disabled here - we're going to
-	 * be frobbing the chips IRQ enable register to see if it exists.
-	 */
-	spin_lock_irqsave(&up->port.lock, flags);
-//	save_flags(flags); cli();
-
-	save_mcr = serial_in(up, UART_MCR);
-	save_lcr = serial_in(up, UART_LCR);
-
-	up->port.type = PORT_16550A;
-	serial_outp(up, UART_LCR, save_lcr);
-
-	up->port.fifosize = uart_config[up->port.type].dfl_xmit_fifo_size;
-
-	if (up->port.type == PORT_UNKNOWN)
-		goto out;
-
-	/*
-	 * Reset the UART.
-	 */
-	serial_outp(up, UART_MCR, save_mcr);
-	serial_outp(up, UART_FCR, (UART_FCR_ENABLE_FIFO |
-				     UART_FCR_CLEAR_RCVR |
-				     UART_FCR_CLEAR_XMIT));
-	serial_outp(up, UART_FCR, 0);
-	(void)serial_in(up, UART_RX);
-	serial_outp(up, UART_IER, 0);
-
- out:	
-	spin_unlock_irqrestore(&up->port.lock, flags);
-//	restore_flags(flags);
-	DEBUG_AUTOCONF("type=%s\n", uart_config[up->port.type].name);
-}
-
-static void serial8250_stop_tx(struct uart_port *port)
-{
-	struct uart_8250_port *up = (struct uart_8250_port *)port;
-
-	if (up->ier & UART_IER_THRI) {
-		up->ier &= ~UART_IER_THRI;
-		serial_out(up, UART_IER, up->ier);
-	}
-}
-
-static void serial8250_start_tx(struct uart_port *port)
-{
-	struct uart_8250_port *up = (struct uart_8250_port *)port;
-
-	if (!(up->ier & UART_IER_THRI)) {
-		up->ier |= UART_IER_THRI;
-		serial_out(up, UART_IER, up->ier);
-	}
-}
-
-static void serial8250_stop_rx(struct uart_port *port)
-{
-	struct uart_8250_port *up = (struct uart_8250_port *)port;
-
-	up->ier &= ~UART_IER_RLSI;
-	up->port.read_status_mask &= ~UART_LSR_DR;
-	serial_out(up, UART_IER, up->ier);
-}
-
-static void serial8250_enable_ms(struct uart_port *port)
-{
-	struct uart_8250_port *up = (struct uart_8250_port *)port;
-
-	up->ier |= UART_IER_MSI;
-	serial_out(up, UART_IER, up->ier);
-}
-
-static void
-receive_chars(struct uart_8250_port *up, int *status, struct pt_regs *regs)
-{
-	struct tty_struct *tty = up->port.info->tty;
-	unsigned char ch, flag;
-	int max_count = 256;
-
-	do {
-		ch = serial_inp(up, UART_RX);
-		flag = TTY_NORMAL;
-		up->port.icount.rx++;
-
-		if (unlikely(*status & (UART_LSR_BI | UART_LSR_PE |
-				       UART_LSR_FE | UART_LSR_OE))) {
-			/*
-			 * For statistics only
-			 */
-			if (*status & UART_LSR_BI) {
-				*status &= ~(UART_LSR_FE | UART_LSR_PE);
-				up->port.icount.brk++;
-				/*
-				 * We do the SysRQ and SAK checking
-				 * here because otherwise the break
-				 * may get masked by ignore_status_mask
-				 * or read_status_mask.
-				 */
-				if (uart_handle_break(&up->port))
-					goto ignore_char;
-			} else if (*status & UART_LSR_PE)
-				up->port.icount.parity++;
-			else if (*status & UART_LSR_FE)
-				up->port.icount.frame++;
-			if (*status & UART_LSR_OE)
-				up->port.icount.overrun++;
-
-			/*
-			 * Mask off conditions which should be ingored.
-			 */
-			*status &= up->port.read_status_mask;
-
-#ifdef CONFIG_SERIAL_AU1X00_CONSOLE
-			if (up->port.line == up->port.cons->index) {
-				/* Recover the break flag from console xmit */
-				*status |= up->lsr_break_flag;
-				up->lsr_break_flag = 0;
-			}
-#endif
-			if (*status & UART_LSR_BI) {
-				DEBUG_INTR("handling break....");
-				flag = TTY_BREAK;
-			} else if (*status & UART_LSR_PE)
-				flag = TTY_PARITY;
-			else if (*status & UART_LSR_FE)
-				flag = TTY_FRAME;
-		}
-		if (uart_handle_sysrq_char(&up->port, ch, regs))
-			goto ignore_char;
-		if ((*status & up->port.ignore_status_mask) == 0)
-			tty_insert_flip_char(tty, ch, flag);
-		if (*status & UART_LSR_OE)
-			/*
-			 * Overrun is special, since it's reported
-			 * immediately, and doesn't affect the current
-			 * character.
-			 */
-			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
-		}
-	ignore_char:
-		*status = serial_inp(up, UART_LSR);
-	} while ((*status & UART_LSR_DR) && (max_count-- > 0));
-	spin_unlock(&up->port.lock);
-	tty_flip_buffer_push(tty);
-	spin_lock(&up->port.lock);
-}
-
-static void transmit_chars(struct uart_8250_port *up)
-{
-	struct circ_buf *xmit = &up->port.info->xmit;
-	int count;
-
-	if (up->port.x_char) {
-		serial_outp(up, UART_TX, up->port.x_char);
-		up->port.icount.tx++;
-		up->port.x_char = 0;
-		return;
-	}
-	if (uart_circ_empty(xmit) || uart_tx_stopped(&up->port)) {
-		serial8250_stop_tx(&up->port);
-		return;
-	}
-
-	count = up->port.fifosize;
-	do {
-		serial_out(up, UART_TX, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		up->port.icount.tx++;
-		if (uart_circ_empty(xmit))
-			break;
-	} while (--count > 0);
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(&up->port);
-
-	DEBUG_INTR("THRE...");
-
-	if (uart_circ_empty(xmit))
-		serial8250_stop_tx(&up->port);
-}
-
-static void check_modem_status(struct uart_8250_port *up)
-{
-	int status;
-
-	status = serial_in(up, UART_MSR);
-
-	if ((status & UART_MSR_ANY_DELTA) == 0)
-		return;
-
-	if (status & UART_MSR_TERI)
-		up->port.icount.rng++;
-	if (status & UART_MSR_DDSR)
-		up->port.icount.dsr++;
-	if (status & UART_MSR_DDCD)
-		uart_handle_dcd_change(&up->port, status & UART_MSR_DCD);
-	if (status & UART_MSR_DCTS)
-		uart_handle_cts_change(&up->port, status & UART_MSR_CTS);
-
-	wake_up_interruptible(&up->port.info->delta_msr_wait);
-}
-
-/*
- * This handles the interrupt from one port.
- */
-static inline void
-serial8250_handle_port(struct uart_8250_port *up, struct pt_regs *regs)
-{
-	unsigned int status = serial_inp(up, UART_LSR);
-
-	DEBUG_INTR("status = %x...", status);
-
-	if (status & UART_LSR_DR)
-		receive_chars(up, &status, regs);
-	check_modem_status(up);
-	if (status & UART_LSR_THRE)
-		transmit_chars(up);
-}
-
-/*
- * This is the serial driver's interrupt routine.
- *
- * Arjan thinks the old way was overly complex, so it got simplified.
- * Alan disagrees, saying that need the complexity to handle the weird
- * nature of ISA shared interrupts.  (This is a special exception.)
- *
- * In order to handle ISA shared interrupts properly, we need to check
- * that all ports have been serviced, and therefore the ISA interrupt
- * line has been de-asserted.
- *
- * This means we need to loop through all ports. checking that they
- * don't have an interrupt pending.
- */
-static irqreturn_t serial8250_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
-	struct irq_info *i = dev_id;
-	struct list_head *l, *end = NULL;
-	int pass_counter = 0;
-
-	DEBUG_INTR("serial8250_interrupt(%d)...", irq);
-
-	spin_lock(&i->lock);
-
-	l = i->head;
-	do {
-		struct uart_8250_port *up;
-		unsigned int iir;
-
-		up = list_entry(l, struct uart_8250_port, list);
-
-		iir = serial_in(up, UART_IIR);
-		if (!(iir & UART_IIR_NO_INT)) {
-			spin_lock(&up->port.lock);
-			serial8250_handle_port(up, regs);
-			spin_unlock(&up->port.lock);
-
-			end = NULL;
-		} else if (end == NULL)
-			end = l;
-
-		l = l->next;
-
-		if (l == i->head && pass_counter++ > PASS_LIMIT) {
-			/* If we hit this, we're dead. */
-			printk(KERN_ERR "serial8250: too much work for "
-				"irq%d\n", irq);
-			break;
-		}
-	} while (l != end);
-
-	spin_unlock(&i->lock);
-
-	DEBUG_INTR("end.\n");
-	/* FIXME! Was it really ours? */
-	return IRQ_HANDLED;
-}
-
-/*
- * To support ISA shared interrupts, we need to have one interrupt
- * handler that ensures that the IRQ line has been deasserted
- * before returning.  Failing to do this will result in the IRQ
- * line being stuck active, and, since ISA irqs are edge triggered,
- * no more IRQs will be seen.
- */
-static void serial_do_unlink(struct irq_info *i, struct uart_8250_port *up)
-{
-	spin_lock_irq(&i->lock);
-
-	if (!list_empty(i->head)) {
-		if (i->head == &up->list)
-			i->head = i->head->next;
-		list_del(&up->list);
-	} else {
-		BUG_ON(i->head != &up->list);
-		i->head = NULL;
-	}
-
-	spin_unlock_irq(&i->lock);
-}
-
-static int serial_link_irq_chain(struct uart_8250_port *up)
-{
-	struct irq_info *i = irq_lists + up->port.irq;
-	int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? SA_SHIRQ : 0;
-
-	spin_lock_irq(&i->lock);
-
-	if (i->head) {
-		list_add(&up->list, i->head);
-		spin_unlock_irq(&i->lock);
-
-		ret = 0;
-	} else {
-		INIT_LIST_HEAD(&up->list);
-		i->head = &up->list;
-		spin_unlock_irq(&i->lock);
-
-		ret = request_irq(up->port.irq, serial8250_interrupt,
-				  irq_flags, "serial", i);
-		if (ret < 0)
-			serial_do_unlink(i, up);
-	}
-
-	return ret;
-}
-
-static void serial_unlink_irq_chain(struct uart_8250_port *up)
-{
-	struct irq_info *i = irq_lists + up->port.irq;
-
-	BUG_ON(i->head == NULL);
-
-	if (list_empty(i->head))
-		free_irq(up->port.irq, i);
-
-	serial_do_unlink(i, up);
-}
-
-/*
- * This function is used to handle ports that do not have an
- * interrupt.  This doesn't work very well for 16450's, but gives
- * barely passable results for a 16550A.  (Although at the expense
- * of much CPU overhead).
- */
-static void serial8250_timeout(unsigned long data)
-{
-	struct uart_8250_port *up = (struct uart_8250_port *)data;
-	unsigned int timeout;
-	unsigned int iir;
-
-	iir = serial_in(up, UART_IIR);
-	if (!(iir & UART_IIR_NO_INT)) {
-		spin_lock(&up->port.lock);
-		serial8250_handle_port(up, NULL);
-		spin_unlock(&up->port.lock);
-	}
-
-	timeout = up->port.timeout;
-	timeout = timeout > 6 ? (timeout / 2 - 2) : 1;
-	mod_timer(&up->timer, jiffies + timeout);
-}
-
-static unsigned int serial8250_tx_empty(struct uart_port *port)
-{
-	struct uart_8250_port *up = (struct uart_8250_port *)port;
-	unsigned long flags;
-	unsigned int ret;
-
-	spin_lock_irqsave(&up->port.lock, flags);
-	ret = serial_in(up, UART_LSR) & UART_LSR_TEMT ? TIOCSER_TEMT : 0;
-	spin_unlock_irqrestore(&up->port.lock, flags);
-
-	return ret;
-}
-
-static unsigned int serial8250_get_mctrl(struct uart_port *port)
-{
-	struct uart_8250_port *up = (struct uart_8250_port *)port;
-	unsigned char status;
-	unsigned int ret;
-
-	status = serial_in(up, UART_MSR);
-
-	ret = 0;
-	if (status & UART_MSR_DCD)
-		ret |= TIOCM_CAR;
-	if (status & UART_MSR_RI)
-		ret |= TIOCM_RNG;
-	if (status & UART_MSR_DSR)
-		ret |= TIOCM_DSR;
-	if (status & UART_MSR_CTS)
-		ret |= TIOCM_CTS;
-	return ret;
-}
-
-static void serial8250_set_mctrl(struct uart_port *port, unsigned int mctrl)
-{
-	struct uart_8250_port *up = (struct uart_8250_port *)port;
-	unsigned char mcr = 0;
-
-	if (mctrl & TIOCM_RTS)
-		mcr |= UART_MCR_RTS;
-	if (mctrl & TIOCM_DTR)
-		mcr |= UART_MCR_DTR;
-	if (mctrl & TIOCM_OUT1)
-		mcr |= UART_MCR_OUT1;
-	if (mctrl & TIOCM_OUT2)
-		mcr |= UART_MCR_OUT2;
-	if (mctrl & TIOCM_LOOP)
-		mcr |= UART_MCR_LOOP;
-
-	mcr = (mcr & up->mcr_mask) | up->mcr_force;
-
-	serial_out(up, UART_MCR, mcr);
-}
-
-static void serial8250_break_ctl(struct uart_port *port, int break_state)
-{
-	struct uart_8250_port *up = (struct uart_8250_port *)port;
-	unsigned long flags;
-
-	spin_lock_irqsave(&up->port.lock, flags);
-	if (break_state == -1)
-		up->lcr |= UART_LCR_SBC;
-	else
-		up->lcr &= ~UART_LCR_SBC;
-	serial_out(up, UART_LCR, up->lcr);
-	spin_unlock_irqrestore(&up->port.lock, flags);
-}
-
-static int serial8250_startup(struct uart_port *port)
-{
-	struct uart_8250_port *up = (struct uart_8250_port *)port;
-	unsigned long flags;
-	int retval;
-
-	/*
-	 * Clear the FIFO buffers and disable them.
-	 * (they will be reeanbled in set_termios())
-	 */
-	if (uart_config[up->port.type].flags & UART_CLEAR_FIFO) {
-		serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO);
-		serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO |
-				UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);
-		serial_outp(up, UART_FCR, 0);
-	}
-
-	/*
-	 * Clear the interrupt registers.
-	 */
-	(void) serial_inp(up, UART_LSR);
-	(void) serial_inp(up, UART_RX);
-	(void) serial_inp(up, UART_IIR);
-	(void) serial_inp(up, UART_MSR);
-
-	/*
-	 * At this point, there's no way the LSR could still be 0xff;
-	 * if it is, then bail out, because there's likely no UART
-	 * here.
-	 */
-	if (!(up->port.flags & UPF_BUGGY_UART) &&
-	    (serial_inp(up, UART_LSR) == 0xff)) {
-		printk("ttyS%d: LSR safety check engaged!\n", up->port.line);
-		return -ENODEV;
-	}
-
-	retval = serial_link_irq_chain(up);
-		if (retval)
-			return retval;
-
-	/*
-	 * Now, initialize the UART
-	 */
-	serial_outp(up, UART_LCR, UART_LCR_WLEN8);
-
-	spin_lock_irqsave(&up->port.lock, flags);
-	if (up->port.flags & UPF_FOURPORT) {
-		if (!is_real_interrupt(up->port.irq))
-			up->port.mctrl |= TIOCM_OUT1;
-	} else
-		/*
-		 * Most PC uarts need OUT2 raised to enable interrupts.
-		 */
-		if (is_real_interrupt(up->port.irq))
-			up->port.mctrl |= TIOCM_OUT2;
-
-	serial8250_set_mctrl(&up->port, up->port.mctrl);
-	spin_unlock_irqrestore(&up->port.lock, flags);
-
-	/*
-	 * Finally, enable interrupts.  Note: Modem status interrupts
-	 * are set via set_termios(), which will be occurring imminently
-	 * anyway, so we don't enable them here.
-	 */
-	up->ier = UART_IER_RLSI | UART_IER_RDI;
-	serial_outp(up, UART_IER, up->ier);
-
-	if (up->port.flags & UPF_FOURPORT) {
-		unsigned int icp;
-		/*
-		 * Enable interrupts on the AST Fourport board
-		 */
-		icp = (up->port.iobase & 0xfe0) | 0x01f;
-		outb_p(0x80, icp);
-		(void) inb_p(icp);
-	}
-
-	/*
-	 * And clear the interrupt registers again for luck.
-	 */
-	(void) serial_inp(up, UART_LSR);
-	(void) serial_inp(up, UART_RX);
-	(void) serial_inp(up, UART_IIR);
-	(void) serial_inp(up, UART_MSR);
-
-	return 0;
-}
-
-static void serial8250_shutdown(struct uart_port *port)
-{
-	struct uart_8250_port *up = (struct uart_8250_port *)port;
-	unsigned long flags;
-
-	/*
-	 * Disable interrupts from this port
-	 */
-	up->ier = 0;
-	serial_outp(up, UART_IER, 0);
-
-	spin_lock_irqsave(&up->port.lock, flags);
-	if (up->port.flags & UPF_FOURPORT) {
-		/* reset interrupts on the AST Fourport board */
-		inb((up->port.iobase & 0xfe0) | 0x1f);
-		up->port.mctrl |= TIOCM_OUT1;
-	} else
-		up->port.mctrl &= ~TIOCM_OUT2;
-
-	serial8250_set_mctrl(&up->port, up->port.mctrl);
-	spin_unlock_irqrestore(&up->port.lock, flags);
-
-	/*
-	 * Disable break condition and FIFOs
-	 */
-	serial_out(up, UART_LCR, serial_inp(up, UART_LCR) & ~UART_LCR_SBC);
-	serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO |
-				  UART_FCR_CLEAR_RCVR |
-				  UART_FCR_CLEAR_XMIT);
-	serial_outp(up, UART_FCR, 0);
-
-	/*
-	 * Read data port to reset things, and then unlink from
-	 * the IRQ chain.
-	 */
-	(void) serial_in(up, UART_RX);
-
-	if (!is_real_interrupt(up->port.irq))
-		del_timer_sync(&up->timer);
-	else
-		serial_unlink_irq_chain(up);
-}
-
-static unsigned int serial8250_get_divisor(struct uart_port *port, unsigned int baud)
-{
-	unsigned int quot;
-
-	/*
-	 * Handle magic divisors for baud rates above baud_base on
-	 * SMSC SuperIO chips.
-	 */
-	if ((port->flags & UPF_MAGIC_MULTIPLIER) &&
-	    baud == (port->uartclk/4))
-		quot = 0x8001;
-	else if ((port->flags & UPF_MAGIC_MULTIPLIER) &&
-		 baud == (port->uartclk/8))
-		quot = 0x8002;
-	else
-		quot = uart_get_divisor(port, baud);
-
-	return quot;
-}
-
-static void
-serial8250_set_termios(struct uart_port *port, struct termios *termios,
-		       struct termios *old)
-{
-	struct uart_8250_port *up = (struct uart_8250_port *)port;
-	unsigned char cval, fcr = 0;
-	unsigned long flags;
-	unsigned int baud, quot;
-
-	switch (termios->c_cflag & CSIZE) {
-	case CS5:
-		cval = UART_LCR_WLEN5;
-		break;
-	case CS6:
-		cval = UART_LCR_WLEN6;
-		break;
-	case CS7:
-		cval = UART_LCR_WLEN7;
-		break;
-	default:
-	case CS8:
-		cval = UART_LCR_WLEN8;
-		break;
-	}
-
-	if (termios->c_cflag & CSTOPB)
-		cval |= UART_LCR_STOP;
-	if (termios->c_cflag & PARENB)
-		cval |= UART_LCR_PARITY;
-	if (!(termios->c_cflag & PARODD))
-		cval |= UART_LCR_EPAR;
-#ifdef CMSPAR
-	if (termios->c_cflag & CMSPAR)
-		cval |= UART_LCR_SPAR;
-#endif
-
-	/*
-	 * Ask the core to calculate the divisor for us.
-	 */
-	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16); 
-	quot = serial8250_get_divisor(port, baud);
-	quot = 0x35; /* FIXME */
-
-	/*
-	 * Work around a bug in the Oxford Semiconductor 952 rev B
-	 * chip which causes it to seriously miscalculate baud rates
-	 * when DLL is 0.
-	 */
-	if ((quot & 0xff) == 0 && up->port.type == PORT_16C950 &&
-	    up->rev == 0x5201)
-		quot ++;
-
-	if (uart_config[up->port.type].flags & UART_USE_FIFO) {
-		if (baud < 2400)
-			fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIGGER_1;
-		else
-			fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIGGER_8;
-	}
-
-	/*
-	 * Ok, we're now changing the port state.  Do it with
-	 * interrupts disabled.
-	 */
-	spin_lock_irqsave(&up->port.lock, flags);
-
-	/*
-	 * Update the per-port timeout.
-	 */
-	uart_update_timeout(port, termios->c_cflag, baud);
-
-	up->port.read_status_mask = UART_LSR_OE | UART_LSR_THRE | UART_LSR_DR;
-	if (termios->c_iflag & INPCK)
-		up->port.read_status_mask |= UART_LSR_FE | UART_LSR_PE;
-	if (termios->c_iflag & (BRKINT | PARMRK))
-		up->port.read_status_mask |= UART_LSR_BI;
-
-	/*
-	 * Characteres to ignore
-	 */
-	up->port.ignore_status_mask = 0;
-	if (termios->c_iflag & IGNPAR)
-		up->port.ignore_status_mask |= UART_LSR_PE | UART_LSR_FE;
-	if (termios->c_iflag & IGNBRK) {
-		up->port.ignore_status_mask |= UART_LSR_BI;
-		/*
-		 * If we're ignoring parity and break indicators,
-		 * ignore overruns too (for real raw support).
-		 */
-		if (termios->c_iflag & IGNPAR)
-			up->port.ignore_status_mask |= UART_LSR_OE;
-	}
-
-	/*
-	 * ignore all characters if CREAD is not set
-	 */
-	if ((termios->c_cflag & CREAD) == 0)
-		up->port.ignore_status_mask |= UART_LSR_DR;
-
-	/*
-	 * CTS flow control flag and modem status interrupts
-	 */
-	up->ier &= ~UART_IER_MSI;
-	if (UART_ENABLE_MS(&up->port, termios->c_cflag))
-		up->ier |= UART_IER_MSI;
-
-	serial_out(up, UART_IER, up->ier);
-	serial_outp(up, 0x28, quot & 0xffff);
-	up->lcr = cval;					/* Save LCR */
-	if (up->port.type != PORT_16750) {
-		if (fcr & UART_FCR_ENABLE_FIFO) {
-			/* emulated UARTs (Lucent Venus 167x) need two steps */
-			serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO);
-		}
-		serial_outp(up, UART_FCR, fcr);		/* set fcr */
-	}
-	spin_unlock_irqrestore(&up->port.lock, flags);
-}
-
-static void
-serial8250_pm(struct uart_port *port, unsigned int state,
-	      unsigned int oldstate)
-{
-	struct uart_8250_port *up = (struct uart_8250_port *)port;
-	if (state) {
-		/* sleep */
-		if (up->pm)
-			up->pm(port, state, oldstate);
-	} else {
-		/* wake */
-		if (up->pm)
-			up->pm(port, state, oldstate);
-	}
-}
-
-/*
- * Resource handling.  This is complicated by the fact that resources
- * depend on the port type.  Maybe we should be claiming the standard
- * 8250 ports, and then trying to get other resources as necessary?
- */
-static int
-serial8250_request_std_resource(struct uart_8250_port *up, struct resource **res)
-{
-	unsigned int size = 8 << up->port.regshift;
-	int ret = 0;
-
-	switch (up->port.iotype) {
-	case UPIO_MEM:
-		if (up->port.mapbase) {
-			*res = request_mem_region(up->port.mapbase, size, "serial");
-			if (!*res)
-				ret = -EBUSY;
-		}
-		break;
-
-	case UPIO_HUB6:
-	case UPIO_PORT:
-		*res = request_region(up->port.iobase, size, "serial");
-		if (!*res)
-			ret = -EBUSY;
-		break;
-	}
-	return ret;
-}
-
-
-static void serial8250_release_port(struct uart_port *port)
-{
-	struct uart_8250_port *up = (struct uart_8250_port *)port;
-	unsigned long start, offset = 0, size = 0;
-
-	size <<= up->port.regshift;
-
-	switch (up->port.iotype) {
-	case UPIO_MEM:
-		if (up->port.mapbase) {
-			/*
-			 * Unmap the area.
-			 */
-			iounmap(up->port.membase);
-			up->port.membase = NULL;
-
-			start = up->port.mapbase;
-
-			if (size)
-				release_mem_region(start + offset, size);
-			release_mem_region(start, 8 << up->port.regshift);
-		}
-		break;
-
-	case UPIO_HUB6:
-	case UPIO_PORT:
-		start = up->port.iobase;
-
-		if (size)
-			release_region(start + offset, size);
-		release_region(start + offset, 8 << up->port.regshift);
-		break;
-
-	default:
-		break;
-	}
-}
-
-static int serial8250_request_port(struct uart_port *port)
-{
-	struct uart_8250_port *up = (struct uart_8250_port *)port;
-	struct resource *res = NULL, *res_rsa = NULL;
-	int ret = 0;
-
-	ret = serial8250_request_std_resource(up, &res);
-
-	/*
-	 * If we have a mapbase, then request that as well.
-	 */
-	if (ret == 0 && up->port.flags & UPF_IOREMAP) {
-		int size = res->end - res->start + 1;
-
-		up->port.membase = ioremap(up->port.mapbase, size);
-		if (!up->port.membase)
-			ret = -ENOMEM;
-	}
-
-	if (ret < 0) {
-		if (res_rsa)
-			release_resource(res_rsa);
-		if (res)
-			release_resource(res);
-	}
-	return ret;
-}
-
-static void serial8250_config_port(struct uart_port *port, int flags)
-{
-	struct uart_8250_port *up = (struct uart_8250_port *)port;
-	struct resource *res_std = NULL, *res_rsa = NULL;
-	int probeflags = PROBE_ANY;
-
-	probeflags &= ~PROBE_RSA;
-
-	if (flags & UART_CONFIG_TYPE)
-		autoconfig(up, probeflags);
-
-	/*
-	 * If the port wasn't an RSA port, release the resource.
-	 */
-	if (up->port.type != PORT_RSA && res_rsa)
-		release_resource(res_rsa);
-
-	if (up->port.type == PORT_UNKNOWN && res_std)
-		release_resource(res_std);
-}
-
-static int
-serial8250_verify_port(struct uart_port *port, struct serial_struct *ser)
-{
-	if (ser->irq >= NR_IRQS || ser->irq < 0 ||
-	    ser->baud_base < 9600 || ser->type < PORT_UNKNOWN ||
-	    ser->type > PORT_MAX_8250 || ser->type == PORT_CIRRUS ||
-	    ser->type == PORT_STARTECH)
-		return -EINVAL;
-	return 0;
-}
-
-static const char *
-serial8250_type(struct uart_port *port)
-{
-	int type = port->type;
-
-	if (type >= ARRAY_SIZE(uart_config))
-		type = 0;
-	return uart_config[type].name;
-}
-
-static struct uart_ops serial8250_pops = {
-	.tx_empty	= serial8250_tx_empty,
-	.set_mctrl	= serial8250_set_mctrl,
-	.get_mctrl	= serial8250_get_mctrl,
-	.stop_tx	= serial8250_stop_tx,
-	.start_tx	= serial8250_start_tx,
-	.stop_rx	= serial8250_stop_rx,
-	.enable_ms	= serial8250_enable_ms,
-	.break_ctl	= serial8250_break_ctl,
-	.startup	= serial8250_startup,
-	.shutdown	= serial8250_shutdown,
-	.set_termios	= serial8250_set_termios,
-	.pm		= serial8250_pm,
-	.type		= serial8250_type,
-	.release_port	= serial8250_release_port,
-	.request_port	= serial8250_request_port,
-	.config_port	= serial8250_config_port,
-	.verify_port	= serial8250_verify_port,
-};
-
-static struct uart_8250_port serial8250_ports[UART_NR];
-
-static void __init serial8250_isa_init_ports(void)
-{
-	struct uart_8250_port *up;
-	static int first = 1;
-	int i;
-
-	if (!first)
-		return;
-	first = 0;
-
-	for (i = 0, up = serial8250_ports; i < ARRAY_SIZE(old_serial_port);
-	     i++, up++) {
-		up->port.iobase   = old_serial_port[i].port;
-		up->port.irq      = old_serial_port[i].irq;
-		up->port.uartclk  = get_au1x00_uart_baud_base();
-		up->port.flags    = old_serial_port[i].flags;
-		up->port.hub6     = old_serial_port[i].hub6;
-		up->port.membase  = old_serial_port[i].iomem_base;
-		up->port.iotype   = old_serial_port[i].io_type;
-		up->port.regshift = old_serial_port[i].iomem_reg_shift;
-		up->port.ops      = &serial8250_pops;
-	}
-}
-
-static void __init serial8250_register_ports(struct uart_driver *drv)
-{
-	int i;
-
-	serial8250_isa_init_ports();
-
-	for (i = 0; i < UART_NR; i++) {
-		struct uart_8250_port *up = &serial8250_ports[i];
-
-		up->port.line = i;
-		up->port.ops = &serial8250_pops;
-		init_timer(&up->timer);
-		up->timer.function = serial8250_timeout;
-
-		/*
-		 * ALPHA_KLUDGE_MCR needs to be killed.
-		 */
-		up->mcr_mask = ~ALPHA_KLUDGE_MCR;
-		up->mcr_force = ALPHA_KLUDGE_MCR;
-
-		uart_add_one_port(drv, &up->port);
-	}
-}
-
-#ifdef CONFIG_SERIAL_AU1X00_CONSOLE
-
-#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
-
-/*
- *	Wait for transmitter & holding register to empty
- */
-static inline void wait_for_xmitr(struct uart_8250_port *up)
-{
-	unsigned int status, tmout = 10000;
-
-	/* Wait up to 10ms for the character(s) to be sent. */
-	do {
-		status = serial_in(up, UART_LSR);
-
-		if (status & UART_LSR_BI)
-			up->lsr_break_flag = UART_LSR_BI;
-
-		if (--tmout == 0)
-			break;
-		udelay(1);
-	} while ((status & BOTH_EMPTY) != BOTH_EMPTY);
-
-	/* Wait up to 1s for flow control if necessary */
-	if (up->port.flags & UPF_CONS_FLOW) {
-		tmout = 1000000;
-		while (--tmout &&
-		       ((serial_in(up, UART_MSR) & UART_MSR_CTS) == 0))
-			udelay(1);
-	}
-}
-
-static void au1x00_console_putchar(struct uart_port *port, int ch)
-{
-	struct uart_8250_port *up = (struct uart_8250_port *)port;
-
-	wait_for_xmitr(up);
-	serial_out(up, UART_TX, ch);
-}
-
-/*
- *	Print a string to the serial port trying not to disturb
- *	any possible real use of the port...
- *
- *	The console_lock must be held when we get here.
- */
-static void
-serial8250_console_write(struct console *co, const char *s, unsigned int count)
-{
-	struct uart_8250_port *up = &serial8250_ports[co->index];
-	unsigned int ier;
-
-	/*
-	 *	First save the UER then disable the interrupts
-	 */
-	ier = serial_in(up, UART_IER);
-	serial_out(up, UART_IER, 0);
-
-	uart_console_write(&up->port, s, count, au1x00_console_putchar);
-
-	/*
-	 *	Finally, wait for transmitter to become empty
-	 *	and restore the IER
-	 */
-	wait_for_xmitr(up);
-	serial_out(up, UART_IER, ier);
-}
-
-static int __init serial8250_console_setup(struct console *co, char *options)
-{
-	struct uart_port *port;
-	int baud = 9600;
-	int bits = 8;
-	int parity = 'n';
-	int flow = 'n';
-
-	/*
-	 * Check whether an invalid uart number has been specified, and
-	 * if so, search for the first available port that does have
-	 * console support.
-	 */
-	if (co->index >= UART_NR)
-		co->index = 0;
-	port = &serial8250_ports[co->index].port;
-
-	/*
-	 * Temporary fix.
-	 */
-	spin_lock_init(&port->lock);
-
-	if (options)
-		uart_parse_options(options, &baud, &parity, &bits, &flow);
-
-	return uart_set_options(port, co, baud, parity, bits, flow);
-}
-
-extern struct uart_driver serial8250_reg;
-static struct console serial8250_console = {
-	.name		= "ttyS",
-	.write		= serial8250_console_write,
-	.device		= uart_console_device,
-	.setup		= serial8250_console_setup,
-	.flags		= CON_PRINTBUFFER,
-	.index		= -1,
-	.data		= &serial8250_reg,
-};
-
-static int __init serial8250_console_init(void)
-{
-	serial8250_isa_init_ports();
-	register_console(&serial8250_console);
-	return 0;
-}
-console_initcall(serial8250_console_init);
-
-#define SERIAL8250_CONSOLE	&serial8250_console
-#else
-#define SERIAL8250_CONSOLE	NULL
-#endif
-
-static struct uart_driver serial8250_reg = {
-	.owner			= THIS_MODULE,
-	.driver_name		= "serial",
-	.devfs_name		= "tts/",
-	.dev_name		= "ttyS",
-	.major			= TTY_MAJOR,
-	.minor			= 64,
-	.nr			= UART_NR,
-	.cons			= SERIAL8250_CONSOLE,
-};
-
-int __init early_serial_setup(struct uart_port *port)
-{
-	serial8250_isa_init_ports();
-	serial8250_ports[port->line].port	= *port;
-	serial8250_ports[port->line].port.ops	= &serial8250_pops;
-	return 0;
-}
-
-/**
- *	serial8250_suspend_port - suspend one serial port
- *	@line:  serial line number
- *      @level: the level of port suspension, as per uart_suspend_port
- *
- *	Suspend one serial port.
- */
-void serial8250_suspend_port(int line)
-{
-	uart_suspend_port(&serial8250_reg, &serial8250_ports[line].port);
-}
-
-/**
- *	serial8250_resume_port - resume one serial port
- *	@line:  serial line number
- *      @level: the level of port resumption, as per uart_resume_port
- *
- *	Resume one serial port.
- */
-void serial8250_resume_port(int line)
-{
-	uart_resume_port(&serial8250_reg, &serial8250_ports[line].port);
-}
-
-static int __init serial8250_init(void)
-{
-	int ret, i;
-
-	printk(KERN_INFO "Serial: Au1x00 driver\n");
-
-	for (i = 0; i < NR_IRQS; i++)
-		spin_lock_init(&irq_lists[i].lock);
-
-	ret = uart_register_driver(&serial8250_reg);
-	if (ret >= 0)
-		serial8250_register_ports(&serial8250_reg);
-
-	return ret;
-}
-
-static void __exit serial8250_exit(void)
-{
-	int i;
-
-	for (i = 0; i < UART_NR; i++)
-		uart_remove_one_port(&serial8250_reg, &serial8250_ports[i].port);
-
-	uart_unregister_driver(&serial8250_reg);
-}
-
-module_init(serial8250_init);
-module_exit(serial8250_exit);
-
-EXPORT_SYMBOL(serial8250_suspend_port);
-EXPORT_SYMBOL(serial8250_resume_port);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Au1x00 serial driver\n");
diff --git a/include/asm-mips/serial.h b/include/asm-mips/serial.h
index e796d75..7b23664 100644
--- a/include/asm-mips/serial.h
+++ b/include/asm-mips/serial.h
@@ -103,88 +103,6 @@
 #define IVR_SERIAL_PORT_DEFNS
 #endif
 
-#ifdef CONFIG_SERIAL_AU1X00
-#include <asm/mach-au1x00/au1000.h>
-#ifdef CONFIG_SOC_AU1000
-#define AU1000_SERIAL_PORT_DEFNS                       \
-    { .baud_base = 0, .port = UART0_ADDR,              \
-      .iomem_base = (unsigned char *)UART0_ADDR,       \
-      .irq = AU1000_UART0_INT, .flags = STD_COM_FLAGS, \
-      .iomem_reg_shift = 2 },                          \
-    { .baud_base = 0, .port = UART1_ADDR,              \
-      .iomem_base = (unsigned char *)UART1_ADDR,       \
-      .irq = AU1000_UART1_INT, .flags = STD_COM_FLAGS, \
-      .iomem_reg_shift = 2 },                          \
-    { .baud_base = 0, .port = UART2_ADDR,              \
-      .iomem_base = (unsigned char *)UART2_ADDR,       \
-      .irq = AU1000_UART2_INT, .flags = STD_COM_FLAGS, \
-      .iomem_reg_shift = 2 },                          \
-    { .baud_base = 0, .port = UART3_ADDR,              \
-      .iomem_base = (unsigned char *)UART3_ADDR,       \
-      .irq = AU1000_UART3_INT, .flags = STD_COM_FLAGS, \
-      .iomem_reg_shift = 2 },
-#endif
-
-#ifdef CONFIG_SOC_AU1500
-#define AU1000_SERIAL_PORT_DEFNS                       \
-    { .baud_base = 0, .port = UART0_ADDR,              \
-      .iomem_base = (unsigned char *)UART0_ADDR,       \
-      .irq = AU1500_UART0_INT, .flags = STD_COM_FLAGS, \
-      .iomem_reg_shift = 2 },                          \
-    { .baud_base = 0, .port = UART3_ADDR,              \
-      .iomem_base = (unsigned char *)UART3_ADDR,       \
-      .irq = AU1500_UART3_INT, .flags = STD_COM_FLAGS, \
-      .iomem_reg_shift = 2 },
-#endif
-
-#ifdef CONFIG_SOC_AU1100
-#define AU1000_SERIAL_PORT_DEFNS                       \
-    { .baud_base = 0, .port = UART0_ADDR,              \
-      .iomem_base = (unsigned char *)UART0_ADDR,       \
-      .irq = AU1100_UART0_INT, .flags = STD_COM_FLAGS, \
-      .iomem_reg_shift = 2 },                          \
-    { .baud_base = 0, .port = UART1_ADDR,              \
-      .iomem_base = (unsigned char *)UART1_ADDR,       \
-      .irq = AU1100_UART1_INT, .flags = STD_COM_FLAGS, \
-      .iomem_reg_shift = 2 },                          \
-    { .baud_base = 0, .port = UART3_ADDR,              \
-      .iomem_base = (unsigned char *)UART3_ADDR,       \
-      .irq = AU1100_UART3_INT, .flags = STD_COM_FLAGS, \
-      .iomem_reg_shift = 2 },
-#endif
-
-#ifdef CONFIG_SOC_AU1550
-#define AU1000_SERIAL_PORT_DEFNS                       \
-    { .baud_base = 0, .port = UART0_ADDR,              \
-      .iomem_base = (unsigned char *)UART0_ADDR,       \
-      .irq = AU1550_UART0_INT, .flags = STD_COM_FLAGS, \
-      .iomem_reg_shift = 2 },                          \
-    { .baud_base = 0, .port = UART1_ADDR,              \
-      .iomem_base = (unsigned char *)UART1_ADDR,       \
-      .irq = AU1550_UART1_INT, .flags = STD_COM_FLAGS, \
-      .iomem_reg_shift = 2 },                          \
-    { .baud_base = 0, .port = UART3_ADDR,              \
-      .iomem_base = (unsigned char *)UART3_ADDR,       \
-      .irq = AU1550_UART3_INT,  .flags = STD_COM_FLAGS,\
-      .iomem_reg_shift = 2 },
-#endif
-
-#ifdef CONFIG_SOC_AU1200
-#define AU1000_SERIAL_PORT_DEFNS                       \
-    { .baud_base = 0, .port = UART0_ADDR,              \
-      .iomem_base = (unsigned char *)UART0_ADDR,       \
-      .irq = AU1200_UART0_INT, .flags = STD_COM_FLAGS, \
-      .iomem_reg_shift = 2 },                          \
-    { .baud_base = 0, .port = UART1_ADDR,              \
-      .iomem_base = (unsigned char *)UART1_ADDR,       \
-      .irq = AU1200_UART1_INT, .flags = STD_COM_FLAGS, \
-      .iomem_reg_shift = 2 },
-#endif
-
-#else
-#define AU1000_SERIAL_PORT_DEFNS
-#endif
-
 #ifdef CONFIG_HAVE_STD_PC_SERIAL_PORT
 #define STD_SERIAL_PORT_DEFNS			\
 	/* UART CLK   PORT IRQ     FLAGS        */			\
@@ -331,7 +249,6 @@
 	MOMENCO_OCELOT_G_SERIAL_PORT_DEFNS		\
 	MOMENCO_OCELOT_C_SERIAL_PORT_DEFNS		\
 	MOMENCO_OCELOT_SERIAL_PORT_DEFNS		\
-	MOMENCO_OCELOT_3_SERIAL_PORT_DEFNS		\
-	AU1000_SERIAL_PORT_DEFNS
+	MOMENCO_OCELOT_3_SERIAL_PORT_DEFNS
 
 #endif /* _ASM_SERIAL_H */
-- 
cgit v0.10.2


From c9b4470e2d9d611181fe488fdf463948d8b24901 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Sun, 26 Mar 2006 21:40:27 +0100
Subject: [ARM] 3414/1: ep93xx: reset ethernet controller before uncompressing

Patch from Lennert Buytenhek

The Redboot version that cirrus supplies for the cirrus ep93xx doesn't
turn off DMA from the ethernet MAC before jumping to linux, which means
that we might end up with bits of RX status and packet data scribbled
over the uncompressed kernel image.

Work around this by resetting the ethernet MAC before we uncompress.

We don't usually work around bootloader bugs, but considering that the
large majority of ep93xx boards out there have this problem, I figured
this it was justified in this case.

Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/include/asm-arm/arch-ep93xx/uncompress.h b/include/asm-arm/arch-ep93xx/uncompress.h
index 4410d21..2171082 100644
--- a/include/asm-arm/arch-ep93xx/uncompress.h
+++ b/include/asm-arm/arch-ep93xx/uncompress.h
@@ -16,11 +16,21 @@ static unsigned char __raw_readb(unsigned int ptr)
 	return *((volatile unsigned char *)ptr);
 }
 
+static unsigned int __raw_readl(unsigned int ptr)
+{
+	return *((volatile unsigned int *)ptr);
+}
+
 static void __raw_writeb(unsigned char value, unsigned int ptr)
 {
 	*((volatile unsigned char *)ptr) = value;
 }
 
+static void __raw_writel(unsigned int value, unsigned int ptr)
+{
+	*((volatile unsigned int *)ptr) = value;
+}
+
 
 #define PHYS_UART1_DATA		0x808c0000
 #define PHYS_UART1_FLAG		0x808c0018
@@ -49,5 +59,33 @@ static void putstr(const char *s)
 	}
 }
 
-#define arch_decomp_setup()
+
+/*
+ * Some bootloaders don't turn off DMA from the ethernet MAC before
+ * jumping to linux, which means that we might end up with bits of RX
+ * status and packet data scribbled over the uncompressed kernel image.
+ * Work around this by resetting the ethernet MAC before we uncompress.
+ */
+#define PHYS_ETH_SELF_CTL		0x80010020
+#define ETH_SELF_CTL_RESET		0x00000001
+
+static void ethernet_reset(void)
+{
+	unsigned int v;
+
+	/* Reset the ethernet MAC.  */
+	v = __raw_readl(PHYS_ETH_SELF_CTL);
+	__raw_writel(v | ETH_SELF_CTL_RESET, PHYS_ETH_SELF_CTL);
+
+	/* Wait for reset to finish.  */
+	while (__raw_readl(PHYS_ETH_SELF_CTL) & ETH_SELF_CTL_RESET)
+		;
+}
+
+
+static void arch_decomp_setup(void)
+{
+	ethernet_reset();
+}
+
 #define arch_decomp_wdog()
-- 
cgit v0.10.2


From bd8f103efef14980decf5d54a2de85835e9a6d9a Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Sun, 26 Mar 2006 21:40:27 +0100
Subject: [ARM] 3415/1: Akita: Add missing EXPORT_SYMBOL

Patch from Richard Purdie

Add an EXPORT_SYMBOL for the Akita IO Expander Device.

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 30ec317..0dbb079 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -467,6 +467,8 @@ struct platform_device akitaioexp_device = {
 	.id		= -1,
 };
 
+EXPORT_SYMBOL_GPL(akitaioexp_device);
+
 static void __init akita_init(void)
 {
 	spitz_ficp_platform_data.transceiver_mode = akita_irda_transceiver_mode;
-- 
cgit v0.10.2


From fbb18a277a6f192404aa20ece49529acb1e1e76d Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sun, 26 Mar 2006 23:13:39 +0100
Subject: [SERIAL] amba-pl010: allow platforms to specify modem control method

The amba-pl010 hardware does not provide RTS and DTR control lines; it
is expected that these will be implemented using GPIO.  Allow platforms
to supply a function to implement manipulation of modem control lines.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-integrator/core.c b/arch/arm/mach-integrator/core.c
index 20071a2..576a5e9 100644
--- a/arch/arm/mach-integrator/core.c
+++ b/arch/arm/mach-integrator/core.c
@@ -15,7 +15,9 @@
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
+#include <linux/termios.h>
 #include <linux/amba/bus.h>
+#include <linux/amba/serial.h>
 
 #include <asm/hardware.h>
 #include <asm/irq.h>
@@ -28,6 +30,8 @@
 
 #include "common.h"
 
+static struct amba_pl010_data integrator_uart_data;
+
 static struct amba_device rtc_device = {
 	.dev		= {
 		.bus_id	= "mb:15",
@@ -44,6 +48,7 @@ static struct amba_device rtc_device = {
 static struct amba_device uart0_device = {
 	.dev		= {
 		.bus_id	= "mb:16",
+		.platform_data = &integrator_uart_data,
 	},
 	.res		= {
 		.start	= INTEGRATOR_UART0_BASE,
@@ -57,6 +62,7 @@ static struct amba_device uart0_device = {
 static struct amba_device uart1_device = {
 	.dev		= {
 		.bus_id	= "mb:17",
+		.platform_data = &integrator_uart_data,
 	},
 	.res		= {
 		.start	= INTEGRATOR_UART1_BASE,
@@ -115,6 +121,46 @@ static int __init integrator_init(void)
 
 arch_initcall(integrator_init);
 
+/*
+ * On the Integrator platform, the port RTS and DTR are provided by
+ * bits in the following SC_CTRLS register bits:
+ *        RTS  DTR
+ *  UART0  7    6
+ *  UART1  5    4
+ */
+#define SC_CTRLC	(IO_ADDRESS(INTEGRATOR_SC_BASE) + INTEGRATOR_SC_CTRLC_OFFSET)
+#define SC_CTRLS	(IO_ADDRESS(INTEGRATOR_SC_BASE) + INTEGRATOR_SC_CTRLS_OFFSET)
+
+static void integrator_uart_set_mctrl(struct amba_device *dev, void __iomem *base, unsigned int mctrl)
+{
+	unsigned int ctrls = 0, ctrlc = 0, rts_mask, dtr_mask;
+
+	if (dev == &uart0_device) {
+		rts_mask = 1 << 4;
+		dtr_mask = 1 << 5;
+	} else {
+		rts_mask = 1 << 6;
+		dtr_mask = 1 << 7;
+	}
+
+	if (mctrl & TIOCM_RTS)
+		ctrlc |= rts_mask;
+	else
+		ctrls |= rts_mask;
+
+	if (mctrl & TIOCM_DTR)
+		ctrlc |= dtr_mask;
+	else
+		ctrls |= dtr_mask;
+
+	__raw_writel(ctrls, SC_CTRLS);
+	__raw_writel(ctrlc, SC_CTRLC);
+}
+
+static struct amba_pl010_data integrator_uart_data = {
+	.set_mctrl = integrator_uart_set_mctrl,
+};
+
 #define CM_CTRL	IO_ADDRESS(INTEGRATOR_HDR_BASE) + INTEGRATOR_HDR_CTRL_OFFSET
 
 static DEFINE_SPINLOCK(cm_lock);
diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c
index 127d6cd..1631414 100644
--- a/drivers/serial/amba-pl010.c
+++ b/drivers/serial/amba-pl010.c
@@ -51,8 +51,6 @@
 #include <linux/amba/serial.h>
 
 #include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/hardware.h>
 
 #define UART_NR		2
 
@@ -65,26 +63,16 @@
 #define UART_RX_DATA(s)		(((s) & UART01x_FR_RXFE) == 0)
 #define UART_TX_READY(s)	(((s) & UART01x_FR_TXFF) == 0)
 
-#define UART_DUMMY_RSR_RX	/*256*/0
+#define UART_DUMMY_RSR_RX	256
 #define UART_PORT_SIZE		64
 
 /*
- * On the Integrator platform, the port RTS and DTR are provided by
- * bits in the following SC_CTRLS register bits:
- *        RTS  DTR
- *  UART0  7    6
- *  UART1  5    4
- */
-#define SC_CTRLC	(IO_ADDRESS(INTEGRATOR_SC_BASE) + INTEGRATOR_SC_CTRLC_OFFSET)
-#define SC_CTRLS	(IO_ADDRESS(INTEGRATOR_SC_BASE) + INTEGRATOR_SC_CTRLS_OFFSET)
-
-/*
  * We wrap our port structure around the generic uart_port.
  */
 struct uart_amba_port {
 	struct uart_port	port;
-	unsigned int		dtr_mask;
-	unsigned int		rts_mask;
+	struct amba_device	*dev;
+	struct amba_pl010_data	*data;
 	unsigned int		old_status;
 };
 
@@ -300,20 +288,9 @@ static unsigned int pl010_get_mctrl(struct uart_port *port)
 static void pl010_set_mctrl(struct uart_port *port, unsigned int mctrl)
 {
 	struct uart_amba_port *uap = (struct uart_amba_port *)port;
-	unsigned int ctrls = 0, ctrlc = 0;
-
-	if (mctrl & TIOCM_RTS)
-		ctrlc |= uap->rts_mask;
-	else
-		ctrls |= uap->rts_mask;
 
-	if (mctrl & TIOCM_DTR)
-		ctrlc |= uap->dtr_mask;
-	else
-		ctrls |= uap->dtr_mask;
-
-	__raw_writel(ctrls, SC_CTRLS);
-	__raw_writel(ctrlc, SC_CTRLC);
+	if (uap->data)
+		uap->data->set_mctrl(uap->dev, uap->port.membase, mctrl);
 }
 
 static void pl010_break_ctl(struct uart_port *port, int break_state)
@@ -539,38 +516,7 @@ static struct uart_ops amba_pl010_pops = {
 	.verify_port	= pl010_verify_port,
 };
 
-static struct uart_amba_port amba_ports[UART_NR] = {
-	{
-		.port	= {
-			.membase	= (void *)IO_ADDRESS(INTEGRATOR_UART0_BASE),
-			.mapbase	= INTEGRATOR_UART0_BASE,
-			.iotype		= UPIO_MEM,
-			.irq		= IRQ_UARTINT0,
-			.uartclk	= 14745600,
-			.fifosize	= 16,
-			.ops		= &amba_pl010_pops,
-			.flags		= UPF_BOOT_AUTOCONF,
-			.line		= 0,
-		},
-		.dtr_mask	= 1 << 5,
-		.rts_mask	= 1 << 4,
-	},
-	{
-		.port	= {
-			.membase	= (void *)IO_ADDRESS(INTEGRATOR_UART1_BASE),
-			.mapbase	= INTEGRATOR_UART1_BASE,
-			.iotype		= UPIO_MEM,
-			.irq		= IRQ_UARTINT1,
-			.uartclk	= 14745600,
-			.fifosize	= 16,
-			.ops		= &amba_pl010_pops,
-			.flags		= UPF_BOOT_AUTOCONF,
-			.line		= 1,
-		},
-		.dtr_mask	= 1 << 7,
-		.rts_mask	= 1 << 6,
-	}
-};
+static struct uart_amba_port *amba_ports[UART_NR];
 
 #ifdef CONFIG_SERIAL_AMBA_PL010_CONSOLE
 
@@ -588,7 +534,7 @@ static void pl010_console_putchar(struct uart_port *port, int ch)
 static void
 pl010_console_write(struct console *co, const char *s, unsigned int count)
 {
-	struct uart_port *port = &amba_ports[co->index].port;
+	struct uart_port *port = &amba_ports[co->index]->port;
 	unsigned int status, old_cr;
 
 	/*
@@ -651,7 +597,7 @@ static int __init pl010_console_setup(struct console *co, char *options)
 	 */
 	if (co->index >= UART_NR)
 		co->index = 0;
-	port = &amba_ports[co->index].port;
+	port = &amba_ports[co->index]->port;
 
 	if (options)
 		uart_parse_options(options, &baud, &parity, &bits, &flow);
@@ -672,24 +618,6 @@ static struct console amba_console = {
 	.data		= &amba_reg,
 };
 
-static int __init amba_console_init(void)
-{
-	/*
-	 * All port initializations are done statically
-	 */
-	register_console(&amba_console);
-	return 0;
-}
-console_initcall(amba_console_init);
-
-static int __init amba_late_console_init(void)
-{
-	if (!(amba_console.flags & CON_ENABLED))
-		register_console(&amba_console);
-	return 0;
-}
-late_initcall(amba_late_console_init);
-
 #define AMBA_CONSOLE	&amba_console
 #else
 #define AMBA_CONSOLE	NULL
@@ -707,30 +635,76 @@ static struct uart_driver amba_reg = {
 
 static int pl010_probe(struct amba_device *dev, void *id)
 {
-	int i;
+	struct uart_amba_port *port;
+	void __iomem *base;
+	int i, ret;
 
-	for (i = 0; i < UART_NR; i++) {
-		if (amba_ports[i].port.mapbase != dev->res.start)
-			continue;
+	for (i = 0; i < ARRAY_SIZE(amba_ports); i++)
+		if (amba_ports[i] == NULL)
+			break;
 
-		amba_ports[i].port.dev = &dev->dev;
-		uart_add_one_port(&amba_reg, &amba_ports[i].port);
-		amba_set_drvdata(dev, &amba_ports[i]);
-		break;
+	if (i == ARRAY_SIZE(amba_ports)) {
+		ret = -EBUSY;
+		goto out;
 	}
 
-	return 0;
+	port = kzalloc(sizeof(struct uart_amba_port), GFP_KERNEL);
+	if (!port) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	base = ioremap(dev->res.start, PAGE_SIZE);
+	if (!base) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	port->port.dev = &dev->dev;
+	port->port.mapbase = dev->res.start;
+	port->port.membase = base;
+	port->port.iotype = UPIO_MEM;
+	port->port.irq = dev->irq[0];
+	port->port.uartclk = 14745600;
+	port->port.fifosize = 16;
+	port->port.ops = &amba_pl010_pops;
+	port->port.flags = UPF_BOOT_AUTOCONF;
+	port->port.line = i;
+	port->dev = dev;
+	port->data = dev->dev.platform_data;
+
+	amba_ports[i] = port;
+
+	amba_set_drvdata(dev, port);
+	ret = uart_add_one_port(&amba_reg, &port->port);
+	if (ret) {
+		amba_set_drvdata(dev, NULL);
+		amba_ports[i] = NULL;
+		iounmap(base);
+ free:
+		kfree(port);
+	}
+
+ out:
+	return ret;
 }
 
 static int pl010_remove(struct amba_device *dev)
 {
-	struct uart_amba_port *uap = amba_get_drvdata(dev);
-
-	if (uap)
-		uart_remove_one_port(&amba_reg, &uap->port);
+	struct uart_amba_port *port = amba_get_drvdata(dev);
+	int i;
 
 	amba_set_drvdata(dev, NULL);
 
+	uart_remove_one_port(&amba_reg, &port->port);
+
+	for (i = 0; i < ARRAY_SIZE(amba_ports); i++)
+		if (amba_ports[i] == port)
+			amba_ports[i] = NULL;
+
+	iounmap(port->port.membase);
+	kfree(port);
+
 	return 0;
 }
 
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index dc726ff..48ee32a 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -158,4 +158,10 @@
 #define UART01x_RSR_ANY		(UART01x_RSR_OE|UART01x_RSR_BE|UART01x_RSR_PE|UART01x_RSR_FE)
 #define UART01x_FR_MODEM_ANY	(UART01x_FR_DCD|UART01x_FR_DSR|UART01x_FR_CTS)
 
+#ifndef __ASSEMBLY__
+struct amba_pl010_data {
+	void (*set_mctrl)(struct amba_device *dev, void __iomem *base, unsigned int mctrl);
+};
+#endif
+
 #endif
-- 
cgit v0.10.2


From aee85fe8e8143d3f54d9e6d3c6cdd40ead563267 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Sun, 26 Mar 2006 23:16:39 +0100
Subject: [SERIAL] Provide Cirrus EP93xx AMBA PL010 serial support.

Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c
index 865427b..2d892e4 100644
--- a/arch/arm/mach-ep93xx/core.c
+++ b/arch/arm/mach-ep93xx/core.c
@@ -30,7 +30,9 @@
 #include <linux/time.h>
 #include <linux/timex.h>
 #include <linux/delay.h>
+#include <linux/termios.h>
 #include <linux/amba/bus.h>
+#include <linux/amba/serial.h>
 
 #include <asm/types.h>
 #include <asm/setup.h>
@@ -360,6 +362,68 @@ void __init ep93xx_init_irq(void)
 /*************************************************************************
  * EP93xx peripheral handling
  *************************************************************************/
+#define EP93XX_UART_MCR_OFFSET		(0x0100)
+
+static void ep93xx_uart_set_mctrl(struct amba_device *dev,
+				  void __iomem *base, unsigned int mctrl)
+{
+	unsigned int mcr;
+
+	mcr = 0;
+	if (!(mctrl & TIOCM_RTS))
+		mcr |= 2;
+	if (!(mctrl & TIOCM_DTR))
+		mcr |= 1;
+
+	__raw_writel(mcr, base + EP93XX_UART_MCR_OFFSET);
+}
+
+static struct amba_pl010_data ep93xx_uart_data = {
+	.set_mctrl	= ep93xx_uart_set_mctrl,
+};
+
+static struct amba_device uart1_device = {
+	.dev		= {
+		.bus_id		= "apb:uart1",
+		.platform_data	= &ep93xx_uart_data,
+	},
+	.res		= {
+		.start	= EP93XX_UART1_PHYS_BASE,
+		.end	= EP93XX_UART1_PHYS_BASE + 0x0fff,
+		.flags	= IORESOURCE_MEM,
+	},
+	.irq		= { IRQ_EP93XX_UART1, NO_IRQ },
+	.periphid	= 0x00041010,
+};
+
+static struct amba_device uart2_device = {
+	.dev		= {
+		.bus_id		= "apb:uart2",
+		.platform_data	= &ep93xx_uart_data,
+	},
+	.res		= {
+		.start	= EP93XX_UART2_PHYS_BASE,
+		.end	= EP93XX_UART2_PHYS_BASE + 0x0fff,
+		.flags	= IORESOURCE_MEM,
+	},
+	.irq		= { IRQ_EP93XX_UART2, NO_IRQ },
+	.periphid	= 0x00041010,
+};
+
+static struct amba_device uart3_device = {
+	.dev		= {
+		.bus_id		= "apb:uart3",
+		.platform_data	= &ep93xx_uart_data,
+	},
+	.res		= {
+		.start	= EP93XX_UART3_PHYS_BASE,
+		.end	= EP93XX_UART3_PHYS_BASE + 0x0fff,
+		.flags	= IORESOURCE_MEM,
+	},
+	.irq		= { IRQ_EP93XX_UART3, NO_IRQ },
+	.periphid	= 0x00041010,
+};
+
 void __init ep93xx_init_devices(void)
 {
 	unsigned int v;
@@ -371,4 +435,8 @@ void __init ep93xx_init_devices(void)
 	v &= ~EP93XX_SYSCON_DEVICE_CONFIG_CRUNCH_ENABLE;
 	__raw_writel(0xaa, EP93XX_SYSCON_SWLOCK);
 	__raw_writel(v, EP93XX_SYSCON_DEVICE_CONFIG);
+
+	amba_device_register(&uart1_device, &iomem_resource);
+	amba_device_register(&uart2_device, &iomem_resource);
+	amba_device_register(&uart3_device, &iomem_resource);
 }
-- 
cgit v0.10.2


From 6abaaaae6d5ed52422c8caf65f3cdbb95579bb58 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 26 Mar 2006 17:37:54 -0800
Subject: [IPSEC]: Fix tunnel error handling in ipcomp6

The error handling in ipcomp6_tunnel_create is broken in two ways:

1) If we fail to allocate an SPI (this should never happen in practice
since there are plenty of 32-bit SPI values for us to use), we will
still go ahead and create the SA.

2) When xfrm_init_state fails, we first of all may trigger the BUG_TRAP
in __xfrm_state_destroy because we didn't set the state to DEAD.  More
importantly we end up returning the freed state as if we succeeded!

This patch fixes them both.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 028b636..d4cfec3 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -228,6 +228,9 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
 
 	t->id.proto = IPPROTO_IPV6;
 	t->id.spi = xfrm6_tunnel_alloc_spi((xfrm_address_t *)&x->props.saddr);
+	if (!t->id.spi)
+		goto error;
+
 	memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
 	memcpy(&t->sel, &x->sel, sizeof(t->sel));
 	t->props.family = AF_INET6;
@@ -243,7 +246,9 @@ out:
 	return t;
 
 error:
+	t->km.state = XFRM_STATE_DEAD;
 	xfrm_state_put(t);
+	t = NULL;
 	goto out;
 }
 
-- 
cgit v0.10.2


From 3eb4801d7bde42b82f05137392a1ee0ece090bad Mon Sep 17 00:00:00 2001
From: Norbert Kiesel <nkiesel@tbdnetworks.com>
Date: Sun, 26 Mar 2006 17:39:55 -0800
Subject: [NET]: drop duplicate assignment in request_sock

Just noticed that request_sock.[ch] contain a useless assignment of
rskq_accept_head to itself.  I assume this is a typo and the 2nd one
was supposed to be _tail.  However, setting _tail to NULL is not
needed, so the patch below just drops the 2nd assignment.

Signed-off-By: Norbert Kiesel <nkiesel@tbdnetworks.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 11641c9..c5d7f92 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -145,7 +145,7 @@ static inline struct request_sock *
 {
 	struct request_sock *req = queue->rskq_accept_head;
 
-	queue->rskq_accept_head = queue->rskq_accept_head = NULL;
+	queue->rskq_accept_head = NULL;
 	return req;
 }
 
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 98f0fc9..1e44eda 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -51,7 +51,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
 
 	get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
 	rwlock_init(&queue->syn_wait_lock);
-	queue->rskq_accept_head = queue->rskq_accept_head = NULL;
+	queue->rskq_accept_head = NULL;
 	lopt->nr_table_entries = nr_table_entries;
 
 	write_lock_bh(&queue->syn_wait_lock);
-- 
cgit v0.10.2


From 28832e83379afd0b0e83b78ac317290c79ebd496 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 8 Mar 2006 11:19:51 +0100
Subject: [PATCH] update max_sectors documentation

The max_sectors has been split into max_hw_sectors and max_sectors for some
time. A patch to have blk_queue_max_sectors enforce this was sent by
me and it broke IDE. This patch updates the documentation.

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 8e63831..f989a9e 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -132,8 +132,18 @@ Some new queue property settings:
 		limit. No highmem default.
 
 	blk_queue_max_sectors(q, max_sectors)
-		Maximum size request you can handle in units of 512 byte
-		sectors. 255 default.
+		Sets two variables that limit the size of the request.
+
+		- The request queue's max_sectors, which is a soft size in
+		in units of 512 byte sectors, and could be dynamically varied
+		by the core kernel.
+
+		- The request queue's max_hw_sectors, which is a hard limit
+		and reflects the maximum size request a driver can handle
+		in units of 512 byte sectors.
+
+		The default for both max_sectors and max_hw_sectors is
+		255. The upper limit of max_sectors is 1024.
 
 	blk_queue_max_phys_segments(q, max_segments)
 		Maximum physical segments you can handle in a request. 128
-- 
cgit v0.10.2


From 06ff37ffb4ba8bcbda0e9d19c712c954ef7b8a0a Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Wed, 8 Mar 2006 11:21:52 +0100
Subject: [PATCH] kzalloc() conversion in drivers/block

this patch converts drivers/block to kzalloc usage.
Compile tested with allyesconfig.

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 9bdea2a..49c7cd5 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -311,11 +311,10 @@ static boolean DAC960_CreateAuxiliaryStructures(DAC960_Controller_T *Controller)
 		CommandsRemaining = CommandAllocationGroupSize;
 	  CommandGroupByteCount =
 		CommandsRemaining * CommandAllocationLength;
-	  AllocationPointer = kmalloc(CommandGroupByteCount, GFP_ATOMIC);
+	  AllocationPointer = kzalloc(CommandGroupByteCount, GFP_ATOMIC);
 	  if (AllocationPointer == NULL)
 		return DAC960_Failure(Controller,
 					"AUXILIARY STRUCTURE CREATION");
-	  memset(AllocationPointer, 0, CommandGroupByteCount);
 	 }
       Command = (DAC960_Command_T *) AllocationPointer;
       AllocationPointer += CommandAllocationLength;
@@ -2709,14 +2708,12 @@ DAC960_DetectController(struct pci_dev *PCI_Device,
   void __iomem *BaseAddress;
   int i;
 
-  Controller = (DAC960_Controller_T *)
-	kmalloc(sizeof(DAC960_Controller_T), GFP_ATOMIC);
+  Controller = kzalloc(sizeof(DAC960_Controller_T), GFP_ATOMIC);
   if (Controller == NULL) {
 	DAC960_Error("Unable to allocate Controller structure for "
                        "Controller at\n", NULL);
 	return NULL;
   }
-  memset(Controller, 0, sizeof(DAC960_Controller_T));
   Controller->ControllerNumber = DAC960_ControllerCount;
   DAC960_Controllers[DAC960_ControllerCount++] = Controller;
   Controller->Bus = PCI_Device->bus->number;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 71ec9e6..168da0c 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -996,13 +996,11 @@ static int cciss_ioctl(struct inode *inode, struct file *filep,
 			status = -EINVAL;
 			goto cleanup1;
 		}
-		buff = (unsigned char **) kmalloc(MAXSGENTRIES * 
-				sizeof(char *), GFP_KERNEL);
+		buff = kzalloc(MAXSGENTRIES * sizeof(char *), GFP_KERNEL);
 		if (!buff) {
 			status = -ENOMEM;
 			goto cleanup1;
 		}
-		memset(buff, 0, MAXSGENTRIES);
 		buff_size = (int *) kmalloc(MAXSGENTRIES * sizeof(int), 
 					GFP_KERNEL);
 		if (!buff_size) {
@@ -2940,13 +2938,12 @@ static void cciss_getgeometry(int cntl_num)
 	int block_size;
 	int total_size; 
 
-	ld_buff = kmalloc(sizeof(ReportLunData_struct), GFP_KERNEL);
+	ld_buff = kzalloc(sizeof(ReportLunData_struct), GFP_KERNEL);
 	if (ld_buff == NULL)
 	{
 		printk(KERN_ERR "cciss: out of memory\n");
 		return;
 	}
-	memset(ld_buff, 0, sizeof(ReportLunData_struct));
 	size_buff = kmalloc(sizeof( ReadCapdata_struct), GFP_KERNEL);
         if (size_buff == NULL)
         {
@@ -3060,10 +3057,9 @@ static int alloc_cciss_hba(void)
 	for(i=0; i< MAX_CTLR; i++) {
 		if (!hba[i]) {
 			ctlr_info_t *p;
-			p = kmalloc(sizeof(ctlr_info_t), GFP_KERNEL);
+			p = kzalloc(sizeof(ctlr_info_t), GFP_KERNEL);
 			if (!p)
 				goto Enomem;
-			memset(p, 0, sizeof(ctlr_info_t));
 			for (n = 0; n < NWD; n++)
 				p->gendisk[n] = disk[n];
 			hba[i] = p;
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index 0e66e90..597c007 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -1027,12 +1027,11 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
 	int i;
 
 	c = (ctlr_info_t *) hba[cntl_num];	
-	ld_buff = kmalloc(reportlunsize, GFP_KERNEL);
+	ld_buff = kzalloc(reportlunsize, GFP_KERNEL);
 	if (ld_buff == NULL) {
 		printk(KERN_ERR "cciss: out of memory\n");
 		return;
 	}
-	memset(ld_buff, 0, reportlunsize);
 	inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL);
         if (inq_buff == NULL) {
                 printk(KERN_ERR "cciss: out of memory\n");
diff --git a/drivers/block/paride/bpck6.c b/drivers/block/paride/bpck6.c
index 08d858a..41a237c 100644
--- a/drivers/block/paride/bpck6.c
+++ b/drivers/block/paride/bpck6.c
@@ -224,10 +224,9 @@ static void bpck6_log_adapter( PIA *pi, char * scratch, int verbose )
 
 static int bpck6_init_proto(PIA *pi)
 {
-	Interface *p = kmalloc(sizeof(Interface), GFP_KERNEL);
+	Interface *p = kzalloc(sizeof(Interface), GFP_KERNEL);
 
 	if (p) {
-		memset(p, 0, sizeof(Interface));
 		pi->private = (unsigned long)p;
 		return 0;
 	}
-- 
cgit v0.10.2


From f68110fc28859f5d7231d5c4fb6dbe68b1394c9b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Wed, 8 Mar 2006 13:31:44 +0100
Subject: [BLOCK] ll_rw_blk: kmalloc -> kzalloc conversion

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 062067f..b836b43 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -906,17 +906,15 @@ init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth)
 				__FUNCTION__, depth);
 	}
 
-	tag_index = kmalloc(depth * sizeof(struct request *), GFP_ATOMIC);
+	tag_index = kzalloc(depth * sizeof(struct request *), GFP_ATOMIC);
 	if (!tag_index)
 		goto fail;
 
 	nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG;
-	tag_map = kmalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC);
+	tag_map = kzalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC);
 	if (!tag_map)
 		goto fail;
 
-	memset(tag_index, 0, depth * sizeof(struct request *));
-	memset(tag_map, 0, nr_ulongs * sizeof(unsigned long));
 	tags->real_max_depth = depth;
 	tags->max_depth = depth;
 	tags->tag_index = tag_index;
-- 
cgit v0.10.2


From b8fca1c7682105c843319728d8e37b42b19092bb Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Tue, 21 Mar 2006 15:24:37 +0100
Subject: [PATCH] ide-cd: quiet down GPCMD_READ_CDVD_CAPACITY failure

Some drives like to throw a:

ATAPI device hdc:
  Error: Not ready -- (Sense key=0x02)
  Incompatible medium installed -- (asc=0x30, ascq=0x00)
  The failed "Read Cd/Dvd Capacity" packet command was:
  "25 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 "

warning on incompatible media, so quiet down this error.

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index c7671e1..b4a41d6 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -2143,6 +2143,7 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
 	req.cmd[0] = GPCMD_READ_CDVD_CAPACITY;
 	req.data = (char *)&capbuf;
 	req.data_len = sizeof(capbuf);
+	req.flags |= REQ_QUIET;
 
 	stat = cdrom_queue_packet_command(drive, &req);
 	if (stat == 0) {
-- 
cgit v0.10.2


From 4c5d0bbde9669cfb7f7fd4670dc9a117aea90384 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 22 Mar 2006 08:08:01 +0100
Subject: [PATCH] blk_execute_rq_nowait-speedup

Both elv_add_request() and generic_unplug_device() grab the queue lock
and disable interrupts, do that locally and use the __ variants.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index b836b43..7fc903b 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -2477,10 +2477,12 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
 	rq->rq_disk = bd_disk;
 	rq->flags |= REQ_NOMERGE;
 	rq->end_io = done;
-	elv_add_request(q, rq, where, 1);
-	generic_unplug_device(q);
+	WARN_ON(irqs_disabled());
+	spin_lock_irq(q->queue_lock);
+	__elv_add_request(q, rq, where, 1);
+	__generic_unplug_device(q);
+	spin_unlock_irq(q->queue_lock);
 }
-
 EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
 
 /**
-- 
cgit v0.10.2


From 837c7878771c15ed8d85ecf814ece7fcb4551b46 Mon Sep 17 00:00:00 2001
From: Ben Woodard <woodard@redhat.com>
Date: Wed, 22 Mar 2006 08:09:31 +0100
Subject: [BLOCK] increase size of disk stat counters

The kernel's representation of the disk statistics uses the type unsigned
which is 32b on both 32b and 64b platforms.  Unfortunately, most system
tools that work with these numbers that are exported in /proc/diskstats
including iostat read these numbers into unsigned longs.  This works fine
on 32b platforms and when the number of IO transactions are small on 64b
platforms.  However, when the numbers wrap on 64b platforms & you read the
numbers into unsigned longs, and compare the numbers to previous readings,
then you get an unsigned representation of a negative number.  This looks
like a very large 64b number & gives you bizarre readouts in iostat:

ilc4: Device:    rrqm/s wrqm/s r/s    w/s  rsec/s  wsec/s    rkB/s wkB/s avgrq-sz avgqu-sz   await  svctm  %util
ilc4: sda        5.50   0.00   143.96 0.00 307496983987862656.00 0.00 153748491993931328.00     0.00 2136028725038430.00     7.94   55.12    5.59  80.42

Though fixing iostat in user space is possible, and a quick survey
indicates that several other similar tools also use unsigned longs when
processing /proc/diskstats.  Therefore, it seems like a better approach
would be to extend the length of the disk_stats structure on 64b
architectures to 64b.  The following patch does that.  It should not affect
the operation on 32b platforms.

Signed-off-by: Ben Woodard <woodard@redhat.com>
Cc: Rick Lindsley <ricklind@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/block/genhd.c b/block/genhd.c
index 64510fd..db4c60c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -454,8 +454,8 @@ static ssize_t disk_stats_read(struct gendisk * disk, char *page)
 	disk_round_stats(disk);
 	preempt_enable();
 	return sprintf(page,
-		"%8u %8u %8llu %8u "
-		"%8u %8u %8llu %8u "
+		"%8lu %8lu %8llu %8u "
+		"%8lu %8lu %8llu %8u "
 		"%8u %8u %8u"
 		"\n",
 		disk_stat_read(disk, ios[READ]),
@@ -649,7 +649,7 @@ static int diskstats_show(struct seq_file *s, void *v)
 	preempt_disable();
 	disk_round_stats(gp);
 	preempt_enable();
-	seq_printf(s, "%4d %4d %s %u %u %llu %u %u %u %llu %u %u %u %u\n",
+	seq_printf(s, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n",
 		gp->major, n + gp->first_minor, disk_name(gp, n, buf),
 		disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]),
 		(unsigned long long)disk_stat_read(gp, sectors[0]),
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index fd647fd..179fea53 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -89,12 +89,12 @@ struct hd_struct {
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	32
 
 struct disk_stats {
-	unsigned sectors[2];		/* READs and WRITEs */
-	unsigned ios[2];
-	unsigned merges[2];
-	unsigned ticks[2];
-	unsigned io_ticks;
-	unsigned time_in_queue;
+	unsigned long sectors[2];	/* READs and WRITEs */
+	unsigned long ios[2];
+	unsigned long merges[2];
+	unsigned long ticks[2];
+	unsigned long io_ticks;
+	unsigned long time_in_queue;
 };
 	
 struct gendisk {
-- 
cgit v0.10.2


From 89a7689e5c039090d99cbdc3625252c3dee50f7f Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Fri, 24 Mar 2006 10:00:57 +0100
Subject: [PATCH] unused label in drivers/block/cciss.

this patch removes a warning about an unused label, by
moving the label into the ifdef.

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 168da0c..1b0fd31 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -2727,9 +2727,9 @@ static void __devinit cciss_interrupt_mode(ctlr_info_t *c, struct pci_dev *pdev,
                         return;
                 }
         }
+default_int_mode:
 #endif /* CONFIG_PCI_MSI */
 	/* if we get here we're going to use the default interrupt mode */
-default_int_mode:
         c->intr[SIMPLE_MODE_INT] = pdev->irq;
 	return;
 }
-- 
cgit v0.10.2


From 09540e691d7f57684b296b60241b2ff357fae3ab Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Sun, 26 Mar 2006 14:32:09 +0200
Subject: [PATCH] Fix blktrace compile with sysfs not defined

debugfs depends on sysfs, so make blktrace kconfig option depend
on that.

Reported by Adrian Bunk.

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/block/Kconfig b/block/Kconfig
index 43ca070..5536839 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -13,6 +13,7 @@ config LBD
 
 config BLK_DEV_IO_TRACE
 	bool "Support for tracing block io actions"
+	depends on SYSFS
 	select RELAY
 	select DEBUG_FS
 	help
-- 
cgit v0.10.2


From ae36b883d29e53b6083ed3d1d44f254cee7507d3 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Sun, 26 Mar 2006 14:38:54 +0200
Subject: [PATCH] Don't make debugfs depend on DEBUG_KERNEL

We use it generally now, at least blktrace isn't a specific debug
kernel feature.

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 7e70ab1..6e8a60f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -172,7 +172,7 @@ config DEBUG_IOREMAP
 
 config DEBUG_FS
 	bool "Debug Filesystem"
-	depends on DEBUG_KERNEL && SYSFS
+	depends on SYSFS
 	help
 	  debugfs is a virtual file system that kernel developers use to put
 	  debugging files into.  Enable this option to be able to read and
-- 
cgit v0.10.2


From 7bebd83dbf096d0bf4b4bfbaf5d8844a05f5eafc Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Mon, 27 Mar 2006 01:07:55 -0800
Subject: [SPARC64]: Fix off-by-1 error in TSB grow check.

The worst part about this bug is what it would cause
a hugepage TSB to be allocated for every address space
since "0 >= 0".

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c
index d21ff32..0db2f7d 100644
--- a/arch/sparc64/mm/fault.c
+++ b/arch/sparc64/mm/fault.c
@@ -413,12 +413,12 @@ good_area:
 #ifdef CONFIG_HUGETLB_PAGE
 	mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE));
 #endif
-	if (unlikely(mm_rss >=
+	if (unlikely(mm_rss >
 		     mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit))
 		tsb_grow(mm, MM_TSB_BASE, mm_rss);
 #ifdef CONFIG_HUGETLB_PAGE
 	mm_rss = mm->context.huge_pte_count;
-	if (unlikely(mm_rss >=
+	if (unlikely(mm_rss >
 		     mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit))
 		tsb_grow(mm, MM_TSB_HUGE, mm_rss);
 #endif
-- 
cgit v0.10.2


From c8c4b939e8dff82ee2bc22ed506ae29ecffca4c2 Mon Sep 17 00:00:00 2001
From: Erik Mouw <erik@bitwizard.nl>
Date: Mon, 27 Mar 2006 15:32:30 +0100
Subject: [ARM] 3416/1: Update LART site URL

Patch from Erik Mouw

The LART website moved to http://www.lartmaker.nl/. This patch
updates the URL in ARM specific files.

Signed-off-by: Erik Mouw <erik@bitwizard.nl>
Acked-by: Jan-Derk Bakker <jdb@lartmaker.nl>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/Documentation/arm/SA1100/Assabet b/Documentation/arm/SA1100/Assabet
index cbbe558..78bc1c1 100644
--- a/Documentation/arm/SA1100/Assabet
+++ b/Documentation/arm/SA1100/Assabet
@@ -26,7 +26,7 @@ Installing a bootloader
 
 A couple of bootloaders able to boot Linux on Assabet are available:
 
-BLOB (http://www.lart.tudelft.nl/lartware/blob/)
+BLOB (http://www.lartmaker.nl/lartware/blob/)
 
    BLOB is a bootloader used within the LART project.  Some contributed
    patches were merged into BLOB to add support for Assabet.
diff --git a/Documentation/arm/SA1100/LART b/Documentation/arm/SA1100/LART
index 2f73f51..6d412b6 100644
--- a/Documentation/arm/SA1100/LART
+++ b/Documentation/arm/SA1100/LART
@@ -11,4 +11,4 @@ is under development, with plenty of others in different stages of
 planning.
 
 The hardware designs for this board have been released under an open license;
-see the LART page at http://www.lart.tudelft.nl/ for more information.
+see the LART page at http://www.lartmaker.nl/ for more information.
diff --git a/arch/arm/mach-sa1100/Kconfig b/arch/arm/mach-sa1100/Kconfig
index 6923316..cd67ab1 100644
--- a/arch/arm/mach-sa1100/Kconfig
+++ b/arch/arm/mach-sa1100/Kconfig
@@ -111,7 +111,7 @@ config SA1100_LART
 	bool "LART"
 	help
 	  Say Y here if you are using the Linux Advanced Radio Terminal
-	  (also known as the LART).  See <http://www.lart.tudelft.nl/> for
+	  (also known as the LART).  See <http://www.lartmaker.nl/> for
 	  information on the LART.
 
 config SA1100_PLEB
diff --git a/arch/arm/mach-sa1100/cpu-sa1100.c b/arch/arm/mach-sa1100/cpu-sa1100.c
index 6435b2e..d68630b 100644
--- a/arch/arm/mach-sa1100/cpu-sa1100.c
+++ b/arch/arm/mach-sa1100/cpu-sa1100.c
@@ -11,7 +11,7 @@
  *     linux-2.4.5-rmk1
  *
  * This software has been developed while working on the LART
- * computing board (http://www.lart.tudelft.nl/), which is
+ * computing board (http://www.lartmaker.nl/), which is
  * sponsored by the Mobile Multi-media Communications
  * (http://www.mmc.tudelft.nl/) and Ubiquitous Communications 
  * (http://www.ubicom.tudelft.nl/) projects.
-- 
cgit v0.10.2


From 4682adcfb06448827fbdfd8b6c636796de569b7d Mon Sep 17 00:00:00 2001
From: "Hyok S. Choi" <hyok.choi@samsung.com>
Date: Mon, 27 Mar 2006 15:46:06 +0100
Subject: [ARM] nommu: trivial patch for arch/arm/lib/Makefile

ifeq ($CONFIG_PREEMPT,y) -> ifeq ($(CONFIG_PREEMPT),y)

Signed-off-by: Hyok S. Choi <hyok.choi@samsung.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 391f3ab..7b726b6 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -18,7 +18,7 @@ lib-y		:= backtrace.o changebit.o csumipv6.o csumpartial.o   \
 
 # the code in uaccess.S is not preemption safe and
 # probably faster on ARMv3 only
-ifeq ($CONFIG_PREEMPT,y)
+ifeq ($(CONFIG_PREEMPT),y)
   lib-y	+= copy_from_user.o copy_to_user.o
 else
 ifneq ($(CONFIG_CPU_32v3),y)
-- 
cgit v0.10.2


From 3747b36eeab93d8969e86987bbc1d44971229b26 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Mon, 27 Mar 2006 16:59:07 +0100
Subject: [ARM] proc-v6: mark page table walks outer-cacheable, shared.  Enable
 NX.

Mark page table walks with outer-cacheable attribute, and enable no-execute
in page tables.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 9a7e7c0..ee6f152 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -21,6 +21,14 @@
 
 #define D_CACHE_LINE_SIZE	32
 
+#define TTB_C		(1 << 0)
+#define TTB_S		(1 << 1)
+#define TTB_IMP		(1 << 2)
+#define TTB_RGN_NC	(0 << 3)
+#define TTB_RGN_WBWA	(1 << 3)
+#define TTB_RGN_WT	(2 << 3)
+#define TTB_RGN_WB	(3 << 3)
+
 	.macro	cpsie, flags
 	.ifc \flags, f
 	.long	0xf1080040
@@ -115,7 +123,7 @@ ENTRY(cpu_v6_switch_mm)
 	mov	r2, #0
 	ldr	r1, [r1, #MM_CONTEXT_ID]	@ get mm->context.id
 #ifdef CONFIG_SMP
-	orr	r0, r0, #2			@ set shared pgtable
+	orr	r0, r0, #TTB_RGN_WBWA|TTB_S	@ mark PTWs shared, outer cacheable
 #endif
 	mcr	p15, 0, r2, c7, c5, 6		@ flush BTAC/BTB
 	mcr	p15, 0, r2, c7, c10, 4		@ drain write buffer
@@ -161,8 +169,8 @@ ENTRY(cpu_v6_set_pte)
 	tst	r1, #L_PTE_YOUNG
 	biceq	r2, r2, #PTE_EXT_APX | PTE_EXT_AP_MASK
 
-@	tst	r1, #L_PTE_EXEC
-@	orreq	r2, r2, #PTE_EXT_XN
+	tst	r1, #L_PTE_EXEC
+	orreq	r2, r2, #PTE_EXT_XN
 
 	tst	r1, #L_PTE_PRESENT
 	moveq	r2, #0
@@ -221,7 +229,7 @@ __v6_setup:
 	mcr	p15, 0, r0, c8, c7, 0		@ invalidate I + D TLBs
 	mcr	p15, 0, r0, c2, c0, 2		@ TTB control register
 #ifdef CONFIG_SMP
-	orr	r4, r4, #2			@ set shared pgtable
+	orr	r4, r4, #TTB_RGN_WBWA|TTB_S	@ mark PTWs shared, outer cacheable
 #endif
 	mcr	p15, 0, r4, c2, c0, 1		@ load TTB1
 #ifdef CONFIG_VFP
-- 
cgit v0.10.2


From dbffa471611d3fc4b401ebabf7bb63ac0e0272b1 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Mon, 27 Mar 2006 01:14:26 -0800
Subject: [PATCH] PM-Timer: don't use workaround if chipset is not buggy

Current timer_pm.c reads I/O port triple times, in order to avoid the bug
of chipset.  But I/O port is slow.

2.6.16 (pmtmr)
Simple gettimeofday: 3.6532 microseconds

2.6.16+patch (pmtmr)
Simple gettimeofday: 1.4582 microseconds

[if chip is buggy, probably it will be 7us or more in 4.2% of probability.]

This patch adds blacklist of buggy chip, and if chip is not buggy, this
uses fast normal version instead of slow workaround version.

If chip is buggy, warnings "pmtmr is slow".  But sounds like there is gray
zone.  I found the PIIX4 errata, but I couldn't find the ICH4 errata.  But
some motherboard seems to have problem.

So, if we found a ICH4, generate warnings, and use a workaround version.
If user's ICH4 is good, the user can specify the "pmtmr_good" boot
parameter to use fast version.

Acked-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
index 264edaa..144e94a 100644
--- a/arch/i386/kernel/timers/timer_pm.c
+++ b/arch/i386/kernel/timers/timer_pm.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/init.h>
+#include <linux/pci.h>
 #include <asm/types.h>
 #include <asm/timer.h>
 #include <asm/smp.h>
@@ -45,24 +46,31 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
 
 #define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
 
+static int pmtmr_need_workaround __read_mostly = 1;
+
 /*helper function to safely read acpi pm timesource*/
 static inline u32 read_pmtmr(void)
 {
-	u32 v1=0,v2=0,v3=0;
-	/* It has been reported that because of various broken
-	 * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time
-	 * source is not latched, so you must read it multiple
-	 * times to insure a safe value is read.
-	 */
-	do {
-		v1 = inl(pmtmr_ioport);
-		v2 = inl(pmtmr_ioport);
-		v3 = inl(pmtmr_ioport);
-	} while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
-			|| (v3 > v1 && v3 < v2));
-
-	/* mask the output to 24 bits */
-	return v2 & ACPI_PM_MASK;
+	if (pmtmr_need_workaround) {
+		u32 v1, v2, v3;
+
+		/* It has been reported that because of various broken
+		 * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time
+		 * source is not latched, so you must read it multiple
+		 * times to insure a safe value is read.
+		 */
+		do {
+			v1 = inl(pmtmr_ioport);
+			v2 = inl(pmtmr_ioport);
+			v3 = inl(pmtmr_ioport);
+		} while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
+			 || (v3 > v1 && v3 < v2));
+
+		/* mask the output to 24 bits */
+		return v2 & ACPI_PM_MASK;
+	}
+
+	return inl(pmtmr_ioport) & ACPI_PM_MASK;
 }
 
 
@@ -263,6 +271,72 @@ struct init_timer_opts __initdata timer_pmtmr_init = {
 	.opts = &timer_pmtmr,
 };
 
+#ifdef CONFIG_PCI
+/*
+ * PIIX4 Errata:
+ *
+ * The power management timer may return improper results when read.
+ * Although the timer value settles properly after incrementing,
+ * while incrementing there is a 3 ns window every 69.8 ns where the
+ * timer value is indeterminate (a 4.2% chance that the data will be
+ * incorrect when read). As a result, the ACPI free running count up
+ * timer specification is violated due to erroneous reads.
+ */
+static int __init pmtmr_bug_check(void)
+{
+	static struct pci_device_id gray_list[] __initdata = {
+		/* these chipsets may have bug. */
+		{ PCI_DEVICE(PCI_VENDOR_ID_INTEL,
+				PCI_DEVICE_ID_INTEL_82801DB_0) },
+		{ },
+	};
+	struct pci_dev *dev;
+	int pmtmr_has_bug = 0;
+	u8 rev;
+
+	if (cur_timer != &timer_pmtmr || !pmtmr_need_workaround)
+		return 0;
+
+	dev = pci_get_device(PCI_VENDOR_ID_INTEL,
+			     PCI_DEVICE_ID_INTEL_82371AB_3, NULL);
+	if (dev) {
+		pci_read_config_byte(dev, PCI_REVISION_ID, &rev);
+		/* the bug has been fixed in PIIX4M */
+		if (rev < 3) {
+			printk(KERN_WARNING "* Found PM-Timer Bug on this "
+				"chipset. Due to workarounds for a bug,\n"
+				"* this time source is slow.  Consider trying "
+				"other time sources (clock=)\n");
+			pmtmr_has_bug = 1;
+		}
+		pci_dev_put(dev);
+	}
+
+	if (pci_dev_present(gray_list)) {
+		printk(KERN_WARNING "* This chipset may have PM-Timer Bug.  Due"
+			" to workarounds for a bug,\n"
+			"* this time source is slow. If you are sure your timer"
+			" does not have\n"
+			"* this bug, please use \"pmtmr_good\" to disable the "
+			"workaround\n");
+		pmtmr_has_bug = 1;
+	}
+
+	if (!pmtmr_has_bug)
+		pmtmr_need_workaround = 0;
+
+	return 0;
+}
+device_initcall(pmtmr_bug_check);
+#endif
+
+static int __init pmtr_good_setup(char *__str)
+{
+	pmtmr_need_workaround = 0;
+	return 1;
+}
+__setup("pmtmr_good", pmtr_good_setup);
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86");
-- 
cgit v0.10.2


From bb83da053319e81906473bec08e8f677d6ac615f Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 27 Mar 2006 01:14:27 -0800
Subject: [PATCH] uml: fix build warnings in __get_user

Fix a gcc warning about losing qualifiers to the first argument of
copy_from_user.  The typeof change for correctness, and fixes a lot of the
warnings, but there are some cases where x has some extra qualifiers, like
volatile, which copy_from_user can't know about.  For these, the void * cast
seems to be necessary.

Also cleaned up some of the whitespace and got rid of the emacs comment at the
bottom.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-um/uaccess.h b/include/asm-um/uaccess.h
index 2ee028b..4e460d6 100644
--- a/include/asm-um/uaccess.h
+++ b/include/asm-um/uaccess.h
@@ -41,16 +41,16 @@
 
 #define __get_user(x, ptr) \
 ({ \
-        const __typeof__(ptr) __private_ptr = ptr; \
-        __typeof__(*(__private_ptr)) __private_val; \
-        int __private_ret = -EFAULT; \
-        (x) = (__typeof__(*(__private_ptr)))0; \
-	if (__copy_from_user(&__private_val, (__private_ptr), \
-	    sizeof(*(__private_ptr))) == 0) {\
-        	(x) = (__typeof__(*(__private_ptr))) __private_val; \
-		__private_ret = 0; \
-	} \
-        __private_ret; \
+	const __typeof__(ptr) __private_ptr = ptr;	\
+	__typeof__(x) __private_val;			\
+	int __private_ret = -EFAULT;			\
+	(x) = (__typeof__(*(__private_ptr)))0;				\
+	if (__copy_from_user((void *) &__private_val, (__private_ptr),	\
+			     sizeof(*(__private_ptr))) == 0) {		\
+		(x) = (__typeof__(*(__private_ptr))) __private_val;	\
+		__private_ret = 0;					\
+	}								\
+	__private_ret;							\
 }) 
 
 #define get_user(x, ptr) \
@@ -89,14 +89,3 @@ struct exception_table_entry
 };
 
 #endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
-- 
cgit v0.10.2


From c90e12b86595dbd9996336ac9d762487c638d934 Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 27 Mar 2006 01:14:29 -0800
Subject: [PATCH] uml: fix declaration of exit()

This fixes a conflict between a header and what gcc "knows" the declaration'
to be.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/include/kern.h b/arch/um/include/kern.h
index 7d223be..4ce3fc6 100644
--- a/arch/um/include/kern.h
+++ b/arch/um/include/kern.h
@@ -29,7 +29,7 @@ extern int getuid(void);
 extern int getgid(void);
 extern int pause(void);
 extern int write(int, const void *, int);
-extern int exit(int);
+extern void exit(int);
 extern int close(int);
 extern int read(unsigned int, char *, int);
 extern int pipe(int *);
-- 
cgit v0.10.2


From d9f8b62a6b033fe7226d7c2b2efdd164ca1aa07c Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 27 Mar 2006 01:14:29 -0800
Subject: [PATCH] uml: fix some printf formats

Some printf formats are incorrect for large memory sizes.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 80c9c18..0c5b9dc 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -421,7 +421,7 @@ int linux_main(int argc, char **argv)
 #ifndef CONFIG_HIGHMEM
 		highmem = 0;
 		printf("CONFIG_HIGHMEM not enabled - physical memory shrunk "
-		       "to %lu bytes\n", physmem_size);
+		       "to %Lu bytes\n", physmem_size);
 #endif
 	}
 
@@ -433,8 +433,8 @@ int linux_main(int argc, char **argv)
 
 	setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem);
 	if(init_maps(physmem_size, iomem_size, highmem)){
-		printf("Failed to allocate mem_map for %lu bytes of physical "
-		       "memory and %lu bytes of highmem\n", physmem_size,
+		printf("Failed to allocate mem_map for %Lu bytes of physical "
+		       "memory and %Lu bytes of highmem\n", physmem_size,
 		       highmem);
 		exit(1);
 	}
-- 
cgit v0.10.2


From 63ae2a94d98dd9d94163918539ec80df33f44a69 Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 27 Mar 2006 01:14:30 -0800
Subject: [PATCH] uml: move libc-dependent irq code to os-Linux

The serial UML OS-abstraction layer patch (um/kernel dir).

This moves all systemcalls from irq_user.c file under os-Linux dir

Signed-off-by: Gennady Sharapov <Gennady.V.Sharapov@intel.com>
Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/include/irq_user.h b/arch/um/include/irq_user.h
index b61deb8..69a93c8 100644
--- a/arch/um/include/irq_user.h
+++ b/arch/um/include/irq_user.h
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
  * Licensed under the GPL
  */
@@ -6,6 +6,17 @@
 #ifndef __IRQ_USER_H__
 #define __IRQ_USER_H__
 
+struct irq_fd {
+	struct irq_fd *next;
+	void *id;
+	int fd;
+	int type;
+	int irq;
+	int pid;
+	int events;
+	int current_events;
+};
+
 enum { IRQ_READ, IRQ_WRITE };
 
 extern void sigio_handler(int sig, union uml_pt_regs *regs);
@@ -16,8 +27,6 @@ extern void reactivate_fd(int fd, int irqnum);
 extern void deactivate_fd(int fd, int irqnum);
 extern int deactivate_all_fds(void);
 extern void forward_interrupts(int pid);
-extern void init_irq_signals(int on_sigstack);
-extern void forward_ipi(int fd, int pid);
 extern int activate_ipi(int fd, int pid);
 extern unsigned long irq_lock(void);
 extern void irq_unlock(unsigned long flags);
diff --git a/arch/um/include/misc_constants.h b/arch/um/include/misc_constants.h
new file mode 100644
index 0000000..989bc08
--- /dev/null
+++ b/arch/um/include/misc_constants.h
@@ -0,0 +1,6 @@
+#ifndef __MISC_CONSTANT_H_
+#define __MISC_CONSTANT_H_
+
+#include <user_constants.h>
+
+#endif
diff --git a/arch/um/include/os.h b/arch/um/include/os.h
index 2a1c64d..efae3e6 100644
--- a/arch/um/include/os.h
+++ b/arch/um/include/os.h
@@ -12,6 +12,7 @@
 #include "sysdep/ptrace.h"
 #include "kern_util.h"
 #include "skas/mm_id.h"
+#include "irq_user.h"
 
 #define OS_TYPE_FILE 1 
 #define OS_TYPE_DIR 2 
@@ -198,6 +199,8 @@ extern void os_flush_stdout(void);
 /* tt.c
  * for tt mode only (will be deleted in future...)
  */
+extern void forward_ipi(int fd, int pid);
+extern void kill_child_dead(int pid);
 extern void stop(void);
 extern int wait_for_stop(int pid, int sig, int cont_type, void *relay);
 extern int protect_memory(unsigned long addr, unsigned long len,
@@ -294,4 +297,17 @@ extern void initial_thread_cb_skas(void (*proc)(void *),
 extern void halt_skas(void);
 extern void reboot_skas(void);
 
+/* irq.c */
+extern int os_waiting_for_events(struct irq_fd *active_fds);
+extern int os_isatty(int fd);
+extern int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds);
+extern void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
+		struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2);
+extern void os_free_irq_later(struct irq_fd *active_fds,
+		int irq, void *dev_id);
+extern int os_get_pollfd(int i);
+extern void os_set_pollfd(int i, int fd);
+extern void os_set_ioignore(void);
+extern void init_irq_signals(int on_sigstack);
+
 #endif
diff --git a/arch/um/kernel/irq_user.c b/arch/um/kernel/irq_user.c
index 0e32f5f..483e79c 100644
--- a/arch/um/kernel/irq_user.c
+++ b/arch/um/kernel/irq_user.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
  * Licensed under the GPL
  */
@@ -18,53 +18,25 @@
 #include "sigio.h"
 #include "irq_user.h"
 #include "os.h"
+#include "misc_constants.h"
 
-struct irq_fd {
-	struct irq_fd *next;
-	void *id;
-	int fd;
-	int type;
-	int irq;
-	int pid;
-	int events;
-	int current_events;
-};
-
-static struct irq_fd *active_fds = NULL;
+struct irq_fd *active_fds = NULL;
 static struct irq_fd **last_irq_ptr = &active_fds;
 
-static struct pollfd *pollfds = NULL;
-static int pollfds_num = 0;
-static int pollfds_size = 0;
-
-extern int io_count, intr_count;
-
 extern void free_irqs(void);
 
 void sigio_handler(int sig, union uml_pt_regs *regs)
 {
 	struct irq_fd *irq_fd;
-	int i, n;
+	int n;
 
 	if(smp_sigio_handler()) return;
 	while(1){
-		n = poll(pollfds, pollfds_num, 0);
-		if(n < 0){
-			if(errno == EINTR) continue;
-			printk("sigio_handler : poll returned %d, "
-			       "errno = %d\n", n, errno);
-			break;
-		}
-		if(n == 0) break;
-
-		irq_fd = active_fds;
-		for(i = 0; i < pollfds_num; i++){
-			if(pollfds[i].revents != 0){
-				irq_fd->current_events = pollfds[i].revents;
-				pollfds[i].fd = -1;
-			}
-			irq_fd = irq_fd->next;
-		}
+		n = os_waiting_for_events(active_fds);
+		if (n <= 0) {
+			if(n == -EINTR) continue;
+			else break;
+ 		}
 
 		for(irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next){
 			if(irq_fd->current_events != 0){
@@ -77,14 +49,9 @@ void sigio_handler(int sig, union uml_pt_regs *regs)
 	free_irqs();
 }
 
-int activate_ipi(int fd, int pid)
-{
-	return(os_set_fd_async(fd, pid));
-}
-
 static void maybe_sigio_broken(int fd, int type)
 {
-	if(isatty(fd)){
+	if(os_isatty(fd)){
 		if((type == IRQ_WRITE) && !pty_output_sigio){
 			write_sigio_workaround();
 			add_sigio_fd(fd, 0);
@@ -96,12 +63,13 @@ static void maybe_sigio_broken(int fd, int type)
 	}
 }
 
+
 int activate_fd(int irq, int fd, int type, void *dev_id)
 {
 	struct pollfd *tmp_pfd;
 	struct irq_fd *new_fd, *irq_fd;
 	unsigned long flags;
-	int pid, events, err, n, size;
+	int pid, events, err, n;
 
 	pid = os_getpid();
 	err = os_set_fd_async(fd, pid);
@@ -113,8 +81,8 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
 	if(new_fd == NULL)
 		goto out;
 
-	if(type == IRQ_READ) events = POLLIN | POLLPRI;
-	else events = POLLOUT;
+	if(type == IRQ_READ) events = UM_POLLIN | UM_POLLPRI;
+	else events = UM_POLLOUT;
 	*new_fd = ((struct irq_fd) { .next  		= NULL,
 				     .id 		= dev_id,
 				     .fd 		= fd,
@@ -125,12 +93,12 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
 				     .current_events 	= 0 } );
 
 	/* Critical section - locked by a spinlock because this stuff can
-	 * be changed from interrupt handlers.  The stuff above is done 
+	 * be changed from interrupt handlers.  The stuff above is done
 	 * outside the lock because it allocates memory.
 	 */
 
 	/* Actually, it only looks like it can be called from interrupt
-	 * context.  The culprit is reactivate_fd, which calls 
+	 * context.  The culprit is reactivate_fd, which calls
 	 * maybe_sigio_broken, which calls write_sigio_workaround,
 	 * which calls activate_fd.  However, write_sigio_workaround should
 	 * only be called once, at boot time.  That would make it clear that
@@ -147,40 +115,42 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
 		}
 	}
 
-	n = pollfds_num;
-	if(n == pollfds_size){
-		while(1){
-			/* Here we have to drop the lock in order to call 
-			 * kmalloc, which might sleep.  If something else
-			 * came in and changed the pollfds array, we free
-			 * the buffer and try again.
-			 */
-			irq_unlock(flags);
-			size = (pollfds_num + 1) * sizeof(pollfds[0]);
-			tmp_pfd = um_kmalloc(size);
-			flags = irq_lock();
-			if(tmp_pfd == NULL)
-				goto out_unlock;
-			if(n == pollfds_size)
-				break;
+	/*-------------*/
+	if(type == IRQ_WRITE)
+		fd = -1;
+
+	tmp_pfd = NULL;
+	n = 0;
+
+	while(1){
+		n = os_create_pollfd(fd, events, tmp_pfd, n);
+		if (n == 0)
+			break;
+
+		/* n > 0
+		 * It means we couldn't put new pollfd to current pollfds
+		 * and tmp_fds is NULL or too small for new pollfds array.
+		 * Needed size is equal to n as minimum.
+		 *
+		 * Here we have to drop the lock in order to call
+		 * kmalloc, which might sleep.
+		 * If something else came in and changed the pollfds array
+		 * so we will not be able to put new pollfd struct to pollfds
+		 * then we free the buffer tmp_fds and try again.
+		 */
+		irq_unlock(flags);
+		if (tmp_pfd != NULL) {
 			kfree(tmp_pfd);
+			tmp_pfd = NULL;
 		}
-		if(pollfds != NULL){
-			memcpy(tmp_pfd, pollfds,
-			       sizeof(pollfds[0]) * pollfds_size);
-			kfree(pollfds);
-		}
-		pollfds = tmp_pfd;
-		pollfds_size++;
-	}
 
-	if(type == IRQ_WRITE) 
-		fd = -1;
+		tmp_pfd = um_kmalloc(n);
+		if (tmp_pfd == NULL)
+			goto out_kfree;
 
-	pollfds[pollfds_num] = ((struct pollfd) { .fd 	= fd,
-						  .events 	= events,
-						  .revents 	= 0 });
-	pollfds_num++;
+		flags = irq_lock();
+	}
+	/*-------------*/
 
 	*last_irq_ptr = new_fd;
 	last_irq_ptr = &new_fd->next;
@@ -196,6 +166,7 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
 
  out_unlock:
 	irq_unlock(flags);
+ out_kfree:
 	kfree(new_fd);
  out:
 	return(err);
@@ -203,43 +174,10 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
 
 static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
 {
-	struct irq_fd **prev;
 	unsigned long flags;
-	int i = 0;
 
 	flags = irq_lock();
-	prev = &active_fds;
-	while(*prev != NULL){
-		if((*test)(*prev, arg)){
-			struct irq_fd *old_fd = *prev;
-			if((pollfds[i].fd != -1) && 
-			   (pollfds[i].fd != (*prev)->fd)){
-				printk("free_irq_by_cb - mismatch between "
-				       "active_fds and pollfds, fd %d vs %d\n",
-				       (*prev)->fd, pollfds[i].fd);
-				goto out;
-			}
-
-			pollfds_num--;
-
-			/* This moves the *whole* array after pollfds[i] (though
-			 * it doesn't spot as such)! */
-
-			memmove(&pollfds[i], &pollfds[i + 1],
-			       (pollfds_num - i) * sizeof(pollfds[0]));
-
-			if(last_irq_ptr == &old_fd->next) 
-				last_irq_ptr = prev;
-			*prev = (*prev)->next;
-			if(old_fd->type == IRQ_WRITE) 
-				ignore_sigio_fd(old_fd->fd);
-			kfree(old_fd);
-			continue;
-		}
-		prev = &(*prev)->next;
-		i++;
-	}
- out:
+ 	os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr);
 	irq_unlock(flags);
 }
 
@@ -277,6 +215,7 @@ static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
 {
 	struct irq_fd *irq;
 	int i = 0;
+	int fdi;
 
 	for(irq=active_fds; irq != NULL; irq = irq->next){
 		if((irq->fd == fd) && (irq->irq == irqnum)) break;
@@ -286,10 +225,11 @@ static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
 		printk("find_irq_by_fd doesn't have descriptor %d\n", fd);
 		goto out;
 	}
-	if((pollfds[i].fd != -1) && (pollfds[i].fd != fd)){
+	fdi = os_get_pollfd(i);
+	if((fdi != -1) && (fdi != fd)){
 		printk("find_irq_by_fd - mismatch between active_fds and "
-		       "pollfds, fd %d vs %d, need %d\n", irq->fd, 
-		       pollfds[i].fd, fd);
+		       "pollfds, fd %d vs %d, need %d\n", irq->fd,
+		       fdi, fd);
 		irq = NULL;
 		goto out;
 	}
@@ -310,9 +250,7 @@ void reactivate_fd(int fd, int irqnum)
 		irq_unlock(flags);
 		return;
 	}
-
-	pollfds[i].fd = irq->fd;
-
+	os_set_pollfd(i, irq->fd);
 	irq_unlock(flags);
 
 	/* This calls activate_fd, so it has to be outside the critical
@@ -331,7 +269,7 @@ void deactivate_fd(int fd, int irqnum)
 	irq = find_irq_by_fd(fd, irqnum, &i);
 	if(irq == NULL)
 		goto out;
-	pollfds[i].fd = -1;
+	os_set_pollfd(i, -1);
  out:
 	irq_unlock(flags);
 }
@@ -347,21 +285,11 @@ int deactivate_all_fds(void)
 			return(err);
 	}
 	/* If there is a signal already queued, after unblocking ignore it */
-	set_handler(SIGIO, SIG_IGN, 0, -1);
+	os_set_ioignore();
 
 	return(0);
 }
 
-void forward_ipi(int fd, int pid)
-{
-	int err;
-
-	err = os_set_owner(fd, pid);
-	if(err < 0)
-		printk("forward_ipi: set_owner failed, fd = %d, me = %d, "
-		       "target = %d, err = %d\n", fd, os_getpid(), pid, -err);
-}
-
 void forward_interrupts(int pid)
 {
 	struct irq_fd *irq;
@@ -384,22 +312,6 @@ void forward_interrupts(int pid)
 	irq_unlock(flags);
 }
 
-void init_irq_signals(int on_sigstack)
-{
-	__sighandler_t h;
-	int flags;
-
-	flags = on_sigstack ? SA_ONSTACK : 0;
-	if(timer_irq_inited) h = (__sighandler_t) alarm_handler;
-	else h = boot_timer_handler;
-
-	set_handler(SIGVTALRM, h, flags | SA_RESTART, 
-		    SIGUSR1, SIGIO, SIGWINCH, SIGALRM, -1);
-	set_handler(SIGIO, (__sighandler_t) sig_handler, flags | SA_RESTART,
-		    SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
-	signal(SIGWINCH, SIG_IGN);
-}
-
 /*
  * Overrides for Emacs so that we follow Linus's tabbing style.
  * Emacs will notice this stuff at the end of the file and automatically
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index 72113b0..c8d8d0a 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
  * Licensed under the GPL
  */
@@ -77,9 +77,9 @@ static int idle_proc(void *cpup)
 	if(err < 0)
 		panic("CPU#%d failed to create IPI pipe, err = %d", cpu, -err);
 
-	activate_ipi(cpu_data[cpu].ipi_pipe[0], 
+	os_set_fd_async(cpu_data[cpu].ipi_pipe[0],
 		     current->thread.mode.tt.extern_pid);
- 
+
 	wmb();
 	if (cpu_test_and_set(cpu, cpu_callin_map)) {
 		printk("huh, CPU#%d already present??\n", cpu);
@@ -106,7 +106,7 @@ static struct task_struct *idle_thread(int cpu)
 		panic("copy_process failed in idle_thread, error = %ld",
 		      PTR_ERR(new_task));
 
-	cpu_tasks[cpu] = ((struct cpu_task) 
+	cpu_tasks[cpu] = ((struct cpu_task)
 		          { .pid = 	new_task->thread.mode.tt.extern_pid,
 			    .task = 	new_task } );
 	idle_threads[cpu] = new_task;
@@ -134,12 +134,12 @@ void smp_prepare_cpus(unsigned int maxcpus)
 	if(err < 0)
 		panic("CPU#0 failed to create IPI pipe, errno = %d", -err);
 
-	activate_ipi(cpu_data[me].ipi_pipe[0],
+	os_set_fd_async(cpu_data[me].ipi_pipe[0],
 		     current->thread.mode.tt.extern_pid);
 
 	for(cpu = 1; cpu < ncpus; cpu++){
 		printk("Booting processor %d...\n", cpu);
-		
+
 		idle = idle_thread(cpu);
 
 		init_idle(idle, cpu);
@@ -223,7 +223,7 @@ void smp_call_function_slave(int cpu)
 	atomic_inc(&scf_finished);
 }
 
-int smp_call_function(void (*_func)(void *info), void *_info, int nonatomic, 
+int smp_call_function(void (*_func)(void *info), void *_info, int nonatomic,
 		      int wait)
 {
 	int cpus = num_online_cpus() - 1;
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index 08a4e62..73d3d83 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -3,14 +3,14 @@
 # Licensed under the GPL
 #
 
-obj-y = aio.o elf_aux.o file.o helper.o main.o mem.o process.o signal.o \
+obj-y = aio.o elf_aux.o file.o helper.o irq.o main.o mem.o process.o signal.o \
 	start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o user_syms.o \
 	util.o drivers/ sys-$(SUBARCH)/
 
 obj-$(CONFIG_MODE_SKAS) += skas/
 
-USER_OBJS := aio.o elf_aux.o file.o helper.o main.o mem.o process.o signal.o \
-	start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o util.o
+USER_OBJS := aio.o elf_aux.o file.o helper.o irq.o main.o mem.o process.o \
+	signal.o start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o util.o
 
 elf_aux.o: $(ARCH_DIR)/kernel-offsets.h
 CFLAGS_elf_aux.o += -I$(objtree)/arch/um
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
new file mode 100644
index 0000000..e599be4
--- /dev/null
+++ b/arch/um/os-Linux/irq.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/poll.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include "user_util.h"
+#include "kern_util.h"
+#include "user.h"
+#include "process.h"
+#include "sigio.h"
+#include "irq_user.h"
+#include "os.h"
+
+static struct pollfd *pollfds = NULL;
+static int pollfds_num = 0;
+static int pollfds_size = 0;
+
+int os_waiting_for_events(struct irq_fd *active_fds)
+{
+	struct irq_fd *irq_fd;
+	int i, n, err;
+
+	n = poll(pollfds, pollfds_num, 0);
+	if(n < 0){
+		err = -errno;
+		if(errno != EINTR)
+			printk("sigio_handler: os_waiting_for_events:"
+			       " poll returned %d, errno = %d\n", n, errno);
+		return err;
+	}
+
+	if(n == 0)
+		return 0;
+
+	irq_fd = active_fds;
+
+	for(i = 0; i < pollfds_num; i++){
+		if(pollfds[i].revents != 0){
+			irq_fd->current_events = pollfds[i].revents;
+			pollfds[i].fd = -1;
+		}
+		irq_fd = irq_fd->next;
+	}
+	return n;
+}
+
+int os_isatty(int fd)
+{
+	return(isatty(fd));
+}
+
+int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds)
+{
+	if (pollfds_num == pollfds_size) {
+		if (size_tmpfds <= pollfds_size * sizeof(pollfds[0])) {
+			/* return min size needed for new pollfds area */
+			return((pollfds_size + 1) * sizeof(pollfds[0]));
+		}
+
+		if(pollfds != NULL){
+			memcpy(tmp_pfd, pollfds,
+			       sizeof(pollfds[0]) * pollfds_size);
+			/* remove old pollfds */
+			kfree(pollfds);
+		}
+		pollfds = tmp_pfd;
+		pollfds_size++;
+	} else {
+		/* remove not used tmp_pfd */
+		if (tmp_pfd != NULL)
+			kfree(tmp_pfd);
+	}
+
+	pollfds[pollfds_num] = ((struct pollfd) { .fd 	= fd,
+						  .events 	= events,
+						  .revents 	= 0 });
+	pollfds_num++;
+
+	return(0);
+}
+
+void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
+		struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2)
+{
+	struct irq_fd **prev;
+	int i = 0;
+
+	prev = &active_fds;
+	while(*prev != NULL){
+		if((*test)(*prev, arg)){
+			struct irq_fd *old_fd = *prev;
+			if((pollfds[i].fd != -1) &&
+			   (pollfds[i].fd != (*prev)->fd)){
+				printk("os_free_irq_by_cb - mismatch between "
+				       "active_fds and pollfds, fd %d vs %d\n",
+				       (*prev)->fd, pollfds[i].fd);
+				goto out;
+			}
+
+			pollfds_num--;
+
+			/* This moves the *whole* array after pollfds[i]
+			 * (though it doesn't spot as such)!
+			 */
+
+			memmove(&pollfds[i], &pollfds[i + 1],
+			       (pollfds_num - i) * sizeof(pollfds[0]));
+			if(*last_irq_ptr2 == &old_fd->next)
+				*last_irq_ptr2 = prev;
+
+			*prev = (*prev)->next;
+			if(old_fd->type == IRQ_WRITE)
+				ignore_sigio_fd(old_fd->fd);
+			kfree(old_fd);
+			continue;
+		}
+		prev = &(*prev)->next;
+		i++;
+	}
+ out:
+	return;
+}
+
+
+int os_get_pollfd(int i)
+{
+	return(pollfds[i].fd);
+}
+
+void os_set_pollfd(int i, int fd)
+{
+	pollfds[i].fd = fd;
+}
+
+void os_set_ioignore(void)
+{
+	set_handler(SIGIO, SIG_IGN, 0, -1);
+}
+
+void init_irq_signals(int on_sigstack)
+{
+	__sighandler_t h;
+	int flags;
+
+	flags = on_sigstack ? SA_ONSTACK : 0;
+	if(timer_irq_inited) h = (__sighandler_t) alarm_handler;
+	else h = boot_timer_handler;
+
+	set_handler(SIGVTALRM, h, flags | SA_RESTART,
+		    SIGUSR1, SIGIO, SIGWINCH, SIGALRM, -1);
+	set_handler(SIGIO, (__sighandler_t) sig_handler, flags | SA_RESTART,
+		    SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+	signal(SIGWINCH, SIG_IGN);
+}
diff --git a/arch/um/os-Linux/tt.c b/arch/um/os-Linux/tt.c
index 919d19f..5461a06 100644
--- a/arch/um/os-Linux/tt.c
+++ b/arch/um/os-Linux/tt.c
@@ -110,6 +110,16 @@ int wait_for_stop(int pid, int sig, int cont_type, void *relay)
 	}
 }
 
+void forward_ipi(int fd, int pid)
+{
+	int err;
+
+	err = os_set_owner(fd, pid);
+	if(err < 0)
+		printk("forward_ipi: set_owner failed, fd = %d, me = %d, "
+		       "target = %d, err = %d\n", fd, os_getpid(), pid, -err);
+}
+
 /*
  *-------------------------
  * only for tt mode (will be deleted in future...)
diff --git a/arch/um/sys-i386/user-offsets.c b/arch/um/sys-i386/user-offsets.c
index 26b6867..6f4ef2b 100644
--- a/arch/um/sys-i386/user-offsets.c
+++ b/arch/um/sys-i386/user-offsets.c
@@ -3,12 +3,13 @@
 #include <asm/ptrace.h>
 #include <asm/user.h>
 #include <linux/stddef.h>
+#include <sys/poll.h>
 
 #define DEFINE(sym, val) \
-        asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+	asm volatile("\n->" #sym " %0 " #val : : "i" (val))
 
 #define DEFINE_LONGS(sym, val) \
-        asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long)))
+	asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long)))
 
 #define OFFSET(sym, str, mem) \
 	DEFINE(sym, offsetof(struct str, mem));
@@ -67,4 +68,9 @@ void foo(void)
 	DEFINE(HOST_ES, ES);
 	DEFINE(HOST_GS, GS);
 	DEFINE(UM_FRAME_SIZE, sizeof(struct user_regs_struct));
+
+	/* XXX Duplicated between i386 and x86_64 */
+	DEFINE(UM_POLLIN, POLLIN);
+	DEFINE(UM_POLLPRI, POLLPRI);
+	DEFINE(UM_POLLOUT, POLLOUT);
 }
diff --git a/arch/um/sys-x86_64/user-offsets.c b/arch/um/sys-x86_64/user-offsets.c
index 7bd54a9..899cebb 100644
--- a/arch/um/sys-x86_64/user-offsets.c
+++ b/arch/um/sys-x86_64/user-offsets.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 #include <stddef.h>
 #include <signal.h>
+#include <sys/poll.h>
 #define __FRAME_OFFSETS
 #include <asm/ptrace.h>
 #include <asm/types.h>
@@ -88,4 +89,9 @@ void foo(void)
 	DEFINE_LONGS(HOST_IP, RIP);
 	DEFINE_LONGS(HOST_SP, RSP);
 	DEFINE(UM_FRAME_SIZE, sizeof(struct user_regs_struct));
+
+	/* XXX Duplicated between i386 and x86_64 */
+	DEFINE(UM_POLLIN, POLLIN);
+	DEFINE(UM_POLLPRI, POLLPRI);
+	DEFINE(UM_POLLOUT, POLLOUT);
 }
-- 
cgit v0.10.2


From 9b4f018d92dbeff9305b4cf4aa80874c3974cfcc Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 27 Mar 2006 01:14:31 -0800
Subject: [PATCH] uml: merge irq_user.c and irq.c

The serial UML OS-abstraction layer patch (um/kernel dir).

This joins irq_user.c and irq.c files.

Signed-off-by: Gennady Sharapov <Gennady.V.Sharapov@intel.com>
Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index 693018b..8458c30 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -7,7 +7,7 @@ extra-y := vmlinux.lds
 clean-files :=
 
 obj-y = config.o exec_kern.o exitcode.o \
-	init_task.o irq.o irq_user.o ksyms.o mem.o physmem.o \
+	init_task.o irq.o ksyms.o mem.o physmem.o \
 	process_kern.o ptrace.o reboot.o resource.o sigio_user.o sigio_kern.o \
 	signal_kern.o smp.o syscall_kern.o sysrq.o \
 	time_kern.o tlb.o trap_kern.o uaccess.o um_arch.o umid.o
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index bbf94bf..c39ea3a 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -31,6 +31,8 @@
 #include "irq_user.h"
 #include "irq_kern.h"
 #include "os.h"
+#include "sigio.h"
+#include "misc_constants.h"
 
 /*
  * Generic, controller-independent functions:
@@ -77,6 +79,298 @@ skip:
 	return 0;
 }
 
+struct irq_fd *active_fds = NULL;
+static struct irq_fd **last_irq_ptr = &active_fds;
+
+extern void free_irqs(void);
+
+void sigio_handler(int sig, union uml_pt_regs *regs)
+{
+	struct irq_fd *irq_fd;
+	int n;
+
+	if(smp_sigio_handler()) return;
+	while(1){
+		n = os_waiting_for_events(active_fds);
+		if (n <= 0) {
+			if(n == -EINTR) continue;
+			else break;
+		}
+
+		for(irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next){
+			if(irq_fd->current_events != 0){
+				irq_fd->current_events = 0;
+				do_IRQ(irq_fd->irq, regs);
+			}
+		}
+	}
+
+	free_irqs();
+}
+
+static void maybe_sigio_broken(int fd, int type)
+{
+	if(os_isatty(fd)){
+		if((type == IRQ_WRITE) && !pty_output_sigio){
+			write_sigio_workaround();
+			add_sigio_fd(fd, 0);
+		}
+		else if((type == IRQ_READ) && !pty_close_sigio){
+			write_sigio_workaround();
+			add_sigio_fd(fd, 1);
+		}
+	}
+}
+
+
+int activate_fd(int irq, int fd, int type, void *dev_id)
+{
+	struct pollfd *tmp_pfd;
+	struct irq_fd *new_fd, *irq_fd;
+	unsigned long flags;
+	int pid, events, err, n;
+
+	pid = os_getpid();
+	err = os_set_fd_async(fd, pid);
+	if(err < 0)
+		goto out;
+
+	new_fd = um_kmalloc(sizeof(*new_fd));
+	err = -ENOMEM;
+	if(new_fd == NULL)
+		goto out;
+
+	if(type == IRQ_READ) events = UM_POLLIN | UM_POLLPRI;
+	else events = UM_POLLOUT;
+	*new_fd = ((struct irq_fd) { .next  		= NULL,
+				     .id 		= dev_id,
+				     .fd 		= fd,
+				     .type 		= type,
+				     .irq 		= irq,
+				     .pid  		= pid,
+				     .events 		= events,
+				     .current_events 	= 0 } );
+
+	/* Critical section - locked by a spinlock because this stuff can
+	 * be changed from interrupt handlers.  The stuff above is done
+	 * outside the lock because it allocates memory.
+	 */
+
+	/* Actually, it only looks like it can be called from interrupt
+	 * context.  The culprit is reactivate_fd, which calls
+	 * maybe_sigio_broken, which calls write_sigio_workaround,
+	 * which calls activate_fd.  However, write_sigio_workaround should
+	 * only be called once, at boot time.  That would make it clear that
+	 * this is called only from process context, and can be locked with
+	 * a semaphore.
+	 */
+	flags = irq_lock();
+	for(irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next){
+		if((irq_fd->fd == fd) && (irq_fd->type == type)){
+			printk("Registering fd %d twice\n", fd);
+			printk("Irqs : %d, %d\n", irq_fd->irq, irq);
+			printk("Ids : 0x%p, 0x%p\n", irq_fd->id, dev_id);
+			goto out_unlock;
+		}
+	}
+
+	/*-------------*/
+	if(type == IRQ_WRITE)
+		fd = -1;
+
+	tmp_pfd = NULL;
+	n = 0;
+
+	while(1){
+		n = os_create_pollfd(fd, events, tmp_pfd, n);
+		if (n == 0)
+			break;
+
+		/* n > 0
+		 * It means we couldn't put new pollfd to current pollfds
+		 * and tmp_fds is NULL or too small for new pollfds array.
+		 * Needed size is equal to n as minimum.
+		 *
+		 * Here we have to drop the lock in order to call
+		 * kmalloc, which might sleep.
+		 * If something else came in and changed the pollfds array
+		 * so we will not be able to put new pollfd struct to pollfds
+		 * then we free the buffer tmp_fds and try again.
+		 */
+		irq_unlock(flags);
+		if (tmp_pfd != NULL) {
+			kfree(tmp_pfd);
+			tmp_pfd = NULL;
+		}
+
+		tmp_pfd = um_kmalloc(n);
+		if (tmp_pfd == NULL)
+			goto out_kfree;
+
+		flags = irq_lock();
+	}
+	/*-------------*/
+
+	*last_irq_ptr = new_fd;
+	last_irq_ptr = &new_fd->next;
+
+	irq_unlock(flags);
+
+	/* This calls activate_fd, so it has to be outside the critical
+	 * section.
+	 */
+	maybe_sigio_broken(fd, type);
+
+	return(0);
+
+ out_unlock:
+	irq_unlock(flags);
+ out_kfree:
+	kfree(new_fd);
+ out:
+	return(err);
+}
+
+static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
+{
+	unsigned long flags;
+
+	flags = irq_lock();
+	os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr);
+	irq_unlock(flags);
+}
+
+struct irq_and_dev {
+	int irq;
+	void *dev;
+};
+
+static int same_irq_and_dev(struct irq_fd *irq, void *d)
+{
+	struct irq_and_dev *data = d;
+
+	return((irq->irq == data->irq) && (irq->id == data->dev));
+}
+
+void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
+{
+	struct irq_and_dev data = ((struct irq_and_dev) { .irq  = irq,
+							  .dev  = dev });
+
+	free_irq_by_cb(same_irq_and_dev, &data);
+}
+
+static int same_fd(struct irq_fd *irq, void *fd)
+{
+	return(irq->fd == *((int *) fd));
+}
+
+void free_irq_by_fd(int fd)
+{
+	free_irq_by_cb(same_fd, &fd);
+}
+
+static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
+{
+	struct irq_fd *irq;
+	int i = 0;
+	int fdi;
+
+	for(irq=active_fds; irq != NULL; irq = irq->next){
+		if((irq->fd == fd) && (irq->irq == irqnum)) break;
+		i++;
+	}
+	if(irq == NULL){
+		printk("find_irq_by_fd doesn't have descriptor %d\n", fd);
+		goto out;
+	}
+	fdi = os_get_pollfd(i);
+	if((fdi != -1) && (fdi != fd)){
+		printk("find_irq_by_fd - mismatch between active_fds and "
+		       "pollfds, fd %d vs %d, need %d\n", irq->fd,
+		       fdi, fd);
+		irq = NULL;
+		goto out;
+	}
+	*index_out = i;
+ out:
+	return(irq);
+}
+
+void reactivate_fd(int fd, int irqnum)
+{
+	struct irq_fd *irq;
+	unsigned long flags;
+	int i;
+
+	flags = irq_lock();
+	irq = find_irq_by_fd(fd, irqnum, &i);
+	if(irq == NULL){
+		irq_unlock(flags);
+		return;
+	}
+	os_set_pollfd(i, irq->fd);
+	irq_unlock(flags);
+
+	/* This calls activate_fd, so it has to be outside the critical
+	 * section.
+	 */
+	maybe_sigio_broken(fd, irq->type);
+}
+
+void deactivate_fd(int fd, int irqnum)
+{
+	struct irq_fd *irq;
+	unsigned long flags;
+	int i;
+
+	flags = irq_lock();
+	irq = find_irq_by_fd(fd, irqnum, &i);
+	if(irq == NULL)
+		goto out;
+	os_set_pollfd(i, -1);
+ out:
+	irq_unlock(flags);
+}
+
+int deactivate_all_fds(void)
+{
+	struct irq_fd *irq;
+	int err;
+
+	for(irq=active_fds;irq != NULL;irq = irq->next){
+		err = os_clear_fd_async(irq->fd);
+		if(err)
+			return(err);
+	}
+	/* If there is a signal already queued, after unblocking ignore it */
+	os_set_ioignore();
+
+	return(0);
+}
+
+void forward_interrupts(int pid)
+{
+	struct irq_fd *irq;
+	unsigned long flags;
+	int err;
+
+	flags = irq_lock();
+	for(irq=active_fds;irq != NULL;irq = irq->next){
+		err = os_set_owner(irq->fd, pid);
+		if(err < 0){
+			/* XXX Just remove the irq rather than
+			 * print out an infinite stream of these
+			 */
+			printk("Failed to forward %d to pid %d, err = %d\n",
+			       irq->fd, pid, -err);
+		}
+
+		irq->pid = pid;
+	}
+	irq_unlock(flags);
+}
+
 /*
  * do_IRQ handles all normal device IRQ's (the special
  * SMP cross-CPU interrupts have their own specific
diff --git a/arch/um/kernel/irq_user.c b/arch/um/kernel/irq_user.c
deleted file mode 100644
index 483e79c..0000000
--- a/arch/um/kernel/irq_user.c
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <signal.h>
-#include <string.h>
-#include <sys/poll.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include "user_util.h"
-#include "kern_util.h"
-#include "user.h"
-#include "process.h"
-#include "sigio.h"
-#include "irq_user.h"
-#include "os.h"
-#include "misc_constants.h"
-
-struct irq_fd *active_fds = NULL;
-static struct irq_fd **last_irq_ptr = &active_fds;
-
-extern void free_irqs(void);
-
-void sigio_handler(int sig, union uml_pt_regs *regs)
-{
-	struct irq_fd *irq_fd;
-	int n;
-
-	if(smp_sigio_handler()) return;
-	while(1){
-		n = os_waiting_for_events(active_fds);
-		if (n <= 0) {
-			if(n == -EINTR) continue;
-			else break;
- 		}
-
-		for(irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next){
-			if(irq_fd->current_events != 0){
-				irq_fd->current_events = 0;
-				do_IRQ(irq_fd->irq, regs);
-			}
-		}
-	}
-
-	free_irqs();
-}
-
-static void maybe_sigio_broken(int fd, int type)
-{
-	if(os_isatty(fd)){
-		if((type == IRQ_WRITE) && !pty_output_sigio){
-			write_sigio_workaround();
-			add_sigio_fd(fd, 0);
-		}
-		else if((type == IRQ_READ) && !pty_close_sigio){
-			write_sigio_workaround();
-			add_sigio_fd(fd, 1);			
-		}
-	}
-}
-
-
-int activate_fd(int irq, int fd, int type, void *dev_id)
-{
-	struct pollfd *tmp_pfd;
-	struct irq_fd *new_fd, *irq_fd;
-	unsigned long flags;
-	int pid, events, err, n;
-
-	pid = os_getpid();
-	err = os_set_fd_async(fd, pid);
-	if(err < 0)
-		goto out;
-
-	new_fd = um_kmalloc(sizeof(*new_fd));
-	err = -ENOMEM;
-	if(new_fd == NULL)
-		goto out;
-
-	if(type == IRQ_READ) events = UM_POLLIN | UM_POLLPRI;
-	else events = UM_POLLOUT;
-	*new_fd = ((struct irq_fd) { .next  		= NULL,
-				     .id 		= dev_id,
-				     .fd 		= fd,
-				     .type 		= type,
-				     .irq 		= irq,
-				     .pid  		= pid,
-				     .events 		= events,
-				     .current_events 	= 0 } );
-
-	/* Critical section - locked by a spinlock because this stuff can
-	 * be changed from interrupt handlers.  The stuff above is done
-	 * outside the lock because it allocates memory.
-	 */
-
-	/* Actually, it only looks like it can be called from interrupt
-	 * context.  The culprit is reactivate_fd, which calls
-	 * maybe_sigio_broken, which calls write_sigio_workaround,
-	 * which calls activate_fd.  However, write_sigio_workaround should
-	 * only be called once, at boot time.  That would make it clear that
-	 * this is called only from process context, and can be locked with
-	 * a semaphore.
-	 */
-	flags = irq_lock();
-	for(irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next){
-		if((irq_fd->fd == fd) && (irq_fd->type == type)){
-			printk("Registering fd %d twice\n", fd);
-			printk("Irqs : %d, %d\n", irq_fd->irq, irq);
-			printk("Ids : 0x%x, 0x%x\n", irq_fd->id, dev_id);
-			goto out_unlock;
-		}
-	}
-
-	/*-------------*/
-	if(type == IRQ_WRITE)
-		fd = -1;
-
-	tmp_pfd = NULL;
-	n = 0;
-
-	while(1){
-		n = os_create_pollfd(fd, events, tmp_pfd, n);
-		if (n == 0)
-			break;
-
-		/* n > 0
-		 * It means we couldn't put new pollfd to current pollfds
-		 * and tmp_fds is NULL or too small for new pollfds array.
-		 * Needed size is equal to n as minimum.
-		 *
-		 * Here we have to drop the lock in order to call
-		 * kmalloc, which might sleep.
-		 * If something else came in and changed the pollfds array
-		 * so we will not be able to put new pollfd struct to pollfds
-		 * then we free the buffer tmp_fds and try again.
-		 */
-		irq_unlock(flags);
-		if (tmp_pfd != NULL) {
-			kfree(tmp_pfd);
-			tmp_pfd = NULL;
-		}
-
-		tmp_pfd = um_kmalloc(n);
-		if (tmp_pfd == NULL)
-			goto out_kfree;
-
-		flags = irq_lock();
-	}
-	/*-------------*/
-
-	*last_irq_ptr = new_fd;
-	last_irq_ptr = &new_fd->next;
-
-	irq_unlock(flags);
-
-	/* This calls activate_fd, so it has to be outside the critical
-	 * section.
-	 */
-	maybe_sigio_broken(fd, type);
-
-	return(0);
-
- out_unlock:
-	irq_unlock(flags);
- out_kfree:
-	kfree(new_fd);
- out:
-	return(err);
-}
-
-static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
-{
-	unsigned long flags;
-
-	flags = irq_lock();
- 	os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr);
-	irq_unlock(flags);
-}
-
-struct irq_and_dev {
-	int irq;
-	void *dev;
-};
-
-static int same_irq_and_dev(struct irq_fd *irq, void *d)
-{
-	struct irq_and_dev *data = d;
-
-	return((irq->irq == data->irq) && (irq->id == data->dev));
-}
-
-void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
-{
-	struct irq_and_dev data = ((struct irq_and_dev) { .irq  = irq,
-							  .dev  = dev });
-
-	free_irq_by_cb(same_irq_and_dev, &data);
-}
-
-static int same_fd(struct irq_fd *irq, void *fd)
-{
-	return(irq->fd == *((int *) fd));
-}
-
-void free_irq_by_fd(int fd)
-{
-	free_irq_by_cb(same_fd, &fd);
-}
-
-static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
-{
-	struct irq_fd *irq;
-	int i = 0;
-	int fdi;
-
-	for(irq=active_fds; irq != NULL; irq = irq->next){
-		if((irq->fd == fd) && (irq->irq == irqnum)) break;
-		i++;
-	}
-	if(irq == NULL){
-		printk("find_irq_by_fd doesn't have descriptor %d\n", fd);
-		goto out;
-	}
-	fdi = os_get_pollfd(i);
-	if((fdi != -1) && (fdi != fd)){
-		printk("find_irq_by_fd - mismatch between active_fds and "
-		       "pollfds, fd %d vs %d, need %d\n", irq->fd,
-		       fdi, fd);
-		irq = NULL;
-		goto out;
-	}
-	*index_out = i;
- out:
-	return(irq);
-}
-
-void reactivate_fd(int fd, int irqnum)
-{
-	struct irq_fd *irq;
-	unsigned long flags;
-	int i;
-
-	flags = irq_lock();
-	irq = find_irq_by_fd(fd, irqnum, &i);
-	if(irq == NULL){
-		irq_unlock(flags);
-		return;
-	}
-	os_set_pollfd(i, irq->fd);
-	irq_unlock(flags);
-
-	/* This calls activate_fd, so it has to be outside the critical
-	 * section.
-	 */
-	maybe_sigio_broken(fd, irq->type);
-}
-
-void deactivate_fd(int fd, int irqnum)
-{
-	struct irq_fd *irq;
-	unsigned long flags;
-	int i;
-
-	flags = irq_lock();
-	irq = find_irq_by_fd(fd, irqnum, &i);
-	if(irq == NULL)
-		goto out;
-	os_set_pollfd(i, -1);
- out:
-	irq_unlock(flags);
-}
-
-int deactivate_all_fds(void)
-{
-	struct irq_fd *irq;
-	int err;
-
-	for(irq=active_fds;irq != NULL;irq = irq->next){
-		err = os_clear_fd_async(irq->fd);
-		if(err)
-			return(err);
-	}
-	/* If there is a signal already queued, after unblocking ignore it */
-	os_set_ioignore();
-
-	return(0);
-}
-
-void forward_interrupts(int pid)
-{
-	struct irq_fd *irq;
-	unsigned long flags;
-	int err;
-
-	flags = irq_lock();
-	for(irq=active_fds;irq != NULL;irq = irq->next){
-		err = os_set_owner(irq->fd, pid);
-		if(err < 0){
-			/* XXX Just remove the irq rather than
-			 * print out an infinite stream of these
-			 */
-			printk("Failed to forward %d to pid %d, err = %d\n",
-			       irq->fd, pid, -err);
-		}
-
-		irq->pid = pid;
-	}
-	irq_unlock(flags);
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
-- 
cgit v0.10.2


From 8e367065eea04a6fde5cb8f3854164af99c45693 Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 27 Mar 2006 01:14:32 -0800
Subject: [PATCH] uml: move SIGIO startup code to os-Linux/start_up.c

The serial UML OS-abstraction layer patch (um/kernel dir).

This moves all startup code from sigio_user.c file under os-Linux dir

Signed-off-by: Gennady Sharapov <Gennady.V.Sharapov@intel.com>
Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/include/os.h b/arch/um/include/os.h
index efae3e6..6d865d4 100644
--- a/arch/um/include/os.h
+++ b/arch/um/include/os.h
@@ -161,6 +161,7 @@ extern int os_lock_file(int fd, int excl);
 /* start_up.c */
 extern void os_early_checks(void);
 extern int can_do_skas(void);
+extern void os_check_bugs(void);
 
 /* Make sure they are clear when running in TT mode. Required by
  * SEGV_MAYBE_FIXABLE */
diff --git a/arch/um/include/sigio.h b/arch/um/include/sigio.h
index 37d76e2..1432fcf 100644
--- a/arch/um/include/sigio.h
+++ b/arch/um/include/sigio.h
@@ -8,7 +8,6 @@
 
 extern int write_sigio_irq(int fd);
 extern int register_sigio_fd(int fd);
-extern int read_sigio_fd(int fd);
 extern int add_sigio_fd(int fd, int read);
 extern int ignore_sigio_fd(int fd);
 extern void sigio_lock(void);
diff --git a/arch/um/kernel/sigio_kern.c b/arch/um/kernel/sigio_kern.c
index 2299884..1c1300f 100644
--- a/arch/um/kernel/sigio_kern.c
+++ b/arch/um/kernel/sigio_kern.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com)
  * Licensed under the GPL
  */
@@ -12,13 +12,16 @@
 #include "sigio.h"
 #include "irq_user.h"
 #include "irq_kern.h"
+#include "os.h"
 
 /* Protected by sigio_lock() called from write_sigio_workaround */
 static int sigio_irq_fd = -1;
 
 static irqreturn_t sigio_interrupt(int irq, void *data, struct pt_regs *unused)
 {
-	read_sigio_fd(sigio_irq_fd);
+	char c;
+
+	os_read_file(sigio_irq_fd, &c, sizeof(c));
 	reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ);
 	return(IRQ_HANDLED);
 }
@@ -51,6 +54,9 @@ void sigio_unlock(void)
 	spin_unlock(&sigio_spinlock);
 }
 
+extern void sigio_cleanup(void);
+__uml_exitcall(sigio_cleanup);
+
 /*
  * Overrides for Emacs so that we follow Linus's tabbing style.
  * Emacs will notice this stuff at the end of the file and automatically
diff --git a/arch/um/kernel/sigio_user.c b/arch/um/kernel/sigio_user.c
index f7b18e1..9bef941 100644
--- a/arch/um/kernel/sigio_user.c
+++ b/arch/um/kernel/sigio_user.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
  * Licensed under the GPL
  */
@@ -20,127 +20,6 @@
 #include "sigio.h"
 #include "os.h"
 
-/* Changed during early boot */
-int pty_output_sigio = 0;
-int pty_close_sigio = 0;
-
-/* Used as a flag during SIGIO testing early in boot */
-static volatile int got_sigio = 0;
-
-void __init handler(int sig)
-{
-	got_sigio = 1;
-}
-
-struct openpty_arg {
-	int master;
-	int slave;
-	int err;
-};
-
-static void openpty_cb(void *arg)
-{
-	struct openpty_arg *info = arg;
-
-	info->err = 0;
-	if(openpty(&info->master, &info->slave, NULL, NULL, NULL))
-		info->err = -errno;
-}
-
-void __init check_one_sigio(void (*proc)(int, int))
-{
-	struct sigaction old, new;
-	struct openpty_arg pty = { .master = -1, .slave = -1 };
-	int master, slave, err;
-
-	initial_thread_cb(openpty_cb, &pty);
-	if(pty.err){
-		printk("openpty failed, errno = %d\n", -pty.err);
-		return;
-	}
-
-	master = pty.master;
-	slave = pty.slave;
-
-	if((master == -1) || (slave == -1)){
-		printk("openpty failed to allocate a pty\n");
-		return;
-	}
-
-	/* Not now, but complain so we now where we failed. */
-	err = raw(master);
-	if (err < 0)
-		panic("check_sigio : __raw failed, errno = %d\n", -err);
-
-	err = os_sigio_async(master, slave);
-	if(err < 0)
-		panic("tty_fds : sigio_async failed, err = %d\n", -err);
-
-	if(sigaction(SIGIO, NULL, &old) < 0)
-		panic("check_sigio : sigaction 1 failed, errno = %d\n", errno);
-	new = old;
-	new.sa_handler = handler;
-	if(sigaction(SIGIO, &new, NULL) < 0)
-		panic("check_sigio : sigaction 2 failed, errno = %d\n", errno);
-
-	got_sigio = 0;
-	(*proc)(master, slave);
-		
-	os_close_file(master);
-	os_close_file(slave);
-
-	if(sigaction(SIGIO, &old, NULL) < 0)
-		panic("check_sigio : sigaction 3 failed, errno = %d\n", errno);
-}
-
-static void tty_output(int master, int slave)
-{
-	int n;
-	char buf[512];
-
-	printk("Checking that host ptys support output SIGIO...");
-
-	memset(buf, 0, sizeof(buf));
-
-	while(os_write_file(master, buf, sizeof(buf)) > 0) ;
-	if(errno != EAGAIN)
-		panic("check_sigio : write failed, errno = %d\n", errno);
-	while(((n = os_read_file(slave, buf, sizeof(buf))) > 0) && !got_sigio) ;
-
-	if (got_sigio) {
-		printk("Yes\n");
-		pty_output_sigio = 1;
-	} else if (n == -EAGAIN) {
-		printk("No, enabling workaround\n");
-	} else {
-		panic("check_sigio : read failed, err = %d\n", n);
-	}
-}
-
-static void tty_close(int master, int slave)
-{
-	printk("Checking that host ptys support SIGIO on close...");
-
-	os_close_file(slave);
-	if(got_sigio){
-		printk("Yes\n");
-		pty_close_sigio = 1;
-	}
-	else printk("No, enabling workaround\n");
-}
-
-void __init check_sigio(void)
-{
-	if((os_access("/dev/ptmx", OS_ACC_R_OK) < 0) &&
-	   (os_access("/dev/ptyp0", OS_ACC_R_OK) < 0)){
-		printk("No pseudo-terminals available - skipping pty SIGIO "
-		       "check\n");
-		return;
-	}
-	check_one_sigio(tty_output);
-	check_one_sigio(tty_close);
-}
-
 /* Protected by sigio_lock(), also used by sigio_cleanup, which is an 
  * exitcall.
  */
@@ -267,10 +146,10 @@ static void update_thread(void)
 	if(write_sigio_pid != -1) 
 		os_kill_process(write_sigio_pid, 1);
 	write_sigio_pid = -1;
-	os_close_file(sigio_private[0]);
-	os_close_file(sigio_private[1]);
-	os_close_file(write_sigio_fds[0]);
-	os_close_file(write_sigio_fds[1]);
+	close(sigio_private[0]);
+	close(sigio_private[1]);
+	close(write_sigio_fds[0]);
+	close(write_sigio_fds[1]);
 	/* Critical section end */
 	set_signals(flags);
 }
@@ -428,39 +307,18 @@ void write_sigio_workaround(void)
  out_free:
 	kfree(p);
  out_close2:
-	os_close_file(l_sigio_private[0]);
-	os_close_file(l_sigio_private[1]);
+	close(l_sigio_private[0]);
+	close(l_sigio_private[1]);
  out_close1:
-	os_close_file(l_write_sigio_fds[0]);
-	os_close_file(l_write_sigio_fds[1]);
+	close(l_write_sigio_fds[0]);
+	close(l_write_sigio_fds[1]);
 	return;
 }
 
-int read_sigio_fd(int fd)
-{
-	int n;
-	char c;
-
-	n = os_read_file(fd, &c, sizeof(c));
-	if(n != sizeof(c)){
-		if(n < 0) {
-			printk("read_sigio_fd - read failed, err = %d\n", -n);
-			return(n);
-		}
-		else {
-			printk("read_sigio_fd - short read, bytes = %d\n", n);
-			return(-EIO);
-		}
-	}
-	return(n);
-}
-
-static void sigio_cleanup(void)
+void sigio_cleanup(void)
 {
 	if (write_sigio_pid != -1) {
 		os_kill_process(write_sigio_pid, 1);
 		write_sigio_pid = -1;
 	}
 }
-
-__uml_exitcall(sigio_cleanup);
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 0c5b9dc..bb1c872 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -487,8 +487,7 @@ void __init setup_arch(char **cmdline_p)
 void __init check_bugs(void)
 {
 	arch_check_bugs();
-	check_sigio();
-	check_devanon();
+ 	os_check_bugs();
 }
 
 void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 829d6b0..3275313 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -3,6 +3,7 @@
  * Licensed under the GPL
  */
 
+#include <pty.h>
 #include <stdio.h>
 #include <stddef.h>
 #include <stdarg.h>
@@ -539,3 +540,130 @@ int __init parse_iomem(char *str, int *add)
 	return(1);
 }
 
+
+/* Changed during early boot */
+int pty_output_sigio = 0;
+int pty_close_sigio = 0;
+
+/* Used as a flag during SIGIO testing early in boot */
+static volatile int got_sigio = 0;
+
+static void __init handler(int sig)
+{
+	got_sigio = 1;
+}
+
+struct openpty_arg {
+	int master;
+	int slave;
+	int err;
+};
+
+static void openpty_cb(void *arg)
+{
+	struct openpty_arg *info = arg;
+
+	info->err = 0;
+	if(openpty(&info->master, &info->slave, NULL, NULL, NULL))
+		info->err = -errno;
+}
+
+static void __init check_one_sigio(void (*proc)(int, int))
+{
+	struct sigaction old, new;
+	struct openpty_arg pty = { .master = -1, .slave = -1 };
+	int master, slave, err;
+
+	initial_thread_cb(openpty_cb, &pty);
+	if(pty.err){
+		printk("openpty failed, errno = %d\n", -pty.err);
+		return;
+	}
+
+	master = pty.master;
+	slave = pty.slave;
+
+	if((master == -1) || (slave == -1)){
+		printk("openpty failed to allocate a pty\n");
+		return;
+	}
+
+	/* Not now, but complain so we now where we failed. */
+	err = raw(master);
+	if (err < 0)
+		panic("check_sigio : __raw failed, errno = %d\n", -err);
+
+	err = os_sigio_async(master, slave);
+	if(err < 0)
+		panic("tty_fds : sigio_async failed, err = %d\n", -err);
+
+	if(sigaction(SIGIO, NULL, &old) < 0)
+		panic("check_sigio : sigaction 1 failed, errno = %d\n", errno);
+	new = old;
+	new.sa_handler = handler;
+	if(sigaction(SIGIO, &new, NULL) < 0)
+		panic("check_sigio : sigaction 2 failed, errno = %d\n", errno);
+
+	got_sigio = 0;
+	(*proc)(master, slave);
+
+	close(master);
+	close(slave);
+
+	if(sigaction(SIGIO, &old, NULL) < 0)
+		panic("check_sigio : sigaction 3 failed, errno = %d\n", errno);
+}
+
+static void tty_output(int master, int slave)
+{
+	int n;
+	char buf[512];
+
+	printk("Checking that host ptys support output SIGIO...");
+
+	memset(buf, 0, sizeof(buf));
+
+	while(os_write_file(master, buf, sizeof(buf)) > 0) ;
+	if(errno != EAGAIN)
+		panic("check_sigio : write failed, errno = %d\n", errno);
+	while(((n = os_read_file(slave, buf, sizeof(buf))) > 0) && !got_sigio) ;
+
+	if(got_sigio){
+		printk("Yes\n");
+		pty_output_sigio = 1;
+	}
+	else if(n == -EAGAIN) printk("No, enabling workaround\n");
+	else panic("check_sigio : read failed, err = %d\n", n);
+}
+
+static void tty_close(int master, int slave)
+{
+	printk("Checking that host ptys support SIGIO on close...");
+
+	close(slave);
+	if(got_sigio){
+		printk("Yes\n");
+		pty_close_sigio = 1;
+	}
+	else printk("No, enabling workaround\n");
+}
+
+void __init check_sigio(void)
+{
+	if((os_access("/dev/ptmx", OS_ACC_R_OK) < 0) &&
+	   (os_access("/dev/ptyp0", OS_ACC_R_OK) < 0)){
+		printk("No pseudo-terminals available - skipping pty SIGIO "
+		       "check\n");
+		return;
+	}
+	check_one_sigio(tty_output);
+	check_one_sigio(tty_close);
+}
+
+void os_check_bugs(void)
+{
+	check_ptrace();
+	check_sigio();
+	check_devanon();
+}
+
-- 
cgit v0.10.2


From f206aabb035318ac4bafbf0b87798335de3634df Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 27 Mar 2006 01:14:33 -0800
Subject: [PATCH] uml: move sigio_user.c to os-Linux/sigio.c

The serial UML OS-abstraction layer patch (um/kernel dir).

This moves sigio_user.c to os-Linux dir

Signed-off-by: Gennady Sharapov <Gennady.V.Sharapov@intel.com>
Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/include/os.h b/arch/um/include/os.h
index 6d865d4..53dee8d 100644
--- a/arch/um/include/os.h
+++ b/arch/um/include/os.h
@@ -311,4 +311,9 @@ extern void os_set_pollfd(int i, int fd);
 extern void os_set_ioignore(void);
 extern void init_irq_signals(int on_sigstack);
 
+/* sigio.c */
+extern void write_sigio_workaround(void);
+extern int add_sigio_fd(int fd, int read);
+extern int ignore_sigio_fd(int fd);
+
 #endif
diff --git a/arch/um/include/sigio.h b/arch/um/include/sigio.h
index 1432fcf..fe99ea1 100644
--- a/arch/um/include/sigio.h
+++ b/arch/um/include/sigio.h
@@ -8,8 +8,6 @@
 
 extern int write_sigio_irq(int fd);
 extern int register_sigio_fd(int fd);
-extern int add_sigio_fd(int fd, int read);
-extern int ignore_sigio_fd(int fd);
 extern void sigio_lock(void);
 extern void sigio_unlock(void);
 
diff --git a/arch/um/include/user_util.h b/arch/um/include/user_util.h
index a6f1f17..992a7e1 100644
--- a/arch/um/include/user_util.h
+++ b/arch/um/include/user_util.h
@@ -58,7 +58,6 @@ extern int attach(int pid);
 extern void kill_child_dead(int pid);
 extern int cont(int pid);
 extern void check_sigio(void);
-extern void write_sigio_workaround(void);
 extern void arch_check_bugs(void);
 extern int cpu_feature(char *what, char *buf, int len);
 extern int arch_handle_signal(int sig, union uml_pt_regs *regs);
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index 8458c30..ac5afaa 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -8,7 +8,7 @@ clean-files :=
 
 obj-y = config.o exec_kern.o exitcode.o \
 	init_task.o irq.o ksyms.o mem.o physmem.o \
-	process_kern.o ptrace.o reboot.o resource.o sigio_user.o sigio_kern.o \
+	process_kern.o ptrace.o reboot.o resource.o sigio_kern.o \
 	signal_kern.o smp.o syscall_kern.o sysrq.o \
 	time_kern.o tlb.o trap_kern.o uaccess.o um_arch.o umid.o
 
diff --git a/arch/um/kernel/sigio_user.c b/arch/um/kernel/sigio_user.c
deleted file mode 100644
index 9bef941..0000000
--- a/arch/um/kernel/sigio_user.c
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include <unistd.h>
-#include <stdlib.h>
-#include <termios.h>
-#include <pty.h>
-#include <signal.h>
-#include <errno.h>
-#include <string.h>
-#include <sched.h>
-#include <sys/socket.h>
-#include <sys/poll.h>
-#include "init.h"
-#include "user.h"
-#include "kern_util.h"
-#include "user_util.h"
-#include "sigio.h"
-#include "os.h"
-
-/* Protected by sigio_lock(), also used by sigio_cleanup, which is an 
- * exitcall.
- */
-static int write_sigio_pid = -1;
-
-/* These arrays are initialized before the sigio thread is started, and
- * the descriptors closed after it is killed.  So, it can't see them change.
- * On the UML side, they are changed under the sigio_lock.
- */
-static int write_sigio_fds[2] = { -1, -1 };
-static int sigio_private[2] = { -1, -1 };
-
-struct pollfds {
-	struct pollfd *poll;
-	int size;
-	int used;
-};
-
-/* Protected by sigio_lock().  Used by the sigio thread, but the UML thread
- * synchronizes with it.
- */
-struct pollfds current_poll = {
-	.poll  		= NULL,
-	.size 		= 0,
-	.used 		= 0
-};
-
-struct pollfds next_poll = {
-	.poll  		= NULL,
-	.size 		= 0,
-	.used 		= 0
-};
-
-static int write_sigio_thread(void *unused)
-{
-	struct pollfds *fds, tmp;
-	struct pollfd *p;
-	int i, n, respond_fd;
-	char c;
-
-        signal(SIGWINCH, SIG_IGN);
-	fds = &current_poll;
-	while(1){
-		n = poll(fds->poll, fds->used, -1);
-		if(n < 0){
-			if(errno == EINTR) continue;
-			printk("write_sigio_thread : poll returned %d, "
-			       "errno = %d\n", n, errno);
-		}
-		for(i = 0; i < fds->used; i++){
-			p = &fds->poll[i];
-			if(p->revents == 0) continue;
-			if(p->fd == sigio_private[1]){
-				n = os_read_file(sigio_private[1], &c, sizeof(c));
-				if(n != sizeof(c))
-					printk("write_sigio_thread : "
-					       "read failed, err = %d\n", -n);
-				tmp = current_poll;
-				current_poll = next_poll;
-				next_poll = tmp;
-				respond_fd = sigio_private[1];
-			}
-			else {
-				respond_fd = write_sigio_fds[1];
-				fds->used--;
-				memmove(&fds->poll[i], &fds->poll[i + 1],
-					(fds->used - i) * sizeof(*fds->poll));
-			}
-
-			n = os_write_file(respond_fd, &c, sizeof(c));
-			if(n != sizeof(c))
-				printk("write_sigio_thread : write failed, "
-				       "err = %d\n", -n);
-		}
-	}
-
-	return 0;
-}
-
-static int need_poll(int n)
-{
-	if(n <= next_poll.size){
-		next_poll.used = n;
-		return(0);
-	}
-	kfree(next_poll.poll);
-	next_poll.poll = um_kmalloc_atomic(n * sizeof(struct pollfd));
-	if(next_poll.poll == NULL){
-		printk("need_poll : failed to allocate new pollfds\n");
-		next_poll.size = 0;
-		next_poll.used = 0;
-		return(-1);
-	}
-	next_poll.size = n;
-	next_poll.used = n;
-	return(0);
-}
-
-/* Must be called with sigio_lock held, because it's needed by the marked
- * critical section. */
-static void update_thread(void)
-{
-	unsigned long flags;
-	int n;
-	char c;
-
-	flags = set_signals(0);
-	n = os_write_file(sigio_private[0], &c, sizeof(c));
-	if(n != sizeof(c)){
-		printk("update_thread : write failed, err = %d\n", -n);
-		goto fail;
-	}
-
-	n = os_read_file(sigio_private[0], &c, sizeof(c));
-	if(n != sizeof(c)){
-		printk("update_thread : read failed, err = %d\n", -n);
-		goto fail;
-	}
-
-	set_signals(flags);
-	return;
- fail:
-	/* Critical section start */
-	if(write_sigio_pid != -1) 
-		os_kill_process(write_sigio_pid, 1);
-	write_sigio_pid = -1;
-	close(sigio_private[0]);
-	close(sigio_private[1]);
-	close(write_sigio_fds[0]);
-	close(write_sigio_fds[1]);
-	/* Critical section end */
-	set_signals(flags);
-}
-
-int add_sigio_fd(int fd, int read)
-{
-	int err = 0, i, n, events;
-
-	sigio_lock();
-	for(i = 0; i < current_poll.used; i++){
-		if(current_poll.poll[i].fd == fd) 
-			goto out;
-	}
-
-	n = current_poll.used + 1;
-	err = need_poll(n);
-	if(err) 
-		goto out;
-
-	for(i = 0; i < current_poll.used; i++)
-		next_poll.poll[i] = current_poll.poll[i];
-
-	if(read) events = POLLIN;
-	else events = POLLOUT;
-
-	next_poll.poll[n - 1] = ((struct pollfd) { .fd  	= fd,
-						   .events 	= events,
-						   .revents 	= 0 });
-	update_thread();
- out:
-	sigio_unlock();
-	return(err);
-}
-
-int ignore_sigio_fd(int fd)
-{
-	struct pollfd *p;
-	int err = 0, i, n = 0;
-
-	sigio_lock();
-	for(i = 0; i < current_poll.used; i++){
-		if(current_poll.poll[i].fd == fd) break;
-	}
-	if(i == current_poll.used)
-		goto out;
-	
-	err = need_poll(current_poll.used - 1);
-	if(err)
-		goto out;
-
-	for(i = 0; i < current_poll.used; i++){
-		p = &current_poll.poll[i];
-		if(p->fd != fd) next_poll.poll[n++] = current_poll.poll[i];
-	}
-	if(n == i){
-		printk("ignore_sigio_fd : fd %d not found\n", fd);
-		err = -1;
-		goto out;
-	}
-
-	update_thread();
- out:
-	sigio_unlock();
-	return(err);
-}
-
-static struct pollfd* setup_initial_poll(int fd)
-{
-	struct pollfd *p;
-
-	p = um_kmalloc(sizeof(struct pollfd));
-	if (p == NULL) {
-		printk("setup_initial_poll : failed to allocate poll\n");
-		return NULL;
-	}
-	*p = ((struct pollfd) { .fd  	= fd,
-				.events 	= POLLIN,
-				.revents 	= 0 });
-	return p;
-}
-
-void write_sigio_workaround(void)
-{
-	unsigned long stack;
-	struct pollfd *p;
-	int err;
-	int l_write_sigio_fds[2];
-	int l_sigio_private[2];
-	int l_write_sigio_pid;
-
-	/* We call this *tons* of times - and most ones we must just fail. */
-	sigio_lock();
-	l_write_sigio_pid = write_sigio_pid;
-	sigio_unlock();
-
-	if (l_write_sigio_pid != -1)
-		return;
-
-	err = os_pipe(l_write_sigio_fds, 1, 1);
-	if(err < 0){
-		printk("write_sigio_workaround - os_pipe 1 failed, "
-		       "err = %d\n", -err);
-		return;
-	}
-	err = os_pipe(l_sigio_private, 1, 1);
-	if(err < 0){
-		printk("write_sigio_workaround - os_pipe 1 failed, "
-		       "err = %d\n", -err);
-		goto out_close1;
-	}
-
-	p = setup_initial_poll(l_sigio_private[1]);
-	if(!p)
-		goto out_close2;
-
-	sigio_lock();
-
-	/* Did we race? Don't try to optimize this, please, it's not so likely
-	 * to happen, and no more than once at the boot. */
-	if(write_sigio_pid != -1)
-		goto out_unlock;
-
-	write_sigio_pid = run_helper_thread(write_sigio_thread, NULL,
-					    CLONE_FILES | CLONE_VM, &stack, 0);
-
-	if (write_sigio_pid < 0)
-		goto out_clear;
-
-	if (write_sigio_irq(l_write_sigio_fds[0]))
-		goto out_kill;
-
-	/* Success, finally. */
-	memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds));
-	memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private));
-
-	current_poll = ((struct pollfds) { .poll 	= p,
-					   .used 	= 1,
-					   .size 	= 1 });
-
-	sigio_unlock();
-	return;
-
- out_kill:
-	l_write_sigio_pid = write_sigio_pid;
-	write_sigio_pid = -1;
-	sigio_unlock();
-	/* Going to call waitpid, avoid holding the lock. */
-	os_kill_process(l_write_sigio_pid, 1);
-	goto out_free;
-
- out_clear:
-	write_sigio_pid = -1;
- out_unlock:
-	sigio_unlock();
- out_free:
-	kfree(p);
- out_close2:
-	close(l_sigio_private[0]);
-	close(l_sigio_private[1]);
- out_close1:
-	close(l_write_sigio_fds[0]);
-	close(l_write_sigio_fds[1]);
-	return;
-}
-
-void sigio_cleanup(void)
-{
-	if (write_sigio_pid != -1) {
-		os_kill_process(write_sigio_pid, 1);
-		write_sigio_pid = -1;
-	}
-}
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index 73d3d83..c3d56c2 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -3,14 +3,15 @@
 # Licensed under the GPL
 #
 
-obj-y = aio.o elf_aux.o file.o helper.o irq.o main.o mem.o process.o signal.o \
-	start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o user_syms.o \
-	util.o drivers/ sys-$(SUBARCH)/
+obj-y = aio.o elf_aux.o file.o helper.o irq.o main.o mem.o process.o sigio.o \
+	signal.o start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o \
+	user_syms.o util.o drivers/ sys-$(SUBARCH)/
 
 obj-$(CONFIG_MODE_SKAS) += skas/
 
 USER_OBJS := aio.o elf_aux.o file.o helper.o irq.o main.o mem.o process.o \
-	signal.o start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o util.o
+	sigio.o signal.o start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o \
+	util.o
 
 elf_aux.o: $(ARCH_DIR)/kernel-offsets.h
 CFLAGS_elf_aux.o += -I$(objtree)/arch/um
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
new file mode 100644
index 0000000..7604c40
--- /dev/null
+++ b/arch/um/os-Linux/sigio.c
@@ -0,0 +1,324 @@
+/*
+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <termios.h>
+#include <pty.h>
+#include <signal.h>
+#include <errno.h>
+#include <string.h>
+#include <sched.h>
+#include <sys/socket.h>
+#include <sys/poll.h>
+#include "init.h"
+#include "user.h"
+#include "kern_util.h"
+#include "user_util.h"
+#include "sigio.h"
+#include "os.h"
+
+/* Protected by sigio_lock(), also used by sigio_cleanup, which is an
+ * exitcall.
+ */
+static int write_sigio_pid = -1;
+
+/* These arrays are initialized before the sigio thread is started, and
+ * the descriptors closed after it is killed.  So, it can't see them change.
+ * On the UML side, they are changed under the sigio_lock.
+ */
+static int write_sigio_fds[2] = { -1, -1 };
+static int sigio_private[2] = { -1, -1 };
+
+struct pollfds {
+	struct pollfd *poll;
+	int size;
+	int used;
+};
+
+/* Protected by sigio_lock().  Used by the sigio thread, but the UML thread
+ * synchronizes with it.
+ */
+struct pollfds current_poll = {
+	.poll  		= NULL,
+	.size 		= 0,
+	.used 		= 0
+};
+
+struct pollfds next_poll = {
+	.poll  		= NULL,
+	.size 		= 0,
+	.used 		= 0
+};
+
+static int write_sigio_thread(void *unused)
+{
+	struct pollfds *fds, tmp;
+	struct pollfd *p;
+	int i, n, respond_fd;
+	char c;
+
+        signal(SIGWINCH, SIG_IGN);
+	fds = &current_poll;
+	while(1){
+		n = poll(fds->poll, fds->used, -1);
+		if(n < 0){
+			if(errno == EINTR) continue;
+			printk("write_sigio_thread : poll returned %d, "
+			       "errno = %d\n", n, errno);
+		}
+		for(i = 0; i < fds->used; i++){
+			p = &fds->poll[i];
+			if(p->revents == 0) continue;
+			if(p->fd == sigio_private[1]){
+				n = os_read_file(sigio_private[1], &c, sizeof(c));
+				if(n != sizeof(c))
+					printk("write_sigio_thread : "
+					       "read failed, err = %d\n", -n);
+				tmp = current_poll;
+				current_poll = next_poll;
+				next_poll = tmp;
+				respond_fd = sigio_private[1];
+			}
+			else {
+				respond_fd = write_sigio_fds[1];
+				fds->used--;
+				memmove(&fds->poll[i], &fds->poll[i + 1],
+					(fds->used - i) * sizeof(*fds->poll));
+			}
+
+			n = os_write_file(respond_fd, &c, sizeof(c));
+			if(n != sizeof(c))
+				printk("write_sigio_thread : write failed, "
+				       "err = %d\n", -n);
+		}
+	}
+
+	return 0;
+}
+
+static int need_poll(int n)
+{
+	if(n <= next_poll.size){
+		next_poll.used = n;
+		return(0);
+	}
+	kfree(next_poll.poll);
+	next_poll.poll = um_kmalloc_atomic(n * sizeof(struct pollfd));
+	if(next_poll.poll == NULL){
+		printk("need_poll : failed to allocate new pollfds\n");
+		next_poll.size = 0;
+		next_poll.used = 0;
+		return(-1);
+	}
+	next_poll.size = n;
+	next_poll.used = n;
+	return(0);
+}
+
+/* Must be called with sigio_lock held, because it's needed by the marked
+ * critical section. */
+static void update_thread(void)
+{
+	unsigned long flags;
+	int n;
+	char c;
+
+	flags = set_signals(0);
+	n = os_write_file(sigio_private[0], &c, sizeof(c));
+	if(n != sizeof(c)){
+		printk("update_thread : write failed, err = %d\n", -n);
+		goto fail;
+	}
+
+	n = os_read_file(sigio_private[0], &c, sizeof(c));
+	if(n != sizeof(c)){
+		printk("update_thread : read failed, err = %d\n", -n);
+		goto fail;
+	}
+
+	set_signals(flags);
+	return;
+ fail:
+	/* Critical section start */
+	if(write_sigio_pid != -1)
+		os_kill_process(write_sigio_pid, 1);
+	write_sigio_pid = -1;
+	close(sigio_private[0]);
+	close(sigio_private[1]);
+	close(write_sigio_fds[0]);
+	close(write_sigio_fds[1]);
+	/* Critical section end */
+	set_signals(flags);
+}
+
+int add_sigio_fd(int fd, int read)
+{
+	int err = 0, i, n, events;
+
+	sigio_lock();
+	for(i = 0; i < current_poll.used; i++){
+		if(current_poll.poll[i].fd == fd)
+			goto out;
+	}
+
+	n = current_poll.used + 1;
+	err = need_poll(n);
+	if(err)
+		goto out;
+
+	for(i = 0; i < current_poll.used; i++)
+		next_poll.poll[i] = current_poll.poll[i];
+
+	if(read) events = POLLIN;
+	else events = POLLOUT;
+
+	next_poll.poll[n - 1] = ((struct pollfd) { .fd  	= fd,
+						   .events 	= events,
+						   .revents 	= 0 });
+	update_thread();
+ out:
+	sigio_unlock();
+	return(err);
+}
+
+int ignore_sigio_fd(int fd)
+{
+	struct pollfd *p;
+	int err = 0, i, n = 0;
+
+	sigio_lock();
+	for(i = 0; i < current_poll.used; i++){
+		if(current_poll.poll[i].fd == fd) break;
+	}
+	if(i == current_poll.used)
+		goto out;
+
+	err = need_poll(current_poll.used - 1);
+	if(err)
+		goto out;
+
+	for(i = 0; i < current_poll.used; i++){
+		p = &current_poll.poll[i];
+		if(p->fd != fd) next_poll.poll[n++] = current_poll.poll[i];
+	}
+	if(n == i){
+		printk("ignore_sigio_fd : fd %d not found\n", fd);
+		err = -1;
+		goto out;
+	}
+
+	update_thread();
+ out:
+	sigio_unlock();
+	return(err);
+}
+
+static struct pollfd *setup_initial_poll(int fd)
+{
+	struct pollfd *p;
+
+	p = um_kmalloc(sizeof(struct pollfd));
+	if (p == NULL) {
+		printk("setup_initial_poll : failed to allocate poll\n");
+		return NULL;
+	}
+	*p = ((struct pollfd) { .fd  	= fd,
+				.events 	= POLLIN,
+				.revents 	= 0 });
+	return p;
+}
+
+void write_sigio_workaround(void)
+{
+	unsigned long stack;
+	struct pollfd *p;
+	int err;
+	int l_write_sigio_fds[2];
+	int l_sigio_private[2];
+	int l_write_sigio_pid;
+
+	/* We call this *tons* of times - and most ones we must just fail. */
+	sigio_lock();
+	l_write_sigio_pid = write_sigio_pid;
+	sigio_unlock();
+
+	if (l_write_sigio_pid != -1)
+		return;
+
+	err = os_pipe(l_write_sigio_fds, 1, 1);
+	if(err < 0){
+		printk("write_sigio_workaround - os_pipe 1 failed, "
+		       "err = %d\n", -err);
+		return;
+	}
+	err = os_pipe(l_sigio_private, 1, 1);
+	if(err < 0){
+		printk("write_sigio_workaround - os_pipe 2 failed, "
+		       "err = %d\n", -err);
+		goto out_close1;
+	}
+
+	p = setup_initial_poll(l_sigio_private[1]);
+	if(!p)
+		goto out_close2;
+
+	sigio_lock();
+
+	/* Did we race? Don't try to optimize this, please, it's not so likely
+	 * to happen, and no more than once at the boot. */
+	if(write_sigio_pid != -1)
+		goto out_unlock;
+
+	write_sigio_pid = run_helper_thread(write_sigio_thread, NULL,
+					    CLONE_FILES | CLONE_VM, &stack, 0);
+
+	if (write_sigio_pid < 0)
+		goto out_clear;
+
+	if (write_sigio_irq(l_write_sigio_fds[0]))
+		goto out_kill;
+
+	/* Success, finally. */
+	memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds));
+	memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private));
+
+	current_poll = ((struct pollfds) { .poll 	= p,
+					   .used 	= 1,
+					   .size 	= 1 });
+
+	sigio_unlock();
+	return;
+
+ out_kill:
+	l_write_sigio_pid = write_sigio_pid;
+	write_sigio_pid = -1;
+	sigio_unlock();
+	/* Going to call waitpid, avoid holding the lock. */
+	os_kill_process(l_write_sigio_pid, 1);
+	goto out_free;
+
+ out_clear:
+	write_sigio_pid = -1;
+ out_unlock:
+	sigio_unlock();
+ out_free:
+	kfree(p);
+ out_close2:
+	close(l_sigio_private[0]);
+	close(l_sigio_private[1]);
+ out_close1:
+	close(l_write_sigio_fds[0]);
+	close(l_write_sigio_fds[1]);
+	return;
+}
+
+void sigio_cleanup(void)
+{
+	if(write_sigio_pid != -1){
+		os_kill_process(write_sigio_pid, 1);
+		write_sigio_pid = -1;
+	}
+}
-- 
cgit v0.10.2


From 81efcd3300754462537ffac60336158b2f773b4e Mon Sep 17 00:00:00 2001
From: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
Date: Mon, 27 Mar 2006 01:14:34 -0800
Subject: [PATCH] uml: more carefully test whether we are in a system call

For security reasons, UML in is_syscall() needs to have access to code in
vsyscall-page.  The current implementation grants this access by explicitly
allowing access to vsyscall in access_ok_skas().  With this change,
copy_from_user() may be used to read the code.  Ptrace access to vsyscall-page
for debugging already was implemented in get_user_pages() by mainline.  In
i386, copy_from_user can't access vsyscall-page, but returns EFAULT.

To make UML behave as i386 does, I changed is_syscall to use
access_process_vm(current) to read the code from vsyscall-page.  This doesn't
hurt security, but simplifies the code and prepares implementation of
stub-vmas.

Signed-off-by: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/sys-i386/ptrace.c b/arch/um/sys-i386/ptrace.c
index e839ce6..8032a10 100644
--- a/arch/um/sys-i386/ptrace.c
+++ b/arch/um/sys-i386/ptrace.c
@@ -6,6 +6,7 @@
 #include <linux/config.h>
 #include <linux/compiler.h>
 #include "linux/sched.h"
+#include "linux/mm.h"
 #include "asm/elf.h"
 #include "asm/ptrace.h"
 #include "asm/uaccess.h"
@@ -26,9 +27,17 @@ int is_syscall(unsigned long addr)
 
 	n = copy_from_user(&instr, (void __user *) addr, sizeof(instr));
 	if(n){
-		printk("is_syscall : failed to read instruction from 0x%lx\n",
-		       addr);
-		return(0);
+		/* access_process_vm() grants access to vsyscall and stub,
+		 * while copy_from_user doesn't. Maybe access_process_vm is
+		 * slow, but that doesn't matter, since it will be called only
+		 * in case of singlestepping, if copy_from_user failed.
+		 */
+		n = access_process_vm(current, addr, &instr, sizeof(instr), 0);
+		if(n != sizeof(instr)) {
+			printk("is_syscall : failed to read instruction from "
+			       "0x%lx\n", addr);
+			return(1);
+		}
 	}
 	/* int 0x80 or sysenter */
 	return((instr == 0x80cd) || (instr == 0x340f));
diff --git a/arch/um/sys-x86_64/ptrace.c b/arch/um/sys-x86_64/ptrace.c
index 74eee5c..147bbf0 100644
--- a/arch/um/sys-x86_64/ptrace.c
+++ b/arch/um/sys-x86_64/ptrace.c
@@ -8,6 +8,7 @@
 #include <asm/ptrace.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
+#include <linux/mm.h>
 #include <asm/uaccess.h>
 #include <asm/elf.h>
 
@@ -136,9 +137,28 @@ void arch_switch(void)
 */
 }
 
+/* XXX Mostly copied from sys-i386 */
 int is_syscall(unsigned long addr)
 {
-	panic("is_syscall");
+	unsigned short instr;
+	int n;
+
+	n = copy_from_user(&instr, (void __user *) addr, sizeof(instr));
+	if(n){
+		/* access_process_vm() grants access to vsyscall and stub,
+		 * while copy_from_user doesn't. Maybe access_process_vm is
+		 * slow, but that doesn't matter, since it will be called only
+		 * in case of singlestepping, if copy_from_user failed.
+		 */
+		n = access_process_vm(current, addr, &instr, sizeof(instr), 0);
+		if(n != sizeof(instr)) {
+			printk("is_syscall : failed to read instruction from "
+			       "0x%lx\n", addr);
+			return(1);
+		}
+	}
+	/* sysenter */
+	return(instr == 0x050f);
 }
 
 int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu )
-- 
cgit v0.10.2


From c554f899b6747c9d8035bf91864d89b337ceb411 Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 27 Mar 2006 01:14:36 -0800
Subject: [PATCH] uml: move tty logging to os-Linux

The serial UML OS-abstraction layer patch (um/kernel dir).

This moves all systemcalls from tty_log.c file under os-Linux dir

Signed-off-by: Gennady Sharapov <Gennady.V.Sharapov@intel.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index ac5afaa..fe08971 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -15,15 +15,12 @@ obj-y = config.o exec_kern.o exitcode.o \
 obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
 obj-$(CONFIG_GPROF)	+= gprof_syms.o
 obj-$(CONFIG_GCOV)	+= gmon_syms.o
-obj-$(CONFIG_TTY_LOG)	+= tty_log.o
 obj-$(CONFIG_SYSCALL_DEBUG) += syscall.o
 
 obj-$(CONFIG_MODE_TT) += tt/
 obj-$(CONFIG_MODE_SKAS) += skas/
 
-user-objs-$(CONFIG_TTY_LOG) += tty_log.o
-
-USER_OBJS := $(user-objs-y) config.o tty_log.o
+USER_OBJS := config.o
 
 include arch/um/scripts/Makefile.rules
 
diff --git a/arch/um/kernel/exec_kern.c b/arch/um/kernel/exec_kern.c
index c264e1c..1ca8431 100644
--- a/arch/um/kernel/exec_kern.c
+++ b/arch/um/kernel/exec_kern.c
@@ -30,8 +30,6 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
 	CHOOSE_MODE_PROC(start_thread_tt, start_thread_skas, regs, eip, esp);
 }
 
-extern void log_exec(char **argv, void *tty);
-
 static long execve1(char *file, char __user * __user *argv,
 		    char __user *__user *env)
 {
diff --git a/arch/um/kernel/tty_log.c b/arch/um/kernel/tty_log.c
deleted file mode 100644
index 9ada656..0000000
--- a/arch/um/kernel/tty_log.c
+++ /dev/null
@@ -1,230 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) and 
- * geoffrey hing <ghing@net.ohio-state.edu>
- * Licensed under the GPL
- */
-
-#include <errno.h>
-#include <string.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/time.h>
-#include "init.h"
-#include "user.h"
-#include "kern_util.h"
-#include "os.h"
-
-#define TTY_LOG_DIR "./"
-
-/* Set early in boot and then unchanged */
-static char *tty_log_dir = TTY_LOG_DIR;
-static int tty_log_fd = -1;
-
-#define TTY_LOG_OPEN 1
-#define TTY_LOG_CLOSE 2
-#define TTY_LOG_WRITE 3
-#define TTY_LOG_EXEC 4
-
-#define TTY_READ 1
-#define TTY_WRITE 2
-
-struct tty_log_buf {
-	int what;
-	unsigned long tty;
-	int len;
-	int direction;
-	unsigned long sec;
-	unsigned long usec;
-};
-
-int open_tty_log(void *tty, void *current_tty)
-{
-	struct timeval tv;
-	struct tty_log_buf data;
-	char buf[strlen(tty_log_dir) + sizeof("01234567890-01234567\0")];
-	int fd;
-
-	gettimeofday(&tv, NULL);
-	if(tty_log_fd != -1){
-		data = ((struct tty_log_buf) { .what 	= TTY_LOG_OPEN,
-					       .tty  = (unsigned long) tty,
-					       .len  = sizeof(current_tty),
-					       .direction = 0,
-					       .sec = tv.tv_sec,
-					       .usec = tv.tv_usec } );
-		os_write_file(tty_log_fd, &data, sizeof(data));
-		os_write_file(tty_log_fd, &current_tty, data.len);
-		return(tty_log_fd);
-	}
-
-	sprintf(buf, "%s/%0u-%0u", tty_log_dir, (unsigned int) tv.tv_sec, 
- 		(unsigned int) tv.tv_usec);
-
-	fd = os_open_file(buf, of_append(of_create(of_rdwr(OPENFLAGS()))),
-			  0644);
-	if(fd < 0){
-		printk("open_tty_log : couldn't open '%s', errno = %d\n",
-		       buf, -fd);
-	}
-	return(fd);
-}
-
-void close_tty_log(int fd, void *tty)
-{
-	struct tty_log_buf data;
-	struct timeval tv;
-
-	if(tty_log_fd != -1){
-		gettimeofday(&tv, NULL);
-		data = ((struct tty_log_buf) { .what 	= TTY_LOG_CLOSE,
-					       .tty  = (unsigned long) tty,
-					       .len  = 0,
-					       .direction = 0,
-					       .sec = tv.tv_sec,
-					       .usec = tv.tv_usec } );
-		os_write_file(tty_log_fd, &data, sizeof(data));
-		return;
-	}
-	os_close_file(fd);
-}
-
-static int log_chunk(int fd, const char *buf, int len)
-{
-	int total = 0, try, missed, n;
-	char chunk[64];
-
-	while(len > 0){
-		try = (len > sizeof(chunk)) ? sizeof(chunk) : len;
-		missed = copy_from_user_proc(chunk, (char *) buf, try);
-		try -= missed;
-		n = os_write_file(fd, chunk, try);
-		if(n != try) {
-			if(n < 0)
-				return(n);
-			return(-EIO);
-		}
-		if(missed != 0)
-			return(-EFAULT);
-
-		len -= try;
-		total += try;
-		buf += try;
-	}
-
-	return(total);
-}
-
-int write_tty_log(int fd, const char *buf, int len, void *tty, int is_read)
-{
-	struct timeval tv;
-	struct tty_log_buf data;
-	int direction;
-
-	if(fd == tty_log_fd){
-		gettimeofday(&tv, NULL);
-		direction = is_read ? TTY_READ : TTY_WRITE;
-		data = ((struct tty_log_buf) { .what 	= TTY_LOG_WRITE,
-					       .tty  = (unsigned long) tty,
-					       .len  = len,
-					       .direction = direction,
-					       .sec = tv.tv_sec,
-					       .usec = tv.tv_usec } );
-		os_write_file(tty_log_fd, &data, sizeof(data));
-	}
-
-	return(log_chunk(fd, buf, len));
-}
-
-void log_exec(char **argv, void *tty)
-{
-	struct timeval tv;
-	struct tty_log_buf data;
-	char **ptr,*arg;
-	int len;
-
-	if(tty_log_fd == -1) return;
-
-	gettimeofday(&tv, NULL);
-
-	len = 0;
-	for(ptr = argv; ; ptr++){
-		if(copy_from_user_proc(&arg, ptr, sizeof(arg)))
-			return;
-		if(arg == NULL) break;
-		len += strlen_user_proc(arg);
-	}
-
-	data = ((struct tty_log_buf) { .what 	= TTY_LOG_EXEC,
-				       .tty  = (unsigned long) tty,
-				       .len  = len,
-				       .direction = 0,
-				       .sec = tv.tv_sec,
-				       .usec = tv.tv_usec } );
-	os_write_file(tty_log_fd, &data, sizeof(data));
-
-	for(ptr = argv; ; ptr++){
-		if(copy_from_user_proc(&arg, ptr, sizeof(arg)))
-			return;
-		if(arg == NULL) break;
-		log_chunk(tty_log_fd, arg, strlen_user_proc(arg));
-	}
-}
-
-extern void register_tty_logger(int (*opener)(void *, void *),
-				int (*writer)(int, const char *, int,
-					      void *, int),
-				void (*closer)(int, void *));
-
-static int register_logger(void)
-{
-	register_tty_logger(open_tty_log, write_tty_log, close_tty_log);
-	return(0);
-}
-
-__uml_initcall(register_logger);
-
-static int __init set_tty_log_dir(char *name, int *add)
-{
-	tty_log_dir = name;
-	return 0;
-}
-
-__uml_setup("tty_log_dir=", set_tty_log_dir,
-"tty_log_dir=<directory>\n"
-"    This is used to specify the directory where the logs of all pty\n"
-"    data from this UML machine will be written.\n\n"
-);
-
-static int __init set_tty_log_fd(char *name, int *add)
-{
-	char *end;
-
-	tty_log_fd = strtoul(name, &end, 0);
-	if((*end != '\0') || (end == name)){
-		printf("set_tty_log_fd - strtoul failed on '%s'\n", name);
-		tty_log_fd = -1;
-	}
-
-	*add = 0;
-	return 0;
-}
-
-__uml_setup("tty_log_fd=", set_tty_log_fd,
-"tty_log_fd=<fd>\n"
-"    This is used to specify a preconfigured file descriptor to which all\n"
-"    tty data will be written.  Preconfigure the descriptor with something\n"
-"    like '10>tty_log tty_log_fd=10'.\n\n"
-);
-
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index c3d56c2..1659386 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -8,10 +8,12 @@ obj-y = aio.o elf_aux.o file.o helper.o irq.o main.o mem.o process.o sigio.o \
 	user_syms.o util.o drivers/ sys-$(SUBARCH)/
 
 obj-$(CONFIG_MODE_SKAS) += skas/
+obj-$(CONFIG_TTY_LOG) += tty_log.o
+user-objs-$(CONFIG_TTY_LOG) += tty_log.o
 
-USER_OBJS := aio.o elf_aux.o file.o helper.o irq.o main.o mem.o process.o \
-	sigio.o signal.o start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o \
-	util.o
+USER_OBJS := $(user-objs-y) aio.o elf_aux.o file.o helper.o irq.o main.o mem.o \
+	process.o sigio.o signal.o start_up.o time.o trap.o tt.o tty.o \
+	uaccess.o umid.o util.o
 
 elf_aux.o: $(ARCH_DIR)/kernel-offsets.h
 CFLAGS_elf_aux.o += -I$(objtree)/arch/um
diff --git a/arch/um/os-Linux/tty_log.c b/arch/um/os-Linux/tty_log.c
new file mode 100644
index 0000000..c6ba56c
--- /dev/null
+++ b/arch/um/os-Linux/tty_log.c
@@ -0,0 +1,218 @@
+/*
+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) and
+ * geoffrey hing <ghing@net.ohio-state.edu>
+ * Licensed under the GPL
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include "init.h"
+#include "user.h"
+#include "kern_util.h"
+#include "os.h"
+
+#define TTY_LOG_DIR "./"
+
+/* Set early in boot and then unchanged */
+static char *tty_log_dir = TTY_LOG_DIR;
+static int tty_log_fd = -1;
+
+#define TTY_LOG_OPEN 1
+#define TTY_LOG_CLOSE 2
+#define TTY_LOG_WRITE 3
+#define TTY_LOG_EXEC 4
+
+#define TTY_READ 1
+#define TTY_WRITE 2
+
+struct tty_log_buf {
+	int what;
+	unsigned long tty;
+	int len;
+	int direction;
+	unsigned long sec;
+	unsigned long usec;
+};
+
+int open_tty_log(void *tty, void *current_tty)
+{
+	struct timeval tv;
+	struct tty_log_buf data;
+	char buf[strlen(tty_log_dir) + sizeof("01234567890-01234567\0")];
+	int fd;
+
+	gettimeofday(&tv, NULL);
+	if(tty_log_fd != -1){
+		data = ((struct tty_log_buf) { .what 	= TTY_LOG_OPEN,
+					       .tty  = (unsigned long) tty,
+					       .len  = sizeof(current_tty),
+					       .direction = 0,
+					       .sec = tv.tv_sec,
+					       .usec = tv.tv_usec } );
+		os_write_file(tty_log_fd, &data, sizeof(data));
+		os_write_file(tty_log_fd, &current_tty, data.len);
+		return(tty_log_fd);
+	}
+
+	sprintf(buf, "%s/%0u-%0u", tty_log_dir, (unsigned int) tv.tv_sec,
+ 		(unsigned int) tv.tv_usec);
+
+	fd = os_open_file(buf, of_append(of_create(of_rdwr(OPENFLAGS()))),
+			  0644);
+	if(fd < 0){
+		printk("open_tty_log : couldn't open '%s', errno = %d\n",
+		       buf, -fd);
+	}
+	return(fd);
+}
+
+void close_tty_log(int fd, void *tty)
+{
+	struct tty_log_buf data;
+	struct timeval tv;
+
+	if(tty_log_fd != -1){
+		gettimeofday(&tv, NULL);
+		data = ((struct tty_log_buf) { .what 	= TTY_LOG_CLOSE,
+					       .tty  = (unsigned long) tty,
+					       .len  = 0,
+					       .direction = 0,
+					       .sec = tv.tv_sec,
+					       .usec = tv.tv_usec } );
+		os_write_file(tty_log_fd, &data, sizeof(data));
+		return;
+	}
+	os_close_file(fd);
+}
+
+static int log_chunk(int fd, const char *buf, int len)
+{
+	int total = 0, try, missed, n;
+	char chunk[64];
+
+	while(len > 0){
+		try = (len > sizeof(chunk)) ? sizeof(chunk) : len;
+		missed = copy_from_user_proc(chunk, (char *) buf, try);
+		try -= missed;
+		n = os_write_file(fd, chunk, try);
+		if(n != try) {
+			if(n < 0)
+				return(n);
+			return(-EIO);
+		}
+		if(missed != 0)
+			return(-EFAULT);
+
+		len -= try;
+		total += try;
+		buf += try;
+	}
+
+	return(total);
+}
+
+int write_tty_log(int fd, const char *buf, int len, void *tty, int is_read)
+{
+	struct timeval tv;
+	struct tty_log_buf data;
+	int direction;
+
+	if(fd == tty_log_fd){
+		gettimeofday(&tv, NULL);
+		direction = is_read ? TTY_READ : TTY_WRITE;
+		data = ((struct tty_log_buf) { .what 	= TTY_LOG_WRITE,
+					       .tty  = (unsigned long) tty,
+					       .len  = len,
+					       .direction = direction,
+					       .sec = tv.tv_sec,
+					       .usec = tv.tv_usec } );
+		os_write_file(tty_log_fd, &data, sizeof(data));
+	}
+
+	return(log_chunk(fd, buf, len));
+}
+
+void log_exec(char **argv, void *tty)
+{
+	struct timeval tv;
+	struct tty_log_buf data;
+	char **ptr,*arg;
+	int len;
+
+	if(tty_log_fd == -1) return;
+
+	gettimeofday(&tv, NULL);
+
+	len = 0;
+	for(ptr = argv; ; ptr++){
+		if(copy_from_user_proc(&arg, ptr, sizeof(arg)))
+			return;
+		if(arg == NULL) break;
+		len += strlen_user_proc(arg);
+	}
+
+	data = ((struct tty_log_buf) { .what 	= TTY_LOG_EXEC,
+				       .tty  = (unsigned long) tty,
+				       .len  = len,
+				       .direction = 0,
+				       .sec = tv.tv_sec,
+				       .usec = tv.tv_usec } );
+	os_write_file(tty_log_fd, &data, sizeof(data));
+
+	for(ptr = argv; ; ptr++){
+		if(copy_from_user_proc(&arg, ptr, sizeof(arg)))
+			return;
+		if(arg == NULL) break;
+		log_chunk(tty_log_fd, arg, strlen_user_proc(arg));
+	}
+}
+
+extern void register_tty_logger(int (*opener)(void *, void *),
+				int (*writer)(int, const char *, int,
+					      void *, int),
+				void (*closer)(int, void *));
+
+static int register_logger(void)
+{
+	register_tty_logger(open_tty_log, write_tty_log, close_tty_log);
+	return(0);
+}
+
+__uml_initcall(register_logger);
+
+static int __init set_tty_log_dir(char *name, int *add)
+{
+	tty_log_dir = name;
+	return 0;
+}
+
+__uml_setup("tty_log_dir=", set_tty_log_dir,
+"tty_log_dir=<directory>\n"
+"    This is used to specify the directory where the logs of all pty\n"
+"    data from this UML machine will be written.\n\n"
+);
+
+static int __init set_tty_log_fd(char *name, int *add)
+{
+	char *end;
+
+	tty_log_fd = strtoul(name, &end, 0);
+	if((*end != '\0') || (end == name)){
+		printf("set_tty_log_fd - strtoul failed on '%s'\n", name);
+		tty_log_fd = -1;
+	}
+
+	*add = 0;
+	return 0;
+}
+
+__uml_setup("tty_log_fd=", set_tty_log_fd,
+"tty_log_fd=<fd>\n"
+"    This is used to specify a preconfigured file descriptor to which all\n"
+"    tty data will be written.  Preconfigure the descriptor with something\n"
+"    like '10>tty_log tty_log_fd=10'.\n\n"
+);
-- 
cgit v0.10.2


From cf9165a50aa21027d2f0eefc90476ec59ae6d2b8 Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 27 Mar 2006 01:14:36 -0800
Subject: [PATCH] uml: oS header cleanups

This rearranges the OS declarations by moving some declarations into os.h.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/include/os.h b/arch/um/include/os.h
index 53dee8d..d3d1bc6 100644
--- a/arch/um/include/os.h
+++ b/arch/um/include/os.h
@@ -122,6 +122,7 @@ static inline struct openflags of_cloexec(struct openflags flags)
 	return(flags); 
 }
   
+/* file.c */
 extern int os_stat_file(const char *file_name, struct uml_stat *buf);
 extern int os_stat_fd(const int fd, struct uml_stat *buf);
 extern int os_access(const char *file, int mode);
@@ -157,6 +158,15 @@ extern int os_connect_socket(char *name);
 extern int os_file_type(char *file);
 extern int os_file_mode(char *file, struct openflags *mode_out);
 extern int os_lock_file(int fd, int excl);
+extern void os_flush_stdout(void);
+extern int os_stat_filesystem(char *path, long *bsize_out,
+			      long long *blocks_out, long long *bfree_out,
+			      long long *bavail_out, long long *files_out,
+			      long long *ffree_out, void *fsid_out,
+			      int fsid_size, long *namelen_out,
+			      long *spare_out);
+extern int os_change_dir(char *dir);
+extern int os_fchange_dir(int fd);
 
 /* start_up.c */
 extern void os_early_checks(void);
@@ -316,4 +326,8 @@ extern void write_sigio_workaround(void);
 extern int add_sigio_fd(int fd, int read);
 extern int ignore_sigio_fd(int fd);
 
+/* skas/trap */
+extern void sig_handler_common_skas(int sig, void *sc_ptr);
+extern void user_signal(int sig, union uml_pt_regs *regs, int pid);
+
 #endif
diff --git a/arch/um/include/skas/mode-skas.h b/arch/um/include/skas/mode-skas.h
index 260065c..8bc6916 100644
--- a/arch/um/include/skas/mode-skas.h
+++ b/arch/um/include/skas/mode-skas.h
@@ -13,7 +13,6 @@ extern unsigned long exec_fp_regs[];
 extern unsigned long exec_fpx_regs[];
 extern int have_fpx_regs;
 
-extern void sig_handler_common_skas(int sig, void *sc_ptr);
 extern void kill_off_processes_skas(void);
 
 #endif
diff --git a/arch/um/include/skas/skas.h b/arch/um/include/skas/skas.h
index 8635728..853b26f 100644
--- a/arch/um/include/skas/skas.h
+++ b/arch/um/include/skas/skas.h
@@ -17,7 +17,6 @@ extern int user_thread(unsigned long stack, int flags);
 extern void new_thread_proc(void *stack, void (*handler)(int sig));
 extern void new_thread_handler(int sig);
 extern void handle_syscall(union uml_pt_regs *regs);
-extern void user_signal(int sig, union uml_pt_regs *regs, int pid);
 extern int new_mm(unsigned long stack);
 extern void get_skas_faultinfo(int pid, struct faultinfo * fi);
 extern long execute_syscall_skas(void *r);
-- 
cgit v0.10.2


From 6c29256c5703ad7c3cf62276dbc38d802a05669e Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 27 Mar 2006 01:14:37 -0800
Subject: [PATCH] uml: allow ubd devices to be shared in a cluster

This adds a 'c' option to the ubd switch which turns off host file locking so
that the device can be shared, as with a cluster.  There's also some
whitespace cleanup while I was in this file.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index fa617e0..0336575 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
  * Licensed under the GPL
  */
@@ -71,7 +71,7 @@ struct io_thread_req {
 	int error;
 };
 
-extern int open_ubd_file(char *file, struct openflags *openflags,
+extern int open_ubd_file(char *file, struct openflags *openflags, int shared,
 			 char **backing_file_out, int *bitmap_offset_out,
 			 unsigned long *bitmap_len_out, int *data_offset_out,
 			 int *create_cow_out);
@@ -137,7 +137,7 @@ static int fake_major = MAJOR_NR;
 
 static struct gendisk *ubd_gendisk[MAX_DEV];
 static struct gendisk *fake_gendisk[MAX_DEV];
- 
+
 #ifdef CONFIG_BLK_DEV_UBD_SYNC
 #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
 					 .cl = 1 })
@@ -168,6 +168,7 @@ struct ubd {
 	__u64 size;
 	struct openflags boot_openflags;
 	struct openflags openflags;
+	int shared;
 	int no_cow;
 	struct cow cow;
 	struct platform_device pdev;
@@ -189,6 +190,7 @@ struct ubd {
 	.boot_openflags =	OPEN_FLAGS, \
 	.openflags =		OPEN_FLAGS, \
         .no_cow =               0, \
+	.shared =		0, \
         .cow =			DEFAULT_COW, \
 }
 
@@ -305,7 +307,7 @@ static int ubd_setup_common(char *str, int *index_out)
 		}
 		major = simple_strtoul(str, &end, 0);
 		if((*end != '\0') || (end == str)){
-			printk(KERN_ERR 
+			printk(KERN_ERR
 			       "ubd_setup : didn't parse major number\n");
 			return(1);
 		}
@@ -316,7 +318,7 @@ static int ubd_setup_common(char *str, int *index_out)
  			printk(KERN_ERR "Can't assign a fake major twice\n");
  			goto out1;
  		}
- 
+
  		fake_major = major;
 
 		printk(KERN_INFO "Setting extra ubd major number to %d\n",
@@ -351,7 +353,7 @@ static int ubd_setup_common(char *str, int *index_out)
 	if (index_out)
 		*index_out = n;
 
-	for (i = 0; i < 4; i++) {
+	for (i = 0; i < sizeof("rscd="); i++) {
 		switch (*str) {
 		case 'r':
 			flags.w = 0;
@@ -362,11 +364,14 @@ static int ubd_setup_common(char *str, int *index_out)
 		case 'd':
 			dev->no_cow = 1;
 			break;
+		case 'c':
+			dev->shared = 1;
+			break;
 		case '=':
 			str++;
 			goto break_loop;
 		default:
-			printk(KERN_ERR "ubd_setup : Expected '=' or flag letter (r,s or d)\n");
+			printk(KERN_ERR "ubd_setup : Expected '=' or flag letter (r, s, c, or d)\n");
 			goto out;
 		}
 		str++;
@@ -515,7 +520,7 @@ static void ubd_handler(void)
 		spin_unlock(&ubd_io_lock);
 		return;
 	}
-        
+
 	ubd_finish(rq, req.error);
 	reactivate_fd(thread_fd, UBD_IRQ);	
 	do_ubd_request(ubd_queue);
@@ -532,7 +537,7 @@ static int io_pid = -1;
 
 void kill_io_thread(void)
 {
-	if(io_pid != -1) 
+	if(io_pid != -1)
 		os_kill_process(io_pid, 1);
 }
 
@@ -567,14 +572,15 @@ static int ubd_open_dev(struct ubd *dev)
 	create_cow = 0;
 	create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL;
 	back_ptr = dev->no_cow ? NULL : &dev->cow.file;
-	dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr,
-				&dev->cow.bitmap_offset, &dev->cow.bitmap_len, 
-				&dev->cow.data_offset, create_ptr);
+	dev->fd = open_ubd_file(dev->file, &dev->openflags, dev->shared,
+				back_ptr, &dev->cow.bitmap_offset,
+				&dev->cow.bitmap_len, &dev->cow.data_offset,
+				create_ptr);
 
 	if((dev->fd == -ENOENT) && create_cow){
-		dev->fd = create_cow_file(dev->file, dev->cow.file, 
+		dev->fd = create_cow_file(dev->file, dev->cow.file,
 					  dev->openflags, 1 << 9, PAGE_SIZE,
-					  &dev->cow.bitmap_offset, 
+					  &dev->cow.bitmap_offset,
 					  &dev->cow.bitmap_len,
 					  &dev->cow.data_offset);
 		if(dev->fd >= 0){
@@ -598,16 +604,16 @@ static int ubd_open_dev(struct ubd *dev)
 		}
 		flush_tlb_kernel_vm();
 
-		err = read_cow_bitmap(dev->fd, dev->cow.bitmap, 
-				      dev->cow.bitmap_offset, 
+		err = read_cow_bitmap(dev->fd, dev->cow.bitmap,
+				      dev->cow.bitmap_offset,
 				      dev->cow.bitmap_len);
 		if(err < 0)
 			goto error;
 
 		flags = dev->openflags;
 		flags.w = 0;
-		err = open_ubd_file(dev->cow.file, &flags, NULL, NULL, NULL, 
-				    NULL, NULL);
+		err = open_ubd_file(dev->cow.file, &flags, dev->shared, NULL,
+				    NULL, NULL, NULL, NULL);
 		if(err < 0) goto error;
 		dev->cow.fd = err;
 	}
@@ -685,11 +691,11 @@ static int ubd_add(int n)
 	dev->size = ROUND_BLOCK(dev->size);
 
 	err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]);
-	if(err) 
+	if(err)
 		goto out_close;
- 
+
 	if(fake_major != MAJOR_NR)
-		ubd_new_disk(fake_major, dev->size, n, 
+		ubd_new_disk(fake_major, dev->size, n,
 			     &fake_gendisk[n]);
 
 	/* perhaps this should also be under the "if (fake_major)" above */
@@ -854,7 +860,7 @@ int ubd_init(void)
 			return -1;
 	}
 	platform_driver_register(&ubd_driver);
-	for (i = 0; i < MAX_DEV; i++) 
+	for (i = 0; i < MAX_DEV; i++)
 		ubd_add(i);
 	return 0;
 }
@@ -872,16 +878,16 @@ int ubd_driver_init(void){
 		 * enough. So use anyway the io thread. */
 	}
 	stack = alloc_stack(0, 0);
-	io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), 
+	io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
 				 &thread_fd);
 	if(io_pid < 0){
-		printk(KERN_ERR 
+		printk(KERN_ERR
 		       "ubd : Failed to start I/O thread (errno = %d) - "
 		       "falling back to synchronous I/O\n", -io_pid);
 		io_pid = -1;
 		return(0);
 	}
-	err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, 
+	err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
 			     SA_INTERRUPT, "ubd", ubd_dev);
 	if(err != 0)
 		printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
@@ -978,7 +984,7 @@ static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
 	if(req->op == UBD_READ) {
 		for(i = 0; i < req->length >> 9; i++){
 			if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
-				ubd_set_bit(i, (unsigned char *) 
+				ubd_set_bit(i, (unsigned char *)
 					    &req->sector_mask);
                 }
 	}
@@ -999,7 +1005,7 @@ static int prepare_request(struct request *req, struct io_thread_req *io_req)
 
 	/* This should be impossible now */
 	if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
-		printk("Write attempted on readonly ubd device %s\n", 
+		printk("Write attempted on readonly ubd device %s\n",
 		       disk->disk_name);
 		end_request(req, 0);
 		return(1);
@@ -1182,7 +1188,7 @@ int read_cow_bitmap(int fd, void *buf, int offset, int len)
 	return(0);
 }
 
-int open_ubd_file(char *file, struct openflags *openflags,
+int open_ubd_file(char *file, struct openflags *openflags, int shared,
 		  char **backing_file_out, int *bitmap_offset_out,
 		  unsigned long *bitmap_len_out, int *data_offset_out,
 		  int *create_cow_out)
@@ -1206,10 +1212,14 @@ int open_ubd_file(char *file, struct openflags *openflags,
 			return fd;
         }
 
-	err = os_lock_file(fd, openflags->w);
-	if(err < 0){
-		printk("Failed to lock '%s', err = %d\n", file, -err);
-		goto out_close;
+	if(shared)
+		printk("Not locking \"%s\" on the host\n", file);
+	else {
+		err = os_lock_file(fd, openflags->w);
+		if(err < 0){
+			printk("Failed to lock '%s', err = %d\n", file, -err);
+			goto out_close;
+		}
 	}
 
 	/* Succesful return case! */
@@ -1260,7 +1270,7 @@ int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
 	int err, fd;
 
 	flags.c = 1;
-	fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL);
+	fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
 	if(fd < 0){
 		err = fd;
 		printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
-- 
cgit v0.10.2


From 98c18238f146a9038098244d60a7d2b1f67ee790 Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 27 Mar 2006 01:14:38 -0800
Subject: [PATCH] uml: fix segfault on signal delivery

This fixes a process segfault where a signal was being delivered such that a
new stack page needed to be allocated to hold the signal frame.  This was
tripping some logic in the page fault handler which wouldn't allocate the page
if the faulting address was more that 32 bytes lower than the current stack
pointer.  Since a signal frame is greater than 32 bytes, this exercised that
case.

It's fixed by updating the SP in the pt_regs before starting to copy the
signal frame.  Since those are the registers that will be copied on to the
stack, we have to be careful to put the original SP, not the new one which
points to the signal frame, on the stack.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c
index 7cd1a82..33a40f5 100644
--- a/arch/um/sys-i386/signal.c
+++ b/arch/um/sys-i386/signal.c
@@ -58,7 +58,7 @@ static int copy_sc_from_user_skas(struct pt_regs *regs,
 }
 
 int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp,
-                         struct pt_regs *regs)
+                         struct pt_regs *regs, unsigned long sp)
 {
   	struct sigcontext sc;
 	unsigned long fpregs[HOST_FP_SIZE];
@@ -72,7 +72,7 @@ int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp,
 	sc.edi = REGS_EDI(regs->regs.skas.regs);
 	sc.esi = REGS_ESI(regs->regs.skas.regs);
 	sc.ebp = REGS_EBP(regs->regs.skas.regs);
-	sc.esp = REGS_SP(regs->regs.skas.regs);
+	sc.esp = sp;
 	sc.ebx = REGS_EBX(regs->regs.skas.regs);
 	sc.edx = REGS_EDX(regs->regs.skas.regs);
 	sc.ecx = REGS_ECX(regs->regs.skas.regs);
@@ -132,7 +132,7 @@ int copy_sc_from_user_tt(struct sigcontext *to, struct sigcontext *from,
 }
 
 int copy_sc_to_user_tt(struct sigcontext *to, struct _fpstate *fp,
-		       struct sigcontext *from, int fpsize)
+		       struct sigcontext *from, int fpsize, unsigned long sp)
 {
 	struct _fpstate *to_fp, *from_fp;
 	int err;
@@ -140,11 +140,18 @@ int copy_sc_to_user_tt(struct sigcontext *to, struct _fpstate *fp,
 	to_fp =	(fp ? fp : (struct _fpstate *) (to + 1));
 	from_fp = from->fpstate;
 	err = copy_to_user(to, from, sizeof(*to));
+
+	/* The SP in the sigcontext is the updated one for the signal
+	 * delivery.  The sp passed in is the original, and this needs
+	 * to be restored, so we stick it in separately.
+	 */
+	err |= copy_to_user(&SC_SP(to), sp, sizeof(sp));
+
 	if(from_fp != NULL){
 		err |= copy_to_user(&to->fpstate, &to_fp, sizeof(to->fpstate));
 		err |= copy_to_user(to_fp, from_fp, fpsize);
 	}
-	return(err);
+	return err;
 }
 #endif
 
@@ -159,11 +166,11 @@ static int copy_sc_from_user(struct pt_regs *to, void __user *from)
 }
 
 static int copy_sc_to_user(struct sigcontext *to, struct _fpstate *fp,
-			   struct pt_regs *from)
+			   struct pt_regs *from, unsigned long sp)
 {
 	return(CHOOSE_MODE(copy_sc_to_user_tt(to, fp, UPT_SC(&from->regs),
-					      sizeof(*fp)),
-                           copy_sc_to_user_skas(to, fp, from)));
+					      sizeof(*fp), sp),
+                           copy_sc_to_user_skas(to, fp, from, sp)));
 }
 
 static int copy_ucontext_to_user(struct ucontext *uc, struct _fpstate *fp,
@@ -174,7 +181,7 @@ static int copy_ucontext_to_user(struct ucontext *uc, struct _fpstate *fp,
 	err |= put_user(current->sas_ss_sp, &uc->uc_stack.ss_sp);
 	err |= put_user(sas_ss_flags(sp), &uc->uc_stack.ss_flags);
 	err |= put_user(current->sas_ss_size, &uc->uc_stack.ss_size);
-	err |= copy_sc_to_user(&uc->uc_mcontext, fp, &current->thread.regs);
+	err |= copy_sc_to_user(&uc->uc_mcontext, fp, &current->thread.regs, sp);
 	err |= copy_to_user(&uc->uc_sigmask, set, sizeof(*set));
 	return(err);
 }
@@ -207,6 +214,7 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
 {
 	struct sigframe __user *frame;
 	void *restorer;
+	unsigned long save_sp = PT_REGS_SP(regs);
 	int err = 0;
 
 	stack_top &= -8UL;
@@ -218,9 +226,19 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
 	if(ka->sa.sa_flags & SA_RESTORER)
 		restorer = ka->sa.sa_restorer;
 
+	/* Update SP now because the page fault handler refuses to extend
+	 * the stack if the faulting address is too far below the current
+	 * SP, which frame now certainly is.  If there's an error, the original
+	 * value is restored on the way out.
+	 * When writing the sigcontext to the stack, we have to write the
+	 * original value, so that's passed to copy_sc_to_user, which does
+	 * the right thing with it.
+	 */
+	PT_REGS_SP(regs) = (unsigned long) frame;
+
 	err |= __put_user(restorer, &frame->pretcode);
 	err |= __put_user(sig, &frame->sig);
-	err |= copy_sc_to_user(&frame->sc, NULL, regs);
+	err |= copy_sc_to_user(&frame->sc, NULL, regs, save_sp);
 	err |= __put_user(mask->sig[0], &frame->sc.oldmask);
 	if (_NSIG_WORDS > 1)
 		err |= __copy_to_user(&frame->extramask, &mask->sig[1],
@@ -238,7 +256,7 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
 	err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
 
 	if(err)
-		return(err);
+		goto err;
 
 	PT_REGS_SP(regs) = (unsigned long) frame;
 	PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
@@ -248,7 +266,11 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
 
 	if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
 		ptrace_notify(SIGTRAP);
-	return(0);
+	return 0;
+
+err:
+	PT_REGS_SP(regs) = save_sp;
+	return err;
 }
 
 int setup_signal_stack_si(unsigned long stack_top, int sig,
@@ -257,6 +279,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 {
 	struct rt_sigframe __user *frame;
 	void *restorer;
+	unsigned long save_sp = PT_REGS_SP(regs);
 	int err = 0;
 
 	stack_top &= -8UL;
@@ -268,13 +291,16 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 	if(ka->sa.sa_flags & SA_RESTORER)
 		restorer = ka->sa.sa_restorer;
 
+	/* See comment above about why this is here */
+	PT_REGS_SP(regs) = (unsigned long) frame;
+
 	err |= __put_user(restorer, &frame->pretcode);
 	err |= __put_user(sig, &frame->sig);
 	err |= __put_user(&frame->info, &frame->pinfo);
 	err |= __put_user(&frame->uc, &frame->puc);
 	err |= copy_siginfo_to_user(&frame->info, info);
 	err |= copy_ucontext_to_user(&frame->uc, &frame->fpstate, mask,
-				     PT_REGS_SP(regs));
+				     save_sp);
 
 	/*
 	 * This is movl $,%eax ; int $0x80
@@ -288,9 +314,8 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 	err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
 
 	if(err)
-		return(err);
+		goto err;
 
-	PT_REGS_SP(regs) = (unsigned long) frame;
 	PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
 	PT_REGS_EAX(regs) = (unsigned long) sig;
 	PT_REGS_EDX(regs) = (unsigned long) &frame->info;
@@ -298,7 +323,11 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 
 	if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
 		ptrace_notify(SIGTRAP);
-	return(0);
+	return 0;
+
+err:
+	PT_REGS_SP(regs) = save_sp;
+	return err;
 }
 
 long sys_sigreturn(struct pt_regs regs)
diff --git a/arch/um/sys-x86_64/signal.c b/arch/um/sys-x86_64/signal.c
index fe1d065..e75c4e1 100644
--- a/arch/um/sys-x86_64/signal.c
+++ b/arch/um/sys-x86_64/signal.c
@@ -55,7 +55,8 @@ static int copy_sc_from_user_skas(struct pt_regs *regs,
 }
 
 int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp,
-                        struct pt_regs *regs, unsigned long mask)
+			 struct pt_regs *regs, unsigned long mask,
+			 unsigned long sp)
 {
         struct faultinfo * fi = &current->thread.arch.faultinfo;
 	int err = 0;
@@ -70,7 +71,11 @@ int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp,
 	err |= PUTREG(regs, RDI, to, rdi);
 	err |= PUTREG(regs, RSI, to, rsi);
 	err |= PUTREG(regs, RBP, to, rbp);
-	err |= PUTREG(regs, RSP, to, rsp);
+        /* Must use orignal RSP, which is passed in, rather than what's in
+         * the pt_regs, because that's already been updated to point at the
+         * signal frame.
+         */
+	err |= __put_user(sp, &to->rsp);
 	err |= PUTREG(regs, RBX, to, rbx);
 	err |= PUTREG(regs, RDX, to, rdx);
 	err |= PUTREG(regs, RCX, to, rcx);
@@ -102,7 +107,7 @@ int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp,
 
 #ifdef CONFIG_MODE_TT
 int copy_sc_from_user_tt(struct sigcontext *to, struct sigcontext *from,
-                        int fpsize)
+			 int fpsize)
 {
 	struct _fpstate *to_fp, *from_fp;
 	unsigned long sigs;
@@ -120,7 +125,7 @@ int copy_sc_from_user_tt(struct sigcontext *to, struct sigcontext *from,
 }
 
 int copy_sc_to_user_tt(struct sigcontext *to, struct _fpstate *fp,
-                      struct sigcontext *from, int fpsize)
+		       struct sigcontext *from, int fpsize, unsigned long sp)
 {
 	struct _fpstate *to_fp, *from_fp;
 	int err;
@@ -128,11 +133,17 @@ int copy_sc_to_user_tt(struct sigcontext *to, struct _fpstate *fp,
 	to_fp = (fp ? fp : (struct _fpstate *) (to + 1));
 	from_fp = from->fpstate;
 	err = copy_to_user(to, from, sizeof(*to));
+	/* The SP in the sigcontext is the updated one for the signal
+	 * delivery.  The sp passed in is the original, and this needs
+	 * to be restored, so we stick it in separately.
+	 */
+	err |= copy_to_user(&SC_SP(to), sp, sizeof(sp));
+
 	if(from_fp != NULL){
 		err |= copy_to_user(&to->fpstate, &to_fp, sizeof(to->fpstate));
 		err |= copy_to_user(to_fp, from_fp, fpsize);
 	}
-	return(err);
+	return err;
 }
 
 #endif
@@ -148,11 +159,12 @@ static int copy_sc_from_user(struct pt_regs *to, void __user *from)
 }
 
 static int copy_sc_to_user(struct sigcontext *to, struct _fpstate *fp,
-                          struct pt_regs *from, unsigned long mask)
+			   struct pt_regs *from, unsigned long mask,
+			   unsigned long sp)
 {
        return(CHOOSE_MODE(copy_sc_to_user_tt(to, fp, UPT_SC(&from->regs),
-                                             sizeof(*fp)),
-                          copy_sc_to_user_skas(to, fp, from, mask)));
+                                             sizeof(*fp), sp),
+                          copy_sc_to_user_skas(to, fp, from, mask, sp)));
 }
 
 struct rt_sigframe
@@ -170,6 +182,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 {
 	struct rt_sigframe __user *frame;
 	struct _fpstate __user *fp = NULL;
+	unsigned long save_sp = PT_REGS_RSP(regs);
 	int err = 0;
 	struct task_struct *me = current;
 
@@ -193,14 +206,25 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 			goto out;
 	}
 
+	/* Update SP now because the page fault handler refuses to extend
+	 * the stack if the faulting address is too far below the current
+	 * SP, which frame now certainly is.  If there's an error, the original
+	 * value is restored on the way out.
+	 * When writing the sigcontext to the stack, we have to write the
+	 * original value, so that's passed to copy_sc_to_user, which does
+	 * the right thing with it.
+	 */
+	PT_REGS_RSP(regs) = (unsigned long) frame;
+
 	/* Create the ucontext.  */
 	err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
 	err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(PT_REGS_SP(regs)),
+	err |= __put_user(sas_ss_flags(save_sp),
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
-	err |= copy_sc_to_user(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
+	err |= copy_sc_to_user(&frame->uc.uc_mcontext, fp, regs, set->sig[0],
+		save_sp);
 	err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
 	if (sizeof(*set) == 16) {
 		__put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
@@ -217,10 +241,10 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 		err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
 	else
 		/* could use a vstub here */
-		goto out;
+		goto restore_sp;
 
 	if (err)
-		goto out;
+		goto restore_sp;
 
 	/* Set up registers for signal handler */
 	{
@@ -238,10 +262,12 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 	PT_REGS_RSI(regs) = (unsigned long) &frame->info;
 	PT_REGS_RDX(regs) = (unsigned long) &frame->uc;
 	PT_REGS_RIP(regs) = (unsigned long) ka->sa.sa_handler;
-
-	PT_REGS_RSP(regs) = (unsigned long) frame;
  out:
-	return(err);
+	return err;
+
+restore_sp:
+	PT_REGS_RSP(regs) = save_sp;
+	return err;
 }
 
 long sys_rt_sigreturn(struct pt_regs *regs)
-- 
cgit v0.10.2


From 1fbbd6844e6a84e5d166ab475dc298d3b89fa4bf Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 27 Mar 2006 01:14:39 -0800
Subject: [PATCH] uml: prevent umid theft

Behavior when booting two UMLs with the same umid was broken.  The second one
would steal the umid.  This fixes that, making the second UML take a random
umid instead.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c
index ecf107a..198e591 100644
--- a/arch/um/os-Linux/umid.c
+++ b/arch/um/os-Linux/umid.c
@@ -143,8 +143,10 @@ static int not_dead_yet(char *dir)
 		goto out_close;
 	}
 
-	if((kill(p, 0) == 0) || (errno != ESRCH))
+	if((kill(p, 0) == 0) || (errno != ESRCH)){
+		printk("umid \"%s\" is already in use by pid %d\n", umid, p);
 		return 1;
+	}
 
 	err = actually_do_remove(dir);
 	if(err)
@@ -234,33 +236,44 @@ int __init make_umid(void)
 	err = mkdir(tmp, 0777);
 	if(err < 0){
 		err = -errno;
-		if(errno != EEXIST)
+		if(err != -EEXIST)
 			goto err;
 
-		if(not_dead_yet(tmp) < 0)
+		/* 1   -> this umid is already in use
+		 * < 0 -> we couldn't remove the umid directory
+		 * In either case, we can't use this umid, so return -EEXIST.
+		 */
+		if(not_dead_yet(tmp) != 0)
 			goto err;
 
 		err = mkdir(tmp, 0777);
 	}
-	if(err < 0){
-		printk("Failed to create '%s' - err = %d\n", umid, err);
-		goto err_rmdir;
+	if(err){
+		err = -errno;
+		printk("Failed to create '%s' - err = %d\n", umid, -errno);
+		goto err;
 	}
 
 	umid_setup = 1;
 
 	create_pid_file();
 
-	return 0;
-
- err_rmdir:
-	rmdir(tmp);
+	err = 0;
  err:
 	return err;
 }
 
 static int __init make_umid_init(void)
 {
+	if(!make_umid())
+		return 0;
+
+	/* If initializing with the given umid failed, then try again with
+	 * a random one.
+	 */
+	printk("Failed to initialize umid \"%s\", trying with a random umid\n",
+	       umid);
+	*umid = '\0';
 	make_umid();
 
 	return 0;
-- 
cgit v0.10.2


From 5f4e8fd08f3993bc31a7dd91767fdb4da4fe6278 Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 27 Mar 2006 01:14:40 -0800
Subject: [PATCH] uml: fix thread startup race

This fixes a race in the starting of write_sigio_thread.  Previously, some of
the data needed by the thread was initialized after the clone.  If the thread
ran immediately, it would see the uninitialized data, including an empty
pollfds, which would cause it to hang.

We move the data initialization to before the clone, and adjust the error
paths and cleanup accordingly.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
index 7604c40..9ba9429 100644
--- a/arch/um/os-Linux/sigio.c
+++ b/arch/um/os-Linux/sigio.c
@@ -29,8 +29,10 @@ static int write_sigio_pid = -1;
  * the descriptors closed after it is killed.  So, it can't see them change.
  * On the UML side, they are changed under the sigio_lock.
  */
-static int write_sigio_fds[2] = { -1, -1 };
-static int sigio_private[2] = { -1, -1 };
+#define SIGIO_FDS_INIT {-1, -1}
+
+static int write_sigio_fds[2] = SIGIO_FDS_INIT;
+static int sigio_private[2] = SIGIO_FDS_INIT;
 
 struct pollfds {
 	struct pollfd *poll;
@@ -270,49 +272,46 @@ void write_sigio_workaround(void)
 	/* Did we race? Don't try to optimize this, please, it's not so likely
 	 * to happen, and no more than once at the boot. */
 	if(write_sigio_pid != -1)
-		goto out_unlock;
-
-	write_sigio_pid = run_helper_thread(write_sigio_thread, NULL,
-					    CLONE_FILES | CLONE_VM, &stack, 0);
+		goto out_free;
 
-	if (write_sigio_pid < 0)
-		goto out_clear;
+	current_poll = ((struct pollfds) { .poll 	= p,
+					   .used 	= 1,
+					   .size 	= 1 });
 
 	if (write_sigio_irq(l_write_sigio_fds[0]))
-		goto out_kill;
+		goto out_clear_poll;
 
-	/* Success, finally. */
 	memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds));
 	memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private));
 
-	current_poll = ((struct pollfds) { .poll 	= p,
-					   .used 	= 1,
-					   .size 	= 1 });
+	write_sigio_pid = run_helper_thread(write_sigio_thread, NULL,
+					    CLONE_FILES | CLONE_VM, &stack, 0);
 
-	sigio_unlock();
-	return;
+	if (write_sigio_pid < 0)
+		goto out_clear;
 
- out_kill:
-	l_write_sigio_pid = write_sigio_pid;
-	write_sigio_pid = -1;
 	sigio_unlock();
-	/* Going to call waitpid, avoid holding the lock. */
-	os_kill_process(l_write_sigio_pid, 1);
-	goto out_free;
+	return;
 
- out_clear:
+out_clear:
 	write_sigio_pid = -1;
- out_unlock:
-	sigio_unlock();
- out_free:
+	write_sigio_fds[0] = -1;
+	write_sigio_fds[1] = -1;
+	sigio_private[0] = -1;
+	sigio_private[1] = -1;
+out_clear_poll:
+	current_poll = ((struct pollfds) { .poll	= NULL,
+					   .size	= 0,
+					   .used	= 0 });
+out_free:
 	kfree(p);
- out_close2:
+	sigio_unlock();
+out_close2:
 	close(l_sigio_private[0]);
 	close(l_sigio_private[1]);
- out_close1:
+out_close1:
 	close(l_write_sigio_fds[0]);
 	close(l_write_sigio_fds[1]);
-	return;
 }
 
 void sigio_cleanup(void)
-- 
cgit v0.10.2


From 86c79cbcee7d617c5aaf9b4f7e6cc093397d3451 Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 27 Mar 2006 01:14:41 -0800
Subject: [PATCH] uml: fix hostfs stack corruption

Noted by Oleg Drokin:
We initialized an extra slot of struct kstatfs.spare, sometimes
causing stack corruption.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Oleg Drokin <green@clusterfs.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index b97809d..23b7cee 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -360,7 +360,6 @@ int do_statfs(char *root, long *bsize_out, long long *blocks_out,
 	spare_out[2] = buf.f_spare[2];
 	spare_out[3] = buf.f_spare[3];
 	spare_out[4] = buf.f_spare[4];
-	spare_out[5] = buf.f_spare[5];
 	return(0);
 }
 
-- 
cgit v0.10.2


From 718c604a28e35848754a65b839e4877ec34b2fca Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:42 -0800
Subject: [PATCH] autofs4: lookup white space cleanup

Whitespace and formating changes to lookup code.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 62d8d4a..2c676bd 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -296,20 +296,20 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
 {
 	struct super_block *sb = mnt->mnt_sb;
 	struct autofs_sb_info *sbi = autofs4_sbi(sb);
-	struct autofs_info *de_info = autofs4_dentry_ino(dentry);
+	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	int status = 0;
 
 	/* Block on any pending expiry here; invalidate the dentry
            when expiration is done to trigger mount request with a new
            dentry */
-	if (de_info && (de_info->flags & AUTOFS_INF_EXPIRING)) {
+	if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
 		DPRINTK("waiting for expire %p name=%.*s",
 			 dentry, dentry->d_name.len, dentry->d_name.name);
 
 		status = autofs4_wait(sbi, dentry, NFY_NONE);
-		
+
 		DPRINTK("expire done status=%d", status);
-		
+
 		/*
 		 * If the directory still exists the mount request must
 		 * continue otherwise it can't be followed at the right
@@ -323,18 +323,21 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
 	DPRINTK("dentry=%p %.*s ino=%p",
 		 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
 
-	/* Wait for a pending mount, triggering one if there isn't one already */
+	/*
+	 * Wait for a pending mount, triggering one if there
+	 * isn't one already
+	 */
 	if (dentry->d_inode == NULL) {
 		DPRINTK("waiting for mount name=%.*s",
 			 dentry->d_name.len, dentry->d_name.name);
 
 		status = autofs4_wait(sbi, dentry, NFY_MOUNT);
-		 
+
 		DPRINTK("mount done status=%d", status);
 
 		if (status && dentry->d_inode)
 			return 0; /* Try to get the kernel to invalidate this dentry */
-		
+
 		/* Turn this into a real negative dentry? */
 		if (status == -ENOENT) {
 			dentry->d_time = jiffies + AUTOFS_NEGATIVE_TIMEOUT;
@@ -367,8 +370,10 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
 		}
 	}
 
-	/* We don't update the usages for the autofs daemon itself, this
-	   is necessary for recursive autofs mounts */
+	/*
+	 * We don't update the usages for the autofs daemon itself, this
+	 * is necessary for recursive autofs mounts
+	 */
 	if (!autofs4_oz_mode(sbi))
 		autofs4_update_usage(mnt, dentry);
 
@@ -384,9 +389,9 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
  * yet completely filled in, and revalidate has to delay such
  * lookups..
  */
-static int autofs4_revalidate(struct dentry * dentry, struct nameidata *nd)
+static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	struct inode * dir = dentry->d_parent->d_inode;
+	struct inode *dir = dentry->d_parent->d_inode;
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	int oz_mode = autofs4_oz_mode(sbi);
 	int flags = nd ? nd->flags : 0;
@@ -462,12 +467,13 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 	DPRINTK("name = %.*s",
 		dentry->d_name.len, dentry->d_name.name);
 
+	/* File name too long to exist */
 	if (dentry->d_name.len > NAME_MAX)
-		return ERR_PTR(-ENAMETOOLONG);/* File name too long to exist */
+		return ERR_PTR(-ENAMETOOLONG);
 
 	sbi = autofs4_sbi(dir->i_sb);
-
 	oz_mode = autofs4_oz_mode(sbi);
+
 	DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
 		 current->pid, process_group(current), sbi->catatonic, oz_mode);
 
@@ -519,7 +525,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 	 * doesn't do the right thing for all system calls, but it should
 	 * be OK for the operations we permit from an autofs.
 	 */
-	if ( dentry->d_inode && d_unhashed(dentry) )
+	if (dentry->d_inode && d_unhashed(dentry))
 		return ERR_PTR(-ENOENT);
 
 	return NULL;
-- 
cgit v0.10.2


From f360ce3be466d50de153b001b24561ca7593042b Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:43 -0800
Subject: [PATCH] autofs4: use libfs routines for readdir

Change readdir routines to use the cursor based routines in libfs.c.  This
removes reliance on old readdir code from 2.4 and should improve efficiency of
readdir in autofs4.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 2c676bd..af9a4c6 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -30,7 +30,6 @@ static int autofs4_dir_close(struct inode *inode, struct file *file);
 static int autofs4_dir_readdir(struct file * filp, void * dirent, filldir_t filldir);
 static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t filldir);
 static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
-static int autofs4_dcache_readdir(struct file *, void *, filldir_t);
 
 struct file_operations autofs4_root_operations = {
 	.open		= dcache_dir_open,
@@ -82,7 +81,7 @@ static int autofs4_root_readdir(struct file *file, void *dirent,
 
 	DPRINTK("needs_reghost = %d", sbi->needs_reghost);
 
-	return autofs4_dcache_readdir(file, dirent, filldir);
+	return dcache_readdir(file, dirent, filldir);
 }
 
 /* Update usage from here to top of tree, so that scan of
@@ -103,75 +102,21 @@ static void autofs4_update_usage(struct vfsmount *mnt, struct dentry *dentry)
 	spin_unlock(&dcache_lock);
 }
 
-/*
- * From 2.4 kernel readdir.c
- */
-static int autofs4_dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
-{
-	int i;
-	struct dentry *dentry = filp->f_dentry;
-
-	i = filp->f_pos;
-	switch (i) {
-		case 0:
-			if (filldir(dirent, ".", 1, i, dentry->d_inode->i_ino, DT_DIR) < 0)
-				break;
-			i++;
-			filp->f_pos++;
-			/* fallthrough */
-		case 1:
-			if (filldir(dirent, "..", 2, i, dentry->d_parent->d_inode->i_ino, DT_DIR) < 0)
-				break;
-			i++;
-			filp->f_pos++;
-			/* fallthrough */
-		default: {
-			struct list_head *list;
-			int j = i-2;
-
-			spin_lock(&dcache_lock);
-			list = dentry->d_subdirs.next;
-
-			for (;;) {
-				if (list == &dentry->d_subdirs) {
-					spin_unlock(&dcache_lock);
-					return 0;
-				}
-				if (!j)
-					break;
-				j--;
-				list = list->next;
-			}
-
-			while(1) {
-				struct dentry *de = list_entry(list,
-						struct dentry, d_u.d_child);
-
-				if (!d_unhashed(de) && de->d_inode) {
-					spin_unlock(&dcache_lock);
-					if (filldir(dirent, de->d_name.name, de->d_name.len, filp->f_pos, de->d_inode->i_ino, DT_UNKNOWN) < 0)
-						break;
-					spin_lock(&dcache_lock);
-				}
-				filp->f_pos++;
-				list = list->next;
-				if (list != &dentry->d_subdirs)
-					continue;
-				spin_unlock(&dcache_lock);
-				break;
-			}
-		}
-	}
-	return 0;
-}
-
 static int autofs4_dir_open(struct inode *inode, struct file *file)
 {
 	struct dentry *dentry = file->f_dentry;
 	struct vfsmount *mnt = file->f_vfsmnt;
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+	struct dentry *cursor;
 	int status;
 
+	status = dcache_dir_open(inode, file);
+	if (status)
+		goto out;
+
+	cursor = file->private_data;
+	cursor->d_fsdata = NULL;
+
 	DPRINTK("file=%p dentry=%p %.*s",
 		file, dentry, dentry->d_name.len, dentry->d_name.name);
 
@@ -180,12 +125,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
 
 	if (autofs4_ispending(dentry)) {
 		DPRINTK("dentry busy");
-		return -EBUSY;
+		dcache_dir_close(inode, file);
+		status = -EBUSY;
+		goto out;
 	}
 
+	status = -ENOENT;
 	if (!d_mountpoint(dentry) && dentry->d_op && dentry->d_op->d_revalidate) {
 		struct nameidata nd;
-		int empty;
+		int empty, ret;
 
 		/* In case there are stale directory dentrys from a failed mount */
 		spin_lock(&dcache_lock);
@@ -195,13 +143,13 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
 		if (!empty)
 			d_invalidate(dentry);
 
-		nd.dentry = dentry;
-		nd.mnt = mnt;
 		nd.flags = LOOKUP_DIRECTORY;
-		status = (dentry->d_op->d_revalidate)(dentry, &nd);
+		ret = (dentry->d_op->d_revalidate)(dentry, &nd);
 
-		if (!status)
-			return -ENOENT;
+		if (!ret) {
+			dcache_dir_close(inode, file);
+			goto out;
+		}
 	}
 
 	if (d_mountpoint(dentry)) {
@@ -212,25 +160,29 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
 		if (!autofs4_follow_mount(&fp_mnt, &fp_dentry)) {
 			dput(fp_dentry);
 			mntput(fp_mnt);
-			return -ENOENT;
+			dcache_dir_close(inode, file);
+			goto out;
 		}
 
 		fp = dentry_open(fp_dentry, fp_mnt, file->f_flags);
 		status = PTR_ERR(fp);
 		if (IS_ERR(fp)) {
-			file->private_data = NULL;
-			return status;
+			dcache_dir_close(inode, file);
+			goto out;
 		}
-		file->private_data = fp;
+		cursor->d_fsdata = fp;
 	}
-out:
 	return 0;
+out:
+	return status;
 }
 
 static int autofs4_dir_close(struct inode *inode, struct file *file)
 {
 	struct dentry *dentry = file->f_dentry;
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+	struct dentry *cursor = file->private_data;
+	int status = 0;
 
 	DPRINTK("file=%p dentry=%p %.*s",
 		file, dentry, dentry->d_name.len, dentry->d_name.name);
@@ -240,26 +192,28 @@ static int autofs4_dir_close(struct inode *inode, struct file *file)
 
 	if (autofs4_ispending(dentry)) {
 		DPRINTK("dentry busy");
-		return -EBUSY;
+		status = -EBUSY;
+		goto out;
 	}
 
 	if (d_mountpoint(dentry)) {
-		struct file *fp = file->private_data;
-
-		if (!fp)
-			return -ENOENT;
-
+		struct file *fp = cursor->d_fsdata;
+		if (!fp) {
+			status = -ENOENT;
+			goto out;
+		}
 		filp_close(fp, current->files);
-		file->private_data = NULL;
 	}
 out:
-	return 0;
+	dcache_dir_close(inode, file);
+	return status;
 }
 
 static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldir)
 {
 	struct dentry *dentry = file->f_dentry;
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+	struct dentry *cursor = file->private_data;
 	int status;
 
 	DPRINTK("file=%p dentry=%p %.*s",
@@ -274,7 +228,7 @@ static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldi
 	}
 
 	if (d_mountpoint(dentry)) {
-		struct file *fp = file->private_data;
+		struct file *fp = cursor->d_fsdata;
 
 		if (!fp)
 			return -ENOENT;
@@ -289,7 +243,7 @@ static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldi
 		return status;
 	}
 out:
-	return autofs4_dcache_readdir(file, dirent, filldir);
+	return dcache_readdir(file, dirent, filldir);
 }
 
 static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int flags)
-- 
cgit v0.10.2


From 2d753e62b87ab2fc72bb4ff5153791d32ff9c08e Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:44 -0800
Subject: [PATCH] autofs4: can't mount due to mount point dir not empty

Addresse a problem where stale dentrys stop mounts from happening.

When a mount point directory is pre-created and a non-existent entry within it
is requested a dentry ends up being created within the mount point directory
which stops future mounts.  The problem is solved by ignoring negative,
unhashed dentrys in the mount point d_subdirs list.

Additionally the apparent cacheing of -ENOENT returns from requests is
removed.  The test on d_time is a tautology and d_time is not initialised and
has an unexpected value.  In short it doesn't do what it's meant to.

The cacheing of failed requests to the daemon is important and will be
followed up later.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index f54c5b2..eea2593 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -41,14 +41,6 @@
 
 #define AUTOFS_SUPER_MAGIC 0x0187
 
-/*
- * If the daemon returns a negative response (AUTOFS_IOC_FAIL) then the
- * kernel will keep the negative response cached for up to the time given
- * here, although the time can be shorter if the kernel throws the dcache
- * entry away.  This probably should be settable from user space.
- */
-#define AUTOFS_NEGATIVE_TIMEOUT (60*HZ)	/* 1 minute */
-
 /* Unified info structure.  This is pointed to by both the dentry and
    inode structures.  Each file in the filesystem has an instance of this
    structure.  It holds a reference to the dentry, so dentries are never
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index af9a4c6..c7ff357 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -294,14 +294,13 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
 
 		/* Turn this into a real negative dentry? */
 		if (status == -ENOENT) {
-			dentry->d_time = jiffies + AUTOFS_NEGATIVE_TIMEOUT;
 			spin_lock(&dentry->d_lock);
 			dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
 			spin_unlock(&dentry->d_lock);
-			return 1;
+			return 0;
 		} else if (status) {
 			/* Return a negative dentry, but leave it "pending" */
-			return 1;
+			return 0;
 		}
 	/* Trigger mount for path component or follow link */
 	} else if (flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) ||
@@ -360,13 +359,13 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 
 	/* Negative dentry.. invalidate if "old" */
 	if (dentry->d_inode == NULL)
-		return (dentry->d_time - jiffies <= AUTOFS_NEGATIVE_TIMEOUT);
+		return 0;
 
 	/* Check for a non-mountpoint directory with no contents */
 	spin_lock(&dcache_lock);
 	if (S_ISDIR(dentry->d_inode->i_mode) &&
 	    !d_mountpoint(dentry) && 
-	    list_empty(&dentry->d_subdirs)) {
+	    simple_empty_nolock(dentry)) {
 		DPRINTK("dentry=%p %.*s, emptydir",
 			 dentry, dentry->d_name.len, dentry->d_name.name);
 		spin_unlock(&dcache_lock);
-- 
cgit v0.10.2


From 1f5f2c3059ae8cc23cb3303daf324b06ba79517a Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:44 -0800
Subject: [PATCH] autofs4: expire code readability cleanup

Change the names of the boolean functions autofs4_check_mount and
autofs4_check_tree to autofs4_mount_busy and autofs4_tree_busy respectively
and alters their return codes to suit in order to aid code readabilty.

A couple of white space cleanups are included as well.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index dc39589..bcc17f5 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -16,7 +16,7 @@
 
 static unsigned long now;
 
-/* Check if a dentry can be expired return 1 if it can else return 0 */
+/* Check if a dentry can be expired */
 static inline int autofs4_can_expire(struct dentry *dentry,
 					unsigned long timeout, int do_now)
 {
@@ -41,14 +41,13 @@ static inline int autofs4_can_expire(struct dentry *dentry,
 		     attempts if expire fails the first time */
 		ino->last_used = now;
 	}
-
 	return 1;
 }
 
-/* Check a mount point for busyness return 1 if not busy, otherwise */
-static int autofs4_check_mount(struct vfsmount *mnt, struct dentry *dentry)
+/* Check a mount point for busyness */
+static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 {
-	int status = 0;
+	int status = 1;
 
 	DPRINTK("dentry %p %.*s",
 		dentry, (int)dentry->d_name.len, dentry->d_name.name);
@@ -65,7 +64,7 @@ static int autofs4_check_mount(struct vfsmount *mnt, struct dentry *dentry)
 
 	/* The big question */
 	if (may_umount_tree(mnt) == 0)
-		status = 1;
+		status = 0;
 done:
 	DPRINTK("returning = %d", status);
 	mntput(mnt);
@@ -75,30 +74,29 @@ done:
 
 /* Check a directory tree of mount points for busyness
  * The tree is not busy iff no mountpoints are busy
- * Return 1 if the tree is busy or 0 otherwise
  */
-static int autofs4_check_tree(struct vfsmount *mnt,
-	       		      struct dentry *top,
-			      unsigned long timeout,
-			      int do_now)
+static int autofs4_tree_busy(struct vfsmount *mnt,
+	       		     struct dentry *top,
+			     unsigned long timeout,
+			     int do_now)
 {
 	struct dentry *this_parent = top;
 	struct list_head *next;
 
-	DPRINTK("parent %p %.*s",
+	DPRINTK("top %p %.*s",
 		top, (int)top->d_name.len, top->d_name.name);
 
 	/* Negative dentry - give up */
 	if (!simple_positive(top))
-		return 0;
+		return 1;
 
 	/* Timeout of a tree mount is determined by its top dentry */
 	if (!autofs4_can_expire(top, timeout, do_now))
-		return 0;
+		return 1;
 
 	/* Is someone visiting anywhere in the tree ? */
 	if (may_umount_tree(mnt))
-		return 0;
+		return 1;
 
 	spin_lock(&dcache_lock);
 repeat:
@@ -126,9 +124,9 @@ resume:
 
 		if (d_mountpoint(dentry)) {
 			/* First busy => tree busy */
-			if (!autofs4_check_mount(mnt, dentry)) {
+			if (autofs4_mount_busy(mnt, dentry)) {
 				dput(dentry);
-				return 0;
+				return 1;
 			}
 		}
 
@@ -144,7 +142,7 @@ resume:
 	}
 	spin_unlock(&dcache_lock);
 
-	return 1;
+	return 0;
 }
 
 static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
@@ -188,7 +186,7 @@ resume:
 				goto cont;
 
 			/* Can we umount this guy */
-			if (autofs4_check_mount(mnt, dentry))
+			if (!autofs4_mount_busy(mnt, dentry))
 				return dentry;
 
 		}
@@ -241,7 +239,7 @@ static struct dentry *autofs4_expire(struct super_block *sb,
 		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
 
 		/* Negative dentry - give up */
-		if ( !simple_positive(dentry) ) {
+		if (!simple_positive(dentry)) {
 			next = next->next;
 			continue;
 		}
@@ -259,21 +257,21 @@ static struct dentry *autofs4_expire(struct super_block *sb,
 				goto next;
 
 			/* Can we umount this guy */
-			if (autofs4_check_mount(mnt, dentry)) {
+			if (!autofs4_mount_busy(mnt, dentry)) {
 				expired = dentry;
 				break;
 			}
 			goto next;
 		}
 
-		if ( simple_empty(dentry) )
+		if (simple_empty(dentry))
 			goto next;
 
 		/* Case 2: tree mount, expire iff entire tree is not busy */
 		if (!exp_leaves) {
 			/* Lock the tree as we must expire as a whole */
 			spin_lock(&sbi->fs_lock);
-			if (autofs4_check_tree(mnt, dentry, timeout, do_now)) {
+			if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
 				struct autofs_info *inf = autofs4_dentry_ino(dentry);
 
 				/* Set this flag early to catch sys_chdir and the like */
@@ -297,7 +295,7 @@ next:
 		next = next->next;
 	}
 
-	if ( expired ) {
+	if (expired) {
 		DPRINTK("returning %p %.*s",
 			expired, (int)expired->d_name.len, expired->d_name.name);
 		spin_lock(&dcache_lock);
@@ -352,16 +350,16 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 		return -EFAULT;
 
 	if ((dentry = autofs4_expire(sb, mnt, sbi, do_now)) != NULL) {
-		struct autofs_info *de_info = autofs4_dentry_ino(dentry);
+		struct autofs_info *ino = autofs4_dentry_ino(dentry);
 
 		/* This is synchronous because it makes the daemon a
                    little easier */
-		de_info->flags |= AUTOFS_INF_EXPIRING;
+		ino->flags |= AUTOFS_INF_EXPIRING;
 		ret = autofs4_wait(sbi, dentry, NFY_EXPIRE);
-		de_info->flags &= ~AUTOFS_INF_EXPIRING;
+		ino->flags &= ~AUTOFS_INF_EXPIRING;
 		dput(dentry);
 	}
-		
+
 	return ret;
 }
 
-- 
cgit v0.10.2


From 1ce12bad85863478619688c0c7363f93a9e5edb8 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:45 -0800
Subject: [PATCH] autofs4: simplify expire tree traversal

Simplify the expire tree traversal code by using a function from namespace.c
to calculate the next entry in the top down tree traversals carried out during
the expire operation.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index bcc17f5..165fe9e 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -72,6 +72,27 @@ done:
 	return status;
 }
 
+/*
+ * Calculate next entry in top down tree traversal.
+ * From next_mnt in namespace.c - elegant.
+ */
+static struct dentry *next_dentry(struct dentry *p, struct dentry *root)
+{
+	struct list_head *next = p->d_subdirs.next;
+
+	if (next == &p->d_subdirs) {
+		while (1) {
+			if (p == root)
+				return NULL;
+			next = p->d_u.d_child.next;
+			if (next != &p->d_parent->d_subdirs)
+				break;
+			p = p->d_parent;
+		}
+	}
+	return list_entry(next, struct dentry, d_u.d_child);
+}
+
 /* Check a directory tree of mount points for busyness
  * The tree is not busy iff no mountpoints are busy
  */
@@ -80,8 +101,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 			     unsigned long timeout,
 			     int do_now)
 {
-	struct dentry *this_parent = top;
-	struct list_head *next;
+	struct dentry *p;
 
 	DPRINTK("top %p %.*s",
 		top, (int)top->d_name.len, top->d_name.name);
@@ -99,49 +119,28 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 		return 1;
 
 	spin_lock(&dcache_lock);
-repeat:
-	next = this_parent->d_subdirs.next;
-resume:
-	while (next != &this_parent->d_subdirs) {
-		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
-
+	for (p = top; p; p = next_dentry(p, top)) {
 		/* Negative dentry - give up */
-		if (!simple_positive(dentry)) {
-			next = next->next;
+		if (!simple_positive(p))
 			continue;
-		}
 
 		DPRINTK("dentry %p %.*s",
-			dentry, (int)dentry->d_name.len, dentry->d_name.name);
-
-		if (!simple_empty_nolock(dentry)) {
-			this_parent = dentry;
-			goto repeat;
-		}
+			p, (int) p->d_name.len, p->d_name.name);
 
-		dentry = dget(dentry);
+		p = dget(p);
 		spin_unlock(&dcache_lock);
 
-		if (d_mountpoint(dentry)) {
+		if (d_mountpoint(p)) {
 			/* First busy => tree busy */
-			if (autofs4_mount_busy(mnt, dentry)) {
-				dput(dentry);
+			if (autofs4_mount_busy(mnt, p)) {
+				dput(p);
 				return 1;
 			}
 		}
-
-		dput(dentry);
+		dput(p);
 		spin_lock(&dcache_lock);
-		next = next->next;
-	}
-
-	if (this_parent != top) {
-		next = this_parent->d_u.d_child.next;
-		this_parent = this_parent->d_parent;
-		goto resume;
 	}
 	spin_unlock(&dcache_lock);
-
 	return 0;
 }
 
@@ -150,59 +149,38 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
 					   unsigned long timeout,
 					   int do_now)
 {
-	struct dentry *this_parent = parent;
-	struct list_head *next;
+	struct dentry *p;
 
 	DPRINTK("parent %p %.*s",
 		parent, (int)parent->d_name.len, parent->d_name.name);
 
 	spin_lock(&dcache_lock);
-repeat:
-	next = this_parent->d_subdirs.next;
-resume:
-	while (next != &this_parent->d_subdirs) {
-		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
-
+	for (p = parent; p; p = next_dentry(p, parent)) {
 		/* Negative dentry - give up */
-		if (!simple_positive(dentry)) {
-			next = next->next;
+		if (!simple_positive(p))
 			continue;
-		}
 
 		DPRINTK("dentry %p %.*s",
-			dentry, (int)dentry->d_name.len, dentry->d_name.name);
+			p, (int) p->d_name.len, p->d_name.name);
 
-		if (!list_empty(&dentry->d_subdirs)) {
-			this_parent = dentry;
-			goto repeat;
-		}
-
-		dentry = dget(dentry);
+		p = dget(p);
 		spin_unlock(&dcache_lock);
 
-		if (d_mountpoint(dentry)) {
+		if (d_mountpoint(p)) {
 			/* Can we expire this guy */
-			if (!autofs4_can_expire(dentry, timeout, do_now))
+			if (!autofs4_can_expire(p, timeout, do_now))
 				goto cont;
 
 			/* Can we umount this guy */
-			if (!autofs4_mount_busy(mnt, dentry))
-				return dentry;
+			if (!autofs4_mount_busy(mnt, p))
+				return p;
 
 		}
 cont:
-		dput(dentry);
+		dput(p);
 		spin_lock(&dcache_lock);
-		next = next->next;
-	}
-
-	if (this_parent != parent) {
-		next = this_parent->d_u.d_child.next;
-		this_parent = this_parent->d_parent;
-		goto resume;
 	}
 	spin_unlock(&dcache_lock);
-
 	return NULL;
 }
 
-- 
cgit v0.10.2


From 1aff3c8b0511b5bb54acf7859e0c6ec9ae7287a9 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:46 -0800
Subject: [PATCH] autofs4: fix false negative return from expire

Fix the case where an expire returns busy on a tree mount when it is in fact
not busy.  This case was overlooked when the patch to prevent the expiring
away of "scaffolding" directories for tree mounts was applied.

The problem arises when a tree of mounts is a member of a map with other keys.
 The current logic will not expire the tree if any other mount in the map is
busy.  The solution is to maintain a "minimum" use count for each autofs
dentry and compare this to the actual dentry usage count during expire.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index eea2593..00da71d 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -55,6 +55,7 @@ struct autofs_info {
 
 	struct autofs_sb_info *sbi;
 	unsigned long last_used;
+	atomic_t count;
 
 	mode_t	mode;
 	size_t	size;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 165fe9e..053d92a 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -101,6 +101,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 			     unsigned long timeout,
 			     int do_now)
 {
+	struct autofs_info *ino;
 	struct dentry *p;
 
 	DPRINTK("top %p %.*s",
@@ -110,14 +111,6 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 	if (!simple_positive(top))
 		return 1;
 
-	/* Timeout of a tree mount is determined by its top dentry */
-	if (!autofs4_can_expire(top, timeout, do_now))
-		return 1;
-
-	/* Is someone visiting anywhere in the tree ? */
-	if (may_umount_tree(mnt))
-		return 1;
-
 	spin_lock(&dcache_lock);
 	for (p = top; p; p = next_dentry(p, top)) {
 		/* Negative dentry - give up */
@@ -130,17 +123,40 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 		p = dget(p);
 		spin_unlock(&dcache_lock);
 
+		/*
+		 * Is someone visiting anywhere in the subtree ?
+		 * If there's no mount we need to check the usage
+		 * count for the autofs dentry.
+		 */
+		ino = autofs4_dentry_ino(p);
 		if (d_mountpoint(p)) {
-			/* First busy => tree busy */
 			if (autofs4_mount_busy(mnt, p)) {
 				dput(p);
 				return 1;
 			}
+		} else {
+			unsigned int ino_count = atomic_read(&ino->count);
+
+			/* allow for dget above and top is already dgot */
+			if (p == top)
+				ino_count += 2;
+			else
+				ino_count++;
+
+			if (atomic_read(&p->d_count) > ino_count) {
+				dput(p);
+				return 1;
+			}
 		}
 		dput(p);
 		spin_lock(&dcache_lock);
 	}
 	spin_unlock(&dcache_lock);
+
+	/* Timeout of a tree mount is ultimately determined by its top dentry */
+	if (!autofs4_can_expire(top, timeout, do_now))
+		return 1;
+
 	return 0;
 }
 
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 1ad98d4..2335b1d 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -46,6 +46,7 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
 	ino->size = 0;
 
 	ino->last_used = jiffies;
+	atomic_set(&ino->count, 0);
 
 	ino->sbi = sbi;
 
@@ -64,10 +65,19 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
 
 void autofs4_free_ino(struct autofs_info *ino)
 {
+	struct autofs_info *p_ino;
+
 	if (ino->dentry) {
 		ino->dentry->d_fsdata = NULL;
-		if (ino->dentry->d_inode)
+		if (ino->dentry->d_inode) {
+			struct dentry *parent = ino->dentry->d_parent;
+			if (atomic_dec_and_test(&ino->count)) {
+				p_ino = autofs4_dentry_ino(parent);
+				if (p_ino && parent != ino->dentry)
+					atomic_dec(&p_ino->count);
+			}
 			dput(ino->dentry);
+		}
 		ino->dentry = NULL;
 	}
 	if (ino->free)
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index c7ff357..d196712 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -490,6 +490,7 @@ static int autofs4_dir_symlink(struct inode *dir,
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+	struct autofs_info *p_ino;
 	struct inode *inode;
 	char *cp;
 
@@ -523,6 +524,10 @@ static int autofs4_dir_symlink(struct inode *dir,
 
 	dentry->d_fsdata = ino;
 	ino->dentry = dget(dentry);
+	atomic_inc(&ino->count);
+	p_ino = autofs4_dentry_ino(dentry->d_parent);
+	if (p_ino && dentry->d_parent != dentry)
+		atomic_inc(&p_ino->count);
 	ino->inode = inode;
 
 	dir->i_mtime = CURRENT_TIME;
@@ -549,11 +554,17 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+	struct autofs_info *p_ino;
 	
 	/* This allows root to remove symlinks */
 	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
 		return -EACCES;
 
+	if (atomic_dec_and_test(&ino->count)) {
+		p_ino = autofs4_dentry_ino(dentry->d_parent);
+		if (p_ino && dentry->d_parent != dentry)
+			atomic_dec(&p_ino->count);
+	}
 	dput(ino->dentry);
 
 	dentry->d_inode->i_size = 0;
@@ -570,6 +581,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+	struct autofs_info *p_ino;
 	
 	if (!autofs4_oz_mode(sbi))
 		return -EACCES;
@@ -584,8 +596,12 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 
+	if (atomic_dec_and_test(&ino->count)) {
+		p_ino = autofs4_dentry_ino(dentry->d_parent);
+		if (p_ino && dentry->d_parent != dentry)
+			atomic_dec(&p_ino->count);
+	}
 	dput(ino->dentry);
-
 	dentry->d_inode->i_size = 0;
 	dentry->d_inode->i_nlink = 0;
 
@@ -599,6 +615,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+	struct autofs_info *p_ino;
 	struct inode *inode;
 
 	if ( !autofs4_oz_mode(sbi) )
@@ -621,6 +638,10 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	dentry->d_fsdata = ino;
 	ino->dentry = dget(dentry);
+	atomic_inc(&ino->count);
+	p_ino = autofs4_dentry_ino(dentry->d_parent);
+	if (p_ino && dentry->d_parent != dentry)
+		atomic_inc(&p_ino->count);
 	ino->inode = inode;
 	dir->i_nlink++;
 	dir->i_mtime = CURRENT_TIME;
-- 
cgit v0.10.2


From e0a7aae94030b878601eb67686b696de4a3764f0 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:47 -0800
Subject: [PATCH] autofs4: expire mounts that hold no (extra) references only

Alter the expire semantics that define how "busyness" is determined.
Currently a last_used counter is updated on every revalidate from processes
other than the mount owner process group.

This patch changes that so that an expire candidate is busy only if it has a
reference count greater than the expected minimum, such as when there is an
open file or working directory in use.

This method is the only way that busyness can be established for direct mounts
within the new implementation.  For consistency the expire semantic is made
the same for all mounts.

A side effect of the patch is that mounts which remain mounted unessessarily
in the presence of some GUI programs that scan the filesystem should now
expire.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 053d92a..6ae2fc8 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -47,6 +47,7 @@ static inline int autofs4_can_expire(struct dentry *dentry,
 /* Check a mount point for busyness */
 static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 {
+	struct dentry *top = dentry;
 	int status = 1;
 
 	DPRINTK("dentry %p %.*s",
@@ -62,9 +63,14 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 	if (is_autofs4_dentry(dentry))
 		goto done;
 
-	/* The big question */
-	if (may_umount_tree(mnt) == 0)
-		status = 0;
+	/* Update the expiry counter if fs is busy */
+	if (may_umount_tree(mnt)) {
+		struct autofs_info *ino = autofs4_dentry_ino(top);
+		ino->last_used = jiffies;
+		goto done;
+	}
+
+	status = 0;
 done:
 	DPRINTK("returning = %d", status);
 	mntput(mnt);
@@ -101,7 +107,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 			     unsigned long timeout,
 			     int do_now)
 {
-	struct autofs_info *ino;
+	struct autofs_info *top_ino = autofs4_dentry_ino(top);
 	struct dentry *p;
 
 	DPRINTK("top %p %.*s",
@@ -127,14 +133,16 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 		 * Is someone visiting anywhere in the subtree ?
 		 * If there's no mount we need to check the usage
 		 * count for the autofs dentry.
+		 * If the fs is busy update the expiry counter.
 		 */
-		ino = autofs4_dentry_ino(p);
 		if (d_mountpoint(p)) {
 			if (autofs4_mount_busy(mnt, p)) {
+				top_ino->last_used = jiffies;
 				dput(p);
 				return 1;
 			}
 		} else {
+			struct autofs_info *ino = autofs4_dentry_ino(p);
 			unsigned int ino_count = atomic_read(&ino->count);
 
 			/* allow for dget above and top is already dgot */
@@ -144,6 +152,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 				ino_count++;
 
 			if (atomic_read(&p->d_count) > ino_count) {
+				top_ino->last_used = jiffies;
 				dput(p);
 				return 1;
 			}
@@ -183,14 +192,13 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
 		spin_unlock(&dcache_lock);
 
 		if (d_mountpoint(p)) {
-			/* Can we expire this guy */
-			if (!autofs4_can_expire(p, timeout, do_now))
+			/* Can we umount this guy */
+			if (autofs4_mount_busy(mnt, p))
 				goto cont;
 
-			/* Can we umount this guy */
-			if (!autofs4_mount_busy(mnt, p))
+			/* Can we expire this guy */
+			if (autofs4_can_expire(p, timeout, do_now))
 				return p;
-
 		}
 cont:
 		dput(p);
@@ -246,12 +254,12 @@ static struct dentry *autofs4_expire(struct super_block *sb,
 			DPRINTK("checking mountpoint %p %.*s",
 				dentry, (int)dentry->d_name.len, dentry->d_name.name);
 
-			/* Can we expire this guy */
-			if (!autofs4_can_expire(dentry, timeout, do_now))
+			/* Can we umount this guy */
+			if (autofs4_mount_busy(mnt, dentry))
 				goto next;
 
-			/* Can we umount this guy */
-			if (!autofs4_mount_busy(mnt, dentry)) {
+			/* Can we expire this guy */
+			if (autofs4_can_expire(dentry, timeout, do_now)) {
 				expired = dentry;
 				break;
 			}
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index d196712..3a4a5b4 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -330,6 +330,10 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
 	if (!autofs4_oz_mode(sbi))
 		autofs4_update_usage(mnt, dentry);
 
+	/* Initialize expiry counter after successful mount */
+	if (ino)
+		ino->last_used = jiffies;
+
 	spin_lock(&dentry->d_lock);
 	dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
 	spin_unlock(&dentry->d_lock);
-- 
cgit v0.10.2


From 862b110f0132401e422783bf68521ade3dc67578 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:48 -0800
Subject: [PATCH] autofs4: remove update_atime unused function

Remove the update of i_atime from autofs4 in favour of having VFS update it.
i_atime is never used for expire in autofs4.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 3a4a5b4..72dca33 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -84,24 +84,6 @@ static int autofs4_root_readdir(struct file *file, void *dirent,
 	return dcache_readdir(file, dirent, filldir);
 }
 
-/* Update usage from here to top of tree, so that scan of
-   top-level directories will give a useful result */
-static void autofs4_update_usage(struct vfsmount *mnt, struct dentry *dentry)
-{
-	struct dentry *top = dentry->d_sb->s_root;
-
-	spin_lock(&dcache_lock);
-	for(; dentry != top; dentry = dentry->d_parent) {
-		struct autofs_info *ino = autofs4_dentry_ino(dentry);
-
-		if (ino) {
-			touch_atime(mnt, dentry);
-			ino->last_used = jiffies;
-		}
-	}
-	spin_unlock(&dcache_lock);
-}
-
 static int autofs4_dir_open(struct inode *inode, struct file *file)
 {
 	struct dentry *dentry = file->f_dentry;
@@ -246,10 +228,9 @@ out:
 	return dcache_readdir(file, dirent, filldir);
 }
 
-static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int flags)
+static int try_to_fill_dentry(struct dentry *dentry, int flags)
 {
-	struct super_block *sb = mnt->mnt_sb;
-	struct autofs_sb_info *sbi = autofs4_sbi(sb);
+	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	int status = 0;
 
@@ -323,13 +304,6 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f
 		}
 	}
 
-	/*
-	 * We don't update the usages for the autofs daemon itself, this
-	 * is necessary for recursive autofs mounts
-	 */
-	if (!autofs4_oz_mode(sbi))
-		autofs4_update_usage(mnt, dentry);
-
 	/* Initialize expiry counter after successful mount */
 	if (ino)
 		ino->last_used = jiffies;
@@ -357,7 +331,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 	/* Pending dentry */
 	if (autofs4_ispending(dentry)) {
 		if (!oz_mode)
-			status = try_to_fill_dentry(nd->mnt, dentry, flags);
+			status = try_to_fill_dentry(dentry, flags);
 		return status;
 	}
 
@@ -374,15 +348,11 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 			 dentry, dentry->d_name.len, dentry->d_name.name);
 		spin_unlock(&dcache_lock);
 		if (!oz_mode)
-			status = try_to_fill_dentry(nd->mnt, dentry, flags);
+			status = try_to_fill_dentry(dentry, flags);
 		return status;
 	}
 	spin_unlock(&dcache_lock);
 
-	/* Update the usage list */
-	if (!oz_mode)
-		autofs4_update_usage(nd->mnt, dentry);
-
 	return 1;
 }
 
-- 
cgit v0.10.2


From d7c4a5f1080a61fd4a3a00ba5f741986f8fb71f0 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:49 -0800
Subject: [PATCH] autofs4: add a show mount options for proc filesystem

Add show_options method to display autofs4 mount options in the proc
filesystem.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 00da71d..d82a019 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -87,11 +87,14 @@ struct autofs_wait_queue {
 struct autofs_sb_info {
 	u32 magic;
 	struct dentry *root;
+	int pipefd;
 	struct file *pipe;
 	pid_t oz_pgrp;
 	int catatonic;
 	int version;
 	int sub_version;
+	int min_proto;
+	int max_proto;
 	unsigned long exp_timeout;
 	int reghost_enabled;
 	int needs_reghost;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 2335b1d..d9a71da 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/file.h>
+#include <linux/seq_file.h>
 #include <linux/pagemap.h>
 #include <linux/parser.h>
 #include <linux/bitops.h>
@@ -163,9 +164,26 @@ static void autofs4_put_super(struct super_block *sb)
 	DPRINTK("shutting down");
 }
 
+static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+	struct autofs_sb_info *sbi = autofs4_sbi(mnt->mnt_sb);
+
+	if (!sbi)
+		return 0;
+
+	seq_printf(m, ",fd=%d", sbi->pipefd);
+	seq_printf(m, ",pgrp=%d", sbi->oz_pgrp);
+	seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ);
+	seq_printf(m, ",minproto=%d", sbi->min_proto);
+	seq_printf(m, ",maxproto=%d", sbi->max_proto);
+
+	return 0;
+}
+
 static struct super_operations autofs4_sops = {
 	.put_super	= autofs4_put_super,
 	.statfs		= simple_statfs,
+	.show_options	= autofs4_show_options,
 };
 
 enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto};
@@ -261,7 +279,6 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	int pipefd;
 	struct autofs_sb_info *sbi;
 	struct autofs_info *ino;
-	int minproto, maxproto;
 
 	sbi = (struct autofs_sb_info *) kmalloc(sizeof(*sbi), GFP_KERNEL);
 	if ( !sbi )
@@ -273,12 +290,15 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	s->s_fs_info = sbi;
 	sbi->magic = AUTOFS_SBI_MAGIC;
 	sbi->root = NULL;
+	sbi->pipefd = -1;
 	sbi->catatonic = 0;
 	sbi->exp_timeout = 0;
 	sbi->oz_pgrp = process_group(current);
 	sbi->sb = s;
 	sbi->version = 0;
 	sbi->sub_version = 0;
+	sbi->min_proto = 0;
+	sbi->max_proto = 0;
 	mutex_init(&sbi->wq_mutex);
 	spin_lock_init(&sbi->fs_lock);
 	sbi->queues = NULL;
@@ -311,22 +331,26 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	if (parse_options(data, &pipefd,
 			  &root_inode->i_uid, &root_inode->i_gid,
 			  &sbi->oz_pgrp,
-			  &minproto, &maxproto)) {
+			  &sbi->min_proto, &sbi->max_proto)) {
 		printk("autofs: called with bogus options\n");
 		goto fail_dput;
 	}
 
 	/* Couldn't this be tested earlier? */
-	if (maxproto < AUTOFS_MIN_PROTO_VERSION ||
-	    minproto > AUTOFS_MAX_PROTO_VERSION) {
+	if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION ||
+	    sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) {
 		printk("autofs: kernel does not match daemon version "
 		       "daemon (%d, %d) kernel (%d, %d)\n",
-			minproto, maxproto,
+			sbi->min_proto, sbi->max_proto,
 			AUTOFS_MIN_PROTO_VERSION, AUTOFS_MAX_PROTO_VERSION);
 		goto fail_dput;
 	}
 
-	sbi->version = maxproto > AUTOFS_MAX_PROTO_VERSION ? AUTOFS_MAX_PROTO_VERSION : maxproto;
+	/* Establish highest kernel protocol version */
+	if (sbi->max_proto > AUTOFS_MAX_PROTO_VERSION)
+		sbi->version = AUTOFS_MAX_PROTO_VERSION;
+	else
+		sbi->version = sbi->max_proto;
 	sbi->sub_version = AUTOFS_PROTO_SUBVERSION;
 
 	DPRINTK("pipe fd = %d, pgrp = %u", pipefd, sbi->oz_pgrp);
@@ -339,6 +363,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	if ( !pipe->f_op || !pipe->f_op->write )
 		goto fail_fput;
 	sbi->pipe = pipe;
+	sbi->pipefd = pipefd;
 
 	/*
 	 * Take a reference to the root dentry so we get a chance to
-- 
cgit v0.10.2


From e77fbddf77c071a5735a4bc63a30649bd64baf14 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:49 -0800
Subject: [PATCH] autofs4: white space cleanup for waitq.c

Whitespace and formating changes to waitq code.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index be78e93..b0bb9d4 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -33,7 +33,7 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
 	sbi->catatonic = 1;
 	wq = sbi->queues;
 	sbi->queues = NULL;	/* Erase all wait queues */
-	while ( wq ) {
+	while (wq) {
 		nwq = wq->next;
 		wq->status = -ENOENT; /* Magic is gone - report failure */
 		kfree(wq->name);
@@ -45,7 +45,6 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
 		fput(sbi->pipe);	/* Close the pipe */
 		sbi->pipe = NULL;
 	}
-
 	shrink_dcache_sb(sbi->sb);
 }
 
@@ -165,7 +164,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 	int len, status;
 
 	/* In catatonic mode, we don't wait for nobody */
-	if ( sbi->catatonic )
+	if (sbi->catatonic)
 		return -ENOENT;
 	
 	name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
@@ -190,7 +189,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 			break;
 	}
 
-	if ( !wq ) {
+	if (!wq) {
 		/* Can't wait for an expire if there's no mount */
 		if (notify == NFY_NONE && !d_mountpoint(dentry)) {
 			kfree(name);
@@ -200,7 +199,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 
 		/* Create a new wait queue */
 		wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
-		if ( !wq ) {
+		if (!wq) {
 			kfree(name);
 			mutex_unlock(&sbi->wq_mutex);
 			return -ENOMEM;
@@ -240,14 +239,14 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 
 	/* wq->name is NULL if and only if the lock is already released */
 
-	if ( sbi->catatonic ) {
+	if (sbi->catatonic) {
 		/* We might have slept, so check again for catatonic mode */
 		wq->status = -ENOENT;
 		kfree(wq->name);
 		wq->name = NULL;
 	}
 
-	if ( wq->name ) {
+	if (wq->name) {
 		/* Block all but "shutdown" signals while waiting */
 		sigset_t oldset;
 		unsigned long irqflags;
@@ -283,12 +282,12 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
 	struct autofs_wait_queue *wq, **wql;
 
 	mutex_lock(&sbi->wq_mutex);
-	for ( wql = &sbi->queues ; (wq = *wql) != 0 ; wql = &wq->next ) {
-		if ( wq->wait_queue_token == wait_queue_token )
+	for (wql = &sbi->queues ; (wq = *wql) != 0 ; wql = &wq->next) {
+		if (wq->wait_queue_token == wait_queue_token)
 			break;
 	}
 
-	if ( !wq ) {
+	if (!wq) {
 		mutex_unlock(&sbi->wq_mutex);
 		return -EINVAL;
 	}
-- 
cgit v0.10.2


From 90a59c7cf5dd68b41ffab61dd30eba1ef5746e66 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:50 -0800
Subject: [PATCH] autofs4: rename simple_empty_nolock function

Rename the function simple_empty_nolock to __simple_empty in line with kernel
naming conventions.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index d82a019..bc1b054 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -201,7 +201,7 @@ static inline int simple_positive(struct dentry *dentry)
 	return dentry->d_inode && !d_unhashed(dentry);
 }
 
-static inline int simple_empty_nolock(struct dentry *dentry)
+static inline int __simple_empty(struct dentry *dentry)
 {
 	struct dentry *child;
 	int ret = 0;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 72dca33..dcd4802 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -343,7 +343,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 	spin_lock(&dcache_lock);
 	if (S_ISDIR(dentry->d_inode->i_mode) &&
 	    !d_mountpoint(dentry) && 
-	    simple_empty_nolock(dentry)) {
+	    __simple_empty(dentry)) {
 		DPRINTK("dentry=%p %.*s, emptydir",
 			 dentry, dentry->d_name.len, dentry->d_name.name);
 		spin_unlock(&dcache_lock);
-- 
cgit v0.10.2


From e3474a8eb38e48dea6690d1fabd75f3c7fd2f93f Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:51 -0800
Subject: [PATCH] autofs4: change may_umount* functions to boolean

Change the functions may_umount and may_umount_tree to boolean functions to
aid code readability.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index 5ccfcf2..3fded38 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -92,7 +92,7 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
 			;
 		dput(dentry);
 
-		if ( may_umount(mnt) == 0 ) {
+		if ( may_umount(mnt) ) {
 			mntput(mnt);
 			DPRINTK(("autofs: signaling expire on %s\n", ent->name));
 			return ent; /* Expirable! */
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 6ae2fc8..02a218f 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -64,7 +64,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 		goto done;
 
 	/* Update the expiry counter if fs is busy */
-	if (may_umount_tree(mnt)) {
+	if (!may_umount_tree(mnt)) {
 		struct autofs_info *ino = autofs4_dentry_ino(top);
 		ino->last_used = jiffies;
 		goto done;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index dcd4802..26eb1f0 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -699,7 +699,7 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p)
 {
 	int status = 0;
 
-	if (may_umount(mnt) == 0)
+	if (may_umount(mnt))
 		status = 1;
 
 	DPRINTK("returning %d", status);
diff --git a/fs/namespace.c b/fs/namespace.c
index e069a4c..bf478ad 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -459,9 +459,9 @@ int may_umount_tree(struct vfsmount *mnt)
 	spin_unlock(&vfsmount_lock);
 
 	if (actual_refs > minimum_refs)
-		return -EBUSY;
+		return 0;
 
-	return 0;
+	return 1;
 }
 
 EXPORT_SYMBOL(may_umount_tree);
@@ -481,10 +481,10 @@ EXPORT_SYMBOL(may_umount_tree);
  */
 int may_umount(struct vfsmount *mnt)
 {
-	int ret = 0;
+	int ret = 1;
 	spin_lock(&vfsmount_lock);
 	if (propagate_mount_busy(mnt, 2))
-		ret = -EBUSY;
+		ret = 0;
 	spin_unlock(&vfsmount_lock);
 	return ret;
 }
-- 
cgit v0.10.2


From f75ba3ade8a4599d67040a9493d75a864e7b329c Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:52 -0800
Subject: [PATCH] autofs4: increase module version

Update autofs4 version.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h
index 9343c89..d998ddc 100644
--- a/include/linux/auto_fs4.h
+++ b/include/linux/auto_fs4.h
@@ -23,7 +23,7 @@
 #define AUTOFS_MIN_PROTO_VERSION	3
 #define AUTOFS_MAX_PROTO_VERSION	4
 
-#define AUTOFS_PROTO_SUBVERSION		7
+#define AUTOFS_PROTO_SUBVERSION		10
 
 /* Mask for expire behaviour */
 #define AUTOFS_EXP_IMMEDIATE		1
-- 
cgit v0.10.2


From 051d381259eb57d6074d02a6ba6e90e744f1a29f Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:53 -0800
Subject: [PATCH] autofs4: nameidata needs to be up to date for follow_link

In order to be able to trigger a mount using the follow_link inode method the
nameidata struct that is passed in needs to have the vfsmount of the autofs
trigger not its parent.

During a path walk if an autofs trigger is mounted on a dentry, when the
follow_link method is called, the nameidata struct contains the vfsmount and
mountpoint dentry of the parent mount while the dentry that is passed in is
the root of the autofs trigger mount.  I believe it is impossible to get the
vfsmount of the trigger mount, within the follow_link method, when only the
parent vfsmount and the root dentry of the trigger mount are known.

This patch updates the nameidata struct on entry to __do_follow_link if it
detects that it is out of date.  It moves the path_to_nameidata to above
__do_follow_link to facilitate calling it from there.  The dput_path is moved
as well as that seemed sensible.  No changes are made to these two functions.

Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/namei.c b/fs/namei.c
index 98dc2e1..22f6e8d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -546,6 +546,22 @@ struct path {
 	struct dentry *dentry;
 };
 
+static inline void dput_path(struct path *path, struct nameidata *nd)
+{
+	dput(path->dentry);
+	if (path->mnt != nd->mnt)
+		mntput(path->mnt);
+}
+
+static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
+{
+	dput(nd->dentry);
+	if (nd->mnt != path->mnt)
+		mntput(nd->mnt);
+	nd->mnt = path->mnt;
+	nd->dentry = path->dentry;
+}
+
 static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd)
 {
 	int error;
@@ -555,8 +571,11 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
 	touch_atime(path->mnt, dentry);
 	nd_set_link(nd, NULL);
 
-	if (path->mnt == nd->mnt)
-		mntget(path->mnt);
+	if (path->mnt != nd->mnt) {
+		path_to_nameidata(path, nd);
+		dget(dentry);
+	}
+	mntget(path->mnt);
 	cookie = dentry->d_inode->i_op->follow_link(dentry, nd);
 	error = PTR_ERR(cookie);
 	if (!IS_ERR(cookie)) {
@@ -573,22 +592,6 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
 	return error;
 }
 
-static inline void dput_path(struct path *path, struct nameidata *nd)
-{
-	dput(path->dentry);
-	if (path->mnt != nd->mnt)
-		mntput(path->mnt);
-}
-
-static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
-{
-	dput(nd->dentry);
-	if (nd->mnt != path->mnt)
-		mntput(nd->mnt);
-	nd->mnt = path->mnt;
-	nd->dentry = path->dentry;
-}
-
 /*
  * This limits recursive symlink follows to 8, while
  * limiting consecutive symlinks to 40.
-- 
cgit v0.10.2


From 34ca959cfc15cf09ad4da4f31ab034691e51af78 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:54 -0800
Subject: [PATCH] autofs4: add v5 follow_link mount trigger method

This patch adds a follow_link inode method for the root of an autofs direct
mount trigger.  It also adds the corresponding mount options and updates the
show_mount method.

Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index bc1b054..ed388a1 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -3,6 +3,7 @@
  * linux/fs/autofs/autofs_i.h
  *
  *   Copyright 1997-1998 Transmeta Corporation - All Rights Reserved
+ *   Copyright 2005-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
@@ -84,6 +85,10 @@ struct autofs_wait_queue {
 
 #define AUTOFS_SBI_MAGIC 0x6d4a556d
 
+#define AUTOFS_TYP_INDIRECT     0x0001
+#define AUTOFS_TYP_DIRECT       0x0002
+#define AUTOFS_TYP_OFFSET       0x0004
+
 struct autofs_sb_info {
 	u32 magic;
 	struct dentry *root;
@@ -96,6 +101,7 @@ struct autofs_sb_info {
 	int min_proto;
 	int max_proto;
 	unsigned long exp_timeout;
+	unsigned int type;
 	int reghost_enabled;
 	int needs_reghost;
 	struct super_block *sb;
@@ -162,6 +168,8 @@ int autofs4_expire_multi(struct super_block *, struct vfsmount *,
 extern struct inode_operations autofs4_symlink_inode_operations;
 extern struct inode_operations autofs4_dir_inode_operations;
 extern struct inode_operations autofs4_root_inode_operations;
+extern struct inode_operations autofs4_indirect_root_inode_operations;
+extern struct inode_operations autofs4_direct_root_inode_operations;
 extern struct file_operations autofs4_dir_operations;
 extern struct file_operations autofs4_root_operations;
 
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index d9a71da..3801bed 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -3,6 +3,7 @@
  * linux/fs/autofs/inode.c
  *
  *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
+ *  Copyright 2005-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
@@ -177,6 +178,13 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
 	seq_printf(m, ",minproto=%d", sbi->min_proto);
 	seq_printf(m, ",maxproto=%d", sbi->max_proto);
 
+	if (sbi->type & AUTOFS_TYP_OFFSET)
+		seq_printf(m, ",offset");
+	else if (sbi->type & AUTOFS_TYP_DIRECT)
+		seq_printf(m, ",direct");
+	else
+		seq_printf(m, ",indirect");
+
 	return 0;
 }
 
@@ -186,7 +194,8 @@ static struct super_operations autofs4_sops = {
 	.show_options	= autofs4_show_options,
 };
 
-enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto};
+enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto,
+	Opt_indirect, Opt_direct, Opt_offset};
 
 static match_table_t tokens = {
 	{Opt_fd, "fd=%u"},
@@ -195,11 +204,15 @@ static match_table_t tokens = {
 	{Opt_pgrp, "pgrp=%u"},
 	{Opt_minproto, "minproto=%u"},
 	{Opt_maxproto, "maxproto=%u"},
+	{Opt_indirect, "indirect"},
+	{Opt_direct, "direct"},
+	{Opt_offset, "offset"},
 	{Opt_err, NULL}
 };
 
 static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
-			 pid_t *pgrp, int *minproto, int *maxproto)
+			 pid_t *pgrp, unsigned int *type,
+			 int *minproto, int *maxproto)
 {
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
@@ -253,6 +266,15 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
 				return 1;
 			*maxproto = option;
 			break;
+		case Opt_indirect:
+			*type = AUTOFS_TYP_INDIRECT;
+			break;
+		case Opt_direct:
+			*type = AUTOFS_TYP_DIRECT;
+			break;
+		case Opt_offset:
+			*type = AUTOFS_TYP_DIRECT | AUTOFS_TYP_OFFSET;
+			break;
 		default:
 			return 1;
 		}
@@ -271,6 +293,11 @@ static struct autofs_info *autofs4_mkroot(struct autofs_sb_info *sbi)
 	return ino;
 }
 
+void autofs4_dentry_release(struct dentry *);
+static struct dentry_operations autofs4_sb_dentry_operations = {
+	.d_release      = autofs4_dentry_release,
+};
+
 int autofs4_fill_super(struct super_block *s, void *data, int silent)
 {
 	struct inode * root_inode;
@@ -297,6 +324,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	sbi->sb = s;
 	sbi->version = 0;
 	sbi->sub_version = 0;
+	sbi->type = 0;
 	sbi->min_proto = 0;
 	sbi->max_proto = 0;
 	mutex_init(&sbi->wq_mutex);
@@ -315,27 +343,31 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	if (!ino)
 		goto fail_free;
 	root_inode = autofs4_get_inode(s, ino);
-	kfree(ino);
 	if (!root_inode)
-		goto fail_free;
+		goto fail_ino;
 
-	root_inode->i_op = &autofs4_root_inode_operations;
-	root_inode->i_fop = &autofs4_root_operations;
 	root = d_alloc_root(root_inode);
-	pipe = NULL;
-
 	if (!root)
 		goto fail_iput;
+	pipe = NULL;
+
+	root->d_op = &autofs4_sb_dentry_operations;
+	root->d_fsdata = ino;
 
 	/* Can this call block? */
 	if (parse_options(data, &pipefd,
 			  &root_inode->i_uid, &root_inode->i_gid,
-			  &sbi->oz_pgrp,
+			  &sbi->oz_pgrp, &sbi->type,
 			  &sbi->min_proto, &sbi->max_proto)) {
 		printk("autofs: called with bogus options\n");
 		goto fail_dput;
 	}
 
+	root_inode->i_fop = &autofs4_root_operations;
+	root_inode->i_op = sbi->type & AUTOFS_TYP_DIRECT ?
+			&autofs4_direct_root_inode_operations :
+			&autofs4_indirect_root_inode_operations;
+
 	/* Couldn't this be tested earlier? */
 	if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION ||
 	    sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) {
@@ -391,6 +423,8 @@ fail_dput:
 fail_iput:
 	printk("autofs: get root dentry failed\n");
 	iput(root_inode);
+fail_ino:
+	kfree(ino);
 fail_free:
 	kfree(sbi);
 fail_unlock:
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 26eb1f0..3f00485 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -4,7 +4,7 @@
  *
  *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
  *  Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org>
- *  Copyright 2001-2003 Ian Kent <raven@themaw.net>
+ *  Copyright 2001-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
@@ -30,6 +30,7 @@ static int autofs4_dir_close(struct inode *inode, struct file *file);
 static int autofs4_dir_readdir(struct file * filp, void * dirent, filldir_t filldir);
 static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t filldir);
 static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
+static void *autofs4_follow_link(struct dentry *, struct nameidata *);
 
 struct file_operations autofs4_root_operations = {
 	.open		= dcache_dir_open,
@@ -46,7 +47,7 @@ struct file_operations autofs4_dir_operations = {
 	.readdir	= autofs4_dir_readdir,
 };
 
-struct inode_operations autofs4_root_inode_operations = {
+struct inode_operations autofs4_indirect_root_inode_operations = {
 	.lookup		= autofs4_lookup,
 	.unlink		= autofs4_dir_unlink,
 	.symlink	= autofs4_dir_symlink,
@@ -54,6 +55,11 @@ struct inode_operations autofs4_root_inode_operations = {
 	.rmdir		= autofs4_dir_rmdir,
 };
 
+struct inode_operations autofs4_direct_root_inode_operations = {
+	.lookup		= autofs4_lookup,
+	.follow_link	= autofs4_follow_link,
+};
+
 struct inode_operations autofs4_dir_inode_operations = {
 	.lookup		= autofs4_lookup,
 	.unlink		= autofs4_dir_unlink,
@@ -252,7 +258,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 		 */
 		status = d_invalidate(dentry);
 		if (status != -EBUSY)
-			return 0;
+			return -ENOENT;
 	}
 
 	DPRINTK("dentry=%p %.*s ino=%p",
@@ -271,17 +277,17 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 		DPRINTK("mount done status=%d", status);
 
 		if (status && dentry->d_inode)
-			return 0; /* Try to get the kernel to invalidate this dentry */
+			return status; /* Try to get the kernel to invalidate this dentry */
 
 		/* Turn this into a real negative dentry? */
 		if (status == -ENOENT) {
 			spin_lock(&dentry->d_lock);
 			dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
 			spin_unlock(&dentry->d_lock);
-			return 0;
+			return status;
 		} else if (status) {
 			/* Return a negative dentry, but leave it "pending" */
-			return 0;
+			return status;
 		}
 	/* Trigger mount for path component or follow link */
 	} else if (flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) ||
@@ -300,7 +306,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 			spin_lock(&dentry->d_lock);
 			dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
 			spin_unlock(&dentry->d_lock);
-			return 0;
+			return status;
 		}
 	}
 
@@ -311,7 +317,41 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 	spin_lock(&dentry->d_lock);
 	dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
 	spin_unlock(&dentry->d_lock);
-	return 1;
+	return status;
+}
+
+/* For autofs direct mounts the follow link triggers the mount */
+static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+	int oz_mode = autofs4_oz_mode(sbi);
+	unsigned int lookup_type;
+	int status;
+
+	DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d",
+		dentry, dentry->d_name.len, dentry->d_name.name, oz_mode,
+		nd->flags);
+
+	/* If it's our master or we shouldn't trigger a mount we're done */
+	lookup_type = nd->flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY);
+	if (oz_mode || !lookup_type)
+		goto done;
+
+	status = try_to_fill_dentry(dentry, 0);
+	if (status)
+		goto out_error;
+
+	if (!autofs4_follow_mount(&nd->mnt, &nd->dentry)) {
+		status = -ENOENT;
+		goto out_error;
+	}
+
+done:
+	return NULL;
+
+out_error:
+	path_release(nd);
+	return ERR_PTR(status);
 }
 
 /*
@@ -326,13 +366,13 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	int oz_mode = autofs4_oz_mode(sbi);
 	int flags = nd ? nd->flags : 0;
-	int status = 1;
+	int status = 0;
 
 	/* Pending dentry */
 	if (autofs4_ispending(dentry)) {
 		if (!oz_mode)
 			status = try_to_fill_dentry(dentry, flags);
-		return status;
+		return !status;
 	}
 
 	/* Negative dentry.. invalidate if "old" */
@@ -349,14 +389,14 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 		spin_unlock(&dcache_lock);
 		if (!oz_mode)
 			status = try_to_fill_dentry(dentry, flags);
-		return status;
+		return !status;
 	}
 	spin_unlock(&dcache_lock);
 
 	return 1;
 }
 
-static void autofs4_dentry_release(struct dentry *de)
+void autofs4_dentry_release(struct dentry *de)
 {
 	struct autofs_info *inf;
 
-- 
cgit v0.10.2


From 3a15e2ab5d6e79a79291734ac24f33d51c0ae389 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:55 -0800
Subject: [PATCH] autofs4: add v5 expire logic

This patch adds expire logic for autofs direct mounts.

Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 02a218f..8fd92ea 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -4,7 +4,7 @@
  *
  *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
  *  Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org>
- *  Copyright 2001-2003 Ian Kent <raven@themaw.net>
+ *  Copyright 2001-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
@@ -99,6 +99,39 @@ static struct dentry *next_dentry(struct dentry *p, struct dentry *root)
 	return list_entry(next, struct dentry, d_u.d_child);
 }
 
+/*
+ * Check a direct mount point for busyness.
+ * Direct mounts have similar expiry semantics to tree mounts.
+ * The tree is not busy iff no mountpoints are busy and there are no
+ * autofs submounts.
+ */
+static int autofs4_direct_busy(struct vfsmount *mnt,
+				struct dentry *top,
+				unsigned long timeout,
+				int do_now)
+{
+	DPRINTK("top %p %.*s",
+		top, (int) top->d_name.len, top->d_name.name);
+
+	/* Not a mountpoint - give up */
+	if (!d_mountpoint(top))
+		return 1;
+
+	/* If it's busy update the expiry counters */
+	if (!may_umount_tree(mnt)) {
+		struct autofs_info *ino = autofs4_dentry_ino(top);
+		if (ino)
+			ino->last_used = jiffies;
+		return 1;
+	}
+
+	/* Timeout of a direct mount is determined by its top dentry */
+	if (!autofs4_can_expire(top, timeout, do_now))
+		return 1;
+
+	return 0;
+}
+
 /* Check a directory tree of mount points for busyness
  * The tree is not busy iff no mountpoints are busy
  */
@@ -208,16 +241,48 @@ cont:
 	return NULL;
 }
 
+/* Check if we can expire a direct mount (possibly a tree) */
+static struct dentry *autofs4_expire_direct(struct super_block *sb,
+					    struct vfsmount *mnt,
+					    struct autofs_sb_info *sbi,
+					    int how)
+{
+	unsigned long timeout;
+	struct dentry *root = dget(sb->s_root);
+	int do_now = how & AUTOFS_EXP_IMMEDIATE;
+
+	if (!sbi->exp_timeout || !root)
+		return NULL;
+
+	now = jiffies;
+	timeout = sbi->exp_timeout;
+
+	/* Lock the tree as we must expire as a whole */
+	spin_lock(&sbi->fs_lock);
+	if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
+		struct autofs_info *ino = autofs4_dentry_ino(root);
+
+		/* Set this flag early to catch sys_chdir and the like */
+		ino->flags |= AUTOFS_INF_EXPIRING;
+		spin_unlock(&sbi->fs_lock);
+		return root;
+	}
+	spin_unlock(&sbi->fs_lock);
+	dput(root);
+
+	return NULL;
+}
+
 /*
  * Find an eligible tree to time-out
  * A tree is eligible if :-
  *  - it is unused by any user process
  *  - it has been unused for exp_timeout time
  */
-static struct dentry *autofs4_expire(struct super_block *sb,
-				     struct vfsmount *mnt,
-				     struct autofs_sb_info *sbi,
-				     int how)
+static struct dentry *autofs4_expire_indirect(struct super_block *sb,
+					      struct vfsmount *mnt,
+					      struct autofs_sb_info *sbi,
+					      int how)
 {
 	unsigned long timeout;
 	struct dentry *root = sb->s_root;
@@ -249,7 +314,12 @@ static struct dentry *autofs4_expire(struct super_block *sb,
 		dentry = dget(dentry);
 		spin_unlock(&dcache_lock);
 
-		/* Case 1: indirect mount or top level direct mount */
+		/*
+		 * Case 1: (i) indirect mount or top level pseudo direct mount
+		 *	   (autofs-4.1).
+		 *	   (ii) indirect mount with offset mount, check the "/"
+		 *	   offset (autofs-5.0+).
+		 */
 		if (d_mountpoint(dentry)) {
 			DPRINTK("checking mountpoint %p %.*s",
 				dentry, (int)dentry->d_name.len, dentry->d_name.name);
@@ -283,7 +353,10 @@ static struct dentry *autofs4_expire(struct super_block *sb,
 				break;
 			}
 			spin_unlock(&sbi->fs_lock);
-		/* Case 3: direct mount, expire individual leaves */
+		/*
+		 * Case 3: pseudo direct mount, expire individual leaves
+		 *	   (autofs-4.1).
+		 */
 		} else {
 			expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
 			if (expired) {
@@ -325,7 +398,7 @@ int autofs4_expire_run(struct super_block *sb,
 	pkt.hdr.proto_version = sbi->version;
 	pkt.hdr.type = autofs_ptype_expire;
 
-	if ((dentry = autofs4_expire(sb, mnt, sbi, 0)) == NULL)
+	if ((dentry = autofs4_expire_indirect(sb, mnt, sbi, 0)) == NULL)
 		return -EAGAIN;
 
 	pkt.len = dentry->d_name.len;
@@ -351,7 +424,12 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 	if (arg && get_user(do_now, arg))
 		return -EFAULT;
 
-	if ((dentry = autofs4_expire(sb, mnt, sbi, do_now)) != NULL) {
+	if (sbi->type & AUTOFS_TYP_DIRECT)
+		dentry = autofs4_expire_direct(sb, mnt, sbi, do_now);
+	else
+		dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now);
+
+	if (dentry) {
 		struct autofs_info *ino = autofs4_dentry_ino(dentry);
 
 		/* This is synchronous because it makes the daemon a
-- 
cgit v0.10.2


From 5c0a32fc2cd0be912511199449a37a4a6f0f582d Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:55 -0800
Subject: [PATCH] autofs4: add new packet type for v5 communications

This patch define a new autofs packet for autofs v5 and updates the waitq.c
functions to handle the additional packet type.

Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index ed388a1..37c8d90 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -77,6 +77,12 @@ struct autofs_wait_queue {
 	int hash;
 	int len;
 	char *name;
+	u32 dev;
+	u64 ino;
+	uid_t uid;
+	gid_t gid;
+	pid_t pid;
+	pid_t tgid;
 	/* This is for status reporting upon return */
 	int status;
 	atomic_t notified;
@@ -180,13 +186,6 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *, struct autofs_sb_info
 
 /* Queue management functions */
 
-enum autofs_notify
-{
-	NFY_NONE,
-	NFY_MOUNT,
-	NFY_EXPIRE
-};
-
 int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
 int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
 void autofs4_catatonic_mode(struct autofs_sb_info *);
@@ -204,6 +203,16 @@ static inline int autofs4_follow_mount(struct vfsmount **mnt, struct dentry **de
 	return res;
 }
 
+static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi)
+{
+	return new_encode_dev(sbi->sb->s_dev);
+}
+
+static inline u64 autofs4_get_ino(struct autofs_sb_info *sbi)
+{
+	return sbi->sb->s_root->d_inode->i_ino;
+}
+
 static inline int simple_positive(struct dentry *dentry)
 {
 	return dentry->d_inode && !d_unhashed(dentry);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index b0bb9d4..12da2c9 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -3,7 +3,7 @@
  * linux/fs/autofs/waitq.c
  *
  *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *  Copyright 2001-2003 Ian Kent <raven@themaw.net>
+ *  Copyright 2001-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
@@ -97,7 +97,10 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 
 	pkt.hdr.proto_version = sbi->version;
 	pkt.hdr.type = type;
-	if (type == autofs_ptype_missing) {
+	switch (type) {
+	/* Kernel protocol v4 missing and expire packets */
+	case autofs_ptype_missing:
+	{
 		struct autofs_packet_missing *mp = &pkt.missing;
 
 		pktsz = sizeof(*mp);
@@ -106,7 +109,10 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 		mp->len = wq->len;
 		memcpy(mp->name, wq->name, wq->len);
 		mp->name[wq->len] = '\0';
-	} else if (type == autofs_ptype_expire_multi) {
+		break;
+	}
+	case autofs_ptype_expire_multi:
+	{
 		struct autofs_packet_expire_multi *ep = &pkt.expire_multi;
 
 		pktsz = sizeof(*ep);
@@ -115,7 +121,34 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 		ep->len = wq->len;
 		memcpy(ep->name, wq->name, wq->len);
 		ep->name[wq->len] = '\0';
-	} else {
+		break;
+	}
+	/*
+	 * Kernel protocol v5 packet for handling indirect and direct
+	 * mount missing and expire requests
+	 */
+	case autofs_ptype_missing_indirect:
+	case autofs_ptype_expire_indirect:
+	case autofs_ptype_missing_direct:
+	case autofs_ptype_expire_direct:
+	{
+		struct autofs_v5_packet *packet = &pkt.v5_packet;
+
+		pktsz = sizeof(*packet);
+
+		packet->wait_queue_token = wq->wait_queue_token;
+		packet->len = wq->len;
+		memcpy(packet->name, wq->name, wq->len);
+		packet->name[wq->len] = '\0';
+		packet->dev = wq->dev;
+		packet->ino = wq->ino;
+		packet->uid = wq->uid;
+		packet->gid = wq->gid;
+		packet->pid = wq->pid;
+		packet->tgid = wq->tgid;
+		break;
+	}
+	default:
 		printk("autofs4_notify_daemon: bad type %d!\n", type);
 		return;
 	}
@@ -161,7 +194,9 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 {
 	struct autofs_wait_queue *wq;
 	char *name;
-	int len, status;
+	unsigned int len = 0;
+	unsigned int hash = 0;
+	int status;
 
 	/* In catatonic mode, we don't wait for nobody */
 	if (sbi->catatonic)
@@ -171,11 +206,17 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 	if (!name)
 		return -ENOMEM;
 
-	len = autofs4_getpath(sbi, dentry, &name);
-	if (!len) {
-		kfree(name);
-		return -ENOENT;
+	/* If this is a direct mount request create a dummy name */
+	if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYP_DIRECT))
+		len = sprintf(name, "%p", dentry);
+	else {
+		len = autofs4_getpath(sbi, dentry, &name);
+		if (!len) {
+			kfree(name);
+			return -ENOENT;
+		}
 	}
+	hash = full_name_hash(name, len);
 
 	if (mutex_lock_interruptible(&sbi->wq_mutex)) {
 		kfree(name);
@@ -211,9 +252,15 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		wq->next = sbi->queues;
 		sbi->queues = wq;
 		init_waitqueue_head(&wq->queue);
-		wq->hash = dentry->d_name.hash;
+		wq->hash = hash;
 		wq->name = name;
 		wq->len = len;
+		wq->dev = autofs4_get_dev(sbi);
+		wq->ino = autofs4_get_ino(sbi);
+		wq->uid = current->uid;
+		wq->gid = current->gid;
+		wq->pid = current->pid;
+		wq->tgid = current->tgid;
 		wq->status = -EINTR; /* Status return if interrupted */
 		atomic_set(&wq->wait_ctr, 2);
 		atomic_set(&wq->notified, 1);
@@ -227,8 +274,23 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 	}
 
 	if (notify != NFY_NONE && atomic_dec_and_test(&wq->notified)) {
-		int type = (notify == NFY_MOUNT ?
-			autofs_ptype_missing : autofs_ptype_expire_multi);
+		int type;
+
+		if (sbi->version < 5) {
+			if (notify == NFY_MOUNT)
+				type = autofs_ptype_missing;
+			else
+				type = autofs_ptype_expire_multi;
+		} else {
+			if (notify == NFY_MOUNT)
+				type = (sbi->type & AUTOFS_TYP_DIRECT) ?
+					autofs_ptype_missing_direct :
+					 autofs_ptype_missing_indirect;
+			else
+				type = (sbi->type & AUTOFS_TYP_DIRECT) ?
+					autofs_ptype_expire_direct :
+					autofs_ptype_expire_indirect;
+		}
 
 		DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
 			(unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h
index d998ddc..0a6bc52 100644
--- a/include/linux/auto_fs4.h
+++ b/include/linux/auto_fs4.h
@@ -19,18 +19,37 @@
 #undef AUTOFS_MIN_PROTO_VERSION
 #undef AUTOFS_MAX_PROTO_VERSION
 
-#define AUTOFS_PROTO_VERSION		4
+#define AUTOFS_PROTO_VERSION		5
 #define AUTOFS_MIN_PROTO_VERSION	3
-#define AUTOFS_MAX_PROTO_VERSION	4
+#define AUTOFS_MAX_PROTO_VERSION	5
 
-#define AUTOFS_PROTO_SUBVERSION		10
+#define AUTOFS_PROTO_SUBVERSION		0
 
 /* Mask for expire behaviour */
 #define AUTOFS_EXP_IMMEDIATE		1
 #define AUTOFS_EXP_LEAVES		2
 
-/* New message type */
-#define autofs_ptype_expire_multi	2	/* Expire entry (umount request) */
+/* Daemon notification packet types */
+enum autofs_notify {
+	NFY_NONE,
+	NFY_MOUNT,
+	NFY_EXPIRE
+};
+
+/* Kernel protocol version 4 packet types */
+
+/* Expire entry (umount request) */
+#define autofs_ptype_expire_multi	2
+
+/* Kernel protocol version 5 packet types */
+
+/* Indirect mount missing and expire requests. */
+#define autofs_ptype_missing_indirect	3
+#define autofs_ptype_expire_indirect	4
+
+/* Direct mount missing and expire requests */
+#define autofs_ptype_missing_direct	5
+#define autofs_ptype_expire_direct	6
 
 /* v4 multi expire (via pipe) */
 struct autofs_packet_expire_multi {
@@ -40,14 +59,36 @@ struct autofs_packet_expire_multi {
 	char name[NAME_MAX+1];
 };
 
+/* autofs v5 common packet struct */
+struct autofs_v5_packet {
+	struct autofs_packet_hdr hdr;
+	autofs_wqt_t wait_queue_token;
+	__u32 dev;
+	__u64 ino;
+	__u32 uid;
+	__u32 gid;
+	__u32 pid;
+	__u32 tgid;
+	__u32 len;
+	char name[NAME_MAX+1];
+};
+
+typedef struct autofs_v5_packet autofs_packet_missing_indirect_t;
+typedef struct autofs_v5_packet autofs_packet_expire_indirect_t;
+typedef struct autofs_v5_packet autofs_packet_missing_direct_t;
+typedef struct autofs_v5_packet autofs_packet_expire_direct_t;
+
 union autofs_packet_union {
 	struct autofs_packet_hdr hdr;
 	struct autofs_packet_missing missing;
 	struct autofs_packet_expire expire;
 	struct autofs_packet_expire_multi expire_multi;
+	struct autofs_v5_packet v5_packet;
 };
 
 #define AUTOFS_IOC_EXPIRE_MULTI		_IOW(0x93,0x66,int)
+#define AUTOFS_IOC_EXPIRE_INDIRECT	AUTOFS_IOC_EXPIRE_MULTI
+#define AUTOFS_IOC_EXPIRE_DIRECT	AUTOFS_IOC_EXPIRE_MULTI
 #define AUTOFS_IOC_PROTOSUBVER		_IOR(0x93,0x67,int)
 #define AUTOFS_IOC_ASKREGHOST           _IOR(0x93,0x68,int)
 #define AUTOFS_IOC_TOGGLEREGHOST        _IOR(0x93,0x69,int)
-- 
cgit v0.10.2


From 44d53eb041d901620b1090590a549a705767fd10 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:56 -0800
Subject: [PATCH] autofs4: change AUTOFS_TYP_* AUTOFS_TYPE_*

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 37c8d90..ff6239d 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -91,9 +91,9 @@ struct autofs_wait_queue {
 
 #define AUTOFS_SBI_MAGIC 0x6d4a556d
 
-#define AUTOFS_TYP_INDIRECT     0x0001
-#define AUTOFS_TYP_DIRECT       0x0002
-#define AUTOFS_TYP_OFFSET       0x0004
+#define AUTOFS_TYPE_INDIRECT     0x0001
+#define AUTOFS_TYPE_DIRECT       0x0002
+#define AUTOFS_TYPE_OFFSET       0x0004
 
 struct autofs_sb_info {
 	u32 magic;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 8fd92ea..08e3321 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -424,7 +424,7 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 	if (arg && get_user(do_now, arg))
 		return -EFAULT;
 
-	if (sbi->type & AUTOFS_TYP_DIRECT)
+	if (sbi->type & AUTOFS_TYPE_DIRECT)
 		dentry = autofs4_expire_direct(sb, mnt, sbi, do_now);
 	else
 		dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 3801bed..9438889 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -178,9 +178,9 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
 	seq_printf(m, ",minproto=%d", sbi->min_proto);
 	seq_printf(m, ",maxproto=%d", sbi->max_proto);
 
-	if (sbi->type & AUTOFS_TYP_OFFSET)
+	if (sbi->type & AUTOFS_TYPE_OFFSET)
 		seq_printf(m, ",offset");
-	else if (sbi->type & AUTOFS_TYP_DIRECT)
+	else if (sbi->type & AUTOFS_TYPE_DIRECT)
 		seq_printf(m, ",direct");
 	else
 		seq_printf(m, ",indirect");
@@ -267,13 +267,13 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
 			*maxproto = option;
 			break;
 		case Opt_indirect:
-			*type = AUTOFS_TYP_INDIRECT;
+			*type = AUTOFS_TYPE_INDIRECT;
 			break;
 		case Opt_direct:
-			*type = AUTOFS_TYP_DIRECT;
+			*type = AUTOFS_TYPE_DIRECT;
 			break;
 		case Opt_offset:
-			*type = AUTOFS_TYP_DIRECT | AUTOFS_TYP_OFFSET;
+			*type = AUTOFS_TYPE_DIRECT | AUTOFS_TYPE_OFFSET;
 			break;
 		default:
 			return 1;
@@ -364,7 +364,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	}
 
 	root_inode->i_fop = &autofs4_root_operations;
-	root_inode->i_op = sbi->type & AUTOFS_TYP_DIRECT ?
+	root_inode->i_op = sbi->type & AUTOFS_TYPE_DIRECT ?
 			&autofs4_direct_root_inode_operations :
 			&autofs4_indirect_root_inode_operations;
 
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 12da2c9..894d746 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -207,7 +207,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		return -ENOMEM;
 
 	/* If this is a direct mount request create a dummy name */
-	if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYP_DIRECT))
+	if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT))
 		len = sprintf(name, "%p", dentry);
 	else {
 		len = autofs4_getpath(sbi, dentry, &name);
@@ -283,11 +283,11 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 				type = autofs_ptype_expire_multi;
 		} else {
 			if (notify == NFY_MOUNT)
-				type = (sbi->type & AUTOFS_TYP_DIRECT) ?
+				type = (sbi->type & AUTOFS_TYPE_DIRECT) ?
 					autofs_ptype_missing_direct :
 					 autofs_ptype_missing_indirect;
 			else
-				type = (sbi->type & AUTOFS_TYP_DIRECT) ?
+				type = (sbi->type & AUTOFS_TYPE_DIRECT) ?
 					autofs_ptype_expire_direct :
 					autofs_ptype_expire_indirect;
 		}
-- 
cgit v0.10.2


From 3370c74b2e147169d5bba6665e03d3281f5795ed Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Mon, 27 Mar 2006 01:14:57 -0800
Subject: [PATCH] Remove redundant check from autofs4_put_super

We have to have a valid sbi here, or we'd have oopsed already.  (There's a
dereference of sbi->catatonic a few lines above)

Coverity #740

Signed-off-by: Dave Jones <davej@redhat.com>
Cc: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 9438889..4eddee4e 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -157,8 +157,7 @@ static void autofs4_put_super(struct super_block *sb)
 		autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */
 
 	/* Clean up and release dangling references */
-	if (sbi)
-		autofs4_force_release(sbi);
+	autofs4_force_release(sbi);
 
 	kfree(sbi);
 
-- 
cgit v0.10.2


From 871f94344cea36b2ce91231f442f9f9298529712 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:58 -0800
Subject: [PATCH] autofs4: follow_link missing functionality

This functionality is also need for operation of autofs v5 direct mounts.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 08e3321..b8ce026 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -113,10 +113,6 @@ static int autofs4_direct_busy(struct vfsmount *mnt,
 	DPRINTK("top %p %.*s",
 		top, (int) top->d_name.len, top->d_name.name);
 
-	/* Not a mountpoint - give up */
-	if (!d_mountpoint(top))
-		return 1;
-
 	/* If it's busy update the expiry counters */
 	if (!may_umount_tree(mnt)) {
 		struct autofs_info *ino = autofs4_dentry_ino(top);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 3f00485..c8fe43a 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -57,6 +57,9 @@ struct inode_operations autofs4_indirect_root_inode_operations = {
 
 struct inode_operations autofs4_direct_root_inode_operations = {
 	.lookup		= autofs4_lookup,
+	.unlink		= autofs4_dir_unlink,
+	.mkdir		= autofs4_dir_mkdir,
+	.rmdir		= autofs4_dir_rmdir,
 	.follow_link	= autofs4_follow_link,
 };
 
@@ -337,15 +340,50 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 	if (oz_mode || !lookup_type)
 		goto done;
 
-	status = try_to_fill_dentry(dentry, 0);
-	if (status)
-		goto out_error;
+	/*
+	 * If a request is pending wait for it.
+	 * If it's a mount then it won't be expired till at least
+	 * a liitle later and if it's an expire then we might need
+	 * to mount it again.
+	 */
+	if (autofs4_ispending(dentry)) {
+		DPRINTK("waiting for active request %p name=%.*s",
+			dentry, dentry->d_name.len, dentry->d_name.name);
 
-	if (!autofs4_follow_mount(&nd->mnt, &nd->dentry)) {
-		status = -ENOENT;
-		goto out_error;
+		status = autofs4_wait(sbi, dentry, NFY_NONE);
+
+		DPRINTK("request done status=%d", status);
 	}
 
+	/*
+	 * If the dentry contains directories then it is an
+	 * autofs multi-mount with no root mount offset. So
+	 * don't try to mount it again.
+	 */
+	spin_lock(&dcache_lock);
+	if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
+		spin_unlock(&dcache_lock);
+
+		status = try_to_fill_dentry(dentry, 0);
+		if (status)
+			goto out_error;
+
+		/*
+		 * The mount succeeded but if there is no root mount
+		 * it must be an autofs multi-mount with no root offset
+		 * so we don't need to follow the mount.
+		 */
+		if (d_mountpoint(dentry)) {
+			if (!autofs4_follow_mount(&nd->mnt, &nd->dentry)) {
+				status = -ENOENT;
+				goto out_error;
+			}
+		}
+
+		goto done;
+	}
+	spin_unlock(&dcache_lock);
+
 done:
 	return NULL;
 
-- 
cgit v0.10.2


From 3e7b19198003fc25b11838e709f17d4fa173b2d7 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 27 Mar 2006 01:14:59 -0800
Subject: [PATCH] autofs4: atomic var underflow

Fix accidental underflow of the atomic counter.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index ff6239d..617fd7b 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -85,7 +85,7 @@ struct autofs_wait_queue {
 	pid_t tgid;
 	/* This is for status reporting upon return */
 	int status;
-	atomic_t notified;
+	atomic_t notify;
 	atomic_t wait_ctr;
 };
 
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 894d746..142ab6a 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -263,7 +263,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		wq->tgid = current->tgid;
 		wq->status = -EINTR; /* Status return if interrupted */
 		atomic_set(&wq->wait_ctr, 2);
-		atomic_set(&wq->notified, 1);
+		atomic_set(&wq->notify, 1);
 		mutex_unlock(&sbi->wq_mutex);
 	} else {
 		atomic_inc(&wq->wait_ctr);
@@ -273,9 +273,11 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 			(unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
 	}
 
-	if (notify != NFY_NONE && atomic_dec_and_test(&wq->notified)) {
+	if (notify != NFY_NONE && atomic_read(&wq->notify)) {
 		int type;
 
+		atomic_dec(&wq->notify);
+
 		if (sbi->version < 5) {
 			if (notify == NFY_MOUNT)
 				type = autofs_ptype_missing;
-- 
cgit v0.10.2


From efc36aa5608f5717338747e152c23f2cfdb14697 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:14:59 -0800
Subject: [PATCH] knfsd: Change the store of auth_domains to not be a 'cache'

The 'auth_domain's are simply handles on internal data structures.  They do
not cache information from user-space, and forcing them into the mold of a
'cache' misrepresents their true nature and causes confusion.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 417ec02..ac09977 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -242,7 +242,7 @@ static inline int svc_expkey_match (struct svc_expkey *a, struct svc_expkey *b)
 
 static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *item)
 {
-	cache_get(&item->ek_client->h);
+	kref_get(&item->ek_client->ref);
 	new->ek_client = item->ek_client;
 	new->ek_fsidtype = item->ek_fsidtype;
 	new->ek_fsid[0] = item->ek_fsid[0];
@@ -474,7 +474,7 @@ static inline int svc_export_match(struct svc_export *a, struct svc_export *b)
 }
 static inline void svc_export_init(struct svc_export *new, struct svc_export *item)
 {
-	cache_get(&item->ex_client->h);
+	kref_get(&item->ex_client->ref);
 	new->ex_client = item->ex_client;
 	new->ex_dentry = dget(item->ex_dentry);
 	new->ex_mnt = mntget(item->ex_mnt);
@@ -1129,7 +1129,6 @@ exp_delclient(struct nfsctl_client *ncp)
 	 */
 	if (dom) {
 		err = auth_unix_forget_old(dom);
-		dom->h.expiry_time = get_seconds();
 		auth_domain_put(dom);
 	}
 
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index c119ce7..2fe2087 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -45,9 +45,10 @@ struct svc_rqst;		/* forward decl */
  * of ip addresses to the given client.
  */
 struct auth_domain {
-	struct	cache_head	h;
+	struct kref		ref;
+	struct hlist_node	hash;
 	char			*name;
-	int			flavour;
+	struct auth_ops		*flavour;
 };
 
 /*
@@ -86,6 +87,9 @@ struct auth_domain {
  *
  * domain_release()
  *   This call releases a domain.
+ * set_client()
+ *   Givens a pending request (struct svc_rqst), finds and assigns
+ *   an appropriate 'auth_domain' as the client.
  */
 struct auth_ops {
 	char *	name;
@@ -117,7 +121,7 @@ extern void	svc_auth_unregister(rpc_authflavor_t flavor);
 extern struct auth_domain *unix_domain_find(char *name);
 extern void auth_domain_put(struct auth_domain *item);
 extern int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom);
-extern struct auth_domain *auth_domain_lookup(struct auth_domain *item, int set);
+extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new);
 extern struct auth_domain *auth_domain_find(char *name);
 extern struct auth_domain *auth_unix_lookup(struct in_addr addr);
 extern int auth_unix_forget_old(struct auth_domain *dom);
@@ -160,8 +164,6 @@ static inline unsigned long hash_mem(char *buf, int length, int bits)
 	return hash >> (BITS_PER_LONG - bits);
 }
 
-extern struct cache_detail auth_domain_cache, ip_map_cache;
-
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_SUNRPC_SVCAUTH_H_ */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 23632d8..6b073c2 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -645,6 +645,8 @@ find_gss_auth_domain(struct gss_ctx *ctx, u32 svc)
 	return auth_domain_find(name);
 }
 
+static struct auth_ops svcauthops_gss;
+
 int
 svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name)
 {
@@ -655,20 +657,18 @@ svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name)
 	new = kmalloc(sizeof(*new), GFP_KERNEL);
 	if (!new)
 		goto out;
-	cache_init(&new->h.h);
+	kref_init(&new->h.ref);
 	new->h.name = kmalloc(strlen(name) + 1, GFP_KERNEL);
 	if (!new->h.name)
 		goto out_free_dom;
 	strcpy(new->h.name, name);
-	new->h.flavour = RPC_AUTH_GSS;
+	new->h.flavour = &svcauthops_gss;
 	new->pseudoflavor = pseudoflavor;
-	new->h.h.expiry_time = NEVER;
 
-	test = auth_domain_lookup(&new->h, 1);
-	if (test == &new->h) {
-		BUG_ON(atomic_dec_and_test(&new->h.h.refcnt));
-	} else { /* XXX Duplicate registration? */
+	test = auth_domain_lookup(name, &new->h);
+	if (test != &new->h) { /* XXX Duplicate registration? */
 		auth_domain_put(&new->h);
+		/* dangling ref-count... */
 		goto out;
 	}
 	return 0;
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 9f73732..4040119 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -142,6 +142,7 @@ EXPORT_SYMBOL(nlm_debug);
 
 extern int register_rpc_pipefs(void);
 extern void unregister_rpc_pipefs(void);
+extern struct cache_detail ip_map_cache;
 
 static int __init
 init_sunrpc(void)
@@ -158,7 +159,6 @@ init_sunrpc(void)
 #ifdef CONFIG_PROC_FS
 	rpc_proc_init();
 #endif
-	cache_register(&auth_domain_cache);
 	cache_register(&ip_map_cache);
 out:
 	return err;
@@ -169,8 +169,6 @@ cleanup_sunrpc(void)
 {
 	unregister_rpc_pipefs();
 	rpc_destroy_mempool();
-	if (cache_unregister(&auth_domain_cache))
-		printk(KERN_ERR "sunrpc: failed to unregister auth_domain cache\n");
 	if (cache_unregister(&ip_map_cache))
 		printk(KERN_ERR "sunrpc: failed to unregister ip_map cache\n");
 #ifdef RPC_DEBUG
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index dda4f0c..5b28c61 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -106,112 +106,56 @@ svc_auth_unregister(rpc_authflavor_t flavor)
 EXPORT_SYMBOL(svc_auth_unregister);
 
 /**************************************************
- * cache for domain name to auth_domain
- * Entries are only added by flavours which will normally
- * have a structure that 'inherits' from auth_domain.
- * e.g. when an IP -> domainname is given to  auth_unix,
- * and the domain name doesn't exist, it will create a
- * auth_unix_domain and add it to this hash table.
- * If it finds the name does exist, but isn't AUTH_UNIX,
- * it will complain.
+ * 'auth_domains' are stored in a hash table indexed by name.
+ * When the last reference to an 'auth_domain' is dropped,
+ * the object is unhashed and freed.
+ * If auth_domain_lookup fails to find an entry, it will return
+ * it's second argument 'new'.  If this is non-null, it will
+ * have been atomically linked into the table.
  */
 
-/*
- * Auth auth_domain cache is somewhat different to other caches,
- * largely because the entries are possibly of different types:
- * each auth flavour has it's own type.
- * One consequence of this that DefineCacheLookup cannot
- * allocate a new structure as it cannot know the size.
- * Notice that the "INIT" code fragment is quite different
- * from other caches.  When auth_domain_lookup might be
- * creating a new domain, the new domain is passed in
- * complete and it is used as-is rather than being copied into
- * another structure.
- */
 #define	DN_HASHBITS	6
 #define	DN_HASHMAX	(1<<DN_HASHBITS)
 #define	DN_HASHMASK	(DN_HASHMAX-1)
 
-static struct cache_head	*auth_domain_table[DN_HASHMAX];
-
-static void auth_domain_drop(struct cache_head *item, struct cache_detail *cd)
-{
-	struct auth_domain *dom = container_of(item, struct auth_domain, h);
-	if (cache_put(item,cd))
-		authtab[dom->flavour]->domain_release(dom);
-}
-
-
-struct cache_detail auth_domain_cache = {
-	.owner		= THIS_MODULE,
-	.hash_size	= DN_HASHMAX,
-	.hash_table	= auth_domain_table,
-	.name		= "auth.domain",
-	.cache_put	= auth_domain_drop,
-};
+static struct hlist_head	auth_domain_table[DN_HASHMAX];
+static spinlock_t	auth_domain_lock = SPIN_LOCK_UNLOCKED;
 
 void auth_domain_put(struct auth_domain *dom)
 {
-	auth_domain_drop(&dom->h, &auth_domain_cache);
-}
-
-static inline int auth_domain_hash(struct auth_domain *item)
-{
-	return hash_str(item->name, DN_HASHBITS);
-}
-static inline int auth_domain_match(struct auth_domain *tmp, struct auth_domain *item)
-{
-	return strcmp(tmp->name, item->name) == 0;
+	if (atomic_dec_and_lock(&dom->ref.refcount, &auth_domain_lock)) {
+		hlist_del(&dom->hash);
+		dom->flavour->domain_release(dom);
+	}
 }
 
 struct auth_domain *
-auth_domain_lookup(struct auth_domain *item, int set)
+auth_domain_lookup(char *name, struct auth_domain *new)
 {
-	struct auth_domain *tmp = NULL;
-	struct cache_head **hp, **head;
-	head = &auth_domain_cache.hash_table[auth_domain_hash(item)];
-
-	if (set)
-		write_lock(&auth_domain_cache.hash_lock);
-	else
-		read_lock(&auth_domain_cache.hash_lock);
-	for (hp=head; *hp != NULL; hp = &tmp->h.next) {
-		tmp = container_of(*hp, struct auth_domain, h);
-		if (!auth_domain_match(tmp, item))
-			continue;
-		if (!set) {
-			cache_get(&tmp->h);
-			goto out_noset;
+	struct auth_domain *hp;
+	struct hlist_head *head;
+	struct hlist_node *np;
+
+	head = &auth_domain_table[hash_str(name, DN_HASHBITS)];
+
+	spin_lock(&auth_domain_lock);
+
+	hlist_for_each_entry(hp, np, head, hash) {
+		if (strcmp(hp->name, name)==0) {
+			kref_get(&hp->ref);
+			spin_unlock(&auth_domain_lock);
+			return hp;
 		}
-		*hp = tmp->h.next;
-		tmp->h.next = NULL;
-		auth_domain_drop(&tmp->h, &auth_domain_cache);
-		goto out_set;
 	}
-	/* Didn't find anything */
-	if (!set)
-		goto out_nada;
-	auth_domain_cache.entries++;
-out_set:
-	item->h.next = *head;
-	*head = &item->h;
-	cache_get(&item->h);
-	write_unlock(&auth_domain_cache.hash_lock);
-	cache_fresh(&auth_domain_cache, &item->h, item->h.expiry_time);
-	cache_get(&item->h);
-	return item;
-out_nada:
-	tmp = NULL;
-out_noset:
-	read_unlock(&auth_domain_cache.hash_lock);
-	return tmp;
+	if (new) {
+		hlist_add_head(&new->hash, head);
+		kref_get(&new->ref);
+	}
+	spin_unlock(&auth_domain_lock);
+	return new;
 }
 
 struct auth_domain *auth_domain_find(char *name)
 {
-	struct auth_domain *rv, ad;
-
-	ad.name = name;
-	rv = auth_domain_lookup(&ad, 0);
-	return rv;
+	return auth_domain_lookup(name, NULL);
 }
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 3e6c694..17e8b2a 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -27,41 +27,35 @@ struct unix_domain {
 	/* other stuff later */
 };
 
+extern struct auth_ops svcauth_unix;
+
 struct auth_domain *unix_domain_find(char *name)
 {
-	struct auth_domain *rv, ud;
-	struct unix_domain *new;
-
-	ud.name = name;
-	
-	rv = auth_domain_lookup(&ud, 0);
-
- foundit:
-	if (rv && rv->flavour != RPC_AUTH_UNIX) {
-		auth_domain_put(rv);
-		return NULL;
-	}
-	if (rv)
-		return rv;
-
-	new = kmalloc(sizeof(*new), GFP_KERNEL);
-	if (new == NULL)
-		return NULL;
-	cache_init(&new->h.h);
-	new->h.name = kstrdup(name, GFP_KERNEL);
-	new->h.flavour = RPC_AUTH_UNIX;
-	new->addr_changes = 0;
-	new->h.h.expiry_time = NEVER;
-
-	rv = auth_domain_lookup(&new->h, 2);
-	if (rv == &new->h) {
-		if (atomic_dec_and_test(&new->h.h.refcnt)) BUG();
-	} else {
-		auth_domain_put(&new->h);
-		goto foundit;
+	struct auth_domain *rv;
+	struct unix_domain *new = NULL;
+
+	rv = auth_domain_lookup(name, NULL);
+	while(1) {
+		if (rv != &new->h) {
+			if (new) auth_domain_put(&new->h);
+			return rv;
+		}
+		if (rv && rv->flavour != &svcauth_unix) {
+			auth_domain_put(rv);
+			return NULL;
+		}
+		if (rv)
+			return rv;
+
+		new = kmalloc(sizeof(*new), GFP_KERNEL);
+		if (new == NULL)
+			return NULL;
+		kref_init(&new->h.ref);
+		new->h.name = kstrdup(name, GFP_KERNEL);
+		new->h.flavour = &svcauth_unix;
+		new->addr_changes = 0;
+		rv = auth_domain_lookup(name, &new->h);
 	}
-
-	return rv;
 }
 
 static void svcauth_unix_domain_release(struct auth_domain *dom)
@@ -130,7 +124,7 @@ static inline void ip_map_init(struct ip_map *new, struct ip_map *item)
 }
 static inline void ip_map_update(struct ip_map *new, struct ip_map *item)
 {
-	cache_get(&item->m_client->h.h);
+	kref_get(&item->m_client->h.ref);
 	new->m_client = item->m_client;
 	new->m_add_change = item->m_add_change;
 }
@@ -272,7 +266,7 @@ int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom)
 	struct unix_domain *udom;
 	struct ip_map ip, *ipmp;
 
-	if (dom->flavour != RPC_AUTH_UNIX)
+	if (dom->flavour != &svcauth_unix)
 		return -EINVAL;
 	udom = container_of(dom, struct unix_domain, h);
 	strcpy(ip.m_class, "nfsd");
@@ -295,7 +289,7 @@ int auth_unix_forget_old(struct auth_domain *dom)
 {
 	struct unix_domain *udom;
 	
-	if (dom->flavour != RPC_AUTH_UNIX)
+	if (dom->flavour != &svcauth_unix)
 		return -EINVAL;
 	udom = container_of(dom, struct unix_domain, h);
 	udom->addr_changes++;
@@ -323,7 +317,7 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr)
 		rv = NULL;
 	} else {
 		rv = &ipm->m_client->h;
-		cache_get(&rv->h);
+		kref_get(&rv->ref);
 	}
 	ip_map_put(&ipm->h, &ip_map_cache);
 	return rv;
@@ -332,7 +326,6 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr)
 void svcauth_unix_purge(void)
 {
 	cache_purge(&ip_map_cache);
-	cache_purge(&auth_domain_cache);
 }
 
 static int
@@ -361,7 +354,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
 			return SVC_DENIED;
 		case 0:
 			rqstp->rq_client = &ipm->m_client->h;
-			cache_get(&rqstp->rq_client->h);
+			kref_get(&rqstp->rq_client->ref);
 			ip_map_put(&ipm->h, &ip_map_cache);
 			break;
 	}
-- 
cgit v0.10.2


From eab7e2e647c348b418e8715ecaca0177e1b473c7 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:00 -0800
Subject: [PATCH] knfsd: Break the hard linkage from svc_expkey to svc_export

Current svc_expkey holds a pointer to the svc_export structure, so updates to
that structure have to be in-place, which is a wart on the whole cache
infrastruct.  So we break that linkage and just do a second lookup.

If this became a performance issue, it would be possible to put a direct link
back in which was only used conditionally.  i.e.  when an object is replaced
in the cache, we set a flag in the old object.  When dereferencing the link
from svc_expkey, if the flag is set, we drop the reference and do a fresh
lookup.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index ac09977..587829e 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -73,8 +73,10 @@ void expkey_put(struct cache_head *item, struct cache_detail *cd)
 	if (cache_put(item, cd)) {
 		struct svc_expkey *key = container_of(item, struct svc_expkey, h);
 		if (test_bit(CACHE_VALID, &item->flags) &&
-		    !test_bit(CACHE_NEGATIVE, &item->flags))
-			exp_put(key->ek_export);
+		    !test_bit(CACHE_NEGATIVE, &item->flags)) {
+			dput(key->ek_dentry);
+			mntput(key->ek_mnt);
+		}
 		auth_domain_put(key->ek_client);
 		kfree(key);
 	}
@@ -164,26 +166,18 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 	} else {
 		struct nameidata nd;
 		struct svc_expkey *ek;
-		struct svc_export *exp;
 		err = path_lookup(buf, 0, &nd);
 		if (err)
 			goto out;
 
 		dprintk("Found the path %s\n", buf);
-		exp = exp_get_by_name(dom, nd.mnt, nd.dentry, NULL);
-
-		err = -ENOENT;
-		if (!exp)
-			goto out_nd;
-		key.ek_export = exp;
-		dprintk("And found export\n");
+		key.ek_mnt = nd.mnt;
+		key.ek_dentry = nd.dentry;
 		
 		ek = svc_expkey_lookup(&key, 1);
 		if (ek)
 			expkey_put(&ek->h, &svc_expkey_cache);
-		exp_put(exp);
 		err = 0;
-	out_nd:
 		path_release(&nd);
 	}
 	cache_flush();
@@ -214,7 +208,7 @@ static int expkey_show(struct seq_file *m,
 	if (test_bit(CACHE_VALID, &h->flags) && 
 	    !test_bit(CACHE_NEGATIVE, &h->flags)) {
 		seq_printf(m, " ");
-		seq_path(m, ek->ek_export->ex_mnt, ek->ek_export->ex_dentry, "\\ \t\n");
+		seq_path(m, ek->ek_mnt, ek->ek_dentry, "\\ \t\n");
 	}
 	seq_printf(m, "\n");
 	return 0;
@@ -252,8 +246,8 @@ static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *it
 
 static inline void svc_expkey_update(struct svc_expkey *new, struct svc_expkey *item)
 {
-	cache_get(&item->ek_export->h);
-	new->ek_export = item->ek_export;
+	new->ek_mnt = mntget(item->ek_mnt);
+	new->ek_dentry = dget(item->ek_dentry);
 }
 
 static DefineSimpleCacheLookup(svc_expkey,0) /* no inplace updates */
@@ -519,7 +513,8 @@ static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv,
 	key.ek_client = clp;
 	key.ek_fsidtype = fsid_type;
 	memcpy(key.ek_fsid, fsidv, key_len(fsid_type));
-	key.ek_export = exp;
+	key.ek_mnt = exp->ex_mnt;
+	key.ek_dentry = exp->ex_dentry;
 	key.h.expiry_time = NEVER;
 	key.h.flags = 0;
 
@@ -741,8 +736,8 @@ exp_export(struct nfsctl_export *nxp)
 	if ((nxp->ex_flags & NFSEXP_FSID) &&
 	    (fsid_key = exp_get_fsid_key(clp, nxp->ex_dev)) &&
 	    !IS_ERR(fsid_key) &&
-	    fsid_key->ek_export &&
-	    fsid_key->ek_export != exp)
+	    fsid_key->ek_mnt &&
+	    (fsid_key->ek_mnt != nd.mnt || fsid_key->ek_dentry != nd.dentry) )
 		goto finish;
 
 	if (exp) {
@@ -912,6 +907,24 @@ out:
 	return err;
 }
 
+struct svc_export *
+exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv,
+	 struct cache_req *reqp)
+{
+	struct svc_export *exp;
+	struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp);
+	if (!ek || IS_ERR(ek))
+		return ERR_PTR(PTR_ERR(ek));
+
+	exp = exp_get_by_name(clp, ek->ek_mnt, ek->ek_dentry, reqp);
+	expkey_put(&ek->h, &svc_expkey_cache);
+
+	if (!exp || IS_ERR(exp))
+		return ERR_PTR(PTR_ERR(exp));
+	return exp;
+}
+
+
 /*
  * Called when we need the filehandle for the root of the pseudofs,
  * for a given NFSv4 client.   The root is defined to be the
@@ -922,6 +935,7 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
 	       struct cache_req *creq)
 {
 	struct svc_expkey *fsid_key;
+	struct svc_export *exp;
 	int rv;
 	u32 fsidv[2];
 
@@ -933,8 +947,14 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
 	if (!fsid_key || IS_ERR(fsid_key))
 		return nfserr_perm;
 
-	rv = fh_compose(fhp, fsid_key->ek_export, 
-			  fsid_key->ek_export->ex_dentry, NULL);
+	exp = exp_get_by_name(clp, fsid_key->ek_mnt, fsid_key->ek_dentry, creq);
+	if (exp == NULL)
+		rv = nfserr_perm;
+	else if (IS_ERR(exp))
+		rv = nfserrno(PTR_ERR(exp));
+	else
+		rv = fh_compose(fhp, exp,
+				fsid_key->ek_dentry, NULL);
 	expkey_put(&fsid_key->h, &svc_expkey_cache);
 	return rv;
 }
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 6bad476..d52e0b7 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -67,7 +67,8 @@ struct svc_expkey {
 	int			ek_fsidtype;
 	u32			ek_fsid[3];
 
-	struct svc_export *	ek_export;
+	struct vfsmount *	ek_mnt;
+	struct dentry *		ek_dentry;
 };
 
 #define EX_SECURE(exp)		(!((exp)->ex_flags & NFSEXP_INSECURE_PORT))
@@ -114,22 +115,9 @@ static inline void exp_get(struct svc_export *exp)
 {
 	cache_get(&exp->h);
 }
-static inline struct svc_export *
+extern struct svc_export *
 exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv,
-	 struct cache_req *reqp)
-{
-	struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp);
-	if (ek && !IS_ERR(ek)) {
-		struct svc_export *exp = ek->ek_export;
-		int err;
-		exp_get(exp);
-		expkey_put(&ek->h, &svc_expkey_cache);
-		if ((err = cache_check(&svc_export_cache, &exp->h, reqp)))
-			exp = ERR_PTR(err);
-		return exp;
-	} else
-		return ERR_PTR(PTR_ERR(ek));
-}
+	 struct cache_req *reqp);
 
 #endif /* __KERNEL__ */
 
-- 
cgit v0.10.2


From 7d317f2c9f1e9dcf4f632fa98f91d1d4a36c4cae Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:01 -0800
Subject: [PATCH] knfsd: Get rid of 'inplace' sunrpc caches

These were an unnecessary wart.  Also only have one 'DefineSimpleCache..'
instead of two.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 587829e..c591761 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -250,7 +250,7 @@ static inline void svc_expkey_update(struct svc_expkey *new, struct svc_expkey *
 	new->ek_dentry = dget(item->ek_dentry);
 }
 
-static DefineSimpleCacheLookup(svc_expkey,0) /* no inplace updates */
+static DefineSimpleCacheLookup(svc_expkey, svc_expkey)
 
 #define	EXPORT_HASHBITS		8
 #define	EXPORT_HASHMAX		(1<< EXPORT_HASHBITS)
@@ -482,7 +482,7 @@ static inline void svc_export_update(struct svc_export *new, struct svc_export *
 	new->ex_fsid = item->ex_fsid;
 }
 
-static DefineSimpleCacheLookup(svc_export,1) /* allow inplace updates */
+static DefineSimpleCacheLookup(svc_export, svc_export)
 
 
 struct svc_expkey *
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 1336965..dea690a 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -76,12 +76,6 @@ struct ent {
 	char              authname[IDMAP_NAMESZ];
 };
 
-#define DefineSimpleCacheLookupMap(STRUCT, FUNC)			\
-        DefineCacheLookup(struct STRUCT, h, FUNC##_lookup,		\
-        (struct STRUCT *item, int set), /*no setup */,			\
-	& FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp),	\
-	STRUCT##_init(new, item), STRUCT##_update(tmp, item), 0)
-
 /* Common entry handling */
 
 #define ENT_HASHBITS          8
@@ -264,7 +258,7 @@ out:
 	return error;
 }
 
-static DefineSimpleCacheLookupMap(ent, idtoname);
+static DefineSimpleCacheLookup(ent, idtoname);
 
 /*
  * Name -> ID cache
@@ -390,7 +384,7 @@ out:
 	return (error);
 }
 
-static DefineSimpleCacheLookupMap(ent, nametoid);
+static DefineSimpleCacheLookup(ent, nametoid);
 
 /*
  * Exported API
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index c4e3ea7..405ac14 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -133,14 +133,11 @@ struct cache_deferred_req {
  * If "set" == 0 :
  *    If an entry is found, it is returned
  *    If no entry is found, a new non-VALID entry is created.
- * If "set" == 1 and INPLACE == 0 :
+ * If "set" == 1 :
  *    If no entry is found a new one is inserted with data from "template"
  *    If a non-CACHE_VALID entry is found, it is updated from template using UPDATE
  *    If a CACHE_VALID entry is found, a new entry is swapped in with data
  *       from "template"
- * If set == 1, and INPLACE == 1 :
- *    As above, except that if a CACHE_VALID entry is found, we UPDATE in place
- *       instead of swapping in a new entry.
  *
  * If the passed handle has the CACHE_NEGATIVE flag set, then UPDATE is not
  * run but insteead CACHE_NEGATIVE is set in any new item.
@@ -159,13 +156,8 @@ struct cache_deferred_req {
  * TEST  tests if "tmp" matches "item"
  * INIT copies key information from "item" to "new"
  * UPDATE copies content information from "item" to "tmp"
- * INPLACE is true if updates can happen inplace rather than allocating a new structure
- *
- * WARNING: any substantial changes to this must be reflected in
- *   net/sunrpc/svcauth.c(auth_domain_lookup)
- *  which is a similar routine that is open-coded.
  */
-#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,SETUP,DETAIL,HASHFN,TEST,INIT,UPDATE,INPLACE)	\
+#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,SETUP,DETAIL,HASHFN,TEST,INIT,UPDATE)	\
 RTN *FNAME ARGS										\
 {											\
 	RTN *tmp, *new=NULL;								\
@@ -179,13 +171,13 @@ RTN *FNAME ARGS										\
 		tmp = container_of(*hp, RTN, MEMBER);					\
 		if (TEST) { /* found a match */						\
 											\
-			if (set && !INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \
+			if (set && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new)	\
 				break;							\
 											\
 			if (new)							\
 				{INIT;}							\
 			if (set) {							\
-				if (!INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags))\
+				if (test_bit(CACHE_VALID, &tmp->MEMBER.flags))\
 				{ /* need to swap in new */				\
 					RTN *t2;					\
 											\
@@ -206,7 +198,7 @@ RTN *FNAME ARGS										\
 			else read_unlock(&(DETAIL)->hash_lock);				\
 			if (set)							\
 				cache_fresh(DETAIL, &tmp->MEMBER, item->MEMBER.expiry_time); \
-			if (set && !INPLACE && new) cache_fresh(DETAIL, &new->MEMBER, 0);	\
+			if (set && new) cache_fresh(DETAIL, &new->MEMBER, 0);	\
 			if (new) (DETAIL)->cache_put(&new->MEMBER, DETAIL);		\
 			return tmp;							\
 		}									\
@@ -239,10 +231,12 @@ RTN *FNAME ARGS										\
 	return NULL;									\
 }
 
-#define DefineSimpleCacheLookup(STRUCT,INPLACE)	\
-	DefineCacheLookup(struct STRUCT, h, STRUCT##_lookup, (struct STRUCT *item, int set), /*no setup */,	\
-			  & STRUCT##_cache, STRUCT##_hash(item), STRUCT##_match(item, tmp),\
-			  STRUCT##_init(new, item), STRUCT##_update(tmp, item),INPLACE)
+#define DefineSimpleCacheLookup(STRUCT, FUNC)				\
+        DefineCacheLookup(struct STRUCT, h, FUNC##_lookup,		\
+        (struct STRUCT *item, int set), /*no setup */,			\
+	& FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp),	\
+	STRUCT##_init(new, item), STRUCT##_update(tmp, item))
+
 
 #define cache_for_each(pos, detail, index, member) 						\
 	for (({read_lock(&(detail)->hash_lock); index = (detail)->hash_size;}) ;		\
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 6b073c2..aadb4e8 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -259,7 +259,7 @@ static struct cache_detail rsi_cache = {
 	.cache_parse    = rsi_parse,
 };
 
-static DefineSimpleCacheLookup(rsi, 0)
+static DefineSimpleCacheLookup(rsi, rsi)
 
 /*
  * The rpcsec_context cache is used to store a context that is
@@ -446,7 +446,7 @@ static struct cache_detail rsc_cache = {
 	.cache_parse	= rsc_parse,
 };
 
-static DefineSimpleCacheLookup(rsc, 0);
+static DefineSimpleCacheLookup(rsc, rsc);
 
 static struct rsc *
 gss_svc_searchbyctx(struct xdr_netobj *handle)
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 17e8b2a..7ddf068 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -258,7 +258,7 @@ struct cache_detail ip_map_cache = {
 	.cache_show	= ip_map_show,
 };
 
-static DefineSimpleCacheLookup(ip_map, 0)
+static DefineSimpleCacheLookup(ip_map, ip_map)
 
 
 int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom)
-- 
cgit v0.10.2


From 15a5f6bd23eddd5b3be80366f364be04fb1c1c99 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:02 -0800
Subject: [PATCH] knfsd: Create cache_lookup function instead of using a macro
 to declare one

The C++-like 'template' approach proves to be too ugly and hard to work with.

The old 'template' won't go away until all users are updated.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 405ac14..3e17a5f 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -81,6 +81,11 @@ struct cache_detail {
 					      struct cache_detail *cd,
 					      struct cache_head *h);
 
+	struct cache_head *	(*alloc)(void);
+	int			(*match)(struct cache_head *orig, struct cache_head *new);
+	void			(*init)(struct cache_head *orig, struct cache_head *new);
+	void			(*update)(struct cache_head *orig, struct cache_head *new);
+
 	/* fields below this comment are for internal use
 	 * and should not be touched by cache owners
 	 */
@@ -237,6 +242,13 @@ RTN *FNAME ARGS										\
 	& FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp),	\
 	STRUCT##_init(new, item), STRUCT##_update(tmp, item))
 
+extern struct cache_head *
+sunrpc_cache_lookup(struct cache_detail *detail,
+		    struct cache_head *key, int hash);
+extern struct cache_head *
+sunrpc_cache_update(struct cache_detail *detail,
+		    struct cache_head *new, struct cache_head *old, int hash);
+
 
 #define cache_for_each(pos, detail, index, member) 						\
 	for (({read_lock(&(detail)->hash_lock); index = (detail)->hash_size;}) ;		\
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 0acccfe..4449dc5 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -47,6 +47,104 @@ void cache_init(struct cache_head *h)
 	h->last_refresh = now;
 }
 
+struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
+				       struct cache_head *key, int hash)
+{
+	struct cache_head **head,  **hp;
+	struct cache_head *new = NULL;
+
+	head = &detail->hash_table[hash];
+
+	read_lock(&detail->hash_lock);
+
+	for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
+		struct cache_head *tmp = *hp;
+		if (detail->match(tmp, key)) {
+			cache_get(tmp);
+			read_unlock(&detail->hash_lock);
+			return tmp;
+		}
+	}
+	read_unlock(&detail->hash_lock);
+	/* Didn't find anything, insert an empty entry */
+
+	new = detail->alloc();
+	if (!new)
+		return NULL;
+	cache_init(new);
+
+	write_lock(&detail->hash_lock);
+
+	/* check if entry appeared while we slept */
+	for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
+		struct cache_head *tmp = *hp;
+		if (detail->match(tmp, key)) {
+			cache_get(tmp);
+			write_unlock(&detail->hash_lock);
+			detail->cache_put(new, detail);
+			return tmp;
+		}
+	}
+	detail->init(new, key);
+	new->next = *head;
+	*head = new;
+	detail->entries++;
+	cache_get(new);
+	write_unlock(&detail->hash_lock);
+
+	return new;
+}
+EXPORT_SYMBOL(sunrpc_cache_lookup);
+
+struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
+				       struct cache_head *new, struct cache_head *old, int hash)
+{
+	/* The 'old' entry is to be replaced by 'new'.
+	 * If 'old' is not VALID, we update it directly,
+	 * otherwise we need to replace it
+	 */
+	struct cache_head **head;
+	struct cache_head *tmp;
+
+	if (!test_bit(CACHE_VALID, &old->flags)) {
+		write_lock(&detail->hash_lock);
+		if (!test_bit(CACHE_VALID, &old->flags)) {
+			if (test_bit(CACHE_NEGATIVE, &new->flags))
+				set_bit(CACHE_NEGATIVE, &old->flags);
+			else
+				detail->update(old, new);
+			/* FIXME cache_fresh should come first */
+			write_unlock(&detail->hash_lock);
+			cache_fresh(detail, old, new->expiry_time);
+			return old;
+		}
+		write_unlock(&detail->hash_lock);
+	}
+	/* We need to insert a new entry */
+	tmp = detail->alloc();
+	if (!tmp) {
+		detail->cache_put(old, detail);
+		return NULL;
+	}
+	cache_init(tmp);
+	detail->init(tmp, old);
+	head = &detail->hash_table[hash];
+
+	write_lock(&detail->hash_lock);
+	if (test_bit(CACHE_NEGATIVE, &new->flags))
+		set_bit(CACHE_NEGATIVE, &tmp->flags);
+	else
+		detail->update(tmp, new);
+	tmp->next = *head;
+	*head = tmp;
+	cache_get(tmp);
+	write_unlock(&detail->hash_lock);
+	cache_fresh(detail, tmp, new->expiry_time);
+	cache_fresh(detail, old, 0);
+	detail->cache_put(old, detail);
+	return tmp;
+}
+EXPORT_SYMBOL(sunrpc_cache_update);
 
 static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h);
 /*
-- 
cgit v0.10.2


From 1a9917c2da5b81fa536e560be0c431435e2c965b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:02 -0800
Subject: [PATCH] knfsd: Convert ip_map cache to use the new lookup routine

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 7ddf068..7e38621 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -106,28 +106,38 @@ static inline int hash_ip(unsigned long ip)
 	return (hash ^ (hash>>8)) & 0xff;
 }
 #endif
-
-static inline int ip_map_hash(struct ip_map *item)
-{
-	return hash_str(item->m_class, IP_HASHBITS) ^ 
-		hash_ip((unsigned long)item->m_addr.s_addr);
-}
-static inline int ip_map_match(struct ip_map *item, struct ip_map *tmp)
+static int ip_map_match(struct cache_head *corig, struct cache_head *cnew)
 {
-	return strcmp(tmp->m_class, item->m_class) == 0
-		&& tmp->m_addr.s_addr == item->m_addr.s_addr;
+	struct ip_map *orig = container_of(corig, struct ip_map, h);
+	struct ip_map *new = container_of(cnew, struct ip_map, h);
+	return strcmp(orig->m_class, new->m_class) == 0
+		&& orig->m_addr.s_addr == new->m_addr.s_addr;
 }
-static inline void ip_map_init(struct ip_map *new, struct ip_map *item)
+static void ip_map_init(struct cache_head *cnew, struct cache_head *citem)
 {
+	struct ip_map *new = container_of(cnew, struct ip_map, h);
+	struct ip_map *item = container_of(citem, struct ip_map, h);
+
 	strcpy(new->m_class, item->m_class);
 	new->m_addr.s_addr = item->m_addr.s_addr;
 }
-static inline void ip_map_update(struct ip_map *new, struct ip_map *item)
+static void update(struct cache_head *cnew, struct cache_head *citem)
 {
+	struct ip_map *new = container_of(cnew, struct ip_map, h);
+	struct ip_map *item = container_of(citem, struct ip_map, h);
+
 	kref_get(&item->m_client->h.ref);
 	new->m_client = item->m_client;
 	new->m_add_change = item->m_add_change;
 }
+static struct cache_head *ip_map_alloc(void)
+{
+	struct ip_map *i = kmalloc(sizeof(*i), GFP_KERNEL);
+	if (i)
+		return &i->h;
+	else
+		return NULL;
+}
 
 static void ip_map_request(struct cache_detail *cd,
 				  struct cache_head *h,
@@ -148,7 +158,8 @@ static void ip_map_request(struct cache_detail *cd,
 	(*bpp)[-1] = '\n';
 }
 
-static struct ip_map *ip_map_lookup(struct ip_map *, int);
+static struct ip_map *ip_map_lookup(char *class, struct in_addr addr);
+static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry);
 
 static int ip_map_parse(struct cache_detail *cd,
 			  char *mesg, int mlen)
@@ -160,7 +171,11 @@ static int ip_map_parse(struct cache_detail *cd,
 	int len;
 	int b1,b2,b3,b4;
 	char c;
-	struct ip_map ipm, *ipmp;
+	char class[8];
+	struct in_addr addr;
+	int err;
+
+	struct ip_map *ipmp;
 	struct auth_domain *dom;
 	time_t expiry;
 
@@ -169,7 +184,7 @@ static int ip_map_parse(struct cache_detail *cd,
 	mesg[mlen-1] = 0;
 
 	/* class */
-	len = qword_get(&mesg, ipm.m_class, sizeof(ipm.m_class));
+	len = qword_get(&mesg, class, sizeof(class));
 	if (len <= 0) return -EINVAL;
 
 	/* ip address */
@@ -194,25 +209,22 @@ static int ip_map_parse(struct cache_detail *cd,
 	} else
 		dom = NULL;
 
-	ipm.m_addr.s_addr =
+	addr.s_addr =
 		htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4);
-	ipm.h.flags = 0;
-	if (dom) {
-		ipm.m_client = container_of(dom, struct unix_domain, h);
-		ipm.m_add_change = ipm.m_client->addr_changes;
+
+	ipmp = ip_map_lookup(class,addr);
+	if (ipmp) {
+		err = ip_map_update(ipmp,
+			     container_of(dom, struct unix_domain, h),
+			     expiry);
 	} else
-		set_bit(CACHE_NEGATIVE, &ipm.h.flags);
-	ipm.h.expiry_time = expiry;
+		err = -ENOMEM;
 
-	ipmp = ip_map_lookup(&ipm, 1);
-	if (ipmp)
-		ip_map_put(&ipmp->h, &ip_map_cache);
 	if (dom)
 		auth_domain_put(dom);
-	if (!ipmp)
-		return -ENOMEM;
+
 	cache_flush();
-	return 0;
+	return err;
 }
 
 static int ip_map_show(struct seq_file *m,
@@ -256,32 +268,70 @@ struct cache_detail ip_map_cache = {
 	.cache_request	= ip_map_request,
 	.cache_parse	= ip_map_parse,
 	.cache_show	= ip_map_show,
+	.match		= ip_map_match,
+	.init		= ip_map_init,
+	.update		= update,
+	.alloc		= ip_map_alloc,
 };
 
-static DefineSimpleCacheLookup(ip_map, ip_map)
+static struct ip_map *ip_map_lookup(char *class, struct in_addr addr)
+{
+	struct ip_map ip;
+	struct cache_head *ch;
+
+	strcpy(ip.m_class, class);
+	ip.m_addr = addr;
+	ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h,
+				 hash_str(class, IP_HASHBITS) ^
+				 hash_ip((unsigned long)addr.s_addr));
+
+	if (ch)
+		return container_of(ch, struct ip_map, h);
+	else
+		return NULL;
+}
 
+static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry)
+{
+	struct ip_map ip;
+	struct cache_head *ch;
+
+	ip.m_client = udom;
+	ip.h.flags = 0;
+	if (!udom)
+		set_bit(CACHE_NEGATIVE, &ip.h.flags);
+	else {
+		ip.m_add_change = udom->addr_changes;
+		/* if this is from the legacy set_client system call,
+		 * we need m_add_change to be one higher
+		 */
+		if (expiry == NEVER)
+			ip.m_add_change++;
+	}
+	ip.h.expiry_time = expiry;
+	ch = sunrpc_cache_update(&ip_map_cache,
+				 &ip.h, &ipm->h,
+				 hash_str(ipm->m_class, IP_HASHBITS) ^
+				 hash_ip((unsigned long)ipm->m_addr.s_addr));
+	if (!ch)
+		return -ENOMEM;
+	ip_map_put(ch, &ip_map_cache);
+	return 0;
+}
 
 int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom)
 {
 	struct unix_domain *udom;
-	struct ip_map ip, *ipmp;
+	struct ip_map *ipmp;
 
 	if (dom->flavour != &svcauth_unix)
 		return -EINVAL;
 	udom = container_of(dom, struct unix_domain, h);
-	strcpy(ip.m_class, "nfsd");
-	ip.m_addr = addr;
-	ip.m_client = udom;
-	ip.m_add_change = udom->addr_changes+1;
-	ip.h.flags = 0;
-	ip.h.expiry_time = NEVER;
-	
-	ipmp = ip_map_lookup(&ip, 1);
+	ipmp = ip_map_lookup("nfsd", addr);
 
-	if (ipmp) {
-		ip_map_put(&ipmp->h, &ip_map_cache);
-		return 0;
-	} else
+	if (ipmp)
+		return ip_map_update(ipmp, udom, NEVER);
+	else
 		return -ENOMEM;
 }
 
@@ -304,7 +354,7 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr)
 	strcpy(key.m_class, "nfsd");
 	key.m_addr = addr;
 
-	ipm = ip_map_lookup(&key, 0);
+	ipm = ip_map_lookup("nfsd", addr);
 
 	if (!ipm)
 		return NULL;
@@ -331,16 +381,14 @@ void svcauth_unix_purge(void)
 static int
 svcauth_unix_set_client(struct svc_rqst *rqstp)
 {
-	struct ip_map key, *ipm;
+	struct ip_map *ipm;
 
 	rqstp->rq_client = NULL;
 	if (rqstp->rq_proc == 0)
 		return SVC_OK;
 
-	strcpy(key.m_class, rqstp->rq_server->sv_program->pg_class);
-	key.m_addr = rqstp->rq_addr.sin_addr;
-
-	ipm = ip_map_lookup(&key, 0);
+	ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class,
+			    rqstp->rq_addr.sin_addr);
 
 	if (ipm == NULL)
 		return SVC_DENIED;
-- 
cgit v0.10.2


From 4f7774c3a0558526c0faaf525581f2029480b313 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:03 -0800
Subject: [PATCH] knfsd: Use new cache_lookup for svc_export

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c591761..2ebd77d 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -258,16 +258,6 @@ static DefineSimpleCacheLookup(svc_expkey, svc_expkey)
 
 static struct cache_head *export_table[EXPORT_HASHMAX];
 
-static inline int svc_export_hash(struct svc_export *item)
-{
-	int rv;
-
-	rv = hash_ptr(item->ex_client, EXPORT_HASHBITS);
-	rv ^= hash_ptr(item->ex_dentry, EXPORT_HASHBITS);
-	rv ^= hash_ptr(item->ex_mnt, EXPORT_HASHBITS);
-	return rv;
-}
-
 void svc_export_put(struct cache_head *item, struct cache_detail *cd)
 {
 	if (cache_put(item, cd)) {
@@ -298,7 +288,8 @@ static void svc_export_request(struct cache_detail *cd,
 	(*bpp)[-1] = '\n';
 }
 
-static struct svc_export *svc_export_lookup(struct svc_export *, int);
+struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old);
+static struct svc_export *svc_export_lookup(struct svc_export *);
 
 static int check_export(struct inode *inode, int flags)
 {
@@ -411,11 +402,16 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
 		if (err) goto out;
 	}
 
-	expp = svc_export_lookup(&exp, 1);
+	expp = svc_export_lookup(&exp);
 	if (expp)
-		exp_put(expp);
-	err = 0;
+		expp = svc_export_update(&exp, expp);
+	else
+		err = -ENOMEM;
 	cache_flush();
+	if (expp == NULL)
+		err = -ENOMEM;
+	else
+		exp_put(expp);
  out:
 	if (nd.dentry)
 		path_release(&nd);
@@ -449,40 +445,95 @@ static int svc_export_show(struct seq_file *m,
 	seq_puts(m, ")\n");
 	return 0;
 }
-struct cache_detail svc_export_cache = {
-	.owner		= THIS_MODULE,
-	.hash_size	= EXPORT_HASHMAX,
-	.hash_table	= export_table,
-	.name		= "nfsd.export",
-	.cache_put	= svc_export_put,
-	.cache_request	= svc_export_request,
-	.cache_parse	= svc_export_parse,
-	.cache_show	= svc_export_show,
-};
-
-static inline int svc_export_match(struct svc_export *a, struct svc_export *b)
+static int svc_export_match(struct cache_head *a, struct cache_head *b)
 {
-	return a->ex_client == b->ex_client &&
-		a->ex_dentry == b->ex_dentry &&
-		a->ex_mnt == b->ex_mnt;
+	struct svc_export *orig = container_of(a, struct svc_export, h);
+	struct svc_export *new = container_of(b, struct svc_export, h);
+	return orig->ex_client == new->ex_client &&
+		orig->ex_dentry == new->ex_dentry &&
+		orig->ex_mnt == new->ex_mnt;
 }
-static inline void svc_export_init(struct svc_export *new, struct svc_export *item)
+
+static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
 {
+	struct svc_export *new = container_of(cnew, struct svc_export, h);
+	struct svc_export *item = container_of(citem, struct svc_export, h);
+
 	kref_get(&item->ex_client->ref);
 	new->ex_client = item->ex_client;
 	new->ex_dentry = dget(item->ex_dentry);
 	new->ex_mnt = mntget(item->ex_mnt);
 }
 
-static inline void svc_export_update(struct svc_export *new, struct svc_export *item)
+static void export_update(struct cache_head *cnew, struct cache_head *citem)
 {
+	struct svc_export *new = container_of(cnew, struct svc_export, h);
+	struct svc_export *item = container_of(citem, struct svc_export, h);
+
 	new->ex_flags = item->ex_flags;
 	new->ex_anon_uid = item->ex_anon_uid;
 	new->ex_anon_gid = item->ex_anon_gid;
 	new->ex_fsid = item->ex_fsid;
 }
 
-static DefineSimpleCacheLookup(svc_export, svc_export)
+static struct cache_head *svc_export_alloc(void)
+{
+	struct svc_export *i = kmalloc(sizeof(*i), GFP_KERNEL);
+	if (i)
+		return &i->h;
+	else
+		return NULL;
+}
+
+struct cache_detail svc_export_cache = {
+	.owner		= THIS_MODULE,
+	.hash_size	= EXPORT_HASHMAX,
+	.hash_table	= export_table,
+	.name		= "nfsd.export",
+	.cache_put	= svc_export_put,
+	.cache_request	= svc_export_request,
+	.cache_parse	= svc_export_parse,
+	.cache_show	= svc_export_show,
+	.match		= svc_export_match,
+	.init		= svc_export_init,
+	.update		= export_update,
+	.alloc		= svc_export_alloc,
+};
+
+static struct svc_export *
+svc_export_lookup(struct svc_export *exp)
+{
+	struct cache_head *ch;
+	int hash;
+	hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS);
+	hash ^= hash_ptr(exp->ex_dentry, EXPORT_HASHBITS);
+	hash ^= hash_ptr(exp->ex_mnt, EXPORT_HASHBITS);
+
+	ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h,
+				 hash);
+	if (ch)
+		return container_of(ch, struct svc_export, h);
+	else
+		return NULL;
+}
+
+struct svc_export *
+svc_export_update(struct svc_export *new, struct svc_export *old)
+{
+	struct cache_head *ch;
+	int hash;
+	hash = hash_ptr(old->ex_client, EXPORT_HASHBITS);
+	hash ^= hash_ptr(old->ex_dentry, EXPORT_HASHBITS);
+	hash ^= hash_ptr(old->ex_mnt, EXPORT_HASHBITS);
+
+	ch = sunrpc_cache_update(&svc_export_cache, &new->h,
+				 &old->h,
+				 hash);
+	if (ch)
+		return container_of(ch, struct svc_export, h);
+	else
+		return NULL;
+}
 
 
 struct svc_expkey *
@@ -568,7 +619,7 @@ exp_get_by_name(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry,
 	key.ex_mnt = mnt;
 	key.ex_dentry = dentry;
 
-	exp = svc_export_lookup(&key, 0);
+	exp = svc_export_lookup(&key);
 	if (exp != NULL) 
 		switch (cache_check(&svc_export_cache, &exp->h, reqp)) {
 		case 0: break;
@@ -770,13 +821,13 @@ exp_export(struct nfsctl_export *nxp)
 	new.ex_anon_gid = nxp->ex_anon_gid;
 	new.ex_fsid = nxp->ex_dev;
 
-	exp = svc_export_lookup(&new, 1);
+	exp = svc_export_lookup(&new);
+	if (exp)
+		exp = svc_export_update(&new, exp);
 
-	if (exp == NULL)
+	if (!exp)
 		goto finish;
 
-	err = 0;
-
 	if (exp_hash(clp, exp) ||
 	    exp_fsid_hash(clp, exp)) {
 		/* failed to create at least one index */
-- 
cgit v0.10.2


From 8d270f7f4cdab792d7263926ac59f747258735ee Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:04 -0800
Subject: [PATCH] knfsd: Use new cache_lookup for svc_expkey cache

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 2ebd77d..abd6896 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -57,17 +57,6 @@ static int		exp_verify_string(char *cp, int max);
 #define	EXPKEY_HASHMASK		(EXPKEY_HASHMAX -1)
 static struct cache_head *expkey_table[EXPKEY_HASHMAX];
 
-static inline int svc_expkey_hash(struct svc_expkey *item)
-{
-	int hash = item->ek_fsidtype;
-	char * cp = (char*)item->ek_fsid;
-	int len = key_len(item->ek_fsidtype);
-
-	hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
-	hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS);
-	return hash & EXPKEY_HASHMASK;
-}
-
 void expkey_put(struct cache_head *item, struct cache_detail *cd)
 {
 	if (cache_put(item, cd)) {
@@ -97,7 +86,8 @@ static void expkey_request(struct cache_detail *cd,
 	(*bpp)[-1] = '\n';
 }
 
-static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *, int);
+static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old);
+static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *);
 static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 {
 	/* client fsidtype fsid [path] */
@@ -108,6 +98,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 	int fsidtype;
 	char *ep;
 	struct svc_expkey key;
+	struct svc_expkey *ek;
 
 	if (mesg[mlen-1] != '\n')
 		return -EINVAL;
@@ -152,20 +143,25 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 	key.ek_fsidtype = fsidtype;
 	memcpy(key.ek_fsid, buf, len);
 
+	ek = svc_expkey_lookup(&key);
+	err = -ENOMEM;
+	if (!ek)
+		goto out;
+
 	/* now we want a pathname, or empty meaning NEGATIVE  */
+	err = -EINVAL;
 	if ((len=qword_get(&mesg, buf, PAGE_SIZE)) < 0)
 		goto out;
 	dprintk("Path seems to be <%s>\n", buf);
 	err = 0;
 	if (len == 0) {
-		struct svc_expkey *ek;
 		set_bit(CACHE_NEGATIVE, &key.h.flags);
-		ek = svc_expkey_lookup(&key, 1);
+		ek = svc_expkey_update(&key, ek);
 		if (ek)
 			expkey_put(&ek->h, &svc_expkey_cache);
+		else err = -ENOMEM;
 	} else {
 		struct nameidata nd;
-		struct svc_expkey *ek;
 		err = path_lookup(buf, 0, &nd);
 		if (err)
 			goto out;
@@ -174,10 +170,11 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 		key.ek_mnt = nd.mnt;
 		key.ek_dentry = nd.dentry;
 		
-		ek = svc_expkey_lookup(&key, 1);
+		ek = svc_expkey_update(&key, ek);
 		if (ek)
 			expkey_put(&ek->h, &svc_expkey_cache);
-		err = 0;
+		else
+			err = -ENOMEM;
 		path_release(&nd);
 	}
 	cache_flush();
@@ -213,29 +210,25 @@ static int expkey_show(struct seq_file *m,
 	seq_printf(m, "\n");
 	return 0;
 }
-	
-struct cache_detail svc_expkey_cache = {
-	.owner		= THIS_MODULE,
-	.hash_size	= EXPKEY_HASHMAX,
-	.hash_table	= expkey_table,
-	.name		= "nfsd.fh",
-	.cache_put	= expkey_put,
-	.cache_request	= expkey_request,
-	.cache_parse	= expkey_parse,
-	.cache_show	= expkey_show,
-};
 
-static inline int svc_expkey_match (struct svc_expkey *a, struct svc_expkey *b)
+static inline int expkey_match (struct cache_head *a, struct cache_head *b)
 {
-	if (a->ek_fsidtype != b->ek_fsidtype ||
-	    a->ek_client != b->ek_client ||
-	    memcmp(a->ek_fsid, b->ek_fsid, key_len(a->ek_fsidtype)) != 0)
+	struct svc_expkey *orig = container_of(a, struct svc_expkey, h);
+	struct svc_expkey *new = container_of(b, struct svc_expkey, h);
+
+	if (orig->ek_fsidtype != new->ek_fsidtype ||
+	    orig->ek_client != new->ek_client ||
+	    memcmp(orig->ek_fsid, new->ek_fsid, key_len(orig->ek_fsidtype)) != 0)
 		return 0;
 	return 1;
 }
 
-static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *item)
+static inline void expkey_init(struct cache_head *cnew,
+				   struct cache_head *citem)
 {
+	struct svc_expkey *new = container_of(cnew, struct svc_expkey, h);
+	struct svc_expkey *item = container_of(citem, struct svc_expkey, h);
+
 	kref_get(&item->ek_client->ref);
 	new->ek_client = item->ek_client;
 	new->ek_fsidtype = item->ek_fsidtype;
@@ -244,13 +237,80 @@ static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *it
 	new->ek_fsid[2] = item->ek_fsid[2];
 }
 
-static inline void svc_expkey_update(struct svc_expkey *new, struct svc_expkey *item)
+static inline void expkey_update(struct cache_head *cnew,
+				   struct cache_head *citem)
 {
+	struct svc_expkey *new = container_of(cnew, struct svc_expkey, h);
+	struct svc_expkey *item = container_of(citem, struct svc_expkey, h);
+
 	new->ek_mnt = mntget(item->ek_mnt);
 	new->ek_dentry = dget(item->ek_dentry);
 }
 
-static DefineSimpleCacheLookup(svc_expkey, svc_expkey)
+static struct cache_head *expkey_alloc(void)
+{
+	struct svc_expkey *i = kmalloc(sizeof(*i), GFP_KERNEL);
+	if (i)
+		return &i->h;
+	else
+		return NULL;
+}
+
+struct cache_detail svc_expkey_cache = {
+	.owner		= THIS_MODULE,
+	.hash_size	= EXPKEY_HASHMAX,
+	.hash_table	= expkey_table,
+	.name		= "nfsd.fh",
+	.cache_put	= expkey_put,
+	.cache_request	= expkey_request,
+	.cache_parse	= expkey_parse,
+	.cache_show	= expkey_show,
+	.match		= expkey_match,
+	.init		= expkey_init,
+	.update       	= expkey_update,
+	.alloc		= expkey_alloc,
+};
+
+static struct svc_expkey *
+svc_expkey_lookup(struct svc_expkey *item)
+{
+	struct cache_head *ch;
+	int hash = item->ek_fsidtype;
+	char * cp = (char*)item->ek_fsid;
+	int len = key_len(item->ek_fsidtype);
+
+	hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
+	hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS);
+	hash &= EXPKEY_HASHMASK;
+
+	ch = sunrpc_cache_lookup(&svc_expkey_cache, &item->h,
+				 hash);
+	if (ch)
+		return container_of(ch, struct svc_expkey, h);
+	else
+		return NULL;
+}
+
+static struct svc_expkey *
+svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
+{
+	struct cache_head *ch;
+	int hash = new->ek_fsidtype;
+	char * cp = (char*)new->ek_fsid;
+	int len = key_len(new->ek_fsidtype);
+
+	hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
+	hash ^= hash_ptr(new->ek_client, EXPKEY_HASHBITS);
+	hash &= EXPKEY_HASHMASK;
+
+	ch = sunrpc_cache_update(&svc_expkey_cache, &new->h,
+				 &old->h, hash);
+	if (ch)
+		return container_of(ch, struct svc_expkey, h);
+	else
+		return NULL;
+}
+
 
 #define	EXPORT_HASHBITS		8
 #define	EXPORT_HASHMAX		(1<< EXPORT_HASHBITS)
@@ -549,7 +609,7 @@ exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp)
 	key.ek_fsidtype = fsid_type;
 	memcpy(key.ek_fsid, fsidv, key_len(fsid_type));
 
-	ek = svc_expkey_lookup(&key, 0);
+	ek = svc_expkey_lookup(&key);
 	if (ek != NULL)
 		if ((err = cache_check(&svc_expkey_cache, &ek->h, reqp)))
 			ek = ERR_PTR(err);
@@ -569,7 +629,9 @@ static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv,
 	key.h.expiry_time = NEVER;
 	key.h.flags = 0;
 
-	ek = svc_expkey_lookup(&key, 1);
+	ek = svc_expkey_lookup(&key);
+	if (ek)
+		ek = svc_expkey_update(&key,ek);
 	if (ek) {
 		expkey_put(&ek->h, &svc_expkey_cache);
 		return 0;
-- 
cgit v0.10.2


From d4d11ea9d6d6fcfaa66fbd10fd4a8d43aa575597 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:04 -0800
Subject: [PATCH] knfsd: Use new sunrpc cache for rsi cache

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index aadb4e8..237d935 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -78,7 +78,8 @@ struct rsi {
 
 static struct cache_head *rsi_table[RSI_HASHMAX];
 static struct cache_detail rsi_cache;
-static struct rsi *rsi_lookup(struct rsi *item, int set);
+static struct rsi *rsi_update(struct rsi *new, struct rsi *old);
+static struct rsi *rsi_lookup(struct rsi *item);
 
 static void rsi_free(struct rsi *rsii)
 {
@@ -103,8 +104,10 @@ static inline int rsi_hash(struct rsi *item)
 	     ^ hash_mem(item->in_token.data, item->in_token.len, RSI_HASHBITS);
 }
 
-static inline int rsi_match(struct rsi *item, struct rsi *tmp)
+static int rsi_match(struct cache_head *a, struct cache_head *b)
 {
+	struct rsi *item = container_of(a, struct rsi, h);
+	struct rsi *tmp = container_of(b, struct rsi, h);
 	return netobj_equal(&item->in_handle, &tmp->in_handle)
 		&& netobj_equal(&item->in_token, &tmp->in_token);
 }
@@ -125,8 +128,11 @@ static inline int dup_netobj(struct xdr_netobj *dst, struct xdr_netobj *src)
 	return dup_to_netobj(dst, src->data, src->len);
 }
 
-static inline void rsi_init(struct rsi *new, struct rsi *item)
+static void rsi_init(struct cache_head *cnew, struct cache_head *citem)
 {
+	struct rsi *new = container_of(cnew, struct rsi, h);
+	struct rsi *item = container_of(citem, struct rsi, h);
+
 	new->out_handle.data = NULL;
 	new->out_handle.len = 0;
 	new->out_token.data = NULL;
@@ -141,8 +147,11 @@ static inline void rsi_init(struct rsi *new, struct rsi *item)
 	item->in_token.data = NULL;
 }
 
-static inline void rsi_update(struct rsi *new, struct rsi *item)
+static void update_rsi(struct cache_head *cnew, struct cache_head *citem)
 {
+	struct rsi *new = container_of(cnew, struct rsi, h);
+	struct rsi *item = container_of(citem, struct rsi, h);
+
 	BUG_ON(new->out_handle.data || new->out_token.data);
 	new->out_handle.len = item->out_handle.len;
 	item->out_handle.len = 0;
@@ -157,6 +166,15 @@ static inline void rsi_update(struct rsi *new, struct rsi *item)
 	new->minor_status = item->minor_status;
 }
 
+static struct cache_head *rsi_alloc(void)
+{
+	struct rsi *rsii = kmalloc(sizeof(*rsii), GFP_KERNEL);
+	if (rsii)
+		return &rsii->h;
+	else
+		return NULL;
+}
+
 static void rsi_request(struct cache_detail *cd,
                        struct cache_head *h,
                        char **bpp, int *blen)
@@ -198,6 +216,10 @@ static int rsi_parse(struct cache_detail *cd,
 	if (dup_to_netobj(&rsii.in_token, buf, len))
 		goto out;
 
+	rsip = rsi_lookup(&rsii);
+	if (!rsip)
+		goto out;
+
 	rsii.h.flags = 0;
 	/* expiry */
 	expiry = get_expiry(&mesg);
@@ -240,12 +262,14 @@ static int rsi_parse(struct cache_detail *cd,
 			goto out;
 	}
 	rsii.h.expiry_time = expiry;
-	rsip = rsi_lookup(&rsii, 1);
+	rsip = rsi_update(&rsii, rsip);
 	status = 0;
 out:
 	rsi_free(&rsii);
 	if (rsip)
 		rsi_put(&rsip->h, &rsi_cache);
+	else
+		status = -ENOMEM;
 	return status;
 }
 
@@ -257,9 +281,37 @@ static struct cache_detail rsi_cache = {
 	.cache_put      = rsi_put,
 	.cache_request  = rsi_request,
 	.cache_parse    = rsi_parse,
+	.match		= rsi_match,
+	.init		= rsi_init,
+	.update		= update_rsi,
+	.alloc		= rsi_alloc,
 };
 
-static DefineSimpleCacheLookup(rsi, rsi)
+static struct rsi *rsi_lookup(struct rsi *item)
+{
+	struct cache_head *ch;
+	int hash = rsi_hash(item);
+
+	ch = sunrpc_cache_lookup(&rsi_cache, &item->h, hash);
+	if (ch)
+		return container_of(ch, struct rsi, h);
+	else
+		return NULL;
+}
+
+static struct rsi *rsi_update(struct rsi *new, struct rsi *old)
+{
+	struct cache_head *ch;
+	int hash = rsi_hash(new);
+
+	ch = sunrpc_cache_update(&rsi_cache, &new->h,
+				 &old->h, hash);
+	if (ch)
+		return container_of(ch, struct rsi, h);
+	else
+		return NULL;
+}
+
 
 /*
  * The rpcsec_context cache is used to store a context that is
@@ -895,7 +947,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
 			goto drop;
 		}
 
-		rsip = rsi_lookup(&rsikey, 0);
+		rsip = rsi_lookup(&rsikey);
 		rsi_free(&rsikey);
 		if (!rsip) {
 			goto drop;
-- 
cgit v0.10.2


From 17f834b6d2e0b102ab53d73ba44d5dbb34c38f90 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:05 -0800
Subject: [PATCH] knfsd: Use new cache code for rsc cache

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 237d935..3801526 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -345,7 +345,8 @@ struct rsc {
 
 static struct cache_head *rsc_table[RSC_HASHMAX];
 static struct cache_detail rsc_cache;
-static struct rsc *rsc_lookup(struct rsc *item, int set);
+static struct rsc *rsc_update(struct rsc *new, struct rsc *old);
+static struct rsc *rsc_lookup(struct rsc *item);
 
 static void rsc_free(struct rsc *rsci)
 {
@@ -372,15 +373,21 @@ rsc_hash(struct rsc *rsci)
 	return hash_mem(rsci->handle.data, rsci->handle.len, RSC_HASHBITS);
 }
 
-static inline int
-rsc_match(struct rsc *new, struct rsc *tmp)
+static int
+rsc_match(struct cache_head *a, struct cache_head *b)
 {
+	struct rsc *new = container_of(a, struct rsc, h);
+	struct rsc *tmp = container_of(b, struct rsc, h);
+
 	return netobj_equal(&new->handle, &tmp->handle);
 }
 
-static inline void
-rsc_init(struct rsc *new, struct rsc *tmp)
+static void
+rsc_init(struct cache_head *cnew, struct cache_head *ctmp)
 {
+	struct rsc *new = container_of(cnew, struct rsc, h);
+	struct rsc *tmp = container_of(ctmp, struct rsc, h);
+
 	new->handle.len = tmp->handle.len;
 	tmp->handle.len = 0;
 	new->handle.data = tmp->handle.data;
@@ -389,9 +396,12 @@ rsc_init(struct rsc *new, struct rsc *tmp)
 	new->cred.cr_group_info = NULL;
 }
 
-static inline void
-rsc_update(struct rsc *new, struct rsc *tmp)
+static void
+update_rsc(struct cache_head *cnew, struct cache_head *ctmp)
 {
+	struct rsc *new = container_of(cnew, struct rsc, h);
+	struct rsc *tmp = container_of(ctmp, struct rsc, h);
+
 	new->mechctx = tmp->mechctx;
 	tmp->mechctx = NULL;
 	memset(&new->seqdata, 0, sizeof(new->seqdata));
@@ -400,6 +410,16 @@ rsc_update(struct rsc *new, struct rsc *tmp)
 	tmp->cred.cr_group_info = NULL;
 }
 
+static struct cache_head *
+rsc_alloc(void)
+{
+	struct rsc *rsci = kmalloc(sizeof(*rsci), GFP_KERNEL);
+	if (rsci)
+		return &rsci->h;
+	else
+		return NULL;
+}
+
 static int rsc_parse(struct cache_detail *cd,
 		     char *mesg, int mlen)
 {
@@ -425,6 +445,10 @@ static int rsc_parse(struct cache_detail *cd,
 	if (expiry == 0)
 		goto out;
 
+	rscp = rsc_lookup(&rsci);
+	if (!rscp)
+		goto out;
+
 	/* uid, or NEGATIVE */
 	rv = get_int(&mesg, &rsci.cred.cr_uid);
 	if (rv == -EINVAL)
@@ -480,12 +504,14 @@ static int rsc_parse(struct cache_detail *cd,
 		gss_mech_put(gm);
 	}
 	rsci.h.expiry_time = expiry;
-	rscp = rsc_lookup(&rsci, 1);
+	rscp = rsc_update(&rsci, rscp);
 	status = 0;
 out:
 	rsc_free(&rsci);
 	if (rscp)
 		rsc_put(&rscp->h, &rsc_cache);
+	else
+		status = -ENOMEM;
 	return status;
 }
 
@@ -496,9 +522,37 @@ static struct cache_detail rsc_cache = {
 	.name		= "auth.rpcsec.context",
 	.cache_put	= rsc_put,
 	.cache_parse	= rsc_parse,
+	.match		= rsc_match,
+	.init		= rsc_init,
+	.update		= update_rsc,
+	.alloc		= rsc_alloc,
 };
 
-static DefineSimpleCacheLookup(rsc, rsc);
+static struct rsc *rsc_lookup(struct rsc *item)
+{
+	struct cache_head *ch;
+	int hash = rsc_hash(item);
+
+	ch = sunrpc_cache_lookup(&rsc_cache, &item->h, hash);
+	if (ch)
+		return container_of(ch, struct rsc, h);
+	else
+		return NULL;
+}
+
+static struct rsc *rsc_update(struct rsc *new, struct rsc *old)
+{
+	struct cache_head *ch;
+	int hash = rsc_hash(new);
+
+	ch = sunrpc_cache_update(&rsc_cache, &new->h,
+				 &old->h, hash);
+	if (ch)
+		return container_of(ch, struct rsc, h);
+	else
+		return NULL;
+}
+
 
 static struct rsc *
 gss_svc_searchbyctx(struct xdr_netobj *handle)
@@ -509,7 +563,7 @@ gss_svc_searchbyctx(struct xdr_netobj *handle)
 	memset(&rsci, 0, sizeof(rsci));
 	if (dup_to_netobj(&rsci.handle, handle->data, handle->len))
 		return NULL;
-	found = rsc_lookup(&rsci, 0);
+	found = rsc_lookup(&rsci);
 	rsc_free(&rsci);
 	if (!found)
 		return NULL;
-- 
cgit v0.10.2


From f9ecc921b5b5e135050e7f22fef873c249aee3fc Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:06 -0800
Subject: [PATCH] knfsd: Use new cache code for name/id lookup caches

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index dea690a..75cfbb6 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -82,9 +82,12 @@ struct ent {
 #define ENT_HASHMAX           (1 << ENT_HASHBITS)
 #define ENT_HASHMASK          (ENT_HASHMAX - 1)
 
-static inline void
-ent_init(struct ent *new, struct ent *itm)
+static void
+ent_init(struct cache_head *cnew, struct cache_head *citm)
 {
+	struct ent *new = container_of(cnew, struct ent, h);
+	struct ent *itm = container_of(citm, struct ent, h);
+
 	new->id = itm->id;
 	new->type = itm->type;
 
@@ -92,12 +95,6 @@ ent_init(struct ent *new, struct ent *itm)
 	strlcpy(new->authname, itm->authname, sizeof(new->name));
 }
 
-static inline void
-ent_update(struct ent *new, struct ent *itm)
-{
-	ent_init(new, itm);
-}
-
 static void
 ent_put(struct cache_head *ch, struct cache_detail *cd)
 {
@@ -107,6 +104,16 @@ ent_put(struct cache_head *ch, struct cache_detail *cd)
 	}
 }
 
+static struct cache_head *
+ent_alloc(void)
+{
+	struct ent *e = kmalloc(sizeof(*e), GFP_KERNEL);
+	if (e)
+		return &e->h;
+	else
+		return NULL;
+}
+
 /*
  * ID -> Name cache
  */
@@ -143,9 +150,12 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
 	(*bpp)[-1] = '\n';
 }
 
-static inline int
-idtoname_match(struct ent *a, struct ent *b)
+static int
+idtoname_match(struct cache_head *ca, struct cache_head *cb)
 {
+	struct ent *a = container_of(ca, struct ent, h);
+	struct ent *b = container_of(cb, struct ent, h);
+
 	return (a->id == b->id && a->type == b->type &&
 	    strcmp(a->authname, b->authname) == 0);
 }
@@ -178,7 +188,8 @@ warn_no_idmapd(struct cache_detail *detail)
 
 
 static int         idtoname_parse(struct cache_detail *, char *, int);
-static struct ent *idtoname_lookup(struct ent *, int);
+static struct ent *idtoname_lookup(struct ent *);
+static struct ent *idtoname_update(struct ent *, struct ent *);
 
 static struct cache_detail idtoname_cache = {
 	.owner		= THIS_MODULE,
@@ -190,6 +201,10 @@ static struct cache_detail idtoname_cache = {
 	.cache_parse	= idtoname_parse,
 	.cache_show	= idtoname_show,
 	.warn_no_listener = warn_no_idmapd,
+	.match		= idtoname_match,
+	.init		= ent_init,
+	.update		= ent_init,
+	.alloc		= ent_alloc,
 };
 
 int
@@ -232,6 +247,11 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
 	if (ent.h.expiry_time == 0)
 		goto out;
 
+	error = -ENOMEM;
+	res = idtoname_lookup(&ent);
+	if (!res)
+		goto out;
+
 	/* Name */
 	error = qword_get(&buf, buf1, PAGE_SIZE);
 	if (error == -EINVAL)
@@ -246,7 +266,8 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
 		memcpy(ent.name, buf1, sizeof(ent.name));
 	}
 	error = -ENOMEM;
-	if ((res = idtoname_lookup(&ent, 1)) == NULL)
+	res = idtoname_update(&ent, res);
+	if (res == NULL)
 		goto out;
 
 	ent_put(&res->h, &idtoname_cache);
@@ -258,7 +279,31 @@ out:
 	return error;
 }
 
-static DefineSimpleCacheLookup(ent, idtoname);
+
+static struct ent *
+idtoname_lookup(struct ent *item)
+{
+	struct cache_head *ch = sunrpc_cache_lookup(&idtoname_cache,
+						    &item->h,
+						    idtoname_hash(item));
+	if (ch)
+		return container_of(ch, struct ent, h);
+	else
+		return NULL;
+}
+
+static struct ent *
+idtoname_update(struct ent *new, struct ent *old)
+{
+	struct cache_head *ch = sunrpc_cache_update(&idtoname_cache,
+						    &new->h, &old->h,
+						    idtoname_hash(new));
+	if (ch)
+		return container_of(ch, struct ent, h);
+	else
+		return NULL;
+}
+
 
 /*
  * Name -> ID cache
@@ -285,9 +330,12 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
 	(*bpp)[-1] = '\n';
 }
 
-static inline int
-nametoid_match(struct ent *a, struct ent *b)
+static int
+nametoid_match(struct cache_head *ca, struct cache_head *cb)
 {
+	struct ent *a = container_of(ca, struct ent, h);
+	struct ent *b = container_of(cb, struct ent, h);
+
 	return (a->type == b->type && strcmp(a->name, b->name) == 0 &&
 	    strcmp(a->authname, b->authname) == 0);
 }
@@ -311,7 +359,8 @@ nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
 	return 0;
 }
 
-static struct ent *nametoid_lookup(struct ent *, int);
+static struct ent *nametoid_lookup(struct ent *);
+static struct ent *nametoid_update(struct ent *, struct ent *);
 static int         nametoid_parse(struct cache_detail *, char *, int);
 
 static struct cache_detail nametoid_cache = {
@@ -324,6 +373,10 @@ static struct cache_detail nametoid_cache = {
 	.cache_parse	= nametoid_parse,
 	.cache_show	= nametoid_show,
 	.warn_no_listener = warn_no_idmapd,
+	.match		= nametoid_match,
+	.init		= ent_init,
+	.update		= ent_init,
+	.alloc		= ent_alloc,
 };
 
 static int
@@ -373,7 +426,11 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
 		set_bit(CACHE_NEGATIVE, &ent.h.flags);
 
 	error = -ENOMEM;
-	if ((res = nametoid_lookup(&ent, 1)) == NULL)
+	res = nametoid_lookup(&ent);
+	if (res == NULL)
+		goto out;
+	res = nametoid_update(&ent, res);
+	if (res == NULL)
 		goto out;
 
 	ent_put(&res->h, &nametoid_cache);
@@ -384,7 +441,30 @@ out:
 	return (error);
 }
 
-static DefineSimpleCacheLookup(ent, nametoid);
+
+static struct ent *
+nametoid_lookup(struct ent *item)
+{
+	struct cache_head *ch = sunrpc_cache_lookup(&nametoid_cache,
+						    &item->h,
+						    nametoid_hash(item));
+	if (ch)
+		return container_of(ch, struct ent, h);
+	else
+		return NULL;
+}
+
+static struct ent *
+nametoid_update(struct ent *new, struct ent *old)
+{
+	struct cache_head *ch = sunrpc_cache_update(&nametoid_cache,
+						    &new->h, &old->h,
+						    nametoid_hash(new));
+	if (ch)
+		return container_of(ch, struct ent, h);
+	else
+		return NULL;
+}
 
 /*
  * Exported API
@@ -452,24 +532,24 @@ idmap_defer(struct cache_req *req)
 }
 
 static inline int
-do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *, int), struct ent *key,
+do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *), struct ent *key,
 		struct cache_detail *detail, struct ent **item,
 		struct idmap_defer_req *mdr)
 {
-	*item = lookup_fn(key, 0);
+	*item = lookup_fn(key);
 	if (!*item)
 		return -ENOMEM;
 	return cache_check(detail, &(*item)->h, &mdr->req);
 }
 
 static inline int
-do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *, int),
+do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *),
 			struct ent *key, struct cache_detail *detail,
 			struct ent **item)
 {
 	int ret = -ENOMEM;
 
-	*item = lookup_fn(key, 0);
+	*item = lookup_fn(key);
 	if (!*item)
 		goto out_err;
 	ret = -ETIMEDOUT;
@@ -490,7 +570,7 @@ out_err:
 
 static int
 idmap_lookup(struct svc_rqst *rqstp,
-		struct ent *(*lookup_fn)(struct ent *, int), struct ent *key,
+		struct ent *(*lookup_fn)(struct ent *), struct ent *key,
 		struct cache_detail *detail, struct ent **item)
 {
 	struct idmap_defer_req *mdr;
-- 
cgit v0.10.2


From 4013edea9a0b6cdcb1fdf5d4011e47e068fd6efb Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:07 -0800
Subject: [PATCH] knfsd: An assortment of little fixes to the sunrpc cache code

- in cache_check, h must be non-NULL as it has been de-referenced,
  so don't bother checking for NULL.

- When a cache-item is updated, we need to call cache_revisit_request to see
  if there is a pending request waiting for that item.  We were using
  a transition to CACHE_VALID to see if that was needed, however that is
  wrong as an expired entry will still be marked 'valid' (as the data is valid
  and will need to be released).  So instead use an off transition for
  CACHE_PENDING which is exactly the right thing to test.

- Add a little bit more debugging info.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 4449dc5..b242f49 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -208,7 +208,7 @@ int cache_check(struct cache_detail *detail,
 	if (rv == -EAGAIN)
 		cache_defer_req(rqstp, h);
 
-	if (rv && h)
+	if (rv)
 		detail->cache_put(h, detail);
 	return rv;
 }
@@ -223,8 +223,10 @@ void cache_fresh(struct cache_detail *detail,
 	head->last_refresh = get_seconds();
 	if (!test_and_set_bit(CACHE_VALID, &head->flags))
 		cache_revisit_request(head);
-	if (test_and_clear_bit(CACHE_PENDING, &head->flags))
+	if (test_and_clear_bit(CACHE_PENDING, &head->flags)) {
+		cache_revisit_request(head);
 		queue_loose(detail, head);
+	}
 }
 
 /*
@@ -551,7 +553,7 @@ static void cache_defer_req(struct cache_req *req, struct cache_head *item)
 		/* there was one too many */
 		dreq->revisit(dreq, 1);
 	}
-	if (test_bit(CACHE_VALID, &item->flags)) {
+	if (!test_bit(CACHE_PENDING, &item->flags)) {
 		/* must have just been validated... */
 		cache_revisit_request(item);
 	}
@@ -892,7 +894,7 @@ static void queue_loose(struct cache_detail *detail, struct cache_head *ch)
 			if (cr->item != ch)
 				continue;
 			if (cr->readers != 0)
-				break;
+				continue;
 			list_del(&cr->q.list);
 			spin_unlock(&queue_lock);
 			detail->cache_put(cr->item, detail);
@@ -1180,8 +1182,8 @@ static int c_show(struct seq_file *m, void *p)
 		return cd->cache_show(m, cd, NULL);
 
 	ifdebug(CACHE)
-		seq_printf(m, "# expiry=%ld refcnt=%d\n",
-			   cp->expiry_time, atomic_read(&cp->refcnt));
+		seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n",
+			   cp->expiry_time, atomic_read(&cp->refcnt), cp->flags);
 	cache_get(cp);
 	if (cache_check(cd, cp, NULL))
 		/* cache_check does a cache_put on failure */
-- 
cgit v0.10.2


From 4d90452cb23b08a9a9dd001010f0ee6b1ee83a45 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:07 -0800
Subject: [PATCH] knfsd: Remove DefineCacheLookup

This has been replaced by more traditional code.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 3e17a5f..afc481d 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -128,119 +128,6 @@ struct cache_deferred_req {
 					   int too_many);
 };
 
-/*
- * just like a template in C++, this macro does cache lookup
- * for us.
- * The function is passed some sort of HANDLE from which a cache_detail
- * structure can be determined (via SETUP, DETAIL), a template
- * cache entry (type RTN*), and a "set" flag.  Using the HASHFN and the 
- * TEST, the function will try to find a matching cache entry in the cache.
- * If "set" == 0 :
- *    If an entry is found, it is returned
- *    If no entry is found, a new non-VALID entry is created.
- * If "set" == 1 :
- *    If no entry is found a new one is inserted with data from "template"
- *    If a non-CACHE_VALID entry is found, it is updated from template using UPDATE
- *    If a CACHE_VALID entry is found, a new entry is swapped in with data
- *       from "template"
- *
- * If the passed handle has the CACHE_NEGATIVE flag set, then UPDATE is not
- * run but insteead CACHE_NEGATIVE is set in any new item.
-
- *  In any case, the new entry is returned with a reference count.
- *
- *    
- * RTN is a struct type for a cache entry
- * MEMBER is the member of the cache which is cache_head, which must be first
- * FNAME is the name for the function	
- * ARGS are arguments to function and must contain RTN *item, int set.  May
- *   also contain something to be usedby SETUP or DETAIL to find cache_detail.
- * SETUP  locates the cache detail and makes it available as...
- * DETAIL identifies the cache detail, possibly set up by SETUP
- * HASHFN returns a hash value of the cache entry "item"
- * TEST  tests if "tmp" matches "item"
- * INIT copies key information from "item" to "new"
- * UPDATE copies content information from "item" to "tmp"
- */
-#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,SETUP,DETAIL,HASHFN,TEST,INIT,UPDATE)	\
-RTN *FNAME ARGS										\
-{											\
-	RTN *tmp, *new=NULL;								\
-	struct cache_head **hp, **head;							\
-	SETUP;										\
-	head = &(DETAIL)->hash_table[HASHFN];						\
- retry:											\
-	if (set||new) write_lock(&(DETAIL)->hash_lock);					\
-	else read_lock(&(DETAIL)->hash_lock);						\
-	for(hp=head; *hp != NULL; hp = &tmp->MEMBER.next) {				\
-		tmp = container_of(*hp, RTN, MEMBER);					\
-		if (TEST) { /* found a match */						\
-											\
-			if (set && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new)	\
-				break;							\
-											\
-			if (new)							\
-				{INIT;}							\
-			if (set) {							\
-				if (test_bit(CACHE_VALID, &tmp->MEMBER.flags))\
-				{ /* need to swap in new */				\
-					RTN *t2;					\
-											\
-					new->MEMBER.next = tmp->MEMBER.next;		\
-					*hp = &new->MEMBER;				\
-					tmp->MEMBER.next = NULL;			\
-					t2 = tmp; tmp = new; new = t2;			\
-				}							\
-				if (test_bit(CACHE_NEGATIVE,  &item->MEMBER.flags))	\
-					set_bit(CACHE_NEGATIVE, &tmp->MEMBER.flags);	\
-				else {							\
-					UPDATE;						\
-					clear_bit(CACHE_NEGATIVE, &tmp->MEMBER.flags);	\
-				}							\
-			}								\
-			cache_get(&tmp->MEMBER);					\
-			if (set||new) write_unlock(&(DETAIL)->hash_lock);		\
-			else read_unlock(&(DETAIL)->hash_lock);				\
-			if (set)							\
-				cache_fresh(DETAIL, &tmp->MEMBER, item->MEMBER.expiry_time); \
-			if (set && new) cache_fresh(DETAIL, &new->MEMBER, 0);	\
-			if (new) (DETAIL)->cache_put(&new->MEMBER, DETAIL);		\
-			return tmp;							\
-		}									\
-	}										\
-	/* Didn't find anything */							\
-	if (new) {									\
-		INIT;									\
-		new->MEMBER.next = *head;						\
-		*head = &new->MEMBER;							\
-		(DETAIL)->entries ++;							\
-		cache_get(&new->MEMBER);						\
-		if (set) {								\
-			tmp = new;							\
-			if (test_bit(CACHE_NEGATIVE, &item->MEMBER.flags))		\
-				set_bit(CACHE_NEGATIVE, &tmp->MEMBER.flags);		\
-			else {UPDATE;}							\
-		}									\
-	}										\
-	if (set||new) write_unlock(&(DETAIL)->hash_lock);				\
-	else read_unlock(&(DETAIL)->hash_lock);						\
-	if (new && set)									\
-		cache_fresh(DETAIL, &new->MEMBER, item->MEMBER.expiry_time);		\
-	if (new)				       					\
-		return new;								\
-	new = kmalloc(sizeof(*new), GFP_KERNEL);					\
-	if (new) {									\
-		cache_init(&new->MEMBER);						\
-		goto retry;								\
-	}										\
-	return NULL;									\
-}
-
-#define DefineSimpleCacheLookup(STRUCT, FUNC)				\
-        DefineCacheLookup(struct STRUCT, h, FUNC##_lookup,		\
-        (struct STRUCT *item, int set), /*no setup */,			\
-	& FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp),	\
-	STRUCT##_init(new, item), STRUCT##_update(tmp, item))
 
 extern struct cache_head *
 sunrpc_cache_lookup(struct cache_detail *detail,
-- 
cgit v0.10.2


From ebd0cb1af3be2729cc1f574681dfba01fcf458d9 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:08 -0800
Subject: [PATCH] knfsd: Unexport cache_fresh and fix a small race

Cache_fresh is now only used in cache.c, so unexport it.

Part of cache_fresh (setting CACHE_VALID) should really be done under the
lock, while part (calling cache_revisit_request etc) must be done outside the
lock.  So we split it up appropriately.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index afc481d..a37fead 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -165,8 +165,6 @@ static inline int cache_put(struct cache_head *h, struct cache_detail *cd)
 }
 
 extern void cache_init(struct cache_head *h);
-extern void cache_fresh(struct cache_detail *detail,
-			struct cache_head *head, time_t expiry);
 extern int cache_check(struct cache_detail *detail,
 		       struct cache_head *h, struct cache_req *rqstp);
 extern void cache_flush(void);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index b242f49..edcda4f 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -96,6 +96,27 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
 }
 EXPORT_SYMBOL(sunrpc_cache_lookup);
 
+
+static void queue_loose(struct cache_detail *detail, struct cache_head *ch);
+
+static int cache_fresh_locked(struct cache_head *head, time_t expiry)
+{
+	head->expiry_time = expiry;
+	head->last_refresh = get_seconds();
+	return !test_and_set_bit(CACHE_VALID, &head->flags);
+}
+
+static void cache_fresh_unlocked(struct cache_head *head,
+			struct cache_detail *detail, int new)
+{
+	if (new)
+		cache_revisit_request(head);
+	if (test_and_clear_bit(CACHE_PENDING, &head->flags)) {
+		cache_revisit_request(head);
+		queue_loose(detail, head);
+	}
+}
+
 struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 				       struct cache_head *new, struct cache_head *old, int hash)
 {
@@ -105,6 +126,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 	 */
 	struct cache_head **head;
 	struct cache_head *tmp;
+	int is_new;
 
 	if (!test_bit(CACHE_VALID, &old->flags)) {
 		write_lock(&detail->hash_lock);
@@ -113,9 +135,9 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 				set_bit(CACHE_NEGATIVE, &old->flags);
 			else
 				detail->update(old, new);
-			/* FIXME cache_fresh should come first */
+			is_new = cache_fresh_locked(old, new->expiry_time);
 			write_unlock(&detail->hash_lock);
-			cache_fresh(detail, old, new->expiry_time);
+			cache_fresh_unlocked(old, detail, is_new);
 			return old;
 		}
 		write_unlock(&detail->hash_lock);
@@ -138,9 +160,11 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 	tmp->next = *head;
 	*head = tmp;
 	cache_get(tmp);
+	is_new = cache_fresh_locked(tmp, new->expiry_time);
+	cache_fresh_locked(old, 0);
 	write_unlock(&detail->hash_lock);
-	cache_fresh(detail, tmp, new->expiry_time);
-	cache_fresh(detail, old, 0);
+	cache_fresh_unlocked(tmp, detail, is_new);
+	cache_fresh_unlocked(old, detail, 0);
 	detail->cache_put(old, detail);
 	return tmp;
 }
@@ -192,7 +216,8 @@ int cache_check(struct cache_detail *detail,
 				clear_bit(CACHE_PENDING, &h->flags);
 				if (rv == -EAGAIN) {
 					set_bit(CACHE_NEGATIVE, &h->flags);
-					cache_fresh(detail, h, get_seconds()+CACHE_NEW_EXPIRY);
+					cache_fresh_unlocked(h, detail,
+					     cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY));
 					rv = -ENOENT;
 				}
 				break;
@@ -213,22 +238,6 @@ int cache_check(struct cache_detail *detail,
 	return rv;
 }
 
-static void queue_loose(struct cache_detail *detail, struct cache_head *ch);
-
-void cache_fresh(struct cache_detail *detail,
-		 struct cache_head *head, time_t expiry)
-{
-
-	head->expiry_time = expiry;
-	head->last_refresh = get_seconds();
-	if (!test_and_set_bit(CACHE_VALID, &head->flags))
-		cache_revisit_request(head);
-	if (test_and_clear_bit(CACHE_PENDING, &head->flags)) {
-		cache_revisit_request(head);
-		queue_loose(detail, head);
-	}
-}
-
 /*
  * caches need to be periodically cleaned.
  * For this we maintain a list of cache_detail and
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 4040119..69b8238 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -105,7 +105,6 @@ EXPORT_SYMBOL(auth_unix_lookup);
 EXPORT_SYMBOL(cache_check);
 EXPORT_SYMBOL(cache_flush);
 EXPORT_SYMBOL(cache_purge);
-EXPORT_SYMBOL(cache_fresh);
 EXPORT_SYMBOL(cache_init);
 EXPORT_SYMBOL(cache_register);
 EXPORT_SYMBOL(cache_unregister);
-- 
cgit v0.10.2


From baab935ff3bdac20c558809da0d8e8f761840219 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:09 -0800
Subject: [PATCH] knfsd: Convert sunrpc_cache to use krefs

.. it makes some of the code nicer.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index abd6896..cc811a1 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -57,18 +57,17 @@ static int		exp_verify_string(char *cp, int max);
 #define	EXPKEY_HASHMASK		(EXPKEY_HASHMAX -1)
 static struct cache_head *expkey_table[EXPKEY_HASHMAX];
 
-void expkey_put(struct cache_head *item, struct cache_detail *cd)
+void expkey_put(struct kref *ref)
 {
-	if (cache_put(item, cd)) {
-		struct svc_expkey *key = container_of(item, struct svc_expkey, h);
-		if (test_bit(CACHE_VALID, &item->flags) &&
-		    !test_bit(CACHE_NEGATIVE, &item->flags)) {
-			dput(key->ek_dentry);
-			mntput(key->ek_mnt);
-		}
-		auth_domain_put(key->ek_client);
-		kfree(key);
+	struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
+
+	if (test_bit(CACHE_VALID, &key->h.flags) &&
+	    !test_bit(CACHE_NEGATIVE, &key->h.flags)) {
+		dput(key->ek_dentry);
+		mntput(key->ek_mnt);
 	}
+	auth_domain_put(key->ek_client);
+	kfree(key);
 }
 
 static void expkey_request(struct cache_detail *cd,
@@ -158,7 +157,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 		set_bit(CACHE_NEGATIVE, &key.h.flags);
 		ek = svc_expkey_update(&key, ek);
 		if (ek)
-			expkey_put(&ek->h, &svc_expkey_cache);
+			cache_put(&ek->h, &svc_expkey_cache);
 		else err = -ENOMEM;
 	} else {
 		struct nameidata nd;
@@ -172,7 +171,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 		
 		ek = svc_expkey_update(&key, ek);
 		if (ek)
-			expkey_put(&ek->h, &svc_expkey_cache);
+			cache_put(&ek->h, &svc_expkey_cache);
 		else
 			err = -ENOMEM;
 		path_release(&nd);
@@ -318,15 +317,13 @@ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
 
 static struct cache_head *export_table[EXPORT_HASHMAX];
 
-void svc_export_put(struct cache_head *item, struct cache_detail *cd)
+static void svc_export_put(struct kref *ref)
 {
-	if (cache_put(item, cd)) {
-		struct svc_export *exp = container_of(item, struct svc_export, h);
-		dput(exp->ex_dentry);
-		mntput(exp->ex_mnt);
-		auth_domain_put(exp->ex_client);
-		kfree(exp);
-	}
+	struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
+	dput(exp->ex_dentry);
+	mntput(exp->ex_mnt);
+	auth_domain_put(exp->ex_client);
+	kfree(exp);
 }
 
 static void svc_export_request(struct cache_detail *cd,
@@ -633,7 +630,7 @@ static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv,
 	if (ek)
 		ek = svc_expkey_update(&key,ek);
 	if (ek) {
-		expkey_put(&ek->h, &svc_expkey_cache);
+		cache_put(&ek->h, &svc_expkey_cache);
 		return 0;
 	}
 	return -ENOMEM;
@@ -762,7 +759,7 @@ static void exp_fsid_unhash(struct svc_export *exp)
 	ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid);
 	if (ek && !IS_ERR(ek)) {
 		ek->h.expiry_time = get_seconds()-1;
-		expkey_put(&ek->h, &svc_expkey_cache);
+		cache_put(&ek->h, &svc_expkey_cache);
 	}
 	svc_expkey_cache.nextcheck = get_seconds();
 }
@@ -800,7 +797,7 @@ static void exp_unhash(struct svc_export *exp)
 	ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino);
 	if (ek && !IS_ERR(ek)) {
 		ek->h.expiry_time = get_seconds()-1;
-		expkey_put(&ek->h, &svc_expkey_cache);
+		cache_put(&ek->h, &svc_expkey_cache);
 	}
 	svc_expkey_cache.nextcheck = get_seconds();
 }
@@ -902,7 +899,7 @@ finish:
 	if (exp)
 		exp_put(exp);
 	if (fsid_key && !IS_ERR(fsid_key))
-		expkey_put(&fsid_key->h, &svc_expkey_cache);
+		cache_put(&fsid_key->h, &svc_expkey_cache);
 	if (clp)
 		auth_domain_put(clp);
 	path_release(&nd);
@@ -1030,7 +1027,7 @@ exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv,
 		return ERR_PTR(PTR_ERR(ek));
 
 	exp = exp_get_by_name(clp, ek->ek_mnt, ek->ek_dentry, reqp);
-	expkey_put(&ek->h, &svc_expkey_cache);
+	cache_put(&ek->h, &svc_expkey_cache);
 
 	if (!exp || IS_ERR(exp))
 		return ERR_PTR(PTR_ERR(exp));
@@ -1068,7 +1065,7 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
 	else
 		rv = fh_compose(fhp, exp,
 				fsid_key->ek_dentry, NULL);
-	expkey_put(&fsid_key->h, &svc_expkey_cache);
+	cache_put(&fsid_key->h, &svc_expkey_cache);
 	return rv;
 }
 
@@ -1187,7 +1184,7 @@ static int e_show(struct seq_file *m, void *p)
 	cache_get(&exp->h);
 	if (cache_check(&svc_export_cache, &exp->h, NULL))
 		return 0;
-	if (cache_put(&exp->h, &svc_export_cache)) BUG();
+	cache_put(&exp->h, &svc_export_cache);
 	return svc_export_show(m, &svc_export_cache, cp);
 }
 
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 75cfbb6..4b6aa60 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -96,12 +96,10 @@ ent_init(struct cache_head *cnew, struct cache_head *citm)
 }
 
 static void
-ent_put(struct cache_head *ch, struct cache_detail *cd)
+ent_put(struct kref *ref)
 {
-	if (cache_put(ch, cd)) {
-		struct ent *map = container_of(ch, struct ent, h);
-		kfree(map);
-	}
+	struct ent *map = container_of(ref, struct ent, h.ref);
+	kfree(map);
 }
 
 static struct cache_head *
@@ -270,7 +268,7 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
 	if (res == NULL)
 		goto out;
 
-	ent_put(&res->h, &idtoname_cache);
+	cache_put(&res->h, &idtoname_cache);
 
 	error = 0;
 out:
@@ -433,7 +431,7 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
 	if (res == NULL)
 		goto out;
 
-	ent_put(&res->h, &nametoid_cache);
+	cache_put(&res->h, &nametoid_cache);
 	error = 0;
 out:
 	kfree(buf1);
@@ -562,7 +560,7 @@ do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *),
 		goto out_put;
 	return 0;
 out_put:
-	ent_put(&(*item)->h, detail);
+	cache_put(&(*item)->h, detail);
 out_err:
 	*item = NULL;
 	return ret;
@@ -613,7 +611,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen
 	if (ret)
 		return ret;
 	*id = item->id;
-	ent_put(&item->h, &nametoid_cache);
+	cache_put(&item->h, &nametoid_cache);
 	return 0;
 }
 
@@ -635,7 +633,7 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
 	ret = strlen(item->name);
 	BUG_ON(ret > IDMAP_NAMESZ);
 	memcpy(name, item->name, ret);
-	ent_put(&item->h, &idtoname_cache);
+	cache_put(&item->h, &idtoname_cache);
 	return ret;
 }
 
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 7a3e397..3f2ec2e 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -506,7 +506,7 @@ fh_put(struct svc_fh *fhp)
 		nfsd_nr_put++;
 	}
 	if (exp) {
-		svc_export_put(&exp->h, &svc_export_cache);
+		cache_put(&exp->h, &svc_export_cache);
 		fhp->fh_export = NULL;
 	}
 	return;
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index d52e0b7..a6c08a4 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -102,13 +102,11 @@ int			exp_rootfh(struct auth_domain *,
 int			exp_pseudoroot(struct auth_domain *, struct svc_fh *fhp, struct cache_req *creq);
 int			nfserrno(int errno);
 
-extern void expkey_put(struct cache_head *item, struct cache_detail *cd);
-extern void svc_export_put(struct cache_head *item, struct cache_detail *cd);
 extern struct cache_detail svc_export_cache, svc_expkey_cache;
 
 static inline void exp_put(struct svc_export *exp)
 {
-	svc_export_put(&exp->h, &svc_export_cache);
+	cache_put(&exp->h, &svc_export_cache);
 }
 
 static inline void exp_get(struct svc_export *exp)
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index a37fead..ad3f5cb 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -50,7 +50,7 @@ struct cache_head {
 	time_t		last_refresh;   /* If CACHE_PENDING, this is when upcall 
 					 * was sent, else this is when update was received
 					 */
-	atomic_t 	refcnt;
+	struct kref	ref;
 	unsigned long	flags;
 };
 #define	CACHE_VALID	0	/* Entry contains valid data */
@@ -68,8 +68,7 @@ struct cache_detail {
 	atomic_t		inuse; /* active user-space update or lookup */
 
 	char			*name;
-	void			(*cache_put)(struct cache_head *,
-					     struct cache_detail*);
+	void			(*cache_put)(struct kref *);
 
 	void			(*cache_request)(struct cache_detail *cd,
 						 struct cache_head *h,
@@ -151,17 +150,17 @@ extern void cache_clean_deferred(void *owner);
 
 static inline struct cache_head  *cache_get(struct cache_head *h)
 {
-	atomic_inc(&h->refcnt);
+	kref_get(&h->ref);
 	return h;
 }
 
 
-static inline int cache_put(struct cache_head *h, struct cache_detail *cd)
+static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
 {
-	if (atomic_read(&h->refcnt) <= 2 &&
+	if (atomic_read(&h->ref.refcount) <= 2 &&
 	    h->expiry_time < cd->nextcheck)
 		cd->nextcheck = h->expiry_time;
-	return atomic_dec_and_test(&h->refcnt);
+	kref_put(&h->ref, cd->cache_put);
 }
 
 extern void cache_init(struct cache_head *h);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 3801526..4d7eb9e 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -89,13 +89,11 @@ static void rsi_free(struct rsi *rsii)
 	kfree(rsii->out_token.data);
 }
 
-static void rsi_put(struct cache_head *item, struct cache_detail *cd)
+static void rsi_put(struct kref *ref)
 {
-	struct rsi *rsii = container_of(item, struct rsi, h);
-	if (cache_put(item, cd)) {
-		rsi_free(rsii);
-		kfree(rsii);
-	}
+	struct rsi *rsii = container_of(ref, struct rsi, h.ref);
+	rsi_free(rsii);
+	kfree(rsii);
 }
 
 static inline int rsi_hash(struct rsi *item)
@@ -267,7 +265,7 @@ static int rsi_parse(struct cache_detail *cd,
 out:
 	rsi_free(&rsii);
 	if (rsip)
-		rsi_put(&rsip->h, &rsi_cache);
+		cache_put(&rsip->h, &rsi_cache);
 	else
 		status = -ENOMEM;
 	return status;
@@ -357,14 +355,12 @@ static void rsc_free(struct rsc *rsci)
 		put_group_info(rsci->cred.cr_group_info);
 }
 
-static void rsc_put(struct cache_head *item, struct cache_detail *cd)
+static void rsc_put(struct kref *ref)
 {
-	struct rsc *rsci = container_of(item, struct rsc, h);
+	struct rsc *rsci = container_of(ref, struct rsc, h.ref);
 
-	if (cache_put(item, cd)) {
-		rsc_free(rsci);
-		kfree(rsci);
-	}
+	rsc_free(rsci);
+	kfree(rsci);
 }
 
 static inline int
@@ -509,7 +505,7 @@ static int rsc_parse(struct cache_detail *cd,
 out:
 	rsc_free(&rsci);
 	if (rscp)
-		rsc_put(&rscp->h, &rsc_cache);
+		cache_put(&rscp->h, &rsc_cache);
 	else
 		status = -ENOMEM;
 	return status;
@@ -1076,7 +1072,7 @@ drop:
 	ret = SVC_DROP;
 out:
 	if (rsci)
-		rsc_put(&rsci->h, &rsc_cache);
+		cache_put(&rsci->h, &rsc_cache);
 	return ret;
 }
 
@@ -1168,7 +1164,7 @@ out_err:
 		put_group_info(rqstp->rq_cred.cr_group_info);
 	rqstp->rq_cred.cr_group_info = NULL;
 	if (gsd->rsci)
-		rsc_put(&gsd->rsci->h, &rsc_cache);
+		cache_put(&gsd->rsci->h, &rsc_cache);
 	gsd->rsci = NULL;
 
 	return stat;
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index edcda4f..dd81e59 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -42,7 +42,7 @@ void cache_init(struct cache_head *h)
 	time_t now = get_seconds();
 	h->next = NULL;
 	h->flags = 0;
-	atomic_set(&h->refcnt, 1);
+	kref_init(&h->ref);
 	h->expiry_time = now + CACHE_NEW_EXPIRY;
 	h->last_refresh = now;
 }
@@ -81,7 +81,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
 		if (detail->match(tmp, key)) {
 			cache_get(tmp);
 			write_unlock(&detail->hash_lock);
-			detail->cache_put(new, detail);
+			cache_put(new, detail);
 			return tmp;
 		}
 	}
@@ -145,7 +145,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 	/* We need to insert a new entry */
 	tmp = detail->alloc();
 	if (!tmp) {
-		detail->cache_put(old, detail);
+		cache_put(old, detail);
 		return NULL;
 	}
 	cache_init(tmp);
@@ -165,7 +165,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 	write_unlock(&detail->hash_lock);
 	cache_fresh_unlocked(tmp, detail, is_new);
 	cache_fresh_unlocked(old, detail, 0);
-	detail->cache_put(old, detail);
+	cache_put(old, detail);
 	return tmp;
 }
 EXPORT_SYMBOL(sunrpc_cache_update);
@@ -234,7 +234,7 @@ int cache_check(struct cache_detail *detail,
 		cache_defer_req(rqstp, h);
 
 	if (rv)
-		detail->cache_put(h, detail);
+		cache_put(h, detail);
 	return rv;
 }
 
@@ -431,7 +431,7 @@ static int cache_clean(void)
 			if (test_and_clear_bit(CACHE_PENDING, &ch->flags))
 				queue_loose(current_detail, ch);
 
-			if (atomic_read(&ch->refcnt) == 1)
+			if (atomic_read(&ch->ref.refcount) == 1)
 				break;
 		}
 		if (ch) {
@@ -446,7 +446,7 @@ static int cache_clean(void)
 			current_index ++;
 		spin_unlock(&cache_list_lock);
 		if (ch)
-			d->cache_put(ch, d);
+			cache_put(ch, d);
 	} else
 		spin_unlock(&cache_list_lock);
 
@@ -723,7 +723,7 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
 		    !test_bit(CACHE_PENDING, &rq->item->flags)) {
 			list_del(&rq->q.list);
 			spin_unlock(&queue_lock);
-			cd->cache_put(rq->item, cd);
+			cache_put(rq->item, cd);
 			kfree(rq->buf);
 			kfree(rq);
 		} else
@@ -906,7 +906,7 @@ static void queue_loose(struct cache_detail *detail, struct cache_head *ch)
 				continue;
 			list_del(&cr->q.list);
 			spin_unlock(&queue_lock);
-			detail->cache_put(cr->item, detail);
+			cache_put(cr->item, detail);
 			kfree(cr->buf);
 			kfree(cr);
 			return;
@@ -1192,7 +1192,7 @@ static int c_show(struct seq_file *m, void *p)
 
 	ifdebug(CACHE)
 		seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n",
-			   cp->expiry_time, atomic_read(&cp->refcnt), cp->flags);
+			   cp->expiry_time, atomic_read(&cp->ref.refcount), cp->flags);
 	cache_get(cp);
 	if (cache_check(cd, cp, NULL))
 		/* cache_check does a cache_put on failure */
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 7e38621..11020c0 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -84,15 +84,15 @@ struct ip_map {
 };
 static struct cache_head	*ip_table[IP_HASHMAX];
 
-static void ip_map_put(struct cache_head *item, struct cache_detail *cd)
+static void ip_map_put(struct kref *kref)
 {
+	struct cache_head *item = container_of(kref, struct cache_head, ref);
 	struct ip_map *im = container_of(item, struct ip_map,h);
-	if (cache_put(item, cd)) {
-		if (test_bit(CACHE_VALID, &item->flags) &&
-		    !test_bit(CACHE_NEGATIVE, &item->flags))
-			auth_domain_put(&im->m_client->h);
-		kfree(im);
-	}
+
+	if (test_bit(CACHE_VALID, &item->flags) &&
+	    !test_bit(CACHE_NEGATIVE, &item->flags))
+		auth_domain_put(&im->m_client->h);
+	kfree(im);
 }
 
 #if IP_HASHBITS == 8
@@ -315,7 +315,7 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex
 				 hash_ip((unsigned long)ipm->m_addr.s_addr));
 	if (!ch)
 		return -ENOMEM;
-	ip_map_put(ch, &ip_map_cache);
+	cache_put(ch, &ip_map_cache);
 	return 0;
 }
 
@@ -369,7 +369,7 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr)
 		rv = &ipm->m_client->h;
 		kref_get(&rv->ref);
 	}
-	ip_map_put(&ipm->h, &ip_map_cache);
+	cache_put(&ipm->h, &ip_map_cache);
 	return rv;
 }
 
@@ -403,7 +403,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
 		case 0:
 			rqstp->rq_client = &ipm->m_client->h;
 			kref_get(&rqstp->rq_client->ref);
-			ip_map_put(&ipm->h, &ip_map_cache);
+			cache_put(&ipm->h, &ip_map_cache);
 			break;
 	}
 	return SVC_OK;
-- 
cgit v0.10.2


From 74cae61ab45f19a3e8c4d9f53c0e94df129c7915 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 27 Mar 2006 01:15:10 -0800
Subject: [PATCH] fs/nfsd/export.c,net/sunrpc/cache.c: make needlessly global
 code static

We can now make some code static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Neil Brown <neilb@suse.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index cc811a1..c340be0 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -57,7 +57,7 @@ static int		exp_verify_string(char *cp, int max);
 #define	EXPKEY_HASHMASK		(EXPKEY_HASHMAX -1)
 static struct cache_head *expkey_table[EXPKEY_HASHMAX];
 
-void expkey_put(struct kref *ref)
+static void expkey_put(struct kref *ref)
 {
 	struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
 
@@ -87,6 +87,8 @@ static void expkey_request(struct cache_detail *cd,
 
 static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old);
 static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *);
+static struct cache_detail svc_expkey_cache;
+
 static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 {
 	/* client fsidtype fsid [path] */
@@ -255,7 +257,7 @@ static struct cache_head *expkey_alloc(void)
 		return NULL;
 }
 
-struct cache_detail svc_expkey_cache = {
+static struct cache_detail svc_expkey_cache = {
 	.owner		= THIS_MODULE,
 	.hash_size	= EXPKEY_HASHMAX,
 	.hash_table	= expkey_table,
@@ -345,7 +347,8 @@ static void svc_export_request(struct cache_detail *cd,
 	(*bpp)[-1] = '\n';
 }
 
-struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old);
+static struct svc_export *svc_export_update(struct svc_export *new,
+					    struct svc_export *old);
 static struct svc_export *svc_export_lookup(struct svc_export *);
 
 static int check_export(struct inode *inode, int flags)
@@ -574,7 +577,7 @@ svc_export_lookup(struct svc_export *exp)
 		return NULL;
 }
 
-struct svc_export *
+static struct svc_export *
 svc_export_update(struct svc_export *new, struct svc_export *old)
 {
 	struct cache_head *ch;
@@ -593,7 +596,7 @@ svc_export_update(struct svc_export *new, struct svc_export *old)
 }
 
 
-struct svc_expkey *
+static struct svc_expkey *
 exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp)
 {
 	struct svc_expkey key, *ek;
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index a6c08a4..d2a8abb 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -86,9 +86,6 @@ void			nfsd_export_shutdown(void);
 void			nfsd_export_flush(void);
 void			exp_readlock(void);
 void			exp_readunlock(void);
-struct svc_expkey *	exp_find_key(struct auth_domain *clp, 
-				     int fsid_type, u32 *fsidv,
-				     struct cache_req *reqp);
 struct svc_export *	exp_get_by_name(struct auth_domain *clp,
 					struct vfsmount *mnt,
 					struct dentry *dentry,
@@ -102,7 +99,7 @@ int			exp_rootfh(struct auth_domain *,
 int			exp_pseudoroot(struct auth_domain *, struct svc_fh *fhp, struct cache_req *creq);
 int			nfserrno(int errno);
 
-extern struct cache_detail svc_export_cache, svc_expkey_cache;
+extern struct cache_detail svc_export_cache;
 
 static inline void exp_put(struct svc_export *exp)
 {
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index ad3f5cb..b5612c9 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -163,7 +163,6 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
 	kref_put(&h->ref, cd->cache_put);
 }
 
-extern void cache_init(struct cache_head *h);
 extern int cache_check(struct cache_detail *detail,
 		       struct cache_head *h, struct cache_req *rqstp);
 extern void cache_flush(void);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index dd81e59..3ac4193 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -37,7 +37,7 @@
 static void cache_defer_req(struct cache_req *req, struct cache_head *item);
 static void cache_revisit_request(struct cache_head *item);
 
-void cache_init(struct cache_head *h)
+static void cache_init(struct cache_head *h)
 {
 	time_t now = get_seconds();
 	h->next = NULL;
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 69b8238..769114f 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -105,7 +105,6 @@ EXPORT_SYMBOL(auth_unix_lookup);
 EXPORT_SYMBOL(cache_check);
 EXPORT_SYMBOL(cache_flush);
 EXPORT_SYMBOL(cache_purge);
-EXPORT_SYMBOL(cache_init);
 EXPORT_SYMBOL(cache_register);
 EXPORT_SYMBOL(cache_unregister);
 EXPORT_SYMBOL(qword_add);
-- 
cgit v0.10.2


From ad1b5229def92b71631a927895b034ceec06c991 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:11 -0800
Subject: [PATCH] knfsd: Tidy up unix_domain_find

We shouldn't really compare &new->h with anything when new ==NULL, and gather
three different if statements that all start

  if (rv ...

into one large if.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 11020c0..7e5707e 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -36,16 +36,16 @@ struct auth_domain *unix_domain_find(char *name)
 
 	rv = auth_domain_lookup(name, NULL);
 	while(1) {
-		if (rv != &new->h) {
-			if (new) auth_domain_put(&new->h);
+		if (rv) {
+			if (new && rv != &new->h)
+				auth_domain_put(&new->h);
+
+			if (rv->flavour != &svcauth_unix) {
+				auth_domain_put(rv);
+				return NULL;
+			}
 			return rv;
 		}
-		if (rv && rv->flavour != &svcauth_unix) {
-			auth_domain_put(rv);
-			return NULL;
-		}
-		if (rv)
-			return rv;
 
 		new = kmalloc(sizeof(*new), GFP_KERNEL);
 		if (new == NULL)
-- 
cgit v0.10.2


From 491214716755894986b99e0cf0a08b830d244577 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:15:12 -0800
Subject: [PATCH] knfsd: Update rpc-cache.txt to match recent changes

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/Documentation/rpc-cache.txt b/Documentation/rpc-cache.txt
index 2b5d443..5f757c8 100644
--- a/Documentation/rpc-cache.txt
+++ b/Documentation/rpc-cache.txt
@@ -1,4 +1,4 @@
-This document gives a brief introduction to the caching
+	This document gives a brief introduction to the caching
 mechanisms in the sunrpc layer that is used, in particular,
 for NFS authentication.
 
@@ -25,25 +25,17 @@ The common code handles such things as:
    - supporting 'NEGATIVE' as well as positive entries
    - allowing an EXPIRED time on cache items, and removing
      items after they expire, and are no longe in-use.
-
-   Future code extensions are expect to handle
    - making requests to user-space to fill in cache entries
    - allowing user-space to directly set entries in the cache
    - delaying RPC requests that depend on as-yet incomplete
      cache entries, and replaying those requests when the cache entry
      is complete.
-   - maintaining last-access times on cache entries
-   - clean out old entries when the caches become full
-
-The code for performing a cache lookup is also common, but in the form
-of a template.  i.e. a #define.
-Each cache defines a lookup function by using the DefineCacheLookup
-macro, or the simpler DefineSimpleCacheLookup macro
+   - clean out old entries as they expire.
 
 Creating a Cache
 ----------------
 
-1/ A cache needs a datum to cache.  This is in the form of a
+1/ A cache needs a datum to store.  This is in the form of a
    structure definition that must contain a
      struct cache_head
    as an element, usually the first.
@@ -51,35 +43,69 @@ Creating a Cache
    Each cache element is reference counted and contains
    expiry and update times for use in cache management.
 2/ A cache needs a "cache_detail" structure that
-   describes the cache.  This stores the hash table, and some
-   parameters for cache management.
-3/ A cache needs a lookup function.  This is created using
-   the DefineCacheLookup macro.  This lookup function is used both
-   to find entries and to update entries.  The normal mode for
-   updating an entry is to replace the old entry with a new
-   entry.  However it is possible to allow update-in-place
-   for those caches where it makes sense (no atomicity issues
-   or indirect reference counting issue)
-4/ A cache needs to be registered using cache_register().  This
-   includes in on a list of caches that will be regularly
-   cleaned to discard old data.  For this to work, some
-   thread must periodically call cache_clean
-   
+   describes the cache.  This stores the hash table, some
+   parameters for cache management, and some operations detailing how
+   to work with particular cache items.
+   The operations requires are:
+   	struct cache_head *alloc(void)
+		This simply allocates appropriate memory and returns
+   		a pointer to the cache_detail embedded within the
+		structure
+	void cache_put(struct kref *)
+		This is called when the last reference to an item is
+		is dropped.  The pointer passed is to the 'ref' field
+		in the cache_head.  cache_put should release any
+		references create by 'cache_init' and, if CACHE_VALID
+		is set, any references created by cache_update.
+		It should then release the memory allocated by
+   		'alloc'.
+        int match(struct cache_head *orig, struct cache_head *new)
+		test if the keys in the two structures match.  Return
+		1 if they do, 0 if they don't.
+	void init(struct cache_head *orig, struct cache_head *new)
+		Set the 'key' fields in 'new' from 'orig'.  This may
+		include taking references to shared objects.
+	void update(struct cache_head *orig, struct cache_head *new)
+		Set the 'content' fileds in 'new' from 'orig'.
+	int cache_show(struct seq_file *m, struct cache_detail *cd,
+			struct cache_head *h)
+		Optional.  Used to provide a /proc file that lists the
+		contents of a cache.  This should show one item,
+   		usually on just one line.
+	int cache_request(struct cache_detail *cd, struct cache_head *h,
+   		char **bpp, int *blen)
+		Format a request to be send to user-space for an item
+   		to be instantiated.  *bpp is a buffer of size *blen.
+		bpp should be moved forward over the encoded message,
+		and  *blen should be reduced to show how much free
+		space remains.  Return 0 on success or <0 if not
+		enough room or other problem.
+	int cache_parse(struct cache_detail *cd, char *buf, int len)
+		A message from user space has arrived to fill out a
+		cache entry.  It is in 'buf' of length 'len'.
+		cache_parse should parse this, find the item in the
+		cache with sunrpc_cache_lookup, and update the item
+		with sunrpc_cache_update.
+
+
+3/ A cache needs to be registered using cache_register().  This
+   includes it on a list of caches that will be regularly
+   cleaned to discard old data.
+
 Using a cache
 -------------
 
-To find a value in a cache, call the lookup function passing it a the
-datum which contains key, and possibly content, and a flag saying
-whether to update the cache with new data from the datum.   Depending
-on how the cache lookup function was defined, it may take an extra
-argument to identify the particular cache in question.
+To find a value in a cache, call sunrpc_cache_lookup passing a pointer
+to the cache_head in a sample item with the 'key' fields filled in.
+This will be passed to ->match to identify the target entry.  If no
+entry is found, a new entry will be create, added to the cache, and
+marked as not containing valid data.
 
-Except in cases of kmalloc failure, the lookup function
-will return a new datum which will store the key and
-may contain valid content, or may not.
-This datum is typically passed to cache_check which determines the
-validity of the datum and may later initiate an upcall to fill
-in the data.
+The item returned is typically passed to cache_check which will check
+if the data is valid, and may initiate an up-call to get fresh data.
+cache_check will return -ENOENT in the entry is negative or if an up
+call is needed but not possible, -EAGAIN if an upcall is pending,
+or 0 if the data is valid;
 
 cache_check can be passed a "struct cache_req *".  This structure is
 typically embedded in the actual request and can be used to create a
@@ -90,6 +116,13 @@ item does become valid, the deferred copy of the request will be
 revisited (->revisit).  It is expected that this method will
 reschedule the request for processing.
 
+The value returned by sunrpc_cache_lookup can also be passed to
+sunrpc_cache_update to set the content for the item.  A second item is
+passed which should hold the content.  If the item found by _lookup
+has valid data, then it is discarded and a new item is created.  This
+saves any user of an item from worrying about content changing while
+it is being inspected.  If the item found by _lookup does not contain
+valid data, then the content is copied across and CACHE_VALID is set.
 
 Populating a cache
 ------------------
@@ -114,8 +147,8 @@ should be create or updated to have the given content, and the
 expiry time should be set on that item.
 
 Reading from a channel is a bit more interesting.  When a cache
-lookup fail, or when it suceeds but finds an entry that may soon
-expiry, a request is lodged for that cache item to be updated by
+lookup fails, or when it succeeds but finds an entry that may soon
+expire, a request is lodged for that cache item to be updated by
 user-space.  These requests appear in the channel file.
 
 Successive reads will return successive requests.
@@ -130,7 +163,7 @@ Thus a user-space helper is likely to:
     write a response
   loop.
 
-If it dies and needs to be restarted, any requests that have not be
+If it dies and needs to be restarted, any requests that have not been
 answered will still appear in the file and will be read by the new
 instance of the helper.
 
@@ -142,10 +175,9 @@ Each cache should also define a "cache_request" method which
 takes a cache item and encodes a request into the buffer
 provided.
 
-
 Note: If a cache has no active readers on the channel, and has had not
 active readers for more than 60 seconds, further requests will not be
-added to the channel but instead all looks that do not find a valid
+added to the channel but instead all lookups that do not find a valid
 entry will fail.  This is partly for backward compatibility: The
 previous nfs exports table was deemed to be authoritative and a
 failed lookup meant a definite 'no'.
@@ -154,18 +186,17 @@ request/response format
 -----------------------
 
 While each cache is free to use it's own format for requests
-and responses over channel, the following is recommended are
+and responses over channel, the following is recommended as
 appropriate and support routines are available to help:
 Each request or response record should be printable ASCII
 with precisely one newline character which should be at the end.
 Fields within the record should be separated by spaces, normally one.
 If spaces, newlines, or nul characters are needed in a field they
-much be quotes.  two mechanisms are available:
+much be quoted.  two mechanisms are available:
 1/ If a field begins '\x' then it must contain an even number of
    hex digits, and pairs of these digits provide the bytes in the
    field.
 2/ otherwise a \ in the field must be followed by 3 octal digits
    which give the code for a byte.  Other characters are treated
-   as them selves.  At the very least, space, newlines nul, and
+   as them selves.  At the very least, space, newline, nul, and
    '\' must be quoted in this way.
-   
-- 
cgit v0.10.2


From 013d3868143387be99bb0373d49452eaa3c55d41 Mon Sep 17 00:00:00 2001
From: Martin Andersson <martin.andersson@control.lth.se>
Date: Mon, 27 Mar 2006 01:15:18 -0800
Subject: [PATCH] sched: fix task interactivity calculation

Is a truncation error in kernel/sched.c triggered when the nice value is
negative.  The affected code is used in the TASK_INTERACTIVE macro.

The code is:
#define SCALE(v1,v1_max,v2_max) \
	(v1) * (v2_max) / (v1_max)

which is used in this way:
SCALE(TASK_NICE(p), 40, MAX_BONUS)

Comments in the code says:
  * This part scales the interactivity limit depending on niceness.
  *
  * We scale it linearly, offset by the INTERACTIVE_DELTA delta.
  * Here are a few examples of different nice levels:
  *
  *  TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0]
  *  TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0]
  *  TASK_INTERACTIVE(  0): [1,1,1,1,0,0,0,0,0,0,0]
  *  TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0]
  *  TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0]
  *
  * (the X axis represents the possible -5 ... 0 ... +5 dynamic
  *  priority range a task can explore, a value of '1' means the
  *  task is rated interactive.)

However, the current code does not scale it linearly and the result differs
from the given examples.  If the mathematical function "floor" is used when
the nice value is negative instead of the truncation one gets when using
integer division, the result conforms to the documentation.

Output of TASK_INTERACTIVE when using the kernel code:
nice    dynamic priorities
-20     1     1     1     1     1     1     1     1     1     0     0
-19     1     1     1     1     1     1     1     1     0     0     0
-18     1     1     1     1     1     1     1     1     0     0     0
-17     1     1     1     1     1     1     1     1     0     0     0
-16     1     1     1     1     1     1     1     1     0     0     0
-15     1     1     1     1     1     1     1     0     0     0     0
-14     1     1     1     1     1     1     1     0     0     0     0
-13     1     1     1     1     1     1     1     0     0     0     0
-12     1     1     1     1     1     1     1     0     0     0     0
-11     1     1     1     1     1     1     0     0     0     0     0
-10     1     1     1     1     1     1     0     0     0     0     0
  -9     1     1     1     1     1     1     0     0     0     0     0
  -8     1     1     1     1     1     1     0     0     0     0     0
  -7     1     1     1     1     1     0     0     0     0     0     0
  -6     1     1     1     1     1     0     0     0     0     0     0
  -5     1     1     1     1     1     0     0     0     0     0     0
  -4     1     1     1     1     1     0     0     0     0     0     0
  -3     1     1     1     1     0     0     0     0     0     0     0
  -2     1     1     1     1     0     0     0     0     0     0     0
  -1     1     1     1     1     0     0     0     0     0     0     0
  0      1     1     1     1     0     0     0     0     0     0     0
  1      1     1     1     1     0     0     0     0     0     0     0
  2      1     1     1     1     0     0     0     0     0     0     0
  3      1     1     1     1     0     0     0     0     0     0     0
  4      1     1     1     0     0     0     0     0     0     0     0
  5      1     1     1     0     0     0     0     0     0     0     0
  6      1     1     1     0     0     0     0     0     0     0     0
  7      1     1     1     0     0     0     0     0     0     0     0
  8      1     1     0     0     0     0     0     0     0     0     0
  9      1     1     0     0     0     0     0     0     0     0     0
10      1     1     0     0     0     0     0     0     0     0     0
11      1     1     0     0     0     0     0     0     0     0     0
12      1     0     0     0     0     0     0     0     0     0     0
13      1     0     0     0     0     0     0     0     0     0     0
14      1     0     0     0     0     0     0     0     0     0     0
15      1     0     0     0     0     0     0     0     0     0     0
16      0     0     0     0     0     0     0     0     0     0     0
17      0     0     0     0     0     0     0     0     0     0     0
18      0     0     0     0     0     0     0     0     0     0     0
19      0     0     0     0     0     0     0     0     0     0     0

Output of TASK_INTERACTIVE when using "floor"
nice    dynamic priorities
-20     1     1     1     1     1     1     1     1     1     0     0
-19     1     1     1     1     1     1     1     1     1     0     0
-18     1     1     1     1     1     1     1     1     1     0     0
-17     1     1     1     1     1     1     1     1     1     0     0
-16     1     1     1     1     1     1     1     1     0     0     0
-15     1     1     1     1     1     1     1     1     0     0     0
-14     1     1     1     1     1     1     1     1     0     0     0
-13     1     1     1     1     1     1     1     1     0     0     0
-12     1     1     1     1     1     1     1     0     0     0     0
-11     1     1     1     1     1     1     1     0     0     0     0
-10     1     1     1     1     1     1     1     0     0     0     0
  -9     1     1     1     1     1     1     1     0     0     0     0
  -8     1     1     1     1     1     1     0     0     0     0     0
  -7     1     1     1     1     1     1     0     0     0     0     0
  -6     1     1     1     1     1     1     0     0     0     0     0
  -5     1     1     1     1     1     1     0     0     0     0     0
  -4     1     1     1     1     1     0     0     0     0     0     0
  -3     1     1     1     1     1     0     0     0     0     0     0
  -2     1     1     1     1     1     0     0     0     0     0     0
  -1     1     1     1     1     1     0     0     0     0     0     0
   0     1     1     1     1     0     0     0     0     0     0     0
   1     1     1     1     1     0     0     0     0     0     0     0
   2     1     1     1     1     0     0     0     0     0     0     0
   3     1     1     1     1     0     0     0     0     0     0     0
   4     1     1     1     0     0     0     0     0     0     0     0
   5     1     1     1     0     0     0     0     0     0     0     0
   6     1     1     1     0     0     0     0     0     0     0     0
   7     1     1     1     0     0     0     0     0     0     0     0
   8     1     1     0     0     0     0     0     0     0     0     0
   9     1     1     0     0     0     0     0     0     0     0     0
  10     1     1     0     0     0     0     0     0     0     0     0
  11     1     1     0     0     0     0     0     0     0     0     0
  12     1     0     0     0     0     0     0     0     0     0     0
  13     1     0     0     0     0     0     0     0     0     0     0
  14     1     0     0     0     0     0     0     0     0     0     0
  15     1     0     0     0     0     0     0     0     0     0     0
  16     0     0     0     0     0     0     0     0     0     0     0
  17     0     0     0     0     0     0     0     0     0     0     0
  18     0     0     0     0     0     0     0     0     0     0     0
  19     0     0     0     0     0     0     0     0     0     0     0

Signed-off-by: Martin Andersson <martin.andersson@control.lth.se>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Williams <pwil3058@bigpond.net.au>
Cc: Con Kolivas <kernel@kolivas.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/sched.c b/kernel/sched.c
index 78acdef..dc599c8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -145,7 +145,8 @@
 	(v1) * (v2_max) / (v1_max)
 
 #define DELTA(p) \
-	(SCALE(TASK_NICE(p), 40, MAX_BONUS) + INTERACTIVE_DELTA)
+	(SCALE(TASK_NICE(p) + 20, 40, MAX_BONUS) - 20 * MAX_BONUS / 40 + \
+		INTERACTIVE_DELTA)
 
 #define TASK_INTERACTIVE(p) \
 	((p)->prio <= (p)->static_prio - DELTA(p))
-- 
cgit v0.10.2


From 77e4bfbcf071f795b54862455dce8902b3fc29c2 Mon Sep 17 00:00:00 2001
From: Andreas Mohr <andi@lisas.de>
Date: Mon, 27 Mar 2006 01:15:20 -0800
Subject: [PATCH] Small schedule() optimization

small schedule() microoptimization.

Signed-off-by: Andreas Mohr <andi@lisas.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/sched.c b/kernel/sched.c
index dc599c8..a96a05d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2879,13 +2879,11 @@ asmlinkage void __sched schedule(void)
 	 * schedule() atomically, we ignore that path for now.
 	 * Otherwise, whine if we are scheduling when we should not be.
 	 */
-	if (likely(!current->exit_state)) {
-		if (unlikely(in_atomic())) {
-			printk(KERN_ERR "BUG: scheduling while atomic: "
-				"%s/0x%08x/%d\n",
-				current->comm, preempt_count(), current->pid);
-			dump_stack();
-		}
+	if (unlikely(in_atomic() && !current->exit_state)) {
+		printk(KERN_ERR "BUG: scheduling while atomic: "
+			"%s/0x%08x/%d\n",
+			current->comm, preempt_count(), current->pid);
+		dump_stack();
 	}
 	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
 
-- 
cgit v0.10.2


From 1e9f28fa1eb9773bf65bae08288c6a0a38eef4a7 Mon Sep 17 00:00:00 2001
From: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Date: Mon, 27 Mar 2006 01:15:22 -0800
Subject: [PATCH] sched: new sched domain for representing multi-core

Add a new sched domain for representing multi-core with shared caches
between cores.  Consider a dual package system, each package containing two
cores and with last level cache shared between cores with in a package.  If
there are two runnable processes, with this appended patch those two
processes will be scheduled on different packages.

On such systems, with this patch we have observed 8% perf improvement with
specJBB(2 warehouse) benchmark and 35% improvement with CFP2000 rate(with 2
users).

This new domain will come into play only on multi-core systems with shared
caches.  On other systems, this sched domain will be removed by domain
degeneration code.  This new domain can be also used for implementing power
savings policy (see OLS 2005 CMP kernel scheduler paper for more details..
I will post another patch for power savings policy soon)

Most of the arch/* file changes are for cpu_coregroup_map() implementation.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index f7db71d..f17bd1d 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -231,6 +231,15 @@ config SCHED_SMT
 	  cost of slightly increased overhead in some places. If unsure say
 	  N here.
 
+config SCHED_MC
+	bool "Multi-core scheduler support"
+	depends on SMP
+	default y
+	help
+	  Multi-core scheduler support improves the CPU scheduler's decision
+	  making when dealing with multi-core CPU chips at a cost of slightly
+	  increased overhead in some places. If unsure say N here.
+
 source "kernel/Kconfig.preempt"
 
 config X86_UP_APIC
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 7e3d6b6..a06a490 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -266,7 +266,7 @@ static void __init early_cpu_detect(void)
 void __cpuinit generic_identify(struct cpuinfo_x86 * c)
 {
 	u32 tfms, xlvl;
-	int junk;
+	int ebx;
 
 	if (have_cpuid_p()) {
 		/* Get vendor name */
@@ -282,7 +282,7 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c)
 		/* Intel-defined flags: level 0x00000001 */
 		if ( c->cpuid_level >= 0x00000001 ) {
 			u32 capability, excap;
-			cpuid(0x00000001, &tfms, &junk, &excap, &capability);
+			cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
 			c->x86_capability[0] = capability;
 			c->x86_capability[4] = excap;
 			c->x86 = (tfms >> 8) & 15;
@@ -292,6 +292,11 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c)
 			if (c->x86 >= 0x6)
 				c->x86_model += ((tfms >> 16) & 0xF) << 4;
 			c->x86_mask = tfms & 15;
+#ifdef CONFIG_SMP
+			c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
+#else
+			c->apicid = (ebx >> 24) & 0xFF;
+#endif
 		} else {
 			/* Have CPUID level 0 only - unheard of */
 			c->x86 = 4;
@@ -474,7 +479,6 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 
 	cpuid(1, &eax, &ebx, &ecx, &edx);
 
-	c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
 
 	if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
 		return;
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index ce61921..7e7fd4e 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -173,6 +173,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
 	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
+	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
+#ifdef CONFIG_SMP
+	unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data);
+#endif
 
 	if (c->cpuid_level > 3) {
 		static int is_initialized;
@@ -205,9 +209,15 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 					break;
 				    case 2:
 					new_l2 = this_leaf.size/1024;
+					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
+					index_msb = get_count_order(num_threads_sharing);
+					l2_id = c->apicid >> index_msb;
 					break;
 				    case 3:
 					new_l3 = this_leaf.size/1024;
+					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
+					index_msb = get_count_order(num_threads_sharing);
+					l3_id = c->apicid >> index_msb;
 					break;
 				    default:
 					break;
@@ -273,11 +283,19 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 		if (new_l1i)
 			l1i = new_l1i;
 
-		if (new_l2)
+		if (new_l2) {
 			l2 = new_l2;
+#ifdef CONFIG_SMP
+			cpu_llc_id[cpu] = l2_id;
+#endif
+		}
 
-		if (new_l3)
+		if (new_l3) {
 			l3 = new_l3;
+#ifdef CONFIG_SMP
+			cpu_llc_id[cpu] = l3_id;
+#endif
+		}
 
 		if ( trace )
 			printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 82371d8..a696990 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -72,6 +72,9 @@ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
 /* Core ID of each logical CPU */
 int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
 
+/* Last level cache ID of each logical CPU */
+int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
+
 /* representing HT siblings of each logical CPU */
 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_sibling_map);
@@ -440,6 +443,18 @@ static void __devinit smp_callin(void)
 
 static int cpucount;
 
+/* maps the cpu to the sched domain representing multi-core */
+cpumask_t cpu_coregroup_map(int cpu)
+{
+	struct cpuinfo_x86 *c = cpu_data + cpu;
+	/*
+	 * For perf, we return last level cache shared map.
+	 * TBD: when power saving sched policy is added, we will return
+	 *      cpu_core_map when power saving policy is enabled
+	 */
+	return c->llc_shared_map;
+}
+
 /* representing cpus for which sibling maps can be computed */
 static cpumask_t cpu_sibling_setup_map;
 
@@ -459,12 +474,16 @@ set_cpu_sibling_map(int cpu)
 				cpu_set(cpu, cpu_sibling_map[i]);
 				cpu_set(i, cpu_core_map[cpu]);
 				cpu_set(cpu, cpu_core_map[i]);
+				cpu_set(i, c[cpu].llc_shared_map);
+				cpu_set(cpu, c[i].llc_shared_map);
 			}
 		}
 	} else {
 		cpu_set(cpu, cpu_sibling_map[cpu]);
 	}
 
+	cpu_set(cpu, c[cpu].llc_shared_map);
+
 	if (current_cpu_data.x86_max_cores == 1) {
 		cpu_core_map[cpu] = cpu_sibling_map[cpu];
 		c[cpu].booted_cores = 1;
@@ -472,6 +491,11 @@ set_cpu_sibling_map(int cpu)
 	}
 
 	for_each_cpu_mask(i, cpu_sibling_setup_map) {
+		if (cpu_llc_id[cpu] != BAD_APICID &&
+		    cpu_llc_id[cpu] == cpu_llc_id[i]) {
+			cpu_set(i, c[cpu].llc_shared_map);
+			cpu_set(cpu, c[i].llc_shared_map);
+		}
 		if (phys_proc_id[cpu] == phys_proc_id[i]) {
 			cpu_set(i, cpu_core_map[cpu]);
 			cpu_set(cpu, cpu_core_map[i]);
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 45efe0c..1cb4aa2 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -250,6 +250,15 @@ config SCHED_SMT
 	  cost of slightly increased overhead in some places. If unsure say
 	  N here.
 
+config SCHED_MC
+	bool "Multi-core scheduler support"
+	depends on SMP
+	default y
+	help
+	  Multi-core scheduler support improves the CPU scheduler's decision
+	  making when dealing with multi-core CPU chips at a cost of slightly
+	  increased overhead in some places. If unsure say N here.
+
 source "kernel/Kconfig.preempt"
 
 config NUMA
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index a57eec8..d1f3e92 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -962,7 +962,6 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 
 	cpuid(1, &eax, &ebx, &ecx, &edx);
 
-	c->apicid = phys_pkg_id(0);
 
 	if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
 		return;
@@ -1171,6 +1170,8 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 			c->x86_capability[2] = cpuid_edx(0x80860001);
 	}
 
+	c->apicid = phys_pkg_id(0);
+
 	/*
 	 * Vendor-specific initialization.  In this section we
 	 * canonicalize the feature flags, meaning if there are
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 66e9865..ea48fa6 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -68,6 +68,9 @@ u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
 /* core ID of each logical CPU */
 u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
 
+/* Last level cache ID of each logical CPU */
+u8 cpu_llc_id[NR_CPUS] __cpuinitdata  = {[0 ... NR_CPUS-1] = BAD_APICID};
+
 /* Bitmask of currently online CPUs */
 cpumask_t cpu_online_map __read_mostly;
 
@@ -445,6 +448,18 @@ void __cpuinit smp_callin(void)
 	cpu_set(cpuid, cpu_callin_map);
 }
 
+/* maps the cpu to the sched domain representing multi-core */
+cpumask_t cpu_coregroup_map(int cpu)
+{
+	struct cpuinfo_x86 *c = cpu_data + cpu;
+	/*
+	 * For perf, we return last level cache shared map.
+	 * TBD: when power saving sched policy is added, we will return
+	 *      cpu_core_map when power saving policy is enabled
+	 */
+	return c->llc_shared_map;
+}
+
 /* representing cpus for which sibling maps can be computed */
 static cpumask_t cpu_sibling_setup_map;
 
@@ -463,12 +478,16 @@ static inline void set_cpu_sibling_map(int cpu)
 				cpu_set(cpu, cpu_sibling_map[i]);
 				cpu_set(i, cpu_core_map[cpu]);
 				cpu_set(cpu, cpu_core_map[i]);
+				cpu_set(i, c[cpu].llc_shared_map);
+				cpu_set(cpu, c[i].llc_shared_map);
 			}
 		}
 	} else {
 		cpu_set(cpu, cpu_sibling_map[cpu]);
 	}
 
+	cpu_set(cpu, c[cpu].llc_shared_map);
+
 	if (current_cpu_data.x86_max_cores == 1) {
 		cpu_core_map[cpu] = cpu_sibling_map[cpu];
 		c[cpu].booted_cores = 1;
@@ -476,6 +495,11 @@ static inline void set_cpu_sibling_map(int cpu)
 	}
 
 	for_each_cpu_mask(i, cpu_sibling_setup_map) {
+		if (cpu_llc_id[cpu] != BAD_APICID &&
+		    cpu_llc_id[cpu] == cpu_llc_id[i]) {
+			cpu_set(i, c[cpu].llc_shared_map);
+			cpu_set(cpu, c[i].llc_shared_map);
+		}
 		if (phys_proc_id[cpu] == phys_proc_id[i]) {
 			cpu_set(i, cpu_core_map[cpu]);
 			cpu_set(cpu, cpu_core_map[i]);
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index feca5d9..af4bfd0 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -20,6 +20,7 @@
 #include <linux/config.h>
 #include <linux/threads.h>
 #include <asm/percpu.h>
+#include <linux/cpumask.h>
 
 /* flag for disabling the tsc */
 extern int tsc_disable;
@@ -67,6 +68,9 @@ struct cpuinfo_x86 {
 	char	pad0;
 	int	x86_power;
 	unsigned long loops_per_jiffy;
+#ifdef CONFIG_SMP
+	cpumask_t llc_shared_map;	/* cpus sharing the last level cache */
+#endif
 	unsigned char x86_max_cores;	/* cpuid returned max cores value */
 	unsigned char booted_cores;	/* number of cores as seen by OS */
 	unsigned char apicid;
@@ -103,6 +107,7 @@ extern struct cpuinfo_x86 cpu_data[];
 
 extern	int phys_proc_id[NR_CPUS];
 extern	int cpu_core_id[NR_CPUS];
+extern	int cpu_llc_id[NR_CPUS];
 extern char ignore_fpu_irq;
 
 extern void identify_cpu(struct cpuinfo_x86 *);
diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h
index aa958c6..b94e5ee 100644
--- a/include/asm-i386/topology.h
+++ b/include/asm-i386/topology.h
@@ -112,4 +112,6 @@ extern unsigned long node_remap_size[];
 
 #endif /* CONFIG_NUMA */
 
+extern cpumask_t cpu_coregroup_map(int cpu);
+
 #endif /* _ASM_I386_TOPOLOGY_H */
diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h
index 8c8d88c..1aa2cee 100644
--- a/include/asm-x86_64/processor.h
+++ b/include/asm-x86_64/processor.h
@@ -20,6 +20,7 @@
 #include <asm/mmsegment.h>
 #include <asm/percpu.h>
 #include <linux/personality.h>
+#include <linux/cpumask.h>
 
 #define TF_MASK		0x00000100
 #define IF_MASK		0x00000200
@@ -65,6 +66,9 @@ struct cpuinfo_x86 {
         __u32   x86_power; 	
 	__u32   extended_cpuid_level;	/* Max extended CPUID function supported */
 	unsigned long loops_per_jiffy;
+#ifdef CONFIG_SMP
+	cpumask_t llc_shared_map;	/* cpus sharing the last level cache */
+#endif
 	__u8	apicid;
 	__u8	booted_cores;	/* number of cores as seen by OS */
 } ____cacheline_aligned;
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
index 9ccbb2c..a4fdaeb 100644
--- a/include/asm-x86_64/smp.h
+++ b/include/asm-x86_64/smp.h
@@ -56,6 +56,7 @@ extern cpumask_t cpu_sibling_map[NR_CPUS];
 extern cpumask_t cpu_core_map[NR_CPUS];
 extern u8 phys_proc_id[NR_CPUS];
 extern u8 cpu_core_id[NR_CPUS];
+extern u8 cpu_llc_id[NR_CPUS];
 
 #define SMP_TRAMPOLINE_BASE 0x6000
 
diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h
index c642f5d..9db54e9 100644
--- a/include/asm-x86_64/topology.h
+++ b/include/asm-x86_64/topology.h
@@ -68,4 +68,6 @@ extern int __node_distance(int, int);
 
 #include <asm-generic/topology.h>
 
+extern cpumask_t cpu_coregroup_map(int cpu);
+
 #endif
diff --git a/include/linux/topology.h b/include/linux/topology.h
index e8eb004..a305ae2 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -164,6 +164,15 @@
 	.nr_balance_failed	= 0,			\
 }
 
+#ifdef CONFIG_SCHED_MC
+#ifndef SD_MC_INIT
+/* for now its same as SD_CPU_INIT.
+ * TBD: Tune Domain parameters!
+ */
+#define SD_MC_INIT   SD_CPU_INIT
+#endif
+#endif
+
 #ifdef CONFIG_NUMA
 #ifndef SD_NODE_INIT
 #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!!
diff --git a/kernel/sched.c b/kernel/sched.c
index a96a05d..8a8b71b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5574,11 +5574,31 @@ static int cpu_to_cpu_group(int cpu)
 }
 #endif
 
+#ifdef CONFIG_SCHED_MC
+static DEFINE_PER_CPU(struct sched_domain, core_domains);
+static struct sched_group sched_group_core[NR_CPUS];
+#endif
+
+#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
+static int cpu_to_core_group(int cpu)
+{
+	return first_cpu(cpu_sibling_map[cpu]);
+}
+#elif defined(CONFIG_SCHED_MC)
+static int cpu_to_core_group(int cpu)
+{
+	return cpu;
+}
+#endif
+
 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
 static struct sched_group sched_group_phys[NR_CPUS];
 static int cpu_to_phys_group(int cpu)
 {
-#ifdef CONFIG_SCHED_SMT
+#if defined(CONFIG_SCHED_MC)
+	cpumask_t mask = cpu_coregroup_map(cpu);
+	return first_cpu(mask);
+#elif defined(CONFIG_SCHED_SMT)
 	return first_cpu(cpu_sibling_map[cpu]);
 #else
 	return cpu;
@@ -5676,6 +5696,17 @@ void build_sched_domains(const cpumask_t *cpu_map)
 		sd->parent = p;
 		sd->groups = &sched_group_phys[group];
 
+#ifdef CONFIG_SCHED_MC
+		p = sd;
+		sd = &per_cpu(core_domains, i);
+		group = cpu_to_core_group(i);
+		*sd = SD_MC_INIT;
+		sd->span = cpu_coregroup_map(i);
+		cpus_and(sd->span, sd->span, *cpu_map);
+		sd->parent = p;
+		sd->groups = &sched_group_core[group];
+#endif
+
 #ifdef CONFIG_SCHED_SMT
 		p = sd;
 		sd = &per_cpu(cpu_domains, i);
@@ -5701,6 +5732,19 @@ void build_sched_domains(const cpumask_t *cpu_map)
 	}
 #endif
 
+#ifdef CONFIG_SCHED_MC
+	/* Set up multi-core groups */
+	for_each_cpu_mask(i, *cpu_map) {
+		cpumask_t this_core_map = cpu_coregroup_map(i);
+		cpus_and(this_core_map, this_core_map, *cpu_map);
+		if (i != first_cpu(this_core_map))
+			continue;
+		init_sched_build_groups(sched_group_core, this_core_map,
+					&cpu_to_core_group);
+	}
+#endif
+
+
 	/* Set up physical groups */
 	for (i = 0; i < MAX_NUMNODES; i++) {
 		cpumask_t nodemask = node_to_cpumask(i);
@@ -5797,11 +5841,31 @@ void build_sched_domains(const cpumask_t *cpu_map)
 		power = SCHED_LOAD_SCALE;
 		sd->groups->cpu_power = power;
 #endif
+#ifdef CONFIG_SCHED_MC
+		sd = &per_cpu(core_domains, i);
+		power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1)
+					    * SCHED_LOAD_SCALE / 10;
+		sd->groups->cpu_power = power;
+
+		sd = &per_cpu(phys_domains, i);
 
+ 		/*
+ 		 * This has to be < 2 * SCHED_LOAD_SCALE
+ 		 * Lets keep it SCHED_LOAD_SCALE, so that
+ 		 * while calculating NUMA group's cpu_power
+ 		 * we can simply do
+ 		 *  numa_group->cpu_power += phys_group->cpu_power;
+ 		 *
+ 		 * See "only add power once for each physical pkg"
+ 		 * comment below
+ 		 */
+ 		sd->groups->cpu_power = SCHED_LOAD_SCALE;
+#else
 		sd = &per_cpu(phys_domains, i);
 		power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
 				(cpus_weight(sd->groups->cpumask)-1) / 10;
 		sd->groups->cpu_power = power;
+#endif
 
 #ifdef CONFIG_NUMA
 		sd = &per_cpu(allnodes_domains, i);
@@ -5823,7 +5887,6 @@ void build_sched_domains(const cpumask_t *cpu_map)
 next_sg:
 		for_each_cpu_mask(j, sg->cpumask) {
 			struct sched_domain *sd;
-			int power;
 
 			sd = &per_cpu(phys_domains, j);
 			if (j != first_cpu(sd->groups->cpumask)) {
@@ -5833,10 +5896,8 @@ next_sg:
 				 */
 				continue;
 			}
-			power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
-				(cpus_weight(sd->groups->cpumask)-1) / 10;
 
-			sg->cpu_power += power;
+			sg->cpu_power += sd->groups->cpu_power;
 		}
 		sg = sg->next;
 		if (sg != sched_group_nodes[i])
@@ -5849,6 +5910,8 @@ next_sg:
 		struct sched_domain *sd;
 #ifdef CONFIG_SCHED_SMT
 		sd = &per_cpu(cpu_domains, i);
+#elif defined(CONFIG_SCHED_MC)
+		sd = &per_cpu(core_domains, i);
 #else
 		sd = &per_cpu(phys_domains, i);
 #endif
-- 
cgit v0.10.2


From 0806903316d516a3b3851c51cea5c71724d9051d Mon Sep 17 00:00:00 2001
From: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Date: Mon, 27 Mar 2006 01:15:23 -0800
Subject: [PATCH] sched: fix group power for allnodes_domains

Current sched groups power calculation for allnodes_domains is wrong.  We
should really be using cumulative power of the physical packages in that
group (similar to the calculation in node_domains)

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/sched.c b/kernel/sched.c
index 8a8b71b..7854ee5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5621,6 +5621,32 @@ static int cpu_to_allnodes_group(int cpu)
 {
 	return cpu_to_node(cpu);
 }
+static void init_numa_sched_groups_power(struct sched_group *group_head)
+{
+	struct sched_group *sg = group_head;
+	int j;
+
+	if (!sg)
+		return;
+next_sg:
+	for_each_cpu_mask(j, sg->cpumask) {
+		struct sched_domain *sd;
+
+		sd = &per_cpu(phys_domains, j);
+		if (j != first_cpu(sd->groups->cpumask)) {
+			/*
+			 * Only add "power" once for each
+			 * physical package.
+			 */
+			continue;
+		}
+
+		sg->cpu_power += sd->groups->cpu_power;
+	}
+	sg = sg->next;
+	if (sg != group_head)
+		goto next_sg;
+}
 #endif
 
 /*
@@ -5866,43 +5892,13 @@ void build_sched_domains(const cpumask_t *cpu_map)
 				(cpus_weight(sd->groups->cpumask)-1) / 10;
 		sd->groups->cpu_power = power;
 #endif
-
-#ifdef CONFIG_NUMA
-		sd = &per_cpu(allnodes_domains, i);
-		if (sd->groups) {
-			power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
-				(cpus_weight(sd->groups->cpumask)-1) / 10;
-			sd->groups->cpu_power = power;
-		}
-#endif
 	}
 
 #ifdef CONFIG_NUMA
-	for (i = 0; i < MAX_NUMNODES; i++) {
-		struct sched_group *sg = sched_group_nodes[i];
-		int j;
-
-		if (sg == NULL)
-			continue;
-next_sg:
-		for_each_cpu_mask(j, sg->cpumask) {
-			struct sched_domain *sd;
+	for (i = 0; i < MAX_NUMNODES; i++)
+		init_numa_sched_groups_power(sched_group_nodes[i]);
 
-			sd = &per_cpu(phys_domains, j);
-			if (j != first_cpu(sd->groups->cpumask)) {
-				/*
-				 * Only add "power" once for each
-				 * physical package.
-				 */
-				continue;
-			}
-
-			sg->cpu_power += sd->groups->cpu_power;
-		}
-		sg = sg->next;
-		if (sg != sched_group_nodes[i])
-			goto next_sg;
-	}
+	init_numa_sched_groups_power(sched_group_allnodes);
 #endif
 
 	/* Attach the domains */
-- 
cgit v0.10.2


From b06be912a3ad68c69dba0ed6e92723140020e392 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Mon, 27 Mar 2006 01:15:24 -0800
Subject: [PATCH] x86: don't use cpuid.2 to determine cache info if cpuid.4 is
 supported

Don't use cpuid.2 to determine cache info if cpuid.4 is supported.  The
exception is P4 trace cache.  We always use cpuid.2 to get trace cache
under P4.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index 7e7fd4e..9df87b0 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -225,11 +225,19 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 			}
 		}
 	}
-	if (c->cpuid_level > 1) {
+	/*
+	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
+	 * trace cache
+	 */
+	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
 		/* supports eax=2  call */
 		int i, j, n;
 		int regs[4];
 		unsigned char *dp = (unsigned char *)regs;
+		int only_trace = 0;
+
+		if (num_cache_leaves != 0 && c->x86 == 15)
+			only_trace = 1;
 
 		/* Number of times to iterate */
 		n = cpuid_eax(2) & 0xFF;
@@ -251,6 +259,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 				while (cache_table[k].descriptor != 0)
 				{
 					if (cache_table[k].descriptor == des) {
+						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
+							break;
 						switch (cache_table[k].cache_type) {
 						case LVL_1_INST:
 							l1i += cache_table[k].size;
@@ -276,43 +286,46 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 				}
 			}
 		}
+	}
 
-		if (new_l1d)
-			l1d = new_l1d;
+	if (new_l1d)
+		l1d = new_l1d;
 
-		if (new_l1i)
-			l1i = new_l1i;
+	if (new_l1i)
+		l1i = new_l1i;
 
-		if (new_l2) {
-			l2 = new_l2;
+	if (new_l2) {
+		l2 = new_l2;
 #ifdef CONFIG_SMP
-			cpu_llc_id[cpu] = l2_id;
+		cpu_llc_id[cpu] = l2_id;
 #endif
-		}
+	}
 
-		if (new_l3) {
-			l3 = new_l3;
+	if (new_l3) {
+		l3 = new_l3;
 #ifdef CONFIG_SMP
-			cpu_llc_id[cpu] = l3_id;
+		cpu_llc_id[cpu] = l3_id;
 #endif
-		}
-
-		if ( trace )
-			printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
-		else if ( l1i )
-			printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
-		if ( l1d )
-			printk(", L1 D cache: %dK\n", l1d);
-		else
-			printk("\n");
-		if ( l2 )
-			printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
-		if ( l3 )
-			printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
-
-		c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
 	}
 
+	if (trace)
+		printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
+	else if ( l1i )
+		printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
+
+	if (l1d)
+		printk(", L1 D cache: %dK\n", l1d);
+	else
+		printk("\n");
+
+	if (l2)
+		printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
+
+	if (l3)
+		printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
+
+	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
+
 	return l2;
 }
 
-- 
cgit v0.10.2


From a117e66ed45ac0569c039ea60bd7a9a61e031858 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:25 -0800
Subject: [PATCH] unify pfn_to_page: generic functions

There are 3 memory models, FLATMEM, DISCONTIGMEM, SPARSEMEM.
Each arch has its own page_to_pfn(), pfn_to_page() for each models.
But most of them can use the same arithmetic.

This patch adds asm-generic/memory_model.h, which includes generic
page_to_pfn(), pfn_to_page() definitions for each memory model.

When CONFIG_OUT_OF_LINE_PFN_TO_PAGE=y, out-of-line functions are
used instead of macro. This is enabled by some archs and  reduces
text size.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Andi Kleen <ak@muc.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Ian Molton <spyro@f2s.com>
Cc: Mikael Starvik <starvik@axis.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Hirokazu Takata <takata.hirokazu@renesas.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp>
Cc: Richard Curnow <rc@rc0.org.uk>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Miles Bader <uclinux-v850@lsi.nec.co.jp>
Cc: Chris Zankel <chris@zankel.net>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
new file mode 100644
index 0000000..a7bb497
--- /dev/null
+++ b/include/asm-generic/memory_model.h
@@ -0,0 +1,77 @@
+#ifndef __ASM_MEMORY_MODEL_H
+#define __ASM_MEMORY_MODEL_H
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+#if defined(CONFIG_FLATMEM)
+
+#ifndef ARCH_PFN_OFFSET
+#define ARCH_PFN_OFFSET		(0UL)
+#endif
+
+#elif defined(CONFIG_DISCONTIGMEM)
+
+#ifndef arch_pfn_to_nid
+#define arch_pfn_to_nid(pfn)	pfn_to_nid(pfn)
+#endif
+
+#ifndef arch_local_page_offset
+#define arch_local_page_offset(pfn, nid)	\
+	((pfn) - NODE_DATA(nid)->node_start_pfn)
+#endif
+
+#endif /* CONFIG_DISCONTIGMEM */
+
+#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE
+struct page;
+/* this is useful when inlined pfn_to_page is too big */
+extern struct page *pfn_to_page(unsigned long pfn);
+extern unsigned long page_to_pfn(struct page *page);
+#else
+/*
+ * supports 3 memory models.
+ */
+#if defined(CONFIG_FLATMEM)
+
+#define pfn_to_page(pfn)	(mem_map + ((pfn) - ARCH_PFN_OFFSET))
+#define page_to_pfn(page)	((unsigned long)((page) - mem_map) + \
+				 ARCH_PFN_OFFSET)
+#elif defined(CONFIG_DISCONTIGMEM)
+
+#define pfn_to_page(pfn)			\
+({	unsigned long __pfn = (pfn);		\
+	unsigned long __nid = arch_pfn_to_nid(pfn);  \
+	NODE_DATA(__nid)->node_mem_map + arch_local_page_offset(__pfn, __nid);\
+})
+
+#define page_to_pfn(pg)			\
+({	struct page *__pg = (pg);		\
+	struct zone *__zone = page_zone(__pg);	\
+	(unsigned long)(__pg - __zone->zone_mem_map) +	\
+	 __zone->zone_start_pfn;			\
+})
+
+#elif defined(CONFIG_SPARSEMEM)
+/*
+ * Note: section's mem_map is encorded to reflect its start_pfn.
+ * section[i].section_mem_map == mem_map's address - start_pfn;
+ */
+#define page_to_pfn(pg)					\
+({	struct page *__pg = (pg);				\
+	int __sec = page_to_section(__pg);			\
+	__pg - __section_mem_map_addr(__nr_to_section(__sec));	\
+})
+
+#define pfn_to_page(pfn)				\
+({	unsigned long __pfn = (pfn);			\
+	struct mem_section *__sec = __pfn_to_section(__pfn);	\
+	__section_mem_map_addr(__sec) + __pfn;		\
+})
+#endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */
+#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */
+
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h
index 66fe4ac..aabb219 100644
--- a/include/asm-sparc64/page.h
+++ b/include/asm-sparc64/page.h
@@ -111,6 +111,8 @@ typedef unsigned long pgprot_t;
 				 (_AC(0x0000000070000000,UL)) : \
 				 (_AC(0xfffff80000000000,UL) + (1UL << 32UL)))
 
+#include <asm-generic/memory_model.h>
+
 #endif /* !(__ASSEMBLY__) */
 
 /* to align the pointer to the (next) page boundary */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ebfc238..0c1c0c0 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -602,17 +602,6 @@ static inline struct mem_section *__pfn_to_section(unsigned long pfn)
 	return __nr_to_section(pfn_to_section_nr(pfn));
 }
 
-#define pfn_to_page(pfn) 						\
-({ 									\
-	unsigned long __pfn = (pfn);					\
-	__section_mem_map_addr(__pfn_to_section(__pfn)) + __pfn;	\
-})
-#define page_to_pfn(page)						\
-({									\
-	page - __section_mem_map_addr(__nr_to_section(			\
-		page_to_section(page)));				\
-})
-
 static inline int pfn_valid(unsigned long pfn)
 {
 	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 338a02b..349b328 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2745,3 +2745,45 @@ void *__init alloc_large_system_hash(const char *tablename,
 
 	return table;
 }
+
+#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE
+/*
+ * pfn <-> page translation. out-of-line version.
+ * (see asm-generic/memory_model.h)
+ */
+#if defined(CONFIG_FLATMEM)
+struct page *pfn_to_page(unsigned long pfn)
+{
+	return mem_map + (pfn - ARCH_PFN_OFFSET);
+}
+unsigned long page_to_pfn(struct page *page)
+{
+	return (page - mem_map) + ARCH_PFN_OFFSET;
+}
+#elif defined(CONFIG_DISCONTIGMEM)
+struct page *pfn_to_page(unsigned long pfn)
+{
+	int nid = arch_pfn_to_nid(pfn);
+	return NODE_DATA(nid)->node_mem_map + arch_local_page_offset(pfn,nid);
+}
+unsigned long page_to_pfn(struct page *page)
+{
+	struct zone *zone = page_zone(page);
+	return (page - zone->zone_mem_map) + zone->zone_start_pfn;
+
+}
+#elif defined(CONFIG_SPARSEMEM)
+struct page *pfn_to_page(unsigned long pfn)
+{
+	return __section_mem_map_addr(__pfn_to_section(pfn)) + pfn;
+}
+
+unsigned long page_to_pfn(struct page *page)
+{
+	long section_id = page_to_section(page);
+	return page - __section_mem_map_addr(__nr_to_section(section_id));
+}
+#endif /* CONFIG_FLATMEM/DISCONTIGMME/SPARSEMEM */
+EXPORT_SYMBOL(pfn_to_page);
+EXPORT_SYMBOL(page_to_pfn);
+#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */
-- 
cgit v0.10.2


From ad658b385e6308066f9084a7ea01305b223cd3a0 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:33 -0800
Subject: [PATCH] unify pfn_to_page: i386 pfn_to_page

i386 can use generic funcs.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h
index 74f595d..e33e9f9 100644
--- a/include/asm-i386/mmzone.h
+++ b/include/asm-i386/mmzone.h
@@ -70,8 +70,6 @@ static inline int pfn_to_nid(unsigned long pfn)
 #endif
 }
 
-#define node_localnr(pfn, nid)		((pfn) - node_data[nid]->node_start_pfn)
-
 /*
  * Following are macros that each numa implmentation must define.
  */
@@ -86,21 +84,6 @@ static inline int pfn_to_nid(unsigned long pfn)
 /* XXX: FIXME -- wli */
 #define kern_addr_valid(kaddr)	(0)
 
-#define pfn_to_page(pfn)						\
-({									\
-	unsigned long __pfn = pfn;					\
-	int __node  = pfn_to_nid(__pfn);				\
-	&NODE_DATA(__node)->node_mem_map[node_localnr(__pfn,__node)];	\
-})
-
-#define page_to_pfn(pg)							\
-({									\
-	struct page *__page = pg;					\
-	struct zone *__zone = page_zone(__page);			\
-	(unsigned long)(__page - __zone->zone_mem_map)			\
-		+ __zone->zone_start_pfn;				\
-})
-
 #ifdef CONFIG_X86_NUMAQ            /* we have contiguous memory on NUMA-Q */
 #define pfn_valid(pfn)          ((pfn) < num_physpages)
 #else
diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h
index 997ca5d..30f52a2 100644
--- a/include/asm-i386/page.h
+++ b/include/asm-i386/page.h
@@ -126,8 +126,6 @@ extern int page_is_ram(unsigned long pagenr);
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
 #define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
 #ifdef CONFIG_FLATMEM
-#define pfn_to_page(pfn)	(mem_map + (pfn))
-#define page_to_pfn(page)	((unsigned long)((page) - mem_map))
 #define pfn_valid(pfn)		((pfn) < max_mapnr)
 #endif /* CONFIG_FLATMEM */
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
@@ -141,6 +139,7 @@ extern int page_is_ram(unsigned long pagenr);
 
 #endif /* __KERNEL__ */
 
+#include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
 #endif /* _I386_PAGE_H */
-- 
cgit v0.10.2


From dc8ecb43701a78bd3c38e7fed1d1c76840579450 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:34 -0800
Subject: [PATCH] unify pfn_to_page: x86_64 pfn_to_page

x86_64 can use generic funcs.
For DISCONTIGMEM, CONFIG_OUT_OF_LINE_PFN_TO_PAGE is selected.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 1cb4aa2..4310b4a 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -334,6 +334,10 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
 	def_bool y
 	depends on NUMA
 
+config OUT_OF_LINE_PFN_TO_PAGE
+	def_bool y
+	depends on DISCONTIGMEM
+
 config NR_CPUS
 	int "Maximum number of CPUs (2-256)"
 	range 2 255
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 63c7264..4be82d6 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -377,21 +377,6 @@ EXPORT_SYMBOL(node_data);
  * Should do that.
  */
 
-/* Requires pfn_valid(pfn) to be true */
-struct page *pfn_to_page(unsigned long pfn)
-{
-	int nid = phys_to_nid(((unsigned long)(pfn)) << PAGE_SHIFT);
-	return (pfn - node_start_pfn(nid)) + NODE_DATA(nid)->node_mem_map;
-}
-EXPORT_SYMBOL(pfn_to_page);
-
-unsigned long page_to_pfn(struct page *page)
-{
-	return (long)(((page) - page_zone(page)->zone_mem_map) +
-		      page_zone(page)->zone_start_pfn);
-}
-EXPORT_SYMBOL(page_to_pfn);
-
 int pfn_valid(unsigned long pfn)
 {
 	unsigned nid;
diff --git a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h
index 937f99b..6b18cd8 100644
--- a/include/asm-x86_64/mmzone.h
+++ b/include/asm-x86_64/mmzone.h
@@ -44,12 +44,8 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
 #define pfn_to_nid(pfn) phys_to_nid((unsigned long)(pfn) << PAGE_SHIFT)
 #define kvaddr_to_nid(kaddr)	phys_to_nid(__pa(kaddr))
 
-extern struct page *pfn_to_page(unsigned long pfn);
-extern unsigned long page_to_pfn(struct page *page);
 extern int pfn_valid(unsigned long pfn);
 #endif
 
-#define local_mapnr(kvaddr) \
-	( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr)) )
 #endif
 #endif
diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h
index 615e3e4..408185b 100644
--- a/include/asm-x86_64/page.h
+++ b/include/asm-x86_64/page.h
@@ -123,8 +123,6 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 #define __boot_va(x)		__va(x)
 #define __boot_pa(x)		__pa(x)
 #ifdef CONFIG_FLATMEM
-#define pfn_to_page(pfn)	(mem_map + (pfn))
-#define page_to_pfn(page)	((unsigned long)((page) - mem_map))
 #define pfn_valid(pfn)		((pfn) < end_pfn)
 #endif
 
@@ -140,6 +138,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 
 #endif /* __KERNEL__ */
 
+#include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
 #endif /* _X86_64_PAGE_H */
-- 
cgit v0.10.2


From 659e35051b165fb40f1160190c2d23f3d5c319d5 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:35 -0800
Subject: [PATCH] unify pfn_to_page: powerpc pfn_to_page

PowerPC can use generic ones.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-powerpc/page.h b/include/asm-powerpc/page.h
index 0b82df4..2fbeceb 100644
--- a/include/asm-powerpc/page.h
+++ b/include/asm-powerpc/page.h
@@ -69,8 +69,6 @@
 #endif
 
 #ifdef CONFIG_FLATMEM
-#define pfn_to_page(pfn)	(mem_map + (pfn))
-#define page_to_pfn(page)	((unsigned long)((page) - mem_map))
 #define pfn_valid(pfn)		((pfn) < max_mapnr)
 #endif
 
@@ -200,6 +198,7 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr,
 		struct page *p);
 extern int page_is_ram(unsigned long pfn);
 
+#include <asm-generic/memory_model.h>
 #endif /* __ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
-- 
cgit v0.10.2


From 1c05dda2b6f025267ab79a267e0a84628a3760e1 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:36 -0800
Subject: [PATCH] unify pfn_to_page: alpha pfn_to_page

Alpha can use generic funcs.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-alpha/mmzone.h b/include/asm-alpha/mmzone.h
index a011ef4..c900439 100644
--- a/include/asm-alpha/mmzone.h
+++ b/include/asm-alpha/mmzone.h
@@ -59,9 +59,6 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
 #define kvaddr_to_nid(kaddr)	pa_to_nid(__pa(kaddr))
 #define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
 
-#define local_mapnr(kvaddr) \
-      ((__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr)))
-
 /*
  * Given a kaddr, LOCAL_BASE_ADDR finds the owning node of the memory
  * and returns the kaddr corresponding to first physical page in the
@@ -104,19 +101,8 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
 	__xx;                                                           \
 })
 
-#define pfn_to_page(pfn)						\
-({									\
- 	unsigned long kaddr = (unsigned long)__va((pfn) << PAGE_SHIFT);	\
-	(NODE_DATA(kvaddr_to_nid(kaddr))->node_mem_map + local_mapnr(kaddr));	\
-})
-
-#define page_to_pfn(page)						\
-	((page) - page_zone(page)->zone_mem_map +			\
-	 (page_zone(page)->zone_start_pfn))
-
 #define page_to_pa(page)						\
-	((( (page) - page_zone(page)->zone_mem_map )			\
-	+ page_zone(page)->zone_start_pfn) << PAGE_SHIFT)
+	(page_to_pfn(page) << PAGE_SHIFT)
 
 #define pfn_to_nid(pfn)		pa_to_nid(((u64)(pfn) << PAGE_SHIFT))
 #define pfn_valid(pfn)							\
diff --git a/include/asm-alpha/page.h b/include/asm-alpha/page.h
index fa0b41b..61bcf70 100644
--- a/include/asm-alpha/page.h
+++ b/include/asm-alpha/page.h
@@ -85,8 +85,6 @@ typedef unsigned long pgprot_t;
 #define __pa(x)			((unsigned long) (x) - PAGE_OFFSET)
 #define __va(x)			((void *)((unsigned long) (x) + PAGE_OFFSET))
 #ifndef CONFIG_DISCONTIGMEM
-#define pfn_to_page(pfn)	(mem_map + (pfn))
-#define page_to_pfn(page)	((unsigned long)((page) - mem_map))
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
 
 #define pfn_valid(pfn)		((pfn) < max_mapnr)
@@ -95,9 +93,9 @@ typedef unsigned long pgprot_t;
 
 #define VM_DATA_DEFAULT_FLAGS		(VM_READ | VM_WRITE | VM_EXEC | \
 					 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
 #endif /* __KERNEL__ */
 
+#include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
 #endif /* _ALPHA_PAGE_H */
-- 
cgit v0.10.2


From 7eb98a2f3b605d8a11434d855b58d828393ea533 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:37 -0800
Subject: [PATCH] unify pfn_to_page: arm pfn_to_page

ARM can use generic funcs.
PFN_TO_NID, LOCAL_MAP_NR are defined by sub-archs.

Signed-off-by: KAMEZAWA Hirotuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-arm/memory.h b/include/asm-arm/memory.h
index b4e1146..afa5c3e 100644
--- a/include/asm-arm/memory.h
+++ b/include/asm-arm/memory.h
@@ -172,9 +172,7 @@ static inline __deprecated void *bus_to_virt(unsigned long x)
  *  virt_addr_valid(k)	indicates whether a virtual address is valid
  */
 #ifndef CONFIG_DISCONTIGMEM
-
-#define page_to_pfn(page)	(((page) - mem_map) + PHYS_PFN_OFFSET)
-#define pfn_to_page(pfn)	((mem_map + (pfn)) - PHYS_PFN_OFFSET)
+#define ARCH_PFN_OFFSET		(PHYS_PFN_OFFSET)
 #define pfn_valid(pfn)		((pfn) >= PHYS_PFN_OFFSET && (pfn) < (PHYS_PFN_OFFSET + max_mapnr))
 
 #define virt_to_page(kaddr)	(pfn_to_page(__pa(kaddr) >> PAGE_SHIFT))
@@ -189,13 +187,8 @@ static inline __deprecated void *bus_to_virt(unsigned long x)
  * around in memory.
  */
 #include <linux/numa.h>
-
-#define page_to_pfn(page)					\
-	(( (page) - page_zone(page)->zone_mem_map)		\
-	  + page_zone(page)->zone_start_pfn)
-
-#define pfn_to_page(pfn)					\
-	(PFN_TO_MAPBASE(pfn) + LOCAL_MAP_NR((pfn) << PAGE_SHIFT))
+#define arch_pfn_to_nid(pfn)	(PFN_TO_NID(pfn))
+#define arch_local_page_offset(pfn, nid) (LOCAL_MAP_NR((pfn) << PAGE_OFFSET))
 
 #define pfn_valid(pfn)						\
 	({							\
@@ -243,4 +236,6 @@ static inline __deprecated void *bus_to_virt(unsigned long x)
 
 #endif
 
+#include <asm-generic/memory_model.h>
+
 #endif
-- 
cgit v0.10.2


From 89fccaf2c4a4a8318153e3460a92284dadeb8f71 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:38 -0800
Subject: [PATCH] unify pfn_to_page: arm26 pfn_to_page

arm26 can use generic funcs.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Ian Molton <spyro@f2s.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-arm26/memory.h b/include/asm-arm26/memory.h
index 20d7861..a65f10b 100644
--- a/include/asm-arm26/memory.h
+++ b/include/asm-arm26/memory.h
@@ -81,8 +81,7 @@ static inline void *phys_to_virt(unsigned long x)
  *  virt_to_page(k)	convert a _valid_ virtual address to struct page *
  *  virt_addr_valid(k)	indicates whether a virtual address is valid
  */
-#define page_to_pfn(page)	(((page) - mem_map) + PHYS_PFN_OFFSET)
-#define pfn_to_page(pfn)	((mem_map + (pfn)) - PHYS_PFN_OFFSET)
+#define ARCH_PFN_OFFSET		(PHYS_PFN_OFFSET)
 #define pfn_valid(pfn)		((pfn) >= PHYS_PFN_OFFSET && (pfn) < (PHYS_PFN_OFFSET + max_mapnr))
 
 #define virt_to_page(kaddr)	(pfn_to_page(__pa(kaddr) >> PAGE_SHIFT))
@@ -98,4 +97,5 @@ static inline void *phys_to_virt(unsigned long x)
  */
 #define page_to_bus(page)	(page_address(page))
 
+#include <asm-generic/memory_model.h>
 #endif
-- 
cgit v0.10.2


From bb872f787a9b6a40a4aa206175e768137f595d9c Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:39 -0800
Subject: [PATCH] unify pfn_to_page: cris pfn_to_page

cris can use generic funcs.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mikael Starvik <starvik@axis.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-cris/page.h b/include/asm-cris/page.h
index c99c478..3787633 100644
--- a/include/asm-cris/page.h
+++ b/include/asm-cris/page.h
@@ -43,8 +43,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 
 /* On CRIS the PFN numbers doesn't start at 0 so we have to compensate */
 /* for that before indexing into the page table starting at mem_map    */
-#define pfn_to_page(pfn)	(mem_map + ((pfn) - (PAGE_OFFSET >> PAGE_SHIFT)))
-#define page_to_pfn(page)	((unsigned long)((page) - mem_map) + (PAGE_OFFSET >> PAGE_SHIFT))
+#define ARCH_PFN_OFFSET		(PAGE_OFFSET >> PAGE_SHIFT)
 #define pfn_valid(pfn)		(((pfn) - (PAGE_OFFSET >> PAGE_SHIFT)) < max_mapnr)
 
 /* to index into the page map. our pages all start at physical addr PAGE_OFFSET so
@@ -77,6 +76,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 
 #endif /* __KERNEL__ */
 
+#include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
 #endif /* _CRIS_PAGE_H */
-- 
cgit v0.10.2


From 5cdac7ca1b727184b1af21effaffcb9e2cd535c2 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:40 -0800
Subject: [PATCH] unify pfn_to_page: FRV pfn_to_page

FRV can use generic funcs.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-frv/page.h b/include/asm-frv/page.h
index b8221b6..dc0f7e0 100644
--- a/include/asm-frv/page.h
+++ b/include/asm-frv/page.h
@@ -57,13 +57,9 @@ extern unsigned long min_low_pfn;
 extern unsigned long max_pfn;
 
 #ifdef CONFIG_MMU
-#define pfn_to_page(pfn)	(mem_map + (pfn))
-#define page_to_pfn(page)	((unsigned long) ((page) - mem_map))
 #define pfn_valid(pfn)		((pfn) < max_mapnr)
-
 #else
-#define pfn_to_page(pfn)	(&mem_map[(pfn) - (PAGE_OFFSET >> PAGE_SHIFT)])
-#define page_to_pfn(page)	((PAGE_OFFSET >> PAGE_SHIFT) + (unsigned long) ((page) - mem_map))
+#define ARCH_PFN_OFFSET		(PAGE_OFFSET >> PAGE_SHIFT)
 #define pfn_valid(pfn)		((pfn) >= min_low_pfn && (pfn) < max_low_pfn)
 
 #endif
@@ -87,6 +83,7 @@ extern unsigned long max_pfn;
 #define WANT_PAGE_VIRTUAL	1
 #endif
 
+#include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
 #endif /* _ASM_PAGE_H */
-- 
cgit v0.10.2


From dd6cc7631cf2a63b3d6c32619054e017479e72ca Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:41 -0800
Subject: [PATCH] unify pfn_to_page: h8300 pfn_to_page

H8300 can use generic funcs.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-h8300/page.h b/include/asm-h8300/page.h
index cd35b1c..6472c9f 100644
--- a/include/asm-h8300/page.h
+++ b/include/asm-h8300/page.h
@@ -71,8 +71,7 @@ extern unsigned long memory_end;
 #define page_to_virt(page)	((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)
 #define pfn_valid(page)	        (page < max_mapnr)
 
-#define pfn_to_page(pfn)	virt_to_page(pfn_to_virt(pfn))
-#define page_to_pfn(page)	virt_to_pfn(page_to_virt(page))
+#define ARCH_PFN_OFFSET		(PAGE_OFFSET >> PAGE_SHIFT)
 
 #define	virt_addr_valid(kaddr)	(((void *)(kaddr) >= (void *)PAGE_OFFSET) && \
 				((void *)(kaddr) < (void *)memory_end))
@@ -81,6 +80,7 @@ extern unsigned long memory_end;
 
 #endif /* __KERNEL__ */
 
+#include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
 #endif /* _H8300_PAGE_H */
-- 
cgit v0.10.2


From 7126cffe74f0dbcd015aaabab01069f422526535 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:42 -0800
Subject: [PATCH] unify pfn_to_page: m32r pfn_to_page

m32r can use generic funcs.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hirokazu Takata <takata.hirokazu@renesas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-m32r/mmzone.h b/include/asm-m32r/mmzone.h
index adc7970..9f3b5ac 100644
--- a/include/asm-m32r/mmzone.h
+++ b/include/asm-m32r/mmzone.h
@@ -21,20 +21,6 @@ extern struct pglist_data *node_data[];
 	__pgdat->node_start_pfn + __pgdat->node_spanned_pages - 1;	\
 })
 
-#define pfn_to_page(pfn)						\
-({									\
-	unsigned long __pfn = pfn;					\
-	int __node  = pfn_to_nid(__pfn);				\
-	&NODE_DATA(__node)->node_mem_map[node_localnr(__pfn,__node)];	\
-})
-
-#define page_to_pfn(pg)							\
-({									\
-	struct page *__page = pg;					\
-	struct zone *__zone = page_zone(__page);			\
-	(unsigned long)(__page - __zone->zone_mem_map)			\
-		+ __zone->zone_start_pfn;				\
-})
 #define pmd_page(pmd)		(pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
 /*
  * pfn_valid should be made as fast as possible, and the current definition
diff --git a/include/asm-m32r/page.h b/include/asm-m32r/page.h
index 4ab5788..9ddbc08 100644
--- a/include/asm-m32r/page.h
+++ b/include/asm-m32r/page.h
@@ -76,9 +76,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 
 #ifndef CONFIG_DISCONTIGMEM
 #define PFN_BASE		(CONFIG_MEMORY_START >> PAGE_SHIFT)
-#define pfn_to_page(pfn)	(mem_map + ((pfn) - PFN_BASE))
-#define page_to_pfn(page)	\
-	((unsigned long)((page) - mem_map) + PFN_BASE)
+#define ARCH_PFN_OFFSET		PFN_BASE
 #define pfn_valid(pfn)		(((pfn) - PFN_BASE) < max_mapnr)
 #endif  /* !CONFIG_DISCONTIGMEM */
 
@@ -92,6 +90,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 
 #endif /* __KERNEL__ */
 
+#include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
 #endif /* _ASM_M32R_PAGE_H */
-- 
cgit v0.10.2


From a02036e796e5046fe0463b9a092e9b617c525866 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:43 -0800
Subject: [PATCH] unify pfn_to_page: mips pfn_to_page

MIPS can use generic funcs.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-mips/mmzone.h b/include/asm-mips/mmzone.h
index 011caeb..7bde443 100644
--- a/include/asm-mips/mmzone.h
+++ b/include/asm-mips/mmzone.h
@@ -22,20 +22,6 @@
 		       NODE_DATA(__n)->node_spanned_pages) : 0);\
 })
 
-#define pfn_to_page(pfn)					\
-({								\
- 	unsigned long __pfn = (pfn);				\
-	pg_data_t *__pg = NODE_DATA(pfn_to_nid(__pfn));		\
-	__pg->node_mem_map + (__pfn - __pg->node_start_pfn);	\
-})
-
-#define page_to_pfn(p)						\
-({								\
-	struct page *__p = (p);					\
-	struct zone *__z = page_zone(__p);			\
-	((__p - __z->zone_mem_map) + __z->zone_start_pfn);	\
-})
-
 /* XXX: FIXME -- wli */
 #define kern_addr_valid(addr)	(0)
 
diff --git a/include/asm-mips/page.h b/include/asm-mips/page.h
index ee25a77..a1eab13 100644
--- a/include/asm-mips/page.h
+++ b/include/asm-mips/page.h
@@ -140,8 +140,6 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 #define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-#define pfn_to_page(pfn)	(mem_map + (pfn))
-#define page_to_pfn(page)	((unsigned long)((page) - mem_map))
 #define pfn_valid(pfn)		((pfn) < max_mapnr)
 #endif
 
@@ -160,6 +158,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 #define WANT_PAGE_VIRTUAL
 #endif
 
+#include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
 #endif /* _ASM_PAGE_H */
-- 
cgit v0.10.2


From 0d833b41092df2f4a65cbdb0a0a947c35cdd2f9d Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:43 -0800
Subject: [PATCH] unify pfn_to_page: parisc pfn_to_page

PARISC can use generic funcs.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-parisc/mmzone.h b/include/asm-parisc/mmzone.h
index ae039f4..ceb9b73 100644
--- a/include/asm-parisc/mmzone.h
+++ b/include/asm-parisc/mmzone.h
@@ -25,23 +25,6 @@ extern struct node_map_data node_data[];
 	pg_data_t *__pgdat = NODE_DATA(nid);				\
 	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;		\
 })
-#define node_localnr(pfn, nid)		((pfn) - node_start_pfn(nid))
-
-#define pfn_to_page(pfn)						\
-({									\
-	unsigned long __pfn = (pfn);					\
-	int __node  = pfn_to_nid(__pfn);				\
-	&NODE_DATA(__node)->node_mem_map[node_localnr(__pfn,__node)];	\
-})
-
-#define page_to_pfn(pg)							\
-({									\
-	struct page *__page = pg;					\
-	struct zone *__zone = page_zone(__page);			\
-	BUG_ON(__zone == NULL);						\
-	(unsigned long)(__page - __zone->zone_mem_map)			\
-		+ __zone->zone_start_pfn;				\
-})
 
 /* We have these possible memory map layouts:
  * Astro: 0-3.75, 67.75-68, 4-64
diff --git a/include/asm-parisc/page.h b/include/asm-parisc/page.h
index 4a6752b..9f303c0 100644
--- a/include/asm-parisc/page.h
+++ b/include/asm-parisc/page.h
@@ -130,8 +130,6 @@ extern int npmem_ranges;
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
 
 #ifndef CONFIG_DISCONTIGMEM
-#define pfn_to_page(pfn)	(mem_map + (pfn))
-#define page_to_pfn(page)	((unsigned long)((page) - mem_map))
 #define pfn_valid(pfn)		((pfn) < max_mapnr)
 #endif /* CONFIG_DISCONTIGMEM */
 
@@ -152,6 +150,7 @@ extern int npmem_ranges;
 
 #endif /* __KERNEL__ */
 
+#include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
 #endif /* _PARISC_PAGE_H */
-- 
cgit v0.10.2


From f68d4c996d3062c8fe64e3ed36d4c83d23288ad8 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:44 -0800
Subject: [PATCH] unify pfn_to_page: ppc pfn_to_page

PPC can use generic funcs.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-ppc/page.h b/include/asm-ppc/page.h
index 538e0c8..a70ba2e 100644
--- a/include/asm-ppc/page.h
+++ b/include/asm-ppc/page.h
@@ -149,8 +149,7 @@ extern int page_is_ram(unsigned long pfn);
 #define __pa(x) ___pa((unsigned long)(x))
 #define __va(x) ((void *)(___va((unsigned long)(x))))
 
-#define pfn_to_page(pfn)	(mem_map + ((pfn) - PPC_PGSTART))
-#define page_to_pfn(page)	((unsigned long)((page) - mem_map) + PPC_PGSTART)
+#define ARCH_PFN_OFFSET		(PPC_PGSTART)
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
 #define page_to_virt(page)	__va(page_to_pfn(page) << PAGE_SHIFT)
 
@@ -175,5 +174,6 @@ extern __inline__ int get_order(unsigned long size)
 /* We do define AT_SYSINFO_EHDR but don't use the gate mecanism */
 #define __HAVE_ARCH_GATE_AREA		1
 
+#include <asm-generic/memory_model.h>
 #endif /* __KERNEL__ */
 #endif /* _PPC_PAGE_H */
-- 
cgit v0.10.2


From aed630434c4fc0bca8ed62f6730d44ba00bdf595 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:45 -0800
Subject: [PATCH] unify pfn_to_page: s390 pfn_to_page

s390 can use generic funcs.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-s390/page.h b/include/asm-s390/page.h
index 2430c56..3b1138a 100644
--- a/include/asm-s390/page.h
+++ b/include/asm-s390/page.h
@@ -181,8 +181,6 @@ page_get_storage_key(unsigned long addr)
 #define PAGE_OFFSET             0x0UL
 #define __pa(x)                 (unsigned long)(x)
 #define __va(x)                 (void *)(unsigned long)(x)
-#define pfn_to_page(pfn)	(mem_map + (pfn))
-#define page_to_pfn(page)	((unsigned long)((page) - mem_map))
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
 
 #define pfn_valid(pfn)		((pfn) < max_mapnr)
@@ -193,6 +191,7 @@ page_get_storage_key(unsigned long addr)
 
 #endif /* __KERNEL__ */
 
+#include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
 #endif /* _S390_PAGE_H */
-- 
cgit v0.10.2


From 104b8deaa5c0144cccfc7d914413ff80c7176af1 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:46 -0800
Subject: [PATCH] unify pfn_to_page: sh pfn_to_page

sh can use generic funcs.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-sh/page.h b/include/asm-sh/page.h
index 972c3f6..9c89287 100644
--- a/include/asm-sh/page.h
+++ b/include/asm-sh/page.h
@@ -105,9 +105,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 
 /* PFN start number, because of __MEMORY_START */
 #define PFN_START		(__MEMORY_START >> PAGE_SHIFT)
-
-#define pfn_to_page(pfn)	(mem_map + (pfn) - PFN_START)
-#define page_to_pfn(page)	((unsigned long)((page) - mem_map) + PFN_START)
+#define ARCH_PFN_OFFSET		(FPN_START)
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
 #define pfn_valid(pfn)		(((pfn) - PFN_START) < max_mapnr)
 #define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
@@ -117,6 +115,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 
 #endif /* __KERNEL__ */
 
+#include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
 #endif /* __ASM_SH_PAGE_H */
-- 
cgit v0.10.2


From 309d34bbe521940de9098d306a58a1797a42d990 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:47 -0800
Subject: [PATCH] unify pfn_to_page: sh64 pfn_to_page

sh64 can use generic funcs.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Richard Curnow <rc@rc0.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-sh64/page.h b/include/asm-sh64/page.h
index c86df90..e4937cd 100644
--- a/include/asm-sh64/page.h
+++ b/include/asm-sh64/page.h
@@ -105,9 +105,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 
 /* PFN start number, because of __MEMORY_START */
 #define PFN_START		(__MEMORY_START >> PAGE_SHIFT)
-
-#define pfn_to_page(pfn)	(mem_map + (pfn) - PFN_START)
-#define page_to_pfn(page)	((unsigned long)((page) - mem_map) + PFN_START)
+#define ARCH_PFN_OFFSET		(PFN_START)
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
 #define pfn_valid(pfn)		(((pfn) - PFN_START) < max_mapnr)
 #define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
@@ -117,6 +115,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 
 #endif /* __KERNEL__ */
 
+#include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
 #endif /* __ASM_SH64_PAGE_H */
-- 
cgit v0.10.2


From 064b2a0762ef7dca5f7068a90b260a309ffe2002 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:48 -0800
Subject: [PATCH] unify pfn_to_page: sparc pfn_to_page

sparc can use generic funcs.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: William Lee Irwin III <wli@holomorphy.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-sparc/page.h b/include/asm-sparc/page.h
index 9122684..ec3274b 100644
--- a/include/asm-sparc/page.h
+++ b/include/asm-sparc/page.h
@@ -152,8 +152,7 @@ extern unsigned long pfn_base;
 #define virt_to_phys		__pa
 #define phys_to_virt		__va
 
-#define pfn_to_page(pfn)	(mem_map + ((pfn)-(pfn_base)))
-#define page_to_pfn(page)	((unsigned long)(((page) - mem_map) + pfn_base))
+#define ARCH_PFN_OFFSET		(pfn_base)
 #define virt_to_page(kaddr)	(mem_map + ((((unsigned long)(kaddr)-PAGE_OFFSET)>>PAGE_SHIFT)))
 
 #define pfn_valid(pfn)		(((pfn) >= (pfn_base)) && (((pfn)-(pfn_base)) < max_mapnr))
@@ -164,6 +163,7 @@ extern unsigned long pfn_base;
 
 #endif /* __KERNEL__ */
 
+#include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
 #endif /* _SPARC_PAGE_H */
-- 
cgit v0.10.2


From 9828c185db7ab41426c03c5539b1759ccf3c28ed Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:50 -0800
Subject: [PATCH] unify pfn_to_page: uml pfn_to_page

UML can use generic funcs.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-um/page.h b/include/asm-um/page.h
index 0229814..4136433 100644
--- a/include/asm-um/page.h
+++ b/include/asm-um/page.h
@@ -106,9 +106,6 @@ extern unsigned long uml_physmem;
 #define __pa(virt) to_phys((void *) (unsigned long) (virt))
 #define __va(phys) to_virt((unsigned long) (phys))
 
-#define page_to_pfn(page) ((page) - mem_map)
-#define pfn_to_page(pfn) (mem_map + (pfn))
-
 #define phys_to_pfn(p) ((p) >> PAGE_SHIFT)
 #define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT)
 
@@ -121,6 +118,7 @@ extern struct page *arch_validate(struct page *page, gfp_t mask, int order);
 extern void arch_free_page(struct page *page, int order);
 #define HAVE_ARCH_FREE_PAGE
 
+#include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
 #endif
-- 
cgit v0.10.2


From e6009f1ba5f6ff0fc8723254ce89ef9c979cc370 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:52 -0800
Subject: [PATCH] unify pfn_to_page: v850 pfn_to_page

v850 can use generic funcs.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Miles Bader <uclinux-v850@lsi.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-v850/page.h b/include/asm-v850/page.h
index b4bc85e..ad03c46 100644
--- a/include/asm-v850/page.h
+++ b/include/asm-v850/page.h
@@ -111,8 +111,7 @@ typedef unsigned long pgprot_t;
 #define page_to_virt(page) \
   ((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)
 
-#define pfn_to_page(pfn)	virt_to_page (pfn_to_virt (pfn))
-#define page_to_pfn(page)	virt_to_pfn (page_to_virt (page))
+#define ARCH_PFN_OFFSET		(PAGE_OFFSET >> PAGE_SHIFT)
 #define pfn_valid(pfn)	        ((pfn) < max_mapnr)
 
 #define	virt_addr_valid(kaddr)						\
@@ -125,6 +124,7 @@ typedef unsigned long pgprot_t;
 
 #endif /* KERNEL */
 
+#include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
 #endif /* __V850_PAGE_H__ */
-- 
cgit v0.10.2


From 655a0443470a73d5dc36e974a241e8db59bb1ccb Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:52 -0800
Subject: [PATCH] unify pfn_to_page: xtensa pfn_to_page

xtensa can use generic funcs.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Chris Zankel <chris@zankel.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-xtensa/page.h b/include/asm-xtensa/page.h
index 8ded36f..992bac5 100644
--- a/include/asm-xtensa/page.h
+++ b/include/asm-xtensa/page.h
@@ -109,10 +109,7 @@ void copy_user_page(void *to,void* from,unsigned long vaddr,struct page* page);
 #define __pa(x)			((unsigned long) (x) - PAGE_OFFSET)
 #define __va(x)			((void *)((unsigned long) (x) + PAGE_OFFSET))
 #define pfn_valid(pfn)		((unsigned long)pfn < max_mapnr)
-#ifndef CONFIG_DISCONTIGMEM
-# define pfn_to_page(pfn)	(mem_map + (pfn))
-# define page_to_pfn(page)	((unsigned long)((page) - mem_map))
-#else
+#ifdef CONFIG_DISCONTIGMEM
 # error CONFIG_DISCONTIGMEM not supported
 #endif
 
@@ -130,4 +127,5 @@ void copy_user_page(void *to,void* from,unsigned long vaddr,struct page* page);
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #endif /* __KERNEL__ */
+#include <asm-generic/memory_model.h>
 #endif /* _XTENSA_PAGE_H */
-- 
cgit v0.10.2


From 0ecd702bcb924d5fb7f687e09986f688336ac896 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:53 -0800
Subject: [PATCH] unify pfn_to_page: ia64 pfn_to_page

ia64 has special config CONFIG_VIRTUAL_MEM_MAP.
CONFIG_DISCONTIGMEM=y && CONFIG_VIRTUAL_MEM_MAP!=y is bug ?

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-ia64/page.h b/include/asm-ia64/page.h
index 6e9aa23..2087825 100644
--- a/include/asm-ia64/page.h
+++ b/include/asm-ia64/page.h
@@ -106,17 +106,25 @@ extern int ia64_pfn_valid (unsigned long pfn);
 # define ia64_pfn_valid(pfn) 1
 #endif
 
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+extern struct page *vmem_map;
+#ifdef CONFIG_DISCONTIGMEM
+# define page_to_pfn(page)	((unsigned long) (page - vmem_map))
+# define pfn_to_page(pfn)	(vmem_map + (pfn))
+#endif
+#endif
+
+#if defined(CONFIG_FLATMEM) || defined(CONFIG_SPARSEMEM)
+/* FLATMEM always configures mem_map (mem_map = vmem_map if necessary) */
+#include <asm-generic/memory_model.h>
+#endif
+
 #ifdef CONFIG_FLATMEM
 # define pfn_valid(pfn)		(((pfn) < max_mapnr) && ia64_pfn_valid(pfn))
-# define page_to_pfn(page)	((unsigned long) (page - mem_map))
-# define pfn_to_page(pfn)	(mem_map + (pfn))
 #elif defined(CONFIG_DISCONTIGMEM)
-extern struct page *vmem_map;
 extern unsigned long min_low_pfn;
 extern unsigned long max_low_pfn;
 # define pfn_valid(pfn)		(((pfn) >= min_low_pfn) && ((pfn) < max_low_pfn) && ia64_pfn_valid(pfn))
-# define page_to_pfn(page)	((unsigned long) (page - vmem_map))
-# define pfn_to_page(pfn)	(vmem_map + (pfn))
 #endif
 
 #define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
-- 
cgit v0.10.2


From a0140c1d85637ee5f4ea7c78f066e3611a6a79dc Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:55 -0800
Subject: [PATCH] remove zone_mem_map

This patch removes zone_mem_map.

pfn_to_page uses pgdat, page_to_pfn uses zone.  page_to_pfn can use pgdat
instead of zone, which is only one user of zone_mem_map.  By modifing it,
we can remove zone_mem_map.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Christoph Lameter <christoph@lameter.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-alpha/mmzone.h b/include/asm-alpha/mmzone.h
index c900439..192d80c 100644
--- a/include/asm-alpha/mmzone.h
+++ b/include/asm-alpha/mmzone.h
@@ -83,8 +83,7 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
 	pte_t pte;                                                           \
 	unsigned long pfn;                                                   \
 									     \
-	pfn = ((unsigned long)((page)-page_zone(page)->zone_mem_map)) << 32; \
-	pfn += page_zone(page)->zone_start_pfn << 32;			     \
+	pfn = page_to_pfn(page) << 32; \
 	pte_val(pte) = pfn | pgprot_val(pgprot);			     \
 									     \
 	pte;								     \
diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
index a7bb497..0cfb086 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -45,11 +45,11 @@ extern unsigned long page_to_pfn(struct page *page);
 	NODE_DATA(__nid)->node_mem_map + arch_local_page_offset(__pfn, __nid);\
 })
 
-#define page_to_pfn(pg)			\
-({	struct page *__pg = (pg);		\
-	struct zone *__zone = page_zone(__pg);	\
-	(unsigned long)(__pg - __zone->zone_mem_map) +	\
-	 __zone->zone_start_pfn;			\
+#define page_to_pfn(pg)							\
+({	struct page *__pg = (pg);					\
+	struct pglist_data *__pgdat = NODE_DATA(page_to_nid(__pg));	\
+	(unsigned long)(__pg - __pgdat->node_mem_map) +			\
+	 __pgdat->node_start_pfn;					\
 })
 
 #elif defined(CONFIG_SPARSEMEM)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0c1c0c0..ace31c5 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -225,7 +225,6 @@ struct zone {
 	 * Discontig memory support fields.
 	 */
 	struct pglist_data	*zone_pgdat;
-	struct page		*zone_mem_map;
 	/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
 	unsigned long		zone_start_pfn;
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 349b328..8dc8f27 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2042,7 +2042,6 @@ static __meminit void init_currently_empty_zone(struct zone *zone,
 	zone_wait_table_init(zone, size);
 	pgdat->nr_zones = zone_idx(zone) + 1;
 
-	zone->zone_mem_map = pfn_to_page(zone_start_pfn);
 	zone->zone_start_pfn = zone_start_pfn;
 
 	memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn);
@@ -2768,9 +2767,8 @@ struct page *pfn_to_page(unsigned long pfn)
 }
 unsigned long page_to_pfn(struct page *page)
 {
-	struct zone *zone = page_zone(page);
-	return (page - zone->zone_mem_map) + zone->zone_start_pfn;
-
+	struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+	return (page - pgdat->node_mem_map) + pgdat->node_start_pfn;
 }
 #elif defined(CONFIG_SPARSEMEM)
 struct page *pfn_to_page(unsigned long pfn)
-- 
cgit v0.10.2


From 8357f8695d58b50fbf2bd507b4b0fc2cd1e43bd6 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:57 -0800
Subject: [PATCH] define for_each_online_pgdat

This patch defines for_each_online_pgdat() as a replacement of
for_each_pgdat()

Now, online nodes are managed by node_online_map.  But for_each_pgdat()
uses pgdat_link to iterate over all nodes(pgdat).  This means management
structure for online pgdat is duplicated.

I think using node_online_map for for_each_pgdat() is simple and sane
rather ather than pgdat_link.  New macro is named as
for_each_online_pgdat().  Following patch will fix callers of
for_each_pgdat().

The bootmem allocater uses for_each_pgdat() before pgdat initialization.  I
don't think it's sane.  Following patch will fix it.

Signed-off-by: Yasunori Goto     <y-goto@jp.fujitsu.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ace31c5..96eb080 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -13,6 +13,7 @@
 #include <linux/numa.h>
 #include <linux/init.h>
 #include <linux/seqlock.h>
+#include <linux/nodemask.h>
 #include <asm/atomic.h>
 
 /* Free memory management - zoned buddy allocator.  */
@@ -349,57 +350,6 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
  */
 #define zone_idx(zone)		((zone) - (zone)->zone_pgdat->node_zones)
 
-/**
- * for_each_pgdat - helper macro to iterate over all nodes
- * @pgdat - pointer to a pg_data_t variable
- *
- * Meant to help with common loops of the form
- * pgdat = pgdat_list;
- * while(pgdat) {
- * 	...
- * 	pgdat = pgdat->pgdat_next;
- * }
- */
-#define for_each_pgdat(pgdat) \
-	for (pgdat = pgdat_list; pgdat; pgdat = pgdat->pgdat_next)
-
-/*
- * next_zone - helper magic for for_each_zone()
- * Thanks to William Lee Irwin III for this piece of ingenuity.
- */
-static inline struct zone *next_zone(struct zone *zone)
-{
-	pg_data_t *pgdat = zone->zone_pgdat;
-
-	if (zone < pgdat->node_zones + MAX_NR_ZONES - 1)
-		zone++;
-	else if (pgdat->pgdat_next) {
-		pgdat = pgdat->pgdat_next;
-		zone = pgdat->node_zones;
-	} else
-		zone = NULL;
-
-	return zone;
-}
-
-/**
- * for_each_zone - helper macro to iterate over all memory zones
- * @zone - pointer to struct zone variable
- *
- * The user only needs to declare the zone variable, for_each_zone
- * fills it in. This basically means for_each_zone() is an
- * easier to read version of this piece of code:
- *
- * for (pgdat = pgdat_list; pgdat; pgdat = pgdat->node_next)
- * 	for (i = 0; i < MAX_NR_ZONES; ++i) {
- * 		struct zone * z = pgdat->node_zones + i;
- * 		...
- * 	}
- * }
- */
-#define for_each_zone(zone) \
-	for (zone = pgdat_list->node_zones; zone; zone = next_zone(zone))
-
 static inline int populated_zone(struct zone *zone)
 {
 	return (!!zone->present_pages);
@@ -471,6 +421,62 @@ extern struct pglist_data contig_page_data;
 
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
+static inline struct pglist_data *first_online_pgdat(void)
+{
+	return NODE_DATA(first_online_node);
+}
+
+static inline struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
+{
+	int nid = next_online_node(pgdat->node_id);
+
+	if (nid == MAX_NUMNODES)
+		return NULL;
+	return NODE_DATA(nid);
+}
+
+
+/**
+ * for_each_pgdat - helper macro to iterate over all nodes
+ * @pgdat - pointer to a pg_data_t variable
+ */
+#define for_each_online_pgdat(pgdat)			\
+	for (pgdat = first_online_pgdat();		\
+	     pgdat;					\
+	     pgdat = next_online_pgdat(pgdat))
+
+/*
+ * next_zone - helper magic for for_each_zone()
+ * Thanks to William Lee Irwin III for this piece of ingenuity.
+ */
+static inline struct zone *next_zone(struct zone *zone)
+{
+	pg_data_t *pgdat = zone->zone_pgdat;
+
+	if (zone < pgdat->node_zones + MAX_NR_ZONES - 1)
+		zone++;
+	else {
+		pgdat = next_online_pgdat(pgdat);
+		if (pgdat)
+			zone = pgdat->node_zones;
+		else
+			zone = NULL;
+	}
+	return zone;
+}
+
+/**
+ * for_each_zone - helper macro to iterate over all memory zones
+ * @zone - pointer to struct zone variable
+ *
+ * The user only needs to declare the zone variable, for_each_zone
+ * fills it in.
+ */
+#define for_each_zone(zone)			        \
+	for (zone = (first_online_pgdat())->node_zones; \
+	     zone;					\
+	     zone = next_zone(zone))
+
 #ifdef CONFIG_SPARSEMEM
 #include <asm/sparsemem.h>
 #endif
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index b959a45..1a9ef3e 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -350,11 +350,15 @@ extern nodemask_t node_possible_map;
 #define num_possible_nodes()	nodes_weight(node_possible_map)
 #define node_online(node)	node_isset((node), node_online_map)
 #define node_possible(node)	node_isset((node), node_possible_map)
+#define first_online_node	first_node(node_online_map)
+#define next_online_node(nid)	next_node((nid), node_online_map)
 #else
 #define num_online_nodes()	1
 #define num_possible_nodes()	1
 #define node_online(node)	((node) == 0)
 #define node_possible(node)	((node) == 0)
+#define first_online_node	0
+#define next_online_node(nid)	(MAX_NUMNODES)
 #endif
 
 #define any_online_node(mask)			\
-- 
cgit v0.10.2


From 679bc9fbb508a0aac9539b2de747eb5849feb428 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:58 -0800
Subject: [PATCH] for_each_online_pgdat: for_each_bootmem

Add a list_head to bootmem_data_t and make bootmems use it.  bootmem list is
sorted by node_boot_start.

Only nodes against which init_bootmem() is called are linked to the list.
(i386 allocates bootmem only from one node(0) not from all online nodes.)

A summary:
 1. for_each_online_pgdat() traverses all *online* nodes.
 2. alloc_bootmem() allocates memory only from initialized-for-bootmem nodes.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 7155452..de3eb8d 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -38,6 +38,7 @@ typedef struct bootmem_data {
 	unsigned long last_pos;
 	unsigned long last_success;	/* Previous allocation point.  To speed
 					 * up searching */
+	struct list_head list;
 } bootmem_data_t;
 
 extern unsigned long __init bootmem_bootmap_pages (unsigned long);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index b55bd39..d3e3bd2 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -33,6 +33,7 @@ EXPORT_SYMBOL(max_pfn);		/* This is exported so
 				 * dma_get_required_mask(), which uses
 				 * it, can be an inline function */
 
+static LIST_HEAD(bdata_list);
 #ifdef CONFIG_CRASH_DUMP
 /*
  * If we have booted due to a crash, max_pfn will be a very low value. We need
@@ -52,6 +53,27 @@ unsigned long __init bootmem_bootmap_pages (unsigned long pages)
 
 	return mapsize;
 }
+/*
+ * link bdata in order
+ */
+static void link_bootmem(bootmem_data_t *bdata)
+{
+	bootmem_data_t *ent;
+	if (list_empty(&bdata_list)) {
+		list_add(&bdata->list, &bdata_list);
+		return;
+	}
+	/* insert in order */
+	list_for_each_entry(ent, &bdata_list, list) {
+		if (bdata->node_boot_start < ent->node_boot_start) {
+			list_add_tail(&bdata->list, &ent->list);
+			return;
+		}
+	}
+	list_add_tail(&bdata->list, &bdata_list);
+	return;
+}
+
 
 /*
  * Called once to set up the allocator itself.
@@ -62,13 +84,11 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
 	bootmem_data_t *bdata = pgdat->bdata;
 	unsigned long mapsize = ((end - start)+7)/8;
 
-	pgdat->pgdat_next = pgdat_list;
-	pgdat_list = pgdat;
-
 	mapsize = ALIGN(mapsize, sizeof(long));
 	bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);
 	bdata->node_boot_start = (start << PAGE_SHIFT);
 	bdata->node_low_pfn = end;
+	link_bootmem(bdata);
 
 	/*
 	 * Initially all pages are reserved - setup_arch() has to
@@ -383,12 +403,11 @@ unsigned long __init free_all_bootmem (void)
 
 void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal)
 {
-	pg_data_t *pgdat = pgdat_list;
+	bootmem_data_t *bdata;
 	void *ptr;
 
-	for_each_pgdat(pgdat)
-		if ((ptr = __alloc_bootmem_core(pgdat->bdata, size,
-						 align, goal, 0)))
+	list_for_each_entry(bdata, &bdata_list, list)
+		if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0)))
 			return(ptr);
 
 	/*
@@ -416,11 +435,11 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigne
 
 void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal)
 {
-	pg_data_t *pgdat = pgdat_list;
+	bootmem_data_t *bdata;
 	void *ptr;
 
-	for_each_pgdat(pgdat)
-		if ((ptr = __alloc_bootmem_core(pgdat->bdata, size,
+	list_for_each_entry(bdata, &bdata_list, list)
+		if ((ptr = __alloc_bootmem_core(bdata, size,
 						 align, goal, LOW32LIMIT)))
 			return(ptr);
 
-- 
cgit v0.10.2


From ec936fc563715a9e2b2e363eb060655b49529325 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:15:59 -0800
Subject: [PATCH] for_each_online_pgdat: renaming for_each_pgdat

Replace for_each_pgdat() with for_each_online_pgdat().

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index 9db3242..2889567 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -36,7 +36,7 @@ void show_mem(void)
 	printk(KERN_INFO "Mem-info:\n");
 	show_free_areas();
 	printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
 			page = pgdat_page_nr(pgdat, i);
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 2f5e448..384f1d7 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -386,7 +386,7 @@ static void __init pgdat_insert(pg_data_t *pgdat)
 {
 	pg_data_t *prev = NULL, *next;
 
-	for_each_pgdat(next)
+	for_each_online_pgdat(next)
 		if (pgdat->node_id < next->node_id)
 			break;
 		else
@@ -560,7 +560,7 @@ void show_mem(void)
 	printk("Mem-info:\n");
 	show_free_areas();
 	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		unsigned long present;
 		unsigned long flags;
 		int shared = 0, cached = 0, reserved = 0;
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index ff4f31f..2ef1151 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -600,7 +600,7 @@ mem_init (void)
 	kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
 	kclist_add(&kcore_kernel, _stext, _end - _stext);
 
-	for_each_pgdat(pgdat)
+	for_each_online_pgdat(pgdat)
 		if (pgdat->bdata->node_bootmem_map)
 			totalram_pages += free_all_bootmem_node(pgdat);
 
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index c9e7dad..2e0fe19 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -47,7 +47,7 @@ void show_mem(void)
 	printk("Mem-info:\n");
 	show_free_areas();
 	printk("Free swap:       %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		unsigned long flags;
 		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index badac10..5e435a9 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -195,7 +195,7 @@ void show_mem(void)
 	printk("Mem-info:\n");
 	show_free_areas();
 	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		unsigned long flags;
 		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; i++) {
@@ -351,7 +351,7 @@ void __init mem_init(void)
 	max_mapnr = max_pfn;
 	totalram_pages += free_all_bootmem();
 #endif
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		for (i = 0; i < pgdat->node_spanned_pages; i++) {
 			if (!pfn_valid(pgdat->node_start_pfn + i))
 				continue;
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index b044156..e5f7f1c 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -72,7 +72,7 @@ void show_mem(void)
 	show_free_areas();
 	printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
                for (i = 0; i < pgdat->node_spanned_pages; ++i) {
 			page = pfn_to_page(pgdat->node_start_pfn + i);
 			total++;
diff --git a/fs/buffer.c b/fs/buffer.c
index d597758..23f1f3a 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -493,7 +493,7 @@ static void free_more_memory(void)
 	wakeup_pdflush(1024);
 	yield();
 
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones;
 		if (*zones)
 			try_to_free_pages(zones, GFP_NOFS);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8dc8f27..ccc3713 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1201,7 +1201,7 @@ unsigned int nr_free_highpages (void)
 	pg_data_t *pgdat;
 	unsigned int pages = 0;
 
-	for_each_pgdat(pgdat)
+	for_each_online_pgdat(pgdat)
 		pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
 
 	return pages;
@@ -1343,7 +1343,7 @@ void get_zone_counts(unsigned long *active,
 	*active = 0;
 	*inactive = 0;
 	*free = 0;
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		unsigned long l, m, n;
 		__get_zone_counts(&l, &m, &n, pgdat);
 		*active += l;
@@ -2482,7 +2482,7 @@ static void setup_per_zone_lowmem_reserve(void)
 	struct pglist_data *pgdat;
 	int j, idx;
 
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		for (j = 0; j < MAX_NR_ZONES; j++) {
 			struct zone *zone = pgdat->node_zones + j;
 			unsigned long present_pages = zone->present_pages;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 78865c8..acdf001 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1305,7 +1305,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 
 	current->reclaim_state = &reclaim_state;
 repeat:
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		unsigned long freed;
 
 		freed = balance_pgdat(pgdat, nr_to_free, 0);
@@ -1335,7 +1335,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
 	cpumask_t mask;
 
 	if (action == CPU_ONLINE) {
-		for_each_pgdat(pgdat) {
+		for_each_online_pgdat(pgdat) {
 			mask = node_to_cpumask(pgdat->node_id);
 			if (any_online_cpu(mask) != NR_CPUS)
 				/* One of our CPUs online: restore mask */
@@ -1351,7 +1351,7 @@ static int __init kswapd_init(void)
 	pg_data_t *pgdat;
 
 	swap_setup();
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		pid_t pid;
 
 		pid = kernel_thread(kswapd, pgdat, CLONE_KERNEL);
-- 
cgit v0.10.2


From 3571761fe49d960bb720c2308ffb9401f0a5e161 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:16:00 -0800
Subject: [PATCH] for_each_online_pgdat: remove sorting pgdat

Because pgdat_list was linked to pgdat_list in *reverse* order, (By default)
some of arch has to sort it by themselves.

for_each_pgdat has gone..for_each_online_pgdat() uses node_online_map, which
doesn't need to be sorted.

This patch removes codes for sorting pgdat.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index c4af963..c3f3ae9 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -352,17 +352,6 @@ void __init zone_sizes_init(void)
 {
 	int nid;
 
-	/*
-	 * Insert nodes into pgdat_list backward so they appear in order.
-	 * Clobber node 0's links and NULL out pgdat_list before starting.
-	 */
-	pgdat_list = NULL;
-	for (nid = MAX_NUMNODES - 1; nid >= 0; nid--) {
-		if (!node_online(nid))
-			continue;
-		NODE_DATA(nid)->pgdat_next = pgdat_list;
-		pgdat_list = NODE_DATA(nid);
-	}
 
 	for_each_online_node(nid) {
 		unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 384f1d7..ec9eeb8 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -379,31 +379,6 @@ static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize)
 }
 
 /**
- * pgdat_insert - insert the pgdat into global pgdat_list
- * @pgdat: the pgdat for a node.
- */
-static void __init pgdat_insert(pg_data_t *pgdat)
-{
-	pg_data_t *prev = NULL, *next;
-
-	for_each_online_pgdat(next)
-		if (pgdat->node_id < next->node_id)
-			break;
-		else
-			prev = next;
-
-	if (prev) {
-		prev->pgdat_next = pgdat;
-		pgdat->pgdat_next = next;
-	} else {
-		pgdat->pgdat_next = pgdat_list;
-		pgdat_list = pgdat;
-	}
-
-	return;
-}
-
-/**
  * memory_less_nodes - allocate and initialize CPU only nodes pernode
  *	information.
  */
@@ -745,11 +720,5 @@ void __init paging_init(void)
 				    pfn_offset, zholes_size);
 	}
 
-	/*
-	 * Make memory less nodes become a member of the known nodes.
-	 */
-	for_each_node_mask(node, memory_less_mask)
-		pgdat_insert(mem_data[node].pgdat);
-
 	zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
 }
diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c
index 08e7279..70c8528 100644
--- a/arch/m32r/mm/discontig.c
+++ b/arch/m32r/mm/discontig.c
@@ -137,12 +137,6 @@ unsigned long __init zone_sizes_init(void)
 	int nid, i;
 	mem_prof_t *mp;
 
-	pgdat_list = NULL;
-	for (nid = num_online_nodes() - 1 ; nid >= 0 ; nid--) {
-		NODE_DATA(nid)->pgdat_next = pgdat_list;
-		pgdat_list = NODE_DATA(nid);
-	}
-
 	for_each_online_node(nid) {
 		mp = &mem_prof[nid];
 		for (i = 0 ; i < MAX_NR_ZONES ; i++) {
-- 
cgit v0.10.2


From ae0f15fb91274e67d78836d38c99ec363df33073 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:16:01 -0800
Subject: [PATCH] for_each_online_pgdat: remove pgdat_list

By using for_each_online_pgdat(), pgdat_list is not necessary now.  This patch
removes it.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 96eb080..0d12c3c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -307,7 +307,6 @@ typedef struct pglist_data {
 	unsigned long node_spanned_pages; /* total size of physical page
 					     range, including holes */
 	int node_id;
-	struct pglist_data *pgdat_next;
 	wait_queue_head_t kswapd_wait;
 	struct task_struct *kswapd;
 	int kswapd_max_order;
@@ -324,8 +323,6 @@ typedef struct pglist_data {
 
 #include <linux/memory_hotplug.h>
 
-extern struct pglist_data *pgdat_list;
-
 void __get_zone_counts(unsigned long *active, unsigned long *inactive,
 			unsigned long *free, struct pglist_data *pgdat);
 void get_zone_counts(unsigned long *active, unsigned long *inactive,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ccc3713..dc523a1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -49,7 +49,6 @@ nodemask_t node_online_map __read_mostly = { { [0] = 1UL } };
 EXPORT_SYMBOL(node_online_map);
 nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
 EXPORT_SYMBOL(node_possible_map);
-struct pglist_data *pgdat_list __read_mostly;
 unsigned long totalram_pages __read_mostly;
 unsigned long totalhigh_pages __read_mostly;
 long nr_swap_pages;
@@ -2169,8 +2168,9 @@ static void *frag_start(struct seq_file *m, loff_t *pos)
 {
 	pg_data_t *pgdat;
 	loff_t node = *pos;
-
-	for (pgdat = pgdat_list; pgdat && node; pgdat = pgdat->pgdat_next)
+	for (pgdat = first_online_pgdat();
+	     pgdat && node;
+	     pgdat = next_online_pgdat(pgdat))
 		--node;
 
 	return pgdat;
@@ -2181,7 +2181,7 @@ static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
 	pg_data_t *pgdat = (pg_data_t *)arg;
 
 	(*pos)++;
-	return pgdat->pgdat_next;
+	return next_online_pgdat(pgdat);
 }
 
 static void frag_stop(struct seq_file *m, void *arg)
-- 
cgit v0.10.2


From 95144c788dc01b6a0ff2c9c2222e37ffdab358b8 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 27 Mar 2006 01:16:02 -0800
Subject: [PATCH] uninline zone helpers

Helper functions for for_each_online_pgdat/for_each_zone look too big to be
inlined.  Speed of these helper macro itself is not very important.  (inner
loops are tend to do more work than this)

This patch make helper function to be out-of-lined.

	inline		out-of-line
.text   005c0680        005bf6a0

005c0680 - 005bf6a0 = FE0 = 4Kbytes.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0d12c3c..b5c2112 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -418,20 +418,9 @@ extern struct pglist_data contig_page_data;
 
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
-static inline struct pglist_data *first_online_pgdat(void)
-{
-	return NODE_DATA(first_online_node);
-}
-
-static inline struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
-{
-	int nid = next_online_node(pgdat->node_id);
-
-	if (nid == MAX_NUMNODES)
-		return NULL;
-	return NODE_DATA(nid);
-}
-
+extern struct pglist_data *first_online_pgdat(void);
+extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat);
+extern struct zone *next_zone(struct zone *zone);
 
 /**
  * for_each_pgdat - helper macro to iterate over all nodes
@@ -441,27 +430,6 @@ static inline struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
 	for (pgdat = first_online_pgdat();		\
 	     pgdat;					\
 	     pgdat = next_online_pgdat(pgdat))
-
-/*
- * next_zone - helper magic for for_each_zone()
- * Thanks to William Lee Irwin III for this piece of ingenuity.
- */
-static inline struct zone *next_zone(struct zone *zone)
-{
-	pg_data_t *pgdat = zone->zone_pgdat;
-
-	if (zone < pgdat->node_zones + MAX_NR_ZONES - 1)
-		zone++;
-	else {
-		pgdat = next_online_pgdat(pgdat);
-		if (pgdat)
-			zone = pgdat->node_zones;
-		else
-			zone = NULL;
-	}
-	return zone;
-}
-
 /**
  * for_each_zone - helper macro to iterate over all memory zones
  * @zone - pointer to struct zone variable
diff --git a/mm/Makefile b/mm/Makefile
index f10c753..0b8f73f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -10,7 +10,7 @@ mmu-$(CONFIG_MMU)	:= fremap.o highmem.o madvise.o memory.o mincore.o \
 obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
 			   page_alloc.o page-writeback.o pdflush.o \
 			   readahead.o swap.o truncate.o vmscan.o \
-			   prio_tree.o util.o $(mmu-y)
+			   prio_tree.o util.o mmzone.o $(mmu-y)
 
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o thrash.o
 obj-$(CONFIG_HUGETLBFS)	+= hugetlb.o
diff --git a/mm/mmzone.c b/mm/mmzone.c
new file mode 100644
index 0000000..b022370
--- /dev/null
+++ b/mm/mmzone.c
@@ -0,0 +1,50 @@
+/*
+ * linux/mm/mmzone.c
+ *
+ * management codes for pgdats and zones.
+ */
+
+
+#include <linux/config.h>
+#include <linux/stddef.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+
+struct pglist_data *first_online_pgdat(void)
+{
+	return NODE_DATA(first_online_node);
+}
+
+EXPORT_SYMBOL(first_online_pgdat);
+
+struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
+{
+	int nid = next_online_node(pgdat->node_id);
+
+	if (nid == MAX_NUMNODES)
+		return NULL;
+	return NODE_DATA(nid);
+}
+EXPORT_SYMBOL(next_online_pgdat);
+
+
+/*
+ * next_zone - helper magic for for_each_zone()
+ */
+struct zone *next_zone(struct zone *zone)
+{
+	pg_data_t *pgdat = zone->zone_pgdat;
+
+	if (zone < pgdat->node_zones + MAX_NR_ZONES - 1)
+		zone++;
+	else {
+		pgdat = next_online_pgdat(pgdat);
+		if (pgdat)
+			zone = pgdat->node_zones;
+		else
+			zone = NULL;
+	}
+	return zone;
+}
+EXPORT_SYMBOL(next_zone);
+
-- 
cgit v0.10.2


From 22a9835c350782a5c3257343713932af3ac92ee0 Mon Sep 17 00:00:00 2001
From: Dave Hansen <haveblue@us.ibm.com>
Date: Mon, 27 Mar 2006 01:16:04 -0800
Subject: [PATCH] unify PFN_* macros

Just about every architecture defines some macros to do operations on pfns.
 They're all virtually identical.  This patch consolidates all of them.

One minor glitch is that at least i386 uses them in a very skeletal header
file.  To keep away from #include dependency hell, I stuck the new
definitions in a new, isolated header.

Of all of the implementations, sh64 is the only one that varied by a bit.
It used some masks to ensure that any sign-extension got ripped away before
the arithmetic is done.  This has been posted to that sh64 maintainers and
the development list.

Compiles on x86, x86_64, ia64 and ppc64.

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index b4e5f8f..9402624 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -34,6 +34,7 @@
 #include <linux/root_dev.h>
 #include <linux/initrd.h>
 #include <linux/eisa.h>
+#include <linux/pfn.h>
 #ifdef CONFIG_MAGIC_SYSRQ
 #include <linux/sysrq.h>
 #include <linux/reboot.h>
@@ -241,9 +242,6 @@ reserve_std_resources(void)
 		request_resource(io, standard_io_resources+i);
 }
 
-#define PFN_UP(x)	(((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-#define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
-#define PFN_PHYS(x)	((x) << PAGE_SHIFT)
 #define PFN_MAX		PFN_DOWN(0x80000000)
 #define for_each_mem_cluster(memdesc, cluster, i)		\
 	for ((cluster) = (memdesc)->cluster, (i) = 0;		\
@@ -472,11 +470,6 @@ page_is_ram(unsigned long pfn)
 	return 0;
 }
 
-#undef PFN_UP
-#undef PFN_DOWN
-#undef PFN_PHYS
-#undef PFN_MAX
-
 void __init
 setup_arch(char **cmdline_p)
 {
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index 6d52512..bf6b65c 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -13,6 +13,7 @@
 #include <linux/bootmem.h>
 #include <linux/swap.h>
 #include <linux/initrd.h>
+#include <linux/pfn.h>
 
 #include <asm/hwrpb.h>
 #include <asm/pgalloc.h>
@@ -27,9 +28,6 @@ bootmem_data_t node_bdata[MAX_NUMNODES];
 #define DBGDCONT(args...)
 #endif
 
-#define PFN_UP(x)       (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-#define PFN_DOWN(x)     ((x) >> PAGE_SHIFT)
-#define PFN_PHYS(x)     ((x) << PAGE_SHIFT)
 #define for_each_mem_cluster(memdesc, cluster, i)		\
 	for ((cluster) = (memdesc)->cluster, (i) = 0;		\
 	     (i) < (memdesc)->numclusters; (i)++, (cluster)++)
diff --git a/arch/arm26/mm/init.c b/arch/arm26/mm/init.c
index e3ecaa4..7da8a52 100644
--- a/arch/arm26/mm/init.c
+++ b/arch/arm26/mm/init.c
@@ -23,6 +23,7 @@
 #include <linux/initrd.h>
 #include <linux/bootmem.h>
 #include <linux/blkdev.h>
+#include <linux/pfn.h>
 
 #include <asm/segment.h>
 #include <asm/mach-types.h>
@@ -101,12 +102,6 @@ struct node_info {
 	int bootmap_pages;
 };
 
-#define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
-#define PFN_UP(x)	(PAGE_ALIGN(x) >> PAGE_SHIFT)
-#define PFN_SIZE(x)	((x) >> PAGE_SHIFT)
-#define PFN_RANGE(s,e)	PFN_SIZE(PAGE_ALIGN((unsigned long)(e)) - \
-				(((unsigned long)(s)) & PAGE_MASK))
-
 /*
  * FIXME: We really want to avoid allocating the bootmap bitmap
  * over the top of the initrd.  Hopefully, this is located towards
diff --git a/arch/cris/kernel/setup.c b/arch/cris/kernel/setup.c
index 1ba57ef..619a6ee 100644
--- a/arch/cris/kernel/setup.c
+++ b/arch/cris/kernel/setup.c
@@ -18,6 +18,7 @@
 #include <linux/seq_file.h>
 #include <linux/tty.h>
 #include <linux/utsname.h>
+#include <linux/pfn.h>
 
 #include <asm/setup.h>
 
@@ -88,10 +89,6 @@ setup_arch(char **cmdline_p)
 	init_mm.end_data =   (unsigned long) &_edata;
 	init_mm.brk =        (unsigned long) &_end;
 
-#define PFN_UP(x)       (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-#define PFN_DOWN(x)     ((x) >> PAGE_SHIFT)
-#define PFN_PHYS(x)     ((x) << PAGE_SHIFT)
-
 	/* min_low_pfn points to the start of DRAM, start_pfn points
 	 * to the first DRAM pages after the kernel, and max_low_pfn
 	 * to the end of DRAM.
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 6917daa..8c08660 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -46,6 +46,7 @@
 #include <linux/kexec.h>
 #include <linux/crash_dump.h>
 #include <linux/dmi.h>
+#include <linux/pfn.h>
 
 #include <video/edid.h>
 
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index c3f3ae9..fe6eb90 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -31,6 +31,7 @@
 #include <linux/nodemask.h>
 #include <linux/module.h>
 #include <linux/kexec.h>
+#include <linux/pfn.h>
 
 #include <asm/e820.h>
 #include <asm/setup.h>
diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c
index d742037..0d78942 100644
--- a/arch/m32r/kernel/setup.c
+++ b/arch/m32r/kernel/setup.c
@@ -24,6 +24,7 @@
 #include <linux/tty.h>
 #include <linux/cpu.h>
 #include <linux/nodemask.h>
+#include <linux/pfn.h>
 
 #include <asm/processor.h>
 #include <asm/pgtable.h>
diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c
index 70c8528..cf610a7 100644
--- a/arch/m32r/mm/discontig.c
+++ b/arch/m32r/mm/discontig.c
@@ -13,6 +13,7 @@
 #include <linux/initrd.h>
 #include <linux/nodemask.h>
 #include <linux/module.h>
+#include <linux/pfn.h>
 
 #include <asm/setup.h>
 
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index 2e0fe19..b71348f 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -18,6 +18,7 @@
 #include <linux/highmem.h>
 #include <linux/bitops.h>
 #include <linux/nodemask.h>
+#include <linux/pfn.h>
 #include <asm/types.h>
 #include <asm/processor.h>
 #include <asm/page.h>
diff --git a/arch/mips/ite-boards/ivr/init.c b/arch/mips/ite-boards/ivr/init.c
index ea4e193..b774db0 100644
--- a/arch/mips/ite-boards/ivr/init.c
+++ b/arch/mips/ite-boards/ivr/init.c
@@ -45,9 +45,6 @@ extern void  __init prom_init_cmdline(void);
 extern unsigned long __init prom_get_memsize(void);
 extern void __init it8172_init_ram_resource(unsigned long memsize);
 
-#define PFN_UP(x)	(((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-#define PFN_ALIGN(x)	(((unsigned long)(x) + (PAGE_SIZE - 1)) & PAGE_MASK)
-
 const char *get_system_type(void)
 {
 	return "Globespan IVR";
diff --git a/arch/mips/ite-boards/qed-4n-s01b/init.c b/arch/mips/ite-boards/qed-4n-s01b/init.c
index 56dca7e..e8ec8be 100644
--- a/arch/mips/ite-boards/qed-4n-s01b/init.c
+++ b/arch/mips/ite-boards/qed-4n-s01b/init.c
@@ -45,9 +45,6 @@ extern void  __init prom_init_cmdline(void);
 extern unsigned long __init prom_get_memsize(void);
 extern void __init it8172_init_ram_resource(unsigned long memsize);
 
-#define PFN_UP(x)	(((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-#define PFN_ALIGN(x)	(((unsigned long)(x) + (PAGE_SIZE - 1)) & PAGE_MASK)
-
 const char *get_system_type(void)
 {
 	return "ITE QED-4N-S01B";
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 0cb3b60..dcbfd27 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -34,6 +34,7 @@
 #include <linux/highmem.h>
 #include <linux/console.h>
 #include <linux/mmzone.h>
+#include <linux/pfn.h>
 
 #include <asm/addrspace.h>
 #include <asm/bootinfo.h>
@@ -257,10 +258,6 @@ static inline int parse_rd_cmdline(unsigned long* rd_start, unsigned long* rd_en
 	return 0;
 }
 
-#define PFN_UP(x)	(((x) + PAGE_SIZE - 1) >> PAGE_SHIFT)
-#define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
-#define PFN_PHYS(x)	((x) << PAGE_SHIFT)
-
 #define MAXMEM		HIGHMEM_START
 #define MAXMEM_PFN	PFN_DOWN(MAXMEM)
 
@@ -493,10 +490,6 @@ static inline void resource_init(void)
 	}
 }
 
-#undef PFN_UP
-#undef PFN_DOWN
-#undef PFN_PHYS
-
 #undef MAXMEM
 #undef MAXMEM_PFN
 
diff --git a/arch/mips/mips-boards/generic/memory.c b/arch/mips/mips-boards/generic/memory.c
index ee5e70c..32c9210 100644
--- a/arch/mips/mips-boards/generic/memory.c
+++ b/arch/mips/mips-boards/generic/memory.c
@@ -49,9 +49,6 @@ static char *mtypes[3] = {
 /* References to section boundaries */
 extern char _end;
 
-#define PFN_ALIGN(x)    (((unsigned long)(x) + (PAGE_SIZE - 1)) & PAGE_MASK)
-
-
 struct prom_pmemblock * __init prom_getmdesc(void)
 {
 	char *memsize_str;
@@ -109,10 +106,10 @@ struct prom_pmemblock * __init prom_getmdesc(void)
 
 	mdesc[3].type = yamon_dontuse;
 	mdesc[3].base = 0x00100000;
-	mdesc[3].size = CPHYSADDR(PFN_ALIGN(&_end)) - mdesc[3].base;
+	mdesc[3].size = CPHYSADDR(PAGE_ALIGN(&_end)) - mdesc[3].base;
 
 	mdesc[4].type = yamon_free;
-	mdesc[4].base = CPHYSADDR(PFN_ALIGN(&_end));
+	mdesc[4].base = CPHYSADDR(PAGE_ALIGN(&_end));
 	mdesc[4].size = memsize - mdesc[4].base;
 
 	return &mdesc[0];
diff --git a/arch/mips/mips-boards/sim/sim_mem.c b/arch/mips/mips-boards/sim/sim_mem.c
index 1ec4e75..e57f737 100644
--- a/arch/mips/mips-boards/sim/sim_mem.c
+++ b/arch/mips/mips-boards/sim/sim_mem.c
@@ -42,9 +42,6 @@ static char *mtypes[3] = {
 /* References to section boundaries */
 extern char _end;
 
-#define PFN_ALIGN(x)    (((unsigned long)(x) + (PAGE_SIZE - 1)) & PAGE_MASK)
-
-
 struct prom_pmemblock * __init prom_getmdesc(void)
 {
 	unsigned int memsize;
@@ -64,10 +61,10 @@ struct prom_pmemblock * __init prom_getmdesc(void)
 
 	mdesc[2].type = simmem_reserved;
 	mdesc[2].base = 0x00100000;
-	mdesc[2].size = CPHYSADDR(PFN_ALIGN(&_end)) - mdesc[2].base;
+	mdesc[2].size = CPHYSADDR(PAGE_ALIGN(&_end)) - mdesc[2].base;
 
 	mdesc[3].type = simmem_free;
-	mdesc[3].base = CPHYSADDR(PFN_ALIGN(&_end));
+	mdesc[3].base = CPHYSADDR(PAGE_ALIGN(&_end));
 	mdesc[3].size = memsize - mdesc[3].base;
 
 	return &mdesc[0];
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 52f7d59..ad89c44 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -25,6 +25,7 @@
 #include <linux/highmem.h>
 #include <linux/swap.h>
 #include <linux/proc_fs.h>
+#include <linux/pfn.h>
 
 #include <asm/bootinfo.h>
 #include <asm/cachectl.h>
@@ -177,9 +178,6 @@ void __init paging_init(void)
 	free_area_init(zones_size);
 }
 
-#define PFN_UP(x)	(((x) + PAGE_SIZE - 1) >> PAGE_SHIFT)
-#define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
-
 static inline int page_is_ram(unsigned long pagenr)
 {
 	int i;
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index e0d095d..6c00dce 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -19,6 +19,7 @@
 #include <linux/nodemask.h>
 #include <linux/swap.h>
 #include <linux/bootmem.h>
+#include <linux/pfn.h>
 #include <asm/page.h>
 #include <asm/sections.h>
 
@@ -28,8 +29,6 @@
 #include <asm/sn/sn_private.h>
 
 
-#define PFN_UP(x)		(((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-
 #define SLOT_PFNSHIFT           (SLOT_SHIFT - PAGE_SHIFT)
 #define PFN_NASIDSHFT           (NASID_SHFT - PAGE_SHIFT)
 
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index c0e79843..7ee4ca2 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -20,6 +20,7 @@
 #include <linux/root_dev.h>
 #include <linux/utsname.h>
 #include <linux/cpu.h>
+#include <linux/pfn.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/sections.h>
@@ -275,10 +276,6 @@ void __init setup_arch(char **cmdline_p)
 
 	sh_mv_setup(cmdline_p);
 
-#define PFN_UP(x)	(((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-#define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
-#define PFN_PHYS(x)	((x) << PAGE_SHIFT)
-
 	/*
 	 * Find the highest page frame number we have available
 	 */
diff --git a/arch/sh64/kernel/setup.c b/arch/sh64/kernel/setup.c
index c7a7b816..d2711c9 100644
--- a/arch/sh64/kernel/setup.c
+++ b/arch/sh64/kernel/setup.c
@@ -48,6 +48,7 @@
 #include <linux/root_dev.h>
 #include <linux/cpu.h>
 #include <linux/initrd.h>
+#include <linux/pfn.h>
 #include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index 0e65340..0500800 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -9,6 +9,7 @@
 #include "linux/vmalloc.h"
 #include "linux/bootmem.h"
 #include "linux/module.h"
+#include "linux/pfn.h"
 #include "asm/types.h"
 #include "asm/pgtable.h"
 #include "kern_util.h"
@@ -316,8 +317,6 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
 	}
 }
 
-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-
 extern int __syscall_stub_start, __binary_start;
 
 void setup_physmem(unsigned long start, unsigned long reserve_end,
diff --git a/include/asm-i386/setup.h b/include/asm-i386/setup.h
index 826a8ca..ee94145 100644
--- a/include/asm-i386/setup.h
+++ b/include/asm-i386/setup.h
@@ -6,9 +6,7 @@
 #ifndef _i386_SETUP_H
 #define _i386_SETUP_H
 
-#define PFN_UP(x)	(((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-#define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
-#define PFN_PHYS(x)	((x) << PAGE_SHIFT)
+#include <linux/pfn.h>
 
 /*
  * Reserved space for vmalloc and iomap - defined in asm/page.h
diff --git a/include/asm-m32r/setup.h b/include/asm-m32r/setup.h
index 5f028dc..52f4fa2 100644
--- a/include/asm-m32r/setup.h
+++ b/include/asm-m32r/setup.h
@@ -24,10 +24,6 @@
 #define RAMDISK_PROMPT_FLAG		(0x8000)
 #define RAMDISK_LOAD_FLAG		(0x4000)
 
-#define PFN_UP(x)	(((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-#define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
-#define PFN_PHYS(x)	((x) << PAGE_SHIFT)
-
 extern unsigned long memory_start;
 extern unsigned long memory_end;
 
diff --git a/include/asm-sh64/platform.h b/include/asm-sh64/platform.h
index 7046a90..bd0d9c4 100644
--- a/include/asm-sh64/platform.h
+++ b/include/asm-sh64/platform.h
@@ -61,9 +61,4 @@ extern int platform_int_priority[NR_INTC_IRQS];
 #define code_resource (platform_parms.kram_res_p[STANDARD_KRAM_RESOURCES - 2])
 #define data_resource (platform_parms.kram_res_p[STANDARD_KRAM_RESOURCES - 1])
 
-/* Be prepared to 64-bit sign extensions */
-#define PFN_UP(x)       ((((x) + PAGE_SIZE-1) >> PAGE_SHIFT) & 0x000fffff)
-#define PFN_DOWN(x)     (((x) >> PAGE_SHIFT) & 0x000fffff)
-#define PFN_PHYS(x)     ((x) << PAGE_SHIFT)
-
 #endif	/* __ASM_SH64_PLATFORM_H */
diff --git a/include/linux/pfn.h b/include/linux/pfn.h
new file mode 100644
index 0000000..bb01f8b
--- /dev/null
+++ b/include/linux/pfn.h
@@ -0,0 +1,9 @@
+#ifndef _LINUX_PFN_H_
+#define _LINUX_PFN_H_
+
+#define PFN_ALIGN(x)	(((unsigned long)(x) + (PAGE_SIZE - 1)) & PAGE_MASK)
+#define PFN_UP(x)	(((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+#define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
+#define PFN_PHYS(x)	((x) << PAGE_SHIFT)
+
+#endif
-- 
cgit v0.10.2


From 66e863acd7f2fb02fef709d5f0ab069445d4a58c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 27 Mar 2006 01:16:06 -0800
Subject: [PATCH] ia64: add ptr_to_compat()

Add ptr_to_compat() to ia64 - needed by the robust-futex code.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-ia64/compat.h b/include/asm-ia64/compat.h
index c0b1910..40d01d8 100644
--- a/include/asm-ia64/compat.h
+++ b/include/asm-ia64/compat.h
@@ -189,6 +189,12 @@ compat_ptr (compat_uptr_t uptr)
 	return (void __user *) (unsigned long) uptr;
 }
 
+static inline compat_uptr_t
+ptr_to_compat(void __user *uptr)
+{
+	return (u32)(unsigned long)uptr;
+}
+
 static __inline__ void __user *
 compat_alloc_user_space (long len)
 {
-- 
cgit v0.10.2


From f267fa9f5b377b5cecdb2baf332fec08bb71246d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 27 Mar 2006 01:16:09 -0800
Subject: [PATCH] s390: add ptr_to_compat()

Add ptr_to_compat() to s390 - needed by the new robust-futex code.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>

untested. CHECKME: am i right about the 0x7fffffffUL masking?

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-s390/compat.h b/include/asm-s390/compat.h
index a007715..356a0b1 100644
--- a/include/asm-s390/compat.h
+++ b/include/asm-s390/compat.h
@@ -128,6 +128,11 @@ static inline void __user *compat_ptr(compat_uptr_t uptr)
 	return (void __user *)(unsigned long)(uptr & 0x7fffffffUL);
 }
 
+static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+{
+	return (u32)(unsigned long)uptr;
+}
+
 static inline void __user *compat_alloc_user_space(long len)
 {
 	unsigned long stack;
-- 
cgit v0.10.2


From 213b63b76a823e622d87df623aaec1119acaeaa0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 27 Mar 2006 01:16:12 -0800
Subject: [PATCH] parisc: add ptr_to_compat()

Add ptr_to_compat() to parisc - needed by the new robust futex code.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Grant Grundler <iod00d@hp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-parisc/compat.h b/include/asm-parisc/compat.h
index 38b918f..289624d 100644
--- a/include/asm-parisc/compat.h
+++ b/include/asm-parisc/compat.h
@@ -138,6 +138,11 @@ static inline void __user *compat_ptr(compat_uptr_t uptr)
 	return (void __user *)(unsigned long)uptr;
 }
 
+static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+{
+	return (u32)(unsigned long)uptr;
+}
+
 static __inline__ void __user *compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = &current->thread.regs;
-- 
cgit v0.10.2


From 62ac285f3c701f0457a15fe01baa64a965c4f5f1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 27 Mar 2006 01:16:13 -0800
Subject: [PATCH] mips: add ptr_to_compat()

Add ptr_to_compat() - needed by the new robust futex code.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-mips/compat.h b/include/asm-mips/compat.h
index 0012bd8..986511d 100644
--- a/include/asm-mips/compat.h
+++ b/include/asm-mips/compat.h
@@ -133,6 +133,11 @@ static inline void __user *compat_ptr(compat_uptr_t uptr)
 	return (void __user *)(long)uptr;
 }
 
+static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+{
+	return (u32)(unsigned long)uptr;
+}
+
 static inline void __user *compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = (struct pt_regs *)
-- 
cgit v0.10.2


From e9056f13bfcdd054a0c3d730e4e096748d8a363a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 27 Mar 2006 01:16:21 -0800
Subject: [PATCH] lightweight robust futexes: arch defaults

This patchset provides a new (written from scratch) implementation of robust
futexes, called "lightweight robust futexes".  We believe this new
implementation is faster and simpler than the vma-based robust futex solutions
presented before, and we'd like this patchset to be adopted in the upstream
kernel.  This is version 1 of the patchset.

  Background
  ----------

What are robust futexes?  To answer that, we first need to understand what
futexes are: normal futexes are special types of locks that in the
noncontended case can be acquired/released from userspace without having to
enter the kernel.

A futex is in essence a user-space address, e.g.  a 32-bit lock variable
field.  If userspace notices contention (the lock is already owned and someone
else wants to grab it too) then the lock is marked with a value that says
"there's a waiter pending", and the sys_futex(FUTEX_WAIT) syscall is used to
wait for the other guy to release it.  The kernel creates a 'futex queue'
internally, so that it can later on match up the waiter with the waker -
without them having to know about each other.  When the owner thread releases
the futex, it notices (via the variable value) that there were waiter(s)
pending, and does the sys_futex(FUTEX_WAKE) syscall to wake them up.  Once all
waiters have taken and released the lock, the futex is again back to
'uncontended' state, and there's no in-kernel state associated with it.  The
kernel completely forgets that there ever was a futex at that address.  This
method makes futexes very lightweight and scalable.

"Robustness" is about dealing with crashes while holding a lock: if a process
exits prematurely while holding a pthread_mutex_t lock that is also shared
with some other process (e.g.  yum segfaults while holding a pthread_mutex_t,
or yum is kill -9-ed), then waiters for that lock need to be notified that the
last owner of the lock exited in some irregular way.

To solve such types of problems, "robust mutex" userspace APIs were created:
pthread_mutex_lock() returns an error value if the owner exits prematurely -
and the new owner can decide whether the data protected by the lock can be
recovered safely.

There is a big conceptual problem with futex based mutexes though: it is the
kernel that destroys the owner task (e.g.  due to a SEGFAULT), but the kernel
cannot help with the cleanup: if there is no 'futex queue' (and in most cases
there is none, futexes being fast lightweight locks) then the kernel has no
information to clean up after the held lock!  Userspace has no chance to clean
up after the lock either - userspace is the one that crashes, so it has no
opportunity to clean up.  Catch-22.

In practice, when e.g.  yum is kill -9-ed (or segfaults), a system reboot is
needed to release that futex based lock.  This is one of the leading
bugreports against yum.

To solve this problem, 'Robust Futex' patches were created and presented on
lkml: the one written by Todd Kneisel and David Singleton is the most advanced
at the moment.  These patches all tried to extend the futex abstraction by
registering futex-based locks in the kernel - and thus give the kernel a
chance to clean up.

E.g.  in David Singleton's robust-futex-6.patch, there are 3 new syscall
variants to sys_futex(): FUTEX_REGISTER, FUTEX_DEREGISTER and FUTEX_RECOVER.
The kernel attaches such robust futexes to vmas (via
vma->vm_file->f_mapping->robust_head), and at do_exit() time, all vmas are
searched to see whether they have a robust_head set.

Lots of work went into the vma-based robust-futex patch, and recently it has
improved significantly, but unfortunately it still has two fundamental
problems left:

 - they have quite complex locking and race scenarios.  The vma-based
   patches had been pending for years, but they are still not completely
   reliable.

 - they have to scan _every_ vma at sys_exit() time, per thread!

The second disadvantage is a real killer: pthread_exit() takes around 1
microsecond on Linux, but with thousands (or tens of thousands) of vmas every
pthread_exit() takes a millisecond or more, also totally destroying the CPU's
L1 and L2 caches!

This is very much noticeable even for normal process sys_exit_group() calls:
the kernel has to do the vma scanning unconditionally!  (this is because the
kernel has no knowledge about how many robust futexes there are to be cleaned
up, because a robust futex might have been registered in another task, and the
futex variable might have been simply mmap()-ed into this process's address
space).

This huge overhead forced the creation of CONFIG_FUTEX_ROBUST, but worse than
that: the overhead makes robust futexes impractical for any type of generic
Linux distribution.

So it became clear to us, something had to be done.  Last week, when Thomas
Gleixner tried to fix up the vma-based robust futex patch in the -rt tree, he
found a handful of new races and we were talking about it and were analyzing
the situation.  At that point a fundamentally different solution occured to
me.  This patchset (written in the past couple of days) implements that new
solution.  Be warned though - the patchset does things we normally dont do in
Linux, so some might find the approach disturbing.  Parental advice
recommended ;-)

  New approach to robust futexes
  ------------------------------

At the heart of this new approach there is a per-thread private list of robust
locks that userspace is holding (maintained by glibc) - which userspace list
is registered with the kernel via a new syscall [this registration happens at
most once per thread lifetime].  At do_exit() time, the kernel checks this
user-space list: are there any robust futex locks to be cleaned up?

In the common case, at do_exit() time, there is no list registered, so the
cost of robust futexes is just a simple current->robust_list != NULL
comparison.  If the thread has registered a list, then normally the list is
empty.  If the thread/process crashed or terminated in some incorrect way then
the list might be non-empty: in this case the kernel carefully walks the list
[not trusting it], and marks all locks that are owned by this thread with the
FUTEX_OWNER_DEAD bit, and wakes up one waiter (if any).

The list is guaranteed to be private and per-thread, so it's lockless.  There
is one race possible though: since adding to and removing from the list is
done after the futex is acquired by glibc, there is a few instructions window
for the thread (or process) to die there, leaving the futex hung.  To protect
against this possibility, userspace (glibc) also maintains a simple per-thread
'list_op_pending' field, to allow the kernel to clean up if the thread dies
after acquiring the lock, but just before it could have added itself to the
list.  Glibc sets this list_op_pending field before it tries to acquire the
futex, and clears it after the list-add (or list-remove) has finished.

That's all that is needed - all the rest of robust-futex cleanup is done in
userspace [just like with the previous patches].

Ulrich Drepper has implemented the necessary glibc support for this new
mechanism, which fully enables robust mutexes.  (Ulrich plans to commit these
changes to glibc-HEAD later today.)

Key differences of this userspace-list based approach, compared to the vma
based method:

 - it's much, much faster: at thread exit time, there's no need to loop
   over every vma (!), which the VM-based method has to do.  Only a very
   simple 'is the list empty' op is done.

 - no VM changes are needed - 'struct address_space' is left alone.

 - no registration of individual locks is needed: robust mutexes dont need
   any extra per-lock syscalls.  Robust mutexes thus become a very lightweight
   primitive - so they dont force the application designer to do a hard choice
   between performance and robustness - robust mutexes are just as fast.

 - no per-lock kernel allocation happens.

 - no resource limits are needed.

 - no kernel-space recovery call (FUTEX_RECOVER) is needed.

 - the implementation and the locking is "obvious", and there are no
   interactions with the VM.

  Performance
  -----------

I have benchmarked the time needed for the kernel to process a list of 1
million (!) held locks, using the new method [on a 2GHz CPU]:

 - with FUTEX_WAIT set [contended mutex]: 130 msecs
 - without FUTEX_WAIT set [uncontended mutex]: 30 msecs

I have also measured an approach where glibc does the lock notification [which
it currently does for !pshared robust mutexes], and that took 256 msecs -
clearly slower, due to the 1 million FUTEX_WAKE syscalls userspace had to do.

(1 million held locks are unheard of - we expect at most a handful of locks to
be held at a time.  Nevertheless it's nice to know that this approach scales
nicely.)

  Implementation details
  ----------------------

The patch adds two new syscalls: one to register the userspace list, and one
to query the registered list pointer:

 asmlinkage long
 sys_set_robust_list(struct robust_list_head __user *head,
                     size_t len);

 asmlinkage long
 sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
                     size_t __user *len_ptr);

List registration is very fast: the pointer is simply stored in
current->robust_list.  [Note that in the future, if robust futexes become
widespread, we could extend sys_clone() to register a robust-list head for new
threads, without the need of another syscall.]

So there is virtually zero overhead for tasks not using robust futexes, and
even for robust futex users, there is only one extra syscall per thread
lifetime, and the cleanup operation, if it happens, is fast and
straightforward.  The kernel doesnt have any internal distinction between
robust and normal futexes.

If a futex is found to be held at exit time, the kernel sets the highest bit
of the futex word:

	#define FUTEX_OWNER_DIED        0x40000000

and wakes up the next futex waiter (if any). User-space does the rest of
the cleanup.

Otherwise, robust futexes are acquired by glibc by putting the TID into the
futex field atomically.  Waiters set the FUTEX_WAITERS bit:

	#define FUTEX_WAITERS           0x80000000

and the remaining bits are for the TID.

  Testing, architecture support
  -----------------------------

I've tested the new syscalls on x86 and x86_64, and have made sure the parsing
of the userspace list is robust [ ;-) ] even if the list is deliberately
corrupted.

i386 and x86_64 syscalls are wired up at the moment, and Ulrich has tested the
new glibc code (on x86_64 and i386), and it works for his robust-mutex
testcases.

All other architectures should build just fine too - but they wont have the
new syscalls yet.

Architectures need to implement the new futex_atomic_cmpxchg_inuser() inline
function before writing up the syscalls (that function returns -ENOSYS right
now).

This patch:

Add placeholder futex_atomic_cmpxchg_inuser() implementations to every
architecture that supports futexes.  It returns -ENOSYS.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Acked-by: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-frv/futex.h b/include/asm-frv/futex.h
index fca9d90..9a0e902 100644
--- a/include/asm-frv/futex.h
+++ b/include/asm-frv/futex.h
@@ -9,5 +9,11 @@
 
 extern int futex_atomic_op_inuser(int encoded_op, int __user *uaddr);
 
+static inline int
+futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+{
+	return -ENOSYS;
+}
+
 #endif
 #endif
diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index 3ae2c73..514bd40 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -49,5 +49,11 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	return ret;
 }
 
+static inline int
+futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+{
+	return -ENOSYS;
+}
+
 #endif
 #endif
diff --git a/include/asm-i386/futex.h b/include/asm-i386/futex.h
index 44b9db8..1f39ad9 100644
--- a/include/asm-i386/futex.h
+++ b/include/asm-i386/futex.h
@@ -104,5 +104,11 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	return ret;
 }
 
+static inline int
+futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+{
+	return -ENOSYS;
+}
+
 #endif
 #endif
diff --git a/include/asm-mips/futex.h b/include/asm-mips/futex.h
index 2454c44..c5fb2d6 100644
--- a/include/asm-mips/futex.h
+++ b/include/asm-mips/futex.h
@@ -99,5 +99,11 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	return ret;
 }
 
+static inline int
+futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+{
+	return -ENOSYS;
+}
+
 #endif
 #endif
diff --git a/include/asm-powerpc/futex.h b/include/asm-powerpc/futex.h
index 39e85f3..80ed985 100644
--- a/include/asm-powerpc/futex.h
+++ b/include/asm-powerpc/futex.h
@@ -81,5 +81,11 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	return ret;
 }
 
+static inline int
+futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+{
+	return -ENOSYS;
+}
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_FUTEX_H */
diff --git a/include/asm-sparc64/futex.h b/include/asm-sparc64/futex.h
index 34c4b43..cd340a2 100644
--- a/include/asm-sparc64/futex.h
+++ b/include/asm-sparc64/futex.h
@@ -83,4 +83,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
 	return ret;
 }
 
+static inline int
+futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+{
+	return -ENOSYS;
+}
+
 #endif /* !(_SPARC64_FUTEX_H) */
diff --git a/include/asm-x86_64/futex.h b/include/asm-x86_64/futex.h
index 8602c09..4f4cb34 100644
--- a/include/asm-x86_64/futex.h
+++ b/include/asm-x86_64/futex.h
@@ -94,5 +94,11 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	return ret;
 }
 
+static inline int
+futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+{
+	return -ENOSYS;
+}
+
 #endif
 #endif
-- 
cgit v0.10.2


From 0771dfefc9e538f077d0b43b6dec19a5a67d0e70 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 27 Mar 2006 01:16:22 -0800
Subject: [PATCH] lightweight robust futexes: core

Add the core infrastructure for robust futexes: structure definitions, the new
syscalls and the do_exit() based cleanup mechanism.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Acked-by: Ulrich Drepper <drepper@redhat.com>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/futex.h b/include/linux/futex.h
index 10f96c3..20face6 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_FUTEX_H
 #define _LINUX_FUTEX_H
 
+#include <linux/sched.h>
+
 /* Second argument to futex syscall */
 
 
@@ -11,10 +13,103 @@
 #define FUTEX_CMP_REQUEUE	4
 #define FUTEX_WAKE_OP		5
 
+/*
+ * Support for robust futexes: the kernel cleans up held futexes at
+ * thread exit time.
+ */
+
+/*
+ * Per-lock list entry - embedded in user-space locks, somewhere close
+ * to the futex field. (Note: user-space uses a double-linked list to
+ * achieve O(1) list add and remove, but the kernel only needs to know
+ * about the forward link)
+ *
+ * NOTE: this structure is part of the syscall ABI, and must not be
+ * changed.
+ */
+struct robust_list {
+	struct robust_list __user *next;
+};
+
+/*
+ * Per-thread list head:
+ *
+ * NOTE: this structure is part of the syscall ABI, and must only be
+ * changed if the change is first communicated with the glibc folks.
+ * (When an incompatible change is done, we'll increase the structure
+ *  size, which glibc will detect)
+ */
+struct robust_list_head {
+	/*
+	 * The head of the list. Points back to itself if empty:
+	 */
+	struct robust_list list;
+
+	/*
+	 * This relative offset is set by user-space, it gives the kernel
+	 * the relative position of the futex field to examine. This way
+	 * we keep userspace flexible, to freely shape its data-structure,
+	 * without hardcoding any particular offset into the kernel:
+	 */
+	long futex_offset;
+
+	/*
+	 * The death of the thread may race with userspace setting
+	 * up a lock's links. So to handle this race, userspace first
+	 * sets this field to the address of the to-be-taken lock,
+	 * then does the lock acquire, and then adds itself to the
+	 * list, and then clears this field. Hence the kernel will
+	 * always have full knowledge of all locks that the thread
+	 * _might_ have taken. We check the owner TID in any case,
+	 * so only truly owned locks will be handled.
+	 */
+	struct robust_list __user *list_op_pending;
+};
+
+/*
+ * Are there any waiters for this robust futex:
+ */
+#define FUTEX_WAITERS		0x80000000
+
+/*
+ * The kernel signals via this bit that a thread holding a futex
+ * has exited without unlocking the futex. The kernel also does
+ * a FUTEX_WAKE on such futexes, after setting the bit, to wake
+ * up any possible waiters:
+ */
+#define FUTEX_OWNER_DIED	0x40000000
+
+/*
+ * Reserved bit:
+ */
+#define FUTEX_OWNER_PENDING	0x20000000
+
+/*
+ * The rest of the robust-futex field is for the TID:
+ */
+#define FUTEX_TID_MASK		0x1fffffff
+
+/*
+ * A limit of one million locks held per thread (!) ought to be enough
+ * for some time. This also protects against a deliberately circular
+ * list. Not worth introducing an rlimit for this:
+ */
+#define ROBUST_LIST_LIMIT	1048576
+
 long do_futex(unsigned long uaddr, int op, int val,
 		unsigned long timeout, unsigned long uaddr2, int val2,
 		int val3);
 
+extern int handle_futex_death(unsigned int *uaddr, struct task_struct *curr);
+
+#ifdef CONFIG_FUTEX
+extern void exit_robust_list(struct task_struct *curr);
+#else
+static inline void exit_robust_list(struct task_struct *curr)
+{
+}
+#endif
+
 #define FUTEX_OP_SET		0	/* *(int *)UADDR2 = OPARG; */
 #define FUTEX_OP_ADD		1	/* *(int *)UADDR2 += OPARG; */
 #define FUTEX_OP_OR		2	/* *(int *)UADDR2 |= OPARG; */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 036d14d2..fd4848f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -35,6 +35,7 @@
 #include <linux/topology.h>
 #include <linux/seccomp.h>
 #include <linux/rcupdate.h>
+#include <linux/futex.h>
 
 #include <linux/auxvec.h>	/* For AT_VECTOR_SIZE */
 
@@ -872,6 +873,8 @@ struct task_struct {
 	int cpuset_mems_generation;
 	int cpuset_mem_spread_rotor;
 #endif
+	struct robust_list_head __user *robust_list;
+
 	atomic_t fs_excl;	/* holding fs exclusive resources */
 	struct rcu_head rcu;
 };
diff --git a/include/linux/threads.h b/include/linux/threads.h
index b59738a..e646bcd 100644
--- a/include/linux/threads.h
+++ b/include/linux/threads.h
@@ -28,7 +28,8 @@
 #define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000)
 
 /*
- * A maximum of 4 million PIDs should be enough for a while:
+ * A maximum of 4 million PIDs should be enough for a while.
+ * [NOTE: PID/TIDs are limited to 2^29 ~= 500+ million, see futex.h.]
  */
 #define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \
 	(sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT))
diff --git a/kernel/exit.c b/kernel/exit.c
index 8037405..aecb48c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -31,6 +31,7 @@
 #include <linux/signal.h>
 #include <linux/cn_proc.h>
 #include <linux/mutex.h>
+#include <linux/futex.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -852,6 +853,8 @@ fastcall NORET_TYPE void do_exit(long code)
 		exit_itimers(tsk->signal);
 		acct_process(code);
 	}
+	if (unlikely(tsk->robust_list))
+		exit_robust_list(tsk);
 	exit_mm(tsk);
 
 	exit_sem(tsk);
diff --git a/kernel/futex.c b/kernel/futex.c
index 5efa2f9..feb724b 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -8,6 +8,10 @@
  *  Removed page pinning, fix privately mapped COW pages and other cleanups
  *  (C) Copyright 2003, 2004 Jamie Lokier
  *
+ *  Robust futex support started by Ingo Molnar
+ *  (C) Copyright 2006 Red Hat Inc, All Rights Reserved
+ *  Thanks to Thomas Gleixner for suggestions, analysis and fixes.
+ *
  *  Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
  *  enough at me, Linus for the original (flawed) idea, Matthew
  *  Kirkwood for proof-of-concept implementation.
@@ -829,6 +833,174 @@ error:
 	goto out;
 }
 
+/*
+ * Support for robust futexes: the kernel cleans up held futexes at
+ * thread exit time.
+ *
+ * Implementation: user-space maintains a per-thread list of locks it
+ * is holding. Upon do_exit(), the kernel carefully walks this list,
+ * and marks all locks that are owned by this thread with the
+ * FUTEX_OWNER_DEAD bit, and wakes up a waiter (if any). The list is
+ * always manipulated with the lock held, so the list is private and
+ * per-thread. Userspace also maintains a per-thread 'list_op_pending'
+ * field, to allow the kernel to clean up if the thread dies after
+ * acquiring the lock, but just before it could have added itself to
+ * the list. There can only be one such pending lock.
+ */
+
+/**
+ * sys_set_robust_list - set the robust-futex list head of a task
+ * @head: pointer to the list-head
+ * @len: length of the list-head, as userspace expects
+ */
+asmlinkage long
+sys_set_robust_list(struct robust_list_head __user *head,
+		    size_t len)
+{
+	/*
+	 * The kernel knows only one size for now:
+	 */
+	if (unlikely(len != sizeof(*head)))
+		return -EINVAL;
+
+	current->robust_list = head;
+
+	return 0;
+}
+
+/**
+ * sys_get_robust_list - get the robust-futex list head of a task
+ * @pid: pid of the process [zero for current task]
+ * @head_ptr: pointer to a list-head pointer, the kernel fills it in
+ * @len_ptr: pointer to a length field, the kernel fills in the header size
+ */
+asmlinkage long
+sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
+		    size_t __user *len_ptr)
+{
+	struct robust_list_head *head;
+	unsigned long ret;
+
+	if (!pid)
+		head = current->robust_list;
+	else {
+		struct task_struct *p;
+
+		ret = -ESRCH;
+		read_lock(&tasklist_lock);
+		p = find_task_by_pid(pid);
+		if (!p)
+			goto err_unlock;
+		ret = -EPERM;
+		if ((current->euid != p->euid) && (current->euid != p->uid) &&
+				!capable(CAP_SYS_PTRACE))
+			goto err_unlock;
+		head = p->robust_list;
+		read_unlock(&tasklist_lock);
+	}
+
+	if (put_user(sizeof(*head), len_ptr))
+		return -EFAULT;
+	return put_user(head, head_ptr);
+
+err_unlock:
+	read_unlock(&tasklist_lock);
+
+	return ret;
+}
+
+/*
+ * Process a futex-list entry, check whether it's owned by the
+ * dying task, and do notification if so:
+ */
+int handle_futex_death(unsigned int *uaddr, struct task_struct *curr)
+{
+	unsigned int futex_val;
+
+repeat:
+	if (get_user(futex_val, uaddr))
+		return -1;
+
+	if ((futex_val & FUTEX_TID_MASK) == curr->pid) {
+		/*
+		 * Ok, this dying thread is truly holding a futex
+		 * of interest. Set the OWNER_DIED bit atomically
+		 * via cmpxchg, and if the value had FUTEX_WAITERS
+		 * set, wake up a waiter (if any). (We have to do a
+		 * futex_wake() even if OWNER_DIED is already set -
+		 * to handle the rare but possible case of recursive
+		 * thread-death.) The rest of the cleanup is done in
+		 * userspace.
+		 */
+		if (futex_atomic_cmpxchg_inuser(uaddr, futex_val,
+					 futex_val | FUTEX_OWNER_DIED) !=
+								   futex_val)
+			goto repeat;
+
+		if (futex_val & FUTEX_WAITERS)
+			futex_wake((unsigned long)uaddr, 1);
+	}
+	return 0;
+}
+
+/*
+ * Walk curr->robust_list (very carefully, it's a userspace list!)
+ * and mark any locks found there dead, and notify any waiters.
+ *
+ * We silently return on any sign of list-walking problem.
+ */
+void exit_robust_list(struct task_struct *curr)
+{
+	struct robust_list_head __user *head = curr->robust_list;
+	struct robust_list __user *entry, *pending;
+	unsigned int limit = ROBUST_LIST_LIMIT;
+	unsigned long futex_offset;
+
+	/*
+	 * Fetch the list head (which was registered earlier, via
+	 * sys_set_robust_list()):
+	 */
+	if (get_user(entry, &head->list.next))
+		return;
+	/*
+	 * Fetch the relative futex offset:
+	 */
+	if (get_user(futex_offset, &head->futex_offset))
+		return;
+	/*
+	 * Fetch any possibly pending lock-add first, and handle it
+	 * if it exists:
+	 */
+	if (get_user(pending, &head->list_op_pending))
+		return;
+	if (pending)
+		handle_futex_death((void *)pending + futex_offset, curr);
+
+	while (entry != &head->list) {
+		/*
+		 * A pending lock might already be on the list, so
+		 * dont process it twice:
+		 */
+		if (entry != pending)
+			if (handle_futex_death((void *)entry + futex_offset,
+						curr))
+				return;
+
+		/*
+		 * Fetch the next entry in the list:
+		 */
+		if (get_user(entry, &entry->next))
+			return;
+		/*
+		 * Avoid excessively long or circular lists:
+		 */
+		if (!--limit)
+			break;
+
+		cond_resched();
+	}
+}
+
 long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout,
 		unsigned long uaddr2, int val2, int val3)
 {
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 1067090..d82864c 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -42,6 +42,10 @@ cond_syscall(sys_recvmsg);
 cond_syscall(sys_socketcall);
 cond_syscall(sys_futex);
 cond_syscall(compat_sys_futex);
+cond_syscall(sys_set_robust_list);
+cond_syscall(compat_sys_set_robust_list);
+cond_syscall(sys_get_robust_list);
+cond_syscall(compat_sys_get_robust_list);
 cond_syscall(sys_epoll_create);
 cond_syscall(sys_epoll_ctl);
 cond_syscall(sys_epoll_wait);
-- 
cgit v0.10.2


From 2eec9ad91f71a3dbacece5c4fb5adc09fad53a96 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 27 Mar 2006 01:16:23 -0800
Subject: [PATCH] lightweight robust futexes: docs

Add robust-futex documentation.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/Documentation/robust-futex-ABI.txt b/Documentation/robust-futex-ABI.txt
new file mode 100644
index 0000000..def5d87
--- /dev/null
+++ b/Documentation/robust-futex-ABI.txt
@@ -0,0 +1,184 @@
+Started by Paul Jackson <pj@sgi.com>
+
+The robust futex ABI
+--------------------
+
+Robust_futexes provide a mechanism that is used in addition to normal
+futexes, for kernel assist of cleanup of held locks on task exit.
+
+The interesting data as to what futexes a thread is holding is kept on a
+linked list in user space, where it can be updated efficiently as locks
+are taken and dropped, without kernel intervention.  The only additional
+kernel intervention required for robust_futexes above and beyond what is
+required for futexes is:
+
+ 1) a one time call, per thread, to tell the kernel where its list of
+    held robust_futexes begins, and
+ 2) internal kernel code at exit, to handle any listed locks held
+    by the exiting thread.
+
+The existing normal futexes already provide a "Fast Userspace Locking"
+mechanism, which handles uncontested locking without needing a system
+call, and handles contested locking by maintaining a list of waiting
+threads in the kernel.  Options on the sys_futex(2) system call support
+waiting on a particular futex, and waking up the next waiter on a
+particular futex.
+
+For robust_futexes to work, the user code (typically in a library such
+as glibc linked with the application) has to manage and place the
+necessary list elements exactly as the kernel expects them.  If it fails
+to do so, then improperly listed locks will not be cleaned up on exit,
+probably causing deadlock or other such failure of the other threads
+waiting on the same locks.
+
+A thread that anticipates possibly using robust_futexes should first
+issue the system call:
+
+    asmlinkage long
+    sys_set_robust_list(struct robust_list_head __user *head, size_t len);
+
+The pointer 'head' points to a structure in the threads address space
+consisting of three words.  Each word is 32 bits on 32 bit arch's, or 64
+bits on 64 bit arch's, and local byte order.  Each thread should have
+its own thread private 'head'.
+
+If a thread is running in 32 bit compatibility mode on a 64 native arch
+kernel, then it can actually have two such structures - one using 32 bit
+words for 32 bit compatibility mode, and one using 64 bit words for 64
+bit native mode.  The kernel, if it is a 64 bit kernel supporting 32 bit
+compatibility mode, will attempt to process both lists on each task
+exit, if the corresponding sys_set_robust_list() call has been made to
+setup that list.
+
+  The first word in the memory structure at 'head' contains a
+  pointer to a single linked list of 'lock entries', one per lock,
+  as described below.  If the list is empty, the pointer will point
+  to itself, 'head'.  The last 'lock entry' points back to the 'head'.
+
+  The second word, called 'offset', specifies the offset from the
+  address of the associated 'lock entry', plus or minus, of what will
+  be called the 'lock word', from that 'lock entry'.  The 'lock word'
+  is always a 32 bit word, unlike the other words above.  The 'lock
+  word' holds 3 flag bits in the upper 3 bits, and the thread id (TID)
+  of the thread holding the lock in the bottom 29 bits.  See further
+  below for a description of the flag bits.
+
+  The third word, called 'list_op_pending', contains transient copy of
+  the address of the 'lock entry', during list insertion and removal,
+  and is needed to correctly resolve races should a thread exit while
+  in the middle of a locking or unlocking operation.
+
+Each 'lock entry' on the single linked list starting at 'head' consists
+of just a single word, pointing to the next 'lock entry', or back to
+'head' if there are no more entries.  In addition, nearby to each 'lock
+entry', at an offset from the 'lock entry' specified by the 'offset'
+word, is one 'lock word'.
+
+The 'lock word' is always 32 bits, and is intended to be the same 32 bit
+lock variable used by the futex mechanism, in conjunction with
+robust_futexes.  The kernel will only be able to wakeup the next thread
+waiting for a lock on a threads exit if that next thread used the futex
+mechanism to register the address of that 'lock word' with the kernel.
+
+For each futex lock currently held by a thread, if it wants this
+robust_futex support for exit cleanup of that lock, it should have one
+'lock entry' on this list, with its associated 'lock word' at the
+specified 'offset'.  Should a thread die while holding any such locks,
+the kernel will walk this list, mark any such locks with a bit
+indicating their holder died, and wakeup the next thread waiting for
+that lock using the futex mechanism.
+
+When a thread has invoked the above system call to indicate it
+anticipates using robust_futexes, the kernel stores the passed in 'head'
+pointer for that task.  The task may retrieve that value later on by
+using the system call:
+
+    asmlinkage long
+    sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
+                        size_t __user *len_ptr);
+
+It is anticipated that threads will use robust_futexes embedded in
+larger, user level locking structures, one per lock.  The kernel
+robust_futex mechanism doesn't care what else is in that structure, so
+long as the 'offset' to the 'lock word' is the same for all
+robust_futexes used by that thread.  The thread should link those locks
+it currently holds using the 'lock entry' pointers.  It may also have
+other links between the locks, such as the reverse side of a double
+linked list, but that doesn't matter to the kernel.
+
+By keeping its locks linked this way, on a list starting with a 'head'
+pointer known to the kernel, the kernel can provide to a thread the
+essential service available for robust_futexes, which is to help clean
+up locks held at the time of (a perhaps unexpectedly) exit.
+
+Actual locking and unlocking, during normal operations, is handled
+entirely by user level code in the contending threads, and by the
+existing futex mechanism to wait for, and wakeup, locks.  The kernels
+only essential involvement in robust_futexes is to remember where the
+list 'head' is, and to walk the list on thread exit, handling locks
+still held by the departing thread, as described below.
+
+There may exist thousands of futex lock structures in a threads shared
+memory, on various data structures, at a given point in time. Only those
+lock structures for locks currently held by that thread should be on
+that thread's robust_futex linked lock list a given time.
+
+A given futex lock structure in a user shared memory region may be held
+at different times by any of the threads with access to that region. The
+thread currently holding such a lock, if any, is marked with the threads
+TID in the lower 29 bits of the 'lock word'.
+
+When adding or removing a lock from its list of held locks, in order for
+the kernel to correctly handle lock cleanup regardless of when the task
+exits (perhaps it gets an unexpected signal 9 in the middle of
+manipulating this list), the user code must observe the following
+protocol on 'lock entry' insertion and removal:
+
+On insertion:
+ 1) set the 'list_op_pending' word to the address of the 'lock word'
+    to be inserted,
+ 2) acquire the futex lock,
+ 3) add the lock entry, with its thread id (TID) in the bottom 29 bits
+    of the 'lock word', to the linked list starting at 'head', and
+ 4) clear the 'list_op_pending' word.
+
+	XXX I am particularly unsure of the following -pj XXX
+
+On removal:
+ 1) set the 'list_op_pending' word to the address of the 'lock word'
+    to be removed,
+ 2) remove the lock entry for this lock from the 'head' list,
+ 2) release the futex lock, and
+ 2) clear the 'lock_op_pending' word.
+
+On exit, the kernel will consider the address stored in
+'list_op_pending' and the address of each 'lock word' found by walking
+the list starting at 'head'.  For each such address, if the bottom 29
+bits of the 'lock word' at offset 'offset' from that address equals the
+exiting threads TID, then the kernel will do two things:
+
+ 1) if bit 31 (0x80000000) is set in that word, then attempt a futex
+    wakeup on that address, which will waken the next thread that has
+    used to the futex mechanism to wait on that address, and
+ 2) atomically set  bit 30 (0x40000000) in the 'lock word'.
+
+In the above, bit 31 was set by futex waiters on that lock to indicate
+they were waiting, and bit 30 is set by the kernel to indicate that the
+lock owner died holding the lock.
+
+The kernel exit code will silently stop scanning the list further if at
+any point:
+
+ 1) the 'head' pointer or an subsequent linked list pointer
+    is not a valid address of a user space word
+ 2) the calculated location of the 'lock word' (address plus
+    'offset') is not the valud address of a 32 bit user space
+    word
+ 3) if the list contains more than 1 million (subject to
+    future kernel configuration changes) elements.
+
+When the kernel sees a list entry whose 'lock word' doesn't have the
+current threads TID in the lower 29 bits, it does nothing with that
+entry, and goes on to the next entry.
+
+Bit 29 (0x20000000) of the 'lock word' is reserved for future use.
diff --git a/Documentation/robust-futexes.txt b/Documentation/robust-futexes.txt
new file mode 100644
index 0000000..7aecc67
--- /dev/null
+++ b/Documentation/robust-futexes.txt
@@ -0,0 +1,218 @@
+Started by: Ingo Molnar <mingo@redhat.com>
+
+Background
+----------
+
+what are robust futexes? To answer that, we first need to understand
+what futexes are: normal futexes are special types of locks that in the
+noncontended case can be acquired/released from userspace without having
+to enter the kernel.
+
+A futex is in essence a user-space address, e.g. a 32-bit lock variable
+field. If userspace notices contention (the lock is already owned and
+someone else wants to grab it too) then the lock is marked with a value
+that says "there's a waiter pending", and the sys_futex(FUTEX_WAIT)
+syscall is used to wait for the other guy to release it. The kernel
+creates a 'futex queue' internally, so that it can later on match up the
+waiter with the waker - without them having to know about each other.
+When the owner thread releases the futex, it notices (via the variable
+value) that there were waiter(s) pending, and does the
+sys_futex(FUTEX_WAKE) syscall to wake them up.  Once all waiters have
+taken and released the lock, the futex is again back to 'uncontended'
+state, and there's no in-kernel state associated with it. The kernel
+completely forgets that there ever was a futex at that address. This
+method makes futexes very lightweight and scalable.
+
+"Robustness" is about dealing with crashes while holding a lock: if a
+process exits prematurely while holding a pthread_mutex_t lock that is
+also shared with some other process (e.g. yum segfaults while holding a
+pthread_mutex_t, or yum is kill -9-ed), then waiters for that lock need
+to be notified that the last owner of the lock exited in some irregular
+way.
+
+To solve such types of problems, "robust mutex" userspace APIs were
+created: pthread_mutex_lock() returns an error value if the owner exits
+prematurely - and the new owner can decide whether the data protected by
+the lock can be recovered safely.
+
+There is a big conceptual problem with futex based mutexes though: it is
+the kernel that destroys the owner task (e.g. due to a SEGFAULT), but
+the kernel cannot help with the cleanup: if there is no 'futex queue'
+(and in most cases there is none, futexes being fast lightweight locks)
+then the kernel has no information to clean up after the held lock!
+Userspace has no chance to clean up after the lock either - userspace is
+the one that crashes, so it has no opportunity to clean up. Catch-22.
+
+In practice, when e.g. yum is kill -9-ed (or segfaults), a system reboot
+is needed to release that futex based lock. This is one of the leading
+bugreports against yum.
+
+To solve this problem, the traditional approach was to extend the vma
+(virtual memory area descriptor) concept to have a notion of 'pending
+robust futexes attached to this area'. This approach requires 3 new
+syscall variants to sys_futex(): FUTEX_REGISTER, FUTEX_DEREGISTER and
+FUTEX_RECOVER. At do_exit() time, all vmas are searched to see whether
+they have a robust_head set. This approach has two fundamental problems
+left:
+
+ - it has quite complex locking and race scenarios. The vma-based
+   approach had been pending for years, but they are still not completely
+   reliable.
+
+ - they have to scan _every_ vma at sys_exit() time, per thread!
+
+The second disadvantage is a real killer: pthread_exit() takes around 1
+microsecond on Linux, but with thousands (or tens of thousands) of vmas
+every pthread_exit() takes a millisecond or more, also totally
+destroying the CPU's L1 and L2 caches!
+
+This is very much noticeable even for normal process sys_exit_group()
+calls: the kernel has to do the vma scanning unconditionally! (this is
+because the kernel has no knowledge about how many robust futexes there
+are to be cleaned up, because a robust futex might have been registered
+in another task, and the futex variable might have been simply mmap()-ed
+into this process's address space).
+
+This huge overhead forced the creation of CONFIG_FUTEX_ROBUST so that
+normal kernels can turn it off, but worse than that: the overhead makes
+robust futexes impractical for any type of generic Linux distribution.
+
+So something had to be done.
+
+New approach to robust futexes
+------------------------------
+
+At the heart of this new approach there is a per-thread private list of
+robust locks that userspace is holding (maintained by glibc) - which
+userspace list is registered with the kernel via a new syscall [this
+registration happens at most once per thread lifetime]. At do_exit()
+time, the kernel checks this user-space list: are there any robust futex
+locks to be cleaned up?
+
+In the common case, at do_exit() time, there is no list registered, so
+the cost of robust futexes is just a simple current->robust_list != NULL
+comparison. If the thread has registered a list, then normally the list
+is empty. If the thread/process crashed or terminated in some incorrect
+way then the list might be non-empty: in this case the kernel carefully
+walks the list [not trusting it], and marks all locks that are owned by
+this thread with the FUTEX_OWNER_DEAD bit, and wakes up one waiter (if
+any).
+
+The list is guaranteed to be private and per-thread at do_exit() time,
+so it can be accessed by the kernel in a lockless way.
+
+There is one race possible though: since adding to and removing from the
+list is done after the futex is acquired by glibc, there is a few
+instructions window for the thread (or process) to die there, leaving
+the futex hung. To protect against this possibility, userspace (glibc)
+also maintains a simple per-thread 'list_op_pending' field, to allow the
+kernel to clean up if the thread dies after acquiring the lock, but just
+before it could have added itself to the list. Glibc sets this
+list_op_pending field before it tries to acquire the futex, and clears
+it after the list-add (or list-remove) has finished.
+
+That's all that is needed - all the rest of robust-futex cleanup is done
+in userspace [just like with the previous patches].
+
+Ulrich Drepper has implemented the necessary glibc support for this new
+mechanism, which fully enables robust mutexes.
+
+Key differences of this userspace-list based approach, compared to the
+vma based method:
+
+ - it's much, much faster: at thread exit time, there's no need to loop
+   over every vma (!), which the VM-based method has to do. Only a very
+   simple 'is the list empty' op is done.
+
+ - no VM changes are needed - 'struct address_space' is left alone.
+
+ - no registration of individual locks is needed: robust mutexes dont
+   need any extra per-lock syscalls. Robust mutexes thus become a very
+   lightweight primitive - so they dont force the application designer
+   to do a hard choice between performance and robustness - robust
+   mutexes are just as fast.
+
+ - no per-lock kernel allocation happens.
+
+ - no resource limits are needed.
+
+ - no kernel-space recovery call (FUTEX_RECOVER) is needed.
+
+ - the implementation and the locking is "obvious", and there are no
+   interactions with the VM.
+
+Performance
+-----------
+
+I have benchmarked the time needed for the kernel to process a list of 1
+million (!) held locks, using the new method [on a 2GHz CPU]:
+
+ - with FUTEX_WAIT set [contended mutex]: 130 msecs
+ - without FUTEX_WAIT set [uncontended mutex]: 30 msecs
+
+I have also measured an approach where glibc does the lock notification
+[which it currently does for !pshared robust mutexes], and that took 256
+msecs - clearly slower, due to the 1 million FUTEX_WAKE syscalls
+userspace had to do.
+
+(1 million held locks are unheard of - we expect at most a handful of
+locks to be held at a time. Nevertheless it's nice to know that this
+approach scales nicely.)
+
+Implementation details
+----------------------
+
+The patch adds two new syscalls: one to register the userspace list, and
+one to query the registered list pointer:
+
+ asmlinkage long
+ sys_set_robust_list(struct robust_list_head __user *head,
+                     size_t len);
+
+ asmlinkage long
+ sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
+                     size_t __user *len_ptr);
+
+List registration is very fast: the pointer is simply stored in
+current->robust_list. [Note that in the future, if robust futexes become
+widespread, we could extend sys_clone() to register a robust-list head
+for new threads, without the need of another syscall.]
+
+So there is virtually zero overhead for tasks not using robust futexes,
+and even for robust futex users, there is only one extra syscall per
+thread lifetime, and the cleanup operation, if it happens, is fast and
+straightforward. The kernel doesnt have any internal distinction between
+robust and normal futexes.
+
+If a futex is found to be held at exit time, the kernel sets the
+following bit of the futex word:
+
+	#define FUTEX_OWNER_DIED        0x40000000
+
+and wakes up the next futex waiter (if any). User-space does the rest of
+the cleanup.
+
+Otherwise, robust futexes are acquired by glibc by putting the TID into
+the futex field atomically. Waiters set the FUTEX_WAITERS bit:
+
+	#define FUTEX_WAITERS           0x80000000
+
+and the remaining bits are for the TID.
+
+Testing, architecture support
+-----------------------------
+
+i've tested the new syscalls on x86 and x86_64, and have made sure the
+parsing of the userspace list is robust [ ;-) ] even if the list is
+deliberately corrupted.
+
+i386 and x86_64 syscalls are wired up at the moment, and Ulrich has
+tested the new glibc code (on x86_64 and i386), and it works for his
+robust-mutex testcases.
+
+All other architectures should build just fine too - but they wont have
+the new syscalls yet.
+
+Architectures need to implement the new futex_atomic_cmpxchg_inuser()
+inline function before writing up the syscalls (that function returns
+-ENOSYS right now).
-- 
cgit v0.10.2


From 34f192c6527f20c47ccec239e7d51a27691b93fc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 27 Mar 2006 01:16:24 -0800
Subject: [PATCH] lightweight robust futexes: compat

32-bit syscall compatibility support.  (This patch also moves all futex
related compat functionality into kernel/futex_compat.c.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Acked-by: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 24d659c..6d3a654 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -147,6 +147,24 @@ typedef struct compat_sigevent {
 	} _sigev_un;
 } compat_sigevent_t;
 
+struct compat_robust_list {
+	compat_uptr_t			next;
+};
+
+struct compat_robust_list_head {
+	struct compat_robust_list	list;
+	compat_long_t			futex_offset;
+	compat_uptr_t			list_op_pending;
+};
+
+extern void compat_exit_robust_list(struct task_struct *curr);
+
+asmlinkage long
+compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
+			   compat_size_t len);
+asmlinkage long
+compat_sys_get_robust_list(int pid, compat_uptr_t *head_ptr,
+			   compat_size_t __user *len_ptr);
 
 long compat_sys_semctl(int first, int second, int third, void __user *uptr);
 long compat_sys_msgsnd(int first, int second, int third, void __user *uptr);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index fd4848f..20b4f03 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -874,6 +874,9 @@ struct task_struct {
 	int cpuset_mem_spread_rotor;
 #endif
 	struct robust_list_head __user *robust_list;
+#ifdef CONFIG_COMPAT
+	struct compat_robust_list_head __user *compat_robust_list;
+#endif
 
 	atomic_t fs_excl;	/* holding fs exclusive resources */
 	struct rcu_head rcu;
diff --git a/kernel/Makefile b/kernel/Makefile
index ff1c11d..58908f9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -12,6 +12,9 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 
 obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
 obj-$(CONFIG_FUTEX) += futex.o
+ifeq ($(CONFIG_COMPAT),y)
+obj-$(CONFIG_FUTEX) += futex_compat.o
+endif
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
 obj-$(CONFIG_SMP) += cpu.o spinlock.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
diff --git a/kernel/compat.c b/kernel/compat.c
index b9bdd12..c1601a8 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -17,7 +17,6 @@
 #include <linux/time.h>
 #include <linux/signal.h>
 #include <linux/sched.h>	/* for MAX_SCHEDULE_TIMEOUT */
-#include <linux/futex.h>	/* for FUTEX_WAIT */
 #include <linux/syscalls.h>
 #include <linux/unistd.h>
 #include <linux/security.h>
@@ -239,28 +238,6 @@ asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set,
 	return ret;
 }
 
-#ifdef CONFIG_FUTEX
-asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, int val,
-		struct compat_timespec __user *utime, u32 __user *uaddr2,
-		int val3)
-{
-	struct timespec t;
-	unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
-	int val2 = 0;
-
-	if ((op == FUTEX_WAIT) && utime) {
-		if (get_compat_timespec(&t, utime))
-			return -EFAULT;
-		timeout = timespec_to_jiffies(&t) + 1;
-	}
-	if (op >= FUTEX_REQUEUE)
-		val2 = (int) (unsigned long) utime;
-
-	return do_futex((unsigned long)uaddr, op, val, timeout,
-			(unsigned long)uaddr2, val2, val3);
-}
-#endif
-
 asmlinkage long compat_sys_setrlimit(unsigned int resource,
 		struct compat_rlimit __user *rlim)
 {
diff --git a/kernel/exit.c b/kernel/exit.c
index aecb48c..a8c7efc 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -32,6 +32,7 @@
 #include <linux/cn_proc.h>
 #include <linux/mutex.h>
 #include <linux/futex.h>
+#include <linux/compat.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -855,6 +856,10 @@ fastcall NORET_TYPE void do_exit(long code)
 	}
 	if (unlikely(tsk->robust_list))
 		exit_robust_list(tsk);
+#ifdef CONFIG_COMPAT
+	if (unlikely(tsk->compat_robust_list))
+		compat_exit_robust_list(tsk);
+#endif
 	exit_mm(tsk);
 
 	exit_sem(tsk);
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
new file mode 100644
index 0000000..c153559
--- /dev/null
+++ b/kernel/futex_compat.c
@@ -0,0 +1,142 @@
+/*
+ * linux/kernel/futex_compat.c
+ *
+ * Futex compatibililty routines.
+ *
+ * Copyright 2006, Red Hat, Inc., Ingo Molnar
+ */
+
+#include <linux/linkage.h>
+#include <linux/compat.h>
+#include <linux/futex.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * Walk curr->robust_list (very carefully, it's a userspace list!)
+ * and mark any locks found there dead, and notify any waiters.
+ *
+ * We silently return on any sign of list-walking problem.
+ */
+void compat_exit_robust_list(struct task_struct *curr)
+{
+	struct compat_robust_list_head __user *head = curr->compat_robust_list;
+	struct robust_list __user *entry, *pending;
+	compat_uptr_t uentry, upending;
+	unsigned int limit = ROBUST_LIST_LIMIT;
+	compat_long_t futex_offset;
+
+	/*
+	 * Fetch the list head (which was registered earlier, via
+	 * sys_set_robust_list()):
+	 */
+	if (get_user(uentry, &head->list.next))
+		return;
+	entry = compat_ptr(uentry);
+	/*
+	 * Fetch the relative futex offset:
+	 */
+	if (get_user(futex_offset, &head->futex_offset))
+		return;
+	/*
+	 * Fetch any possibly pending lock-add first, and handle it
+	 * if it exists:
+	 */
+	if (get_user(upending, &head->list_op_pending))
+		return;
+	pending = compat_ptr(upending);
+	if (upending)
+		handle_futex_death((void *)pending + futex_offset, curr);
+
+	while (compat_ptr(uentry) != &head->list) {
+		/*
+		 * A pending lock might already be on the list, so
+		 * dont process it twice:
+		 */
+		if (entry != pending)
+			if (handle_futex_death((void *)entry + futex_offset,
+						curr))
+				return;
+
+		/*
+		 * Fetch the next entry in the list:
+		 */
+		if (get_user(uentry, (compat_uptr_t *)&entry->next))
+			return;
+		entry = compat_ptr(uentry);
+		/*
+		 * Avoid excessively long or circular lists:
+		 */
+		if (!--limit)
+			break;
+
+		cond_resched();
+	}
+}
+
+asmlinkage long
+compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
+			   compat_size_t len)
+{
+	if (unlikely(len != sizeof(*head)))
+		return -EINVAL;
+
+	current->compat_robust_list = head;
+
+	return 0;
+}
+
+asmlinkage long
+compat_sys_get_robust_list(int pid, compat_uptr_t *head_ptr,
+			   compat_size_t __user *len_ptr)
+{
+	struct compat_robust_list_head *head;
+	unsigned long ret;
+
+	if (!pid)
+		head = current->compat_robust_list;
+	else {
+		struct task_struct *p;
+
+		ret = -ESRCH;
+		read_lock(&tasklist_lock);
+		p = find_task_by_pid(pid);
+		if (!p)
+			goto err_unlock;
+		ret = -EPERM;
+		if ((current->euid != p->euid) && (current->euid != p->uid) &&
+				!capable(CAP_SYS_PTRACE))
+			goto err_unlock;
+		head = p->compat_robust_list;
+		read_unlock(&tasklist_lock);
+	}
+
+	if (put_user(sizeof(*head), len_ptr))
+		return -EFAULT;
+	return put_user(ptr_to_compat(head), head_ptr);
+
+err_unlock:
+	read_unlock(&tasklist_lock);
+
+	return ret;
+}
+
+asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, int val,
+		struct compat_timespec __user *utime, u32 __user *uaddr2,
+		int val3)
+{
+	struct timespec t;
+	unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
+	int val2 = 0;
+
+	if ((op == FUTEX_WAIT) && utime) {
+		if (get_compat_timespec(&t, utime))
+			return -EFAULT;
+		timeout = timespec_to_jiffies(&t) + 1;
+	}
+	if (op >= FUTEX_REQUEUE)
+		val2 = (int) (unsigned long) utime;
+
+	return do_futex((unsigned long)uaddr, op, val, timeout,
+			(unsigned long)uaddr2, val2, val3);
+}
-- 
cgit v0.10.2


From dfd4e3ec246355274c9cf62c6b04a1ee6fa3caba Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 27 Mar 2006 01:16:25 -0800
Subject: [PATCH] lightweight robust futexes: i386

i386: add the futex_atomic_cmpxchg_inuser() assembly implementation, and wire
up the new syscalls.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Acked-by: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index ac687d0..326595f 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -310,3 +310,5 @@ ENTRY(sys_call_table)
 	.long sys_pselect6
 	.long sys_ppoll
 	.long sys_unshare		/* 310 */
+	.long sys_set_robust_list
+	.long sys_get_robust_list
diff --git a/include/asm-i386/futex.h b/include/asm-i386/futex.h
index 1f39ad9..41184a3 100644
--- a/include/asm-i386/futex.h
+++ b/include/asm-i386/futex.h
@@ -107,7 +107,28 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 static inline int
 futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
 {
-	return -ENOSYS;
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+		return -EFAULT;
+
+	__asm__ __volatile__(
+		"1:	" LOCK_PREFIX "cmpxchgl %3, %1		\n"
+
+		"2:	.section .fixup, \"ax\"			\n"
+		"3:	mov     %2, %0				\n"
+		"	jmp     2b				\n"
+		"	.previous				\n"
+
+		"	.section __ex_table, \"a\"		\n"
+		"	.align  8				\n"
+		"	.long   1b,3b				\n"
+		"	.previous				\n"
+
+		: "=a" (oldval), "=m" (*uaddr)
+		: "i" (-EFAULT), "r" (newval), "0" (oldval)
+		: "memory"
+	);
+
+	return oldval;
 }
 
 #endif
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index d8afd0e..014e356 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -316,8 +316,10 @@
 #define __NR_pselect6		308
 #define __NR_ppoll		309
 #define __NR_unshare		310
+#define __NR_set_robust_list	311
+#define __NR_get_robust_list	312
 
-#define NR_syscalls 311
+#define NR_syscalls 313
 
 /*
  * user-visible error numbers are in the range -1 - -128: see
-- 
cgit v0.10.2


From 8fdd6c6df7889dc89df3d9fe0f5bbe6733e39f48 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 27 Mar 2006 01:16:26 -0800
Subject: [PATCH] lightweight robust futexes: x86_64

x86_64: add the futex_atomic_cmpxchg_inuser() assembly implementation, and
wire up the new syscalls.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Acked-by: Ulrich Drepper <drepper@redhat.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index 7549a43..35b2fac 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -688,6 +688,8 @@ ia32_sys_call_table:
 	.quad sys_ni_syscall		/* pselect6 for now */
 	.quad sys_ni_syscall		/* ppoll for now */
 	.quad sys_unshare		/* 310 */
+	.quad compat_sys_set_robust_list
+	.quad compat_sys_get_robust_list
 ia32_syscall_end:		
 	.rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8
 		.quad ni_syscall
diff --git a/include/asm-x86_64/futex.h b/include/asm-x86_64/futex.h
index 4f4cb34..7d9eb1a 100644
--- a/include/asm-x86_64/futex.h
+++ b/include/asm-x86_64/futex.h
@@ -97,7 +97,28 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 static inline int
 futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
 {
-	return -ENOSYS;
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+		return -EFAULT;
+
+	__asm__ __volatile__(
+		"1:	" LOCK_PREFIX "cmpxchgl %3, %1		\n"
+
+		"2:	.section .fixup, \"ax\"			\n"
+		"3:	mov     %2, %0				\n"
+		"	jmp     2b				\n"
+		"	.previous				\n"
+
+		"	.section __ex_table, \"a\"		\n"
+		"	.align  8				\n"
+		"	.quad   1b,3b				\n"
+		"	.previous				\n"
+
+		: "=a" (oldval), "=m" (*uaddr)
+		: "i" (-EFAULT), "r" (newval), "0" (oldval)
+		: "memory"
+	);
+
+	return oldval;
 }
 
 #endif
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index da0341c..fcc5163 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -605,8 +605,12 @@ __SYSCALL(__NR_pselect6, sys_ni_syscall)	/* for now */
 __SYSCALL(__NR_ppoll,	sys_ni_syscall)		/* for now */
 #define __NR_unshare		272
 __SYSCALL(__NR_unshare,	sys_unshare)
+#define __NR_set_robust_list	273
+__SYSCALL(__NR_set_robust_list, sys_set_robust_list)
+#define __NR_get_robust_list	274
+__SYSCALL(__NR_get_robust_list, sys_get_robust_list)
 
-#define __NR_syscall_max __NR_unshare
+#define __NR_syscall_max __NR_get_robust_list
 
 #ifndef __NO_STUBS
 
-- 
cgit v0.10.2


From 8f17d3a5049d32392b79925c73a0cf99ce6d5af0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 27 Mar 2006 01:16:27 -0800
Subject: [PATCH] lightweight robust futexes updates

- fix: initialize the robust list(s) to NULL in copy_process.

- doc update

- cleanup: rename _inuser to _inatomic

- __user cleanups and other small cleanups

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/Documentation/robust-futex-ABI.txt b/Documentation/robust-futex-ABI.txt
index def5d87..8529a17 100644
--- a/Documentation/robust-futex-ABI.txt
+++ b/Documentation/robust-futex-ABI.txt
@@ -142,8 +142,6 @@ On insertion:
     of the 'lock word', to the linked list starting at 'head', and
  4) clear the 'list_op_pending' word.
 
-	XXX I am particularly unsure of the following -pj XXX
-
 On removal:
  1) set the 'list_op_pending' word to the address of the 'lock word'
     to be removed,
diff --git a/Documentation/robust-futexes.txt b/Documentation/robust-futexes.txt
index 7aecc67..df82d75 100644
--- a/Documentation/robust-futexes.txt
+++ b/Documentation/robust-futexes.txt
@@ -213,6 +213,6 @@ robust-mutex testcases.
 All other architectures should build just fine too - but they wont have
 the new syscalls yet.
 
-Architectures need to implement the new futex_atomic_cmpxchg_inuser()
+Architectures need to implement the new futex_atomic_cmpxchg_inatomic()
 inline function before writing up the syscalls (that function returns
 -ENOSYS right now).
diff --git a/include/asm-frv/futex.h b/include/asm-frv/futex.h
index 9a0e902..08b3d1d 100644
--- a/include/asm-frv/futex.h
+++ b/include/asm-frv/futex.h
@@ -10,7 +10,7 @@
 extern int futex_atomic_op_inuser(int encoded_op, int __user *uaddr);
 
 static inline int
-futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
 	return -ENOSYS;
 }
diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index 514bd40..df893c1 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -50,7 +50,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 }
 
 static inline int
-futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
 	return -ENOSYS;
 }
diff --git a/include/asm-i386/futex.h b/include/asm-i386/futex.h
index 41184a3..7b8ceef 100644
--- a/include/asm-i386/futex.h
+++ b/include/asm-i386/futex.h
@@ -105,7 +105,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 }
 
 static inline int
-futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
diff --git a/include/asm-mips/futex.h b/include/asm-mips/futex.h
index c5fb2d6..a554089 100644
--- a/include/asm-mips/futex.h
+++ b/include/asm-mips/futex.h
@@ -100,7 +100,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 }
 
 static inline int
-futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
 	return -ENOSYS;
 }
diff --git a/include/asm-powerpc/futex.h b/include/asm-powerpc/futex.h
index 80ed985..f1b3c00 100644
--- a/include/asm-powerpc/futex.h
+++ b/include/asm-powerpc/futex.h
@@ -82,7 +82,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 }
 
 static inline int
-futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
 	return -ENOSYS;
 }
diff --git a/include/asm-x86_64/futex.h b/include/asm-x86_64/futex.h
index 7d9eb1a..9804bf0 100644
--- a/include/asm-x86_64/futex.h
+++ b/include/asm-x86_64/futex.h
@@ -95,7 +95,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 }
 
 static inline int
-futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 20face6..55fff96 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -100,7 +100,7 @@ long do_futex(unsigned long uaddr, int op, int val,
 		unsigned long timeout, unsigned long uaddr2, int val2,
 		int val3);
 
-extern int handle_futex_death(unsigned int *uaddr, struct task_struct *curr);
+extern int handle_futex_death(u32 __user *uaddr, struct task_struct *curr);
 
 #ifdef CONFIG_FUTEX
 extern void exit_robust_list(struct task_struct *curr);
diff --git a/kernel/fork.c b/kernel/fork.c
index e0a2b44..c49bd19 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1061,7 +1061,10 @@ static task_t *copy_process(unsigned long clone_flags,
 	 * Clear TID on mm_release()?
 	 */
 	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
-
+	p->robust_list = NULL;
+#ifdef CONFIG_COMPAT
+	p->compat_robust_list = NULL;
+#endif
 	/*
 	 * sigaltstack should be cleared when sharing the same VM
 	 */
diff --git a/kernel/futex.c b/kernel/futex.c
index feb724b..9c9b2b6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -913,15 +913,15 @@ err_unlock:
  * Process a futex-list entry, check whether it's owned by the
  * dying task, and do notification if so:
  */
-int handle_futex_death(unsigned int *uaddr, struct task_struct *curr)
+int handle_futex_death(u32 __user *uaddr, struct task_struct *curr)
 {
-	unsigned int futex_val;
+	u32 uval;
 
-repeat:
-	if (get_user(futex_val, uaddr))
+retry:
+	if (get_user(uval, uaddr))
 		return -1;
 
-	if ((futex_val & FUTEX_TID_MASK) == curr->pid) {
+	if ((uval & FUTEX_TID_MASK) == curr->pid) {
 		/*
 		 * Ok, this dying thread is truly holding a futex
 		 * of interest. Set the OWNER_DIED bit atomically
@@ -932,12 +932,11 @@ repeat:
 		 * thread-death.) The rest of the cleanup is done in
 		 * userspace.
 		 */
-		if (futex_atomic_cmpxchg_inuser(uaddr, futex_val,
-					 futex_val | FUTEX_OWNER_DIED) !=
-								   futex_val)
-			goto repeat;
+		if (futex_atomic_cmpxchg_inatomic(uaddr, uval,
+					 uval | FUTEX_OWNER_DIED) != uval)
+			goto retry;
 
-		if (futex_val & FUTEX_WAITERS)
+		if (uval & FUTEX_WAITERS)
 			futex_wake((unsigned long)uaddr, 1);
 	}
 	return 0;
@@ -985,7 +984,6 @@ void exit_robust_list(struct task_struct *curr)
 			if (handle_futex_death((void *)entry + futex_offset,
 						curr))
 				return;
-
 		/*
 		 * Fetch the next entry in the list:
 		 */
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index c153559..9c077cf 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -121,9 +121,9 @@ err_unlock:
 	return ret;
 }
 
-asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, int val,
+asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
 		struct compat_timespec __user *utime, u32 __user *uaddr2,
-		int val3)
+		u32 val3)
 {
 	struct timespec t;
 	unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
@@ -137,6 +137,5 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, int val,
 	if (op >= FUTEX_REQUEUE)
 		val2 = (int) (unsigned long) utime;
 
-	return do_futex((unsigned long)uaddr, op, val, timeout,
-			(unsigned long)uaddr2, val2, val3);
+	return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3);
 }
-- 
cgit v0.10.2


From 76b81e2b0e2241accebcc68e126bc5ab958661b9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 27 Mar 2006 01:16:28 -0800
Subject: [PATCH] lightweight robust futexes updates 2

futex.h updates:

- get rid of FUTEX_OWNER_PENDING - it's not used
- reduce ROBUST_LIST_LIMIT to a saner value

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/futex.h b/include/linux/futex.h
index 55fff96..966a5b3 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -80,21 +80,15 @@ struct robust_list_head {
 #define FUTEX_OWNER_DIED	0x40000000
 
 /*
- * Reserved bit:
- */
-#define FUTEX_OWNER_PENDING	0x20000000
-
-/*
  * The rest of the robust-futex field is for the TID:
  */
-#define FUTEX_TID_MASK		0x1fffffff
+#define FUTEX_TID_MASK		0x3fffffff
 
 /*
- * A limit of one million locks held per thread (!) ought to be enough
- * for some time. This also protects against a deliberately circular
- * list. Not worth introducing an rlimit for this:
+ * This limit protects against a deliberately circular list.
+ * (Not worth introducing an rlimit for it)
  */
-#define ROBUST_LIST_LIMIT	1048576
+#define ROBUST_LIST_LIMIT	2048
 
 long do_futex(unsigned long uaddr, int op, int val,
 		unsigned long timeout, unsigned long uaddr2, int val2,
-- 
cgit v0.10.2


From e041c683412d5bf44dc2b109053e3b837b71742d Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 27 Mar 2006 01:16:30 -0800
Subject: [PATCH] Notifier chain update: API changes

The kernel's implementation of notifier chains is unsafe.  There is no
protection against entries being added to or removed from a chain while the
chain is in use.  The issues were discussed in this thread:

    http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2

We noticed that notifier chains in the kernel fall into two basic usage
classes:

	"Blocking" chains are always called from a process context
	and the callout routines are allowed to sleep;

	"Atomic" chains can be called from an atomic context and
	the callout routines are not allowed to sleep.

We decided to codify this distinction and make it part of the API.  Therefore
this set of patches introduces three new, parallel APIs: one for blocking
notifiers, one for atomic notifiers, and one for "raw" notifiers (which is
really just the old API under a new name).  New kinds of data structures are
used for the heads of the chains, and new routines are defined for
registration, unregistration, and calling a chain.  The three APIs are
explained in include/linux/notifier.h and their implementation is in
kernel/sys.c.

With atomic and blocking chains, the implementation guarantees that the chain
links will not be corrupted and that chain callers will not get messed up by
entries being added or removed.  For raw chains the implementation provides no
guarantees at all; users of this API must provide their own protections.  (The
idea was that situations may come up where the assumptions of the atomic and
blocking APIs are not appropriate, so it should be possible for users to
handle these things in their own way.)

There are some limitations, which should not be too hard to live with.  For
atomic/blocking chains, registration and unregistration must always be done in
a process context since the chain is protected by a mutex/rwsem.  Also, a
callout routine for a non-raw chain must not try to register or unregister
entries on its own chain.  (This did happen in a couple of places and the code
had to be changed to avoid it.)

Since atomic chains may be called from within an NMI handler, they cannot use
spinlocks for synchronization.  Instead we use RCU.  The overhead falls almost
entirely in the unregister routine, which is okay since unregistration is much
less frequent that calling a chain.

Here is the list of chains that we adjusted and their classifications.  None
of them use the raw API, so for the moment it is only a placeholder.

  ATOMIC CHAINS
  -------------
arch/i386/kernel/traps.c:		i386die_chain
arch/ia64/kernel/traps.c:		ia64die_chain
arch/powerpc/kernel/traps.c:		powerpc_die_chain
arch/sparc64/kernel/traps.c:		sparc64die_chain
arch/x86_64/kernel/traps.c:		die_chain
drivers/char/ipmi/ipmi_si_intf.c:	xaction_notifier_list
kernel/panic.c:				panic_notifier_list
kernel/profile.c:			task_free_notifier
net/bluetooth/hci_core.c:		hci_notifier
net/ipv4/netfilter/ip_conntrack_core.c:	ip_conntrack_chain
net/ipv4/netfilter/ip_conntrack_core.c:	ip_conntrack_expect_chain
net/ipv6/addrconf.c:			inet6addr_chain
net/netfilter/nf_conntrack_core.c:	nf_conntrack_chain
net/netfilter/nf_conntrack_core.c:	nf_conntrack_expect_chain
net/netlink/af_netlink.c:		netlink_chain

  BLOCKING CHAINS
  ---------------
arch/powerpc/platforms/pseries/reconfig.c:	pSeries_reconfig_chain
arch/s390/kernel/process.c:		idle_chain
arch/x86_64/kernel/process.c		idle_notifier
drivers/base/memory.c:			memory_chain
drivers/cpufreq/cpufreq.c		cpufreq_policy_notifier_list
drivers/cpufreq/cpufreq.c		cpufreq_transition_notifier_list
drivers/macintosh/adb.c:		adb_client_list
drivers/macintosh/via-pmu.c		sleep_notifier_list
drivers/macintosh/via-pmu68k.c		sleep_notifier_list
drivers/macintosh/windfarm_core.c	wf_client_list
drivers/usb/core/notify.c		usb_notifier_list
drivers/video/fbmem.c			fb_notifier_list
kernel/cpu.c				cpu_chain
kernel/module.c				module_notify_list
kernel/profile.c			munmap_notifier
kernel/profile.c			task_exit_notifier
kernel/sys.c				reboot_notifier_list
net/core/dev.c				netdev_chain
net/decnet/dn_dev.c:			dnaddr_chain
net/ipv4/devinet.c:			inetaddr_chain

It's possible that some of these classifications are wrong.  If they are,
please let us know or submit a patch to fix them.  Note that any chain that
gets called very frequently should be atomic, because the rwsem read-locking
used for blocking chains is very likely to incur cache misses on SMP systems.
(However, if the chain's callout routines may sleep then the chain cannot be
atomic.)

The patch set was written by Alan Stern and Chandra Seetharaman, incorporating
material written by Keith Owens and suggestions from Paul McKenney and Andrew
Morton.

[jes@sgi.com: restructure the notifier chain initialization macros]
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 9402624..dd87696 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -43,7 +43,7 @@
 #include <asm/setup.h>
 #include <asm/io.h>
 
-extern struct notifier_block *panic_notifier_list;
+extern struct atomic_notifier_head panic_notifier_list;
 static int alpha_panic_event(struct notifier_block *, unsigned long, void *);
 static struct notifier_block alpha_panic_block = {
 	alpha_panic_event,
@@ -500,7 +500,8 @@ setup_arch(char **cmdline_p)
 	}
 
 	/* Register a call for panic conditions. */
-	notifier_chain_register(&panic_notifier_list, &alpha_panic_block);
+	atomic_notifier_chain_register(&panic_notifier_list,
+			&alpha_panic_block);
 
 #ifdef CONFIG_ALPHA_GENERIC
 	/* Assume that we've booted from SRM if we haven't booted from MILO.
diff --git a/arch/arm/mach-omap1/board-netstar.c b/arch/arm/mach-omap1/board-netstar.c
index 60d5f8a..7520e60 100644
--- a/arch/arm/mach-omap1/board-netstar.c
+++ b/arch/arm/mach-omap1/board-netstar.c
@@ -141,7 +141,7 @@ static int __init netstar_late_init(void)
 	/* TODO: Setup front panel switch here */
 
 	/* Setup panic notifier */
-	notifier_chain_register(&panic_notifier_list, &panic_block);
+	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
 
 	return 0;
 }
diff --git a/arch/arm/mach-omap1/board-voiceblue.c b/arch/arm/mach-omap1/board-voiceblue.c
index bfd5fdd..52e4a9d 100644
--- a/arch/arm/mach-omap1/board-voiceblue.c
+++ b/arch/arm/mach-omap1/board-voiceblue.c
@@ -235,7 +235,7 @@ static struct notifier_block panic_block = {
 static int __init voiceblue_setup(void)
 {
 	/* Setup panic notifier */
-	notifier_chain_register(&panic_notifier_list, &panic_block);
+	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
 
 	return 0;
 }
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 4624f8c..6b63a5a 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -92,22 +92,21 @@ asmlinkage void spurious_interrupt_bug(void);
 asmlinkage void machine_check(void);
 
 static int kstack_depth_to_print = 24;
-struct notifier_block *i386die_chain;
-static DEFINE_SPINLOCK(die_notifier_lock);
+ATOMIC_NOTIFIER_HEAD(i386die_chain);
 
 int register_die_notifier(struct notifier_block *nb)
 {
-	int err = 0;
-	unsigned long flags;
-
 	vmalloc_sync_all();
-	spin_lock_irqsave(&die_notifier_lock, flags);
-	err = notifier_chain_register(&i386die_chain, nb);
-	spin_unlock_irqrestore(&die_notifier_lock, flags);
-	return err;
+	return atomic_notifier_chain_register(&i386die_chain, nb);
 }
 EXPORT_SYMBOL(register_die_notifier);
 
+int unregister_die_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&i386die_chain, nb);
+}
+EXPORT_SYMBOL(unregister_die_notifier);
+
 static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
 {
 	return	p > (void *)tinfo &&
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index dabd6c3..7c1ddc8 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -30,19 +30,19 @@ extern spinlock_t timerlist_lock;
 fpswa_interface_t *fpswa_interface;
 EXPORT_SYMBOL(fpswa_interface);
 
-struct notifier_block *ia64die_chain;
+ATOMIC_NOTIFIER_HEAD(ia64die_chain);
 
 int
 register_die_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_register(&ia64die_chain, nb);
+	return atomic_notifier_chain_register(&ia64die_chain, nb);
 }
 EXPORT_SYMBOL_GPL(register_die_notifier);
 
 int
 unregister_die_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_unregister(&ia64die_chain, nb);
+	return atomic_notifier_chain_unregister(&ia64die_chain, nb);
 }
 EXPORT_SYMBOL_GPL(unregister_die_notifier);
 
diff --git a/arch/mips/lasat/setup.c b/arch/mips/lasat/setup.c
index 83eb08b..e9e9a89 100644
--- a/arch/mips/lasat/setup.c
+++ b/arch/mips/lasat/setup.c
@@ -165,7 +165,8 @@ void __init plat_setup(void)
 
 	/* Set up panic notifier */
 	for (i = 0; i < sizeof(lasat_panic_block) / sizeof(struct notifier_block); i++)
-		notifier_chain_register(&panic_notifier_list, &lasat_panic_block[i]);
+		atomic_notifier_chain_register(&panic_notifier_list,
+				&lasat_panic_block[i]);
 
 	lasat_reboot_setup();
 
diff --git a/arch/mips/sgi-ip22/ip22-reset.c b/arch/mips/sgi-ip22/ip22-reset.c
index 92a3b3c..a9c58e0 100644
--- a/arch/mips/sgi-ip22/ip22-reset.c
+++ b/arch/mips/sgi-ip22/ip22-reset.c
@@ -238,7 +238,7 @@ static int __init reboot_setup(void)
 	request_irq(SGI_PANEL_IRQ, panel_int, 0, "Front Panel", NULL);
 	init_timer(&blink_timer);
 	blink_timer.function = blink_timeout;
-	notifier_chain_register(&panic_notifier_list, &panic_block);
+	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
 
 	return 0;
 }
diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c
index 0c94800..ab9d9ce 100644
--- a/arch/mips/sgi-ip32/ip32-reset.c
+++ b/arch/mips/sgi-ip32/ip32-reset.c
@@ -193,7 +193,7 @@ static __init int ip32_reboot_setup(void)
 
 	init_timer(&blink_timer);
 	blink_timer.function = blink_timeout;
-	notifier_chain_register(&panic_notifier_list, &panic_block);
+	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
 
 	request_irq(MACEISA_RTC_IRQ, ip32_rtc_int, 0, "rtc", NULL);
 
diff --git a/arch/parisc/kernel/pdc_chassis.c b/arch/parisc/kernel/pdc_chassis.c
index 2a01fe1..0cea695 100644
--- a/arch/parisc/kernel/pdc_chassis.c
+++ b/arch/parisc/kernel/pdc_chassis.c
@@ -150,7 +150,8 @@ void __init parisc_pdc_chassis_init(void)
 
 		if (handle) {
 			/* initialize panic notifier chain */
-			notifier_chain_register(&panic_notifier_list, &pdc_chassis_panic_block);
+			atomic_notifier_chain_register(&panic_notifier_list,
+					&pdc_chassis_panic_block);
 
 			/* initialize reboot notifier chain */
 			register_reboot_notifier(&pdc_chassis_reboot_block);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 2f3fdad..e20c1fa 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -579,7 +579,8 @@ void __init setup_arch(char **cmdline_p)
 	panic_timeout = 180;
 
 	if (ppc_md.panic)
-		notifier_chain_register(&panic_notifier_list, &ppc64_panic_block);
+		atomic_notifier_chain_register(&panic_notifier_list,
+				&ppc64_panic_block);
 
 	init_mm.start_code = PAGE_OFFSET;
 	init_mm.end_code = (unsigned long) _etext;
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 98660ae..9763faa 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -74,19 +74,19 @@ EXPORT_SYMBOL(__debugger_dabr_match);
 EXPORT_SYMBOL(__debugger_fault_handler);
 #endif
 
-struct notifier_block *powerpc_die_chain;
-static DEFINE_SPINLOCK(die_notifier_lock);
+ATOMIC_NOTIFIER_HEAD(powerpc_die_chain);
 
 int register_die_notifier(struct notifier_block *nb)
 {
-	int err = 0;
-	unsigned long flags;
+	return atomic_notifier_chain_register(&powerpc_die_chain, nb);
+}
+EXPORT_SYMBOL(register_die_notifier);
 
-	spin_lock_irqsave(&die_notifier_lock, flags);
-	err = notifier_chain_register(&powerpc_die_chain, nb);
-	spin_unlock_irqrestore(&die_notifier_lock, flags);
-	return err;
+int unregister_die_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&powerpc_die_chain, nb);
 }
+EXPORT_SYMBOL(unregister_die_notifier);
 
 /*
  * Trap & Exception support
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 86cfa6e..5ad9067 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -94,16 +94,16 @@ static struct device_node *derive_parent(const char *path)
 	return parent;
 }
 
-static struct notifier_block *pSeries_reconfig_chain;
+static BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);
 
 int pSeries_reconfig_notifier_register(struct notifier_block *nb)
 {
-	return notifier_chain_register(&pSeries_reconfig_chain, nb);
+	return blocking_notifier_chain_register(&pSeries_reconfig_chain, nb);
 }
 
 void pSeries_reconfig_notifier_unregister(struct notifier_block *nb)
 {
-	notifier_chain_unregister(&pSeries_reconfig_chain, nb);
+	blocking_notifier_chain_unregister(&pSeries_reconfig_chain, nb);
 }
 
 static int pSeries_reconfig_add_node(const char *path, struct property *proplist)
@@ -131,7 +131,7 @@ static int pSeries_reconfig_add_node(const char *path, struct property *proplist
 		goto out_err;
 	}
 
-	err = notifier_call_chain(&pSeries_reconfig_chain,
+	err = blocking_notifier_call_chain(&pSeries_reconfig_chain,
 				  PSERIES_RECONFIG_ADD, np);
 	if (err == NOTIFY_BAD) {
 		printk(KERN_ERR "Failed to add device node %s\n", path);
@@ -171,7 +171,7 @@ static int pSeries_reconfig_remove_node(struct device_node *np)
 
 	remove_node_proc_entries(np);
 
-	notifier_call_chain(&pSeries_reconfig_chain,
+	blocking_notifier_call_chain(&pSeries_reconfig_chain,
 			    PSERIES_RECONFIG_REMOVE, np);
 	of_detach_node(np);
 
diff --git a/arch/ppc/platforms/prep_setup.c b/arch/ppc/platforms/prep_setup.c
index a0fc628..d95c05d 100644
--- a/arch/ppc/platforms/prep_setup.c
+++ b/arch/ppc/platforms/prep_setup.c
@@ -736,7 +736,7 @@ ibm_statusled_progress(char *s, unsigned short hex)
 		hex = 0xfff;
 		if (!notifier_installed) {
 			++notifier_installed;
-			notifier_chain_register(&panic_notifier_list,
+			atomic_notifier_chain_register(&panic_notifier_list,
 						&ibm_statusled_block);
 		}
 	}
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 99182a4..4a0f5a1 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -76,17 +76,17 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
 /*
  * Need to know about CPUs going idle?
  */
-static struct notifier_block *idle_chain;
+static ATOMIC_NOTIFIER_HEAD(idle_chain);
 
 int register_idle_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_register(&idle_chain, nb);
+	return atomic_notifier_chain_register(&idle_chain, nb);
 }
 EXPORT_SYMBOL(register_idle_notifier);
 
 int unregister_idle_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_unregister(&idle_chain, nb);
+	return atomic_notifier_chain_unregister(&idle_chain, nb);
 }
 EXPORT_SYMBOL(unregister_idle_notifier);
 
@@ -95,7 +95,7 @@ void do_monitor_call(struct pt_regs *regs, long interruption_code)
 	/* disable monitor call class 0 */
 	__ctl_clear_bit(8, 15);
 
-	notifier_call_chain(&idle_chain, CPU_NOT_IDLE,
+	atomic_notifier_call_chain(&idle_chain, CPU_NOT_IDLE,
 			    (void *)(long) smp_processor_id());
 }
 
@@ -116,7 +116,8 @@ static void default_idle(void)
 		return;
 	}
 
-	rc = notifier_call_chain(&idle_chain, CPU_IDLE, (void *)(long) cpu);
+	rc = atomic_notifier_call_chain(&idle_chain,
+			CPU_IDLE, (void *)(long) cpu);
 	if (rc != NOTIFY_OK && rc != NOTIFY_DONE)
 		BUG();
 	if (rc != NOTIFY_OK) {
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index df612e4..ff090bb 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -43,18 +43,19 @@
 #include <linux/kmod.h>
 #endif
 
-struct notifier_block *sparc64die_chain;
-static DEFINE_SPINLOCK(die_notifier_lock);
+ATOMIC_NOTIFIER_HEAD(sparc64die_chain);
 
 int register_die_notifier(struct notifier_block *nb)
 {
-	int err = 0;
-	unsigned long flags;
-	spin_lock_irqsave(&die_notifier_lock, flags);
-	err = notifier_chain_register(&sparc64die_chain, nb);
-	spin_unlock_irqrestore(&die_notifier_lock, flags);
-	return err;
+	return atomic_notifier_chain_register(&sparc64die_chain, nb);
 }
+EXPORT_SYMBOL(register_die_notifier);
+
+int unregister_die_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&sparc64die_chain, nb);
+}
+EXPORT_SYMBOL(unregister_die_notifier);
 
 /* When an irrecoverable trap occurs at tl > 0, the trap entry
  * code logs the trap state registers at every level in the trap
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 54388d1..1488816 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -762,7 +762,8 @@ static struct notifier_block panic_exit_notifier = {
 
 static int add_notifier(void)
 {
-	notifier_chain_register(&panic_notifier_list, &panic_exit_notifier);
+	atomic_notifier_chain_register(&panic_notifier_list,
+			&panic_exit_notifier);
 	return(0);
 }
 
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index bb1c872..7d51dd7 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -477,7 +477,8 @@ static struct notifier_block panic_exit_notifier = {
 
 void __init setup_arch(char **cmdline_p)
 {
-	notifier_chain_register(&panic_notifier_list, &panic_exit_notifier);
+	atomic_notifier_chain_register(&panic_notifier_list,
+			&panic_exit_notifier);
 	paging_init();
         strlcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
  	*cmdline_p = command_line;
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 0370720..70dd8e5 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -66,24 +66,17 @@ EXPORT_SYMBOL(boot_option_idle_override);
 void (*pm_idle)(void);
 static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
 
-static struct notifier_block *idle_notifier;
-static DEFINE_SPINLOCK(idle_notifier_lock);
+static ATOMIC_NOTIFIER_HEAD(idle_notifier);
 
 void idle_notifier_register(struct notifier_block *n)
 {
-	unsigned long flags;
-	spin_lock_irqsave(&idle_notifier_lock, flags);
-	notifier_chain_register(&idle_notifier, n);
-	spin_unlock_irqrestore(&idle_notifier_lock, flags);
+	atomic_notifier_chain_register(&idle_notifier, n);
 }
 EXPORT_SYMBOL_GPL(idle_notifier_register);
 
 void idle_notifier_unregister(struct notifier_block *n)
 {
-	unsigned long flags;
-	spin_lock_irqsave(&idle_notifier_lock, flags);
-	notifier_chain_unregister(&idle_notifier, n);
-	spin_unlock_irqrestore(&idle_notifier_lock, flags);
+	atomic_notifier_chain_unregister(&idle_notifier, n);
 }
 EXPORT_SYMBOL(idle_notifier_unregister);
 
@@ -93,13 +86,13 @@ static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
 void enter_idle(void)
 {
 	__get_cpu_var(idle_state) = CPU_IDLE;
-	notifier_call_chain(&idle_notifier, IDLE_START, NULL);
+	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
 }
 
 static void __exit_idle(void)
 {
 	__get_cpu_var(idle_state) = CPU_NOT_IDLE;
-	notifier_call_chain(&idle_notifier, IDLE_END, NULL);
+	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
 }
 
 /* Called from interrupts to signify idle end */
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 7b14830..edaa9fe 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -69,20 +69,20 @@ asmlinkage void alignment_check(void);
 asmlinkage void machine_check(void);
 asmlinkage void spurious_interrupt_bug(void);
 
-struct notifier_block *die_chain;
-static DEFINE_SPINLOCK(die_notifier_lock);
+ATOMIC_NOTIFIER_HEAD(die_chain);
 
 int register_die_notifier(struct notifier_block *nb)
 {
-	int err = 0;
-	unsigned long flags;
-
 	vmalloc_sync_all();
-	spin_lock_irqsave(&die_notifier_lock, flags);
-	err = notifier_chain_register(&die_chain, nb);
-	spin_unlock_irqrestore(&die_notifier_lock, flags);
-	return err;
+	return atomic_notifier_chain_register(&die_chain, nb);
+}
+EXPORT_SYMBOL(register_die_notifier);
+
+int unregister_die_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&die_chain, nb);
 }
+EXPORT_SYMBOL(unregister_die_notifier);
 
 static inline void conditional_sti(struct pt_regs *regs)
 {
diff --git a/arch/xtensa/platform-iss/setup.c b/arch/xtensa/platform-iss/setup.c
index 2e6dcbf..23790a5 100644
--- a/arch/xtensa/platform-iss/setup.c
+++ b/arch/xtensa/platform-iss/setup.c
@@ -108,5 +108,5 @@ static struct notifier_block iss_panic_block = {
 
 void __init platform_setup(char **p_cmdline)
 {
-	notifier_chain_register(&panic_notifier_list, &iss_panic_block);
+	atomic_notifier_chain_register(&panic_notifier_list, &iss_panic_block);
 }
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 105a0d6..dd547af 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -47,16 +47,16 @@ static struct kset_uevent_ops memory_uevent_ops = {
 	.uevent		= memory_uevent,
 };
 
-static struct notifier_block *memory_chain;
+static BLOCKING_NOTIFIER_HEAD(memory_chain);
 
 int register_memory_notifier(struct notifier_block *nb)
 {
-        return notifier_chain_register(&memory_chain, nb);
+        return blocking_notifier_chain_register(&memory_chain, nb);
 }
 
 void unregister_memory_notifier(struct notifier_block *nb)
 {
-        notifier_chain_unregister(&memory_chain, nb);
+        blocking_notifier_chain_unregister(&memory_chain, nb);
 }
 
 /*
@@ -140,7 +140,7 @@ static ssize_t show_mem_state(struct sys_device *dev, char *buf)
 
 static inline int memory_notify(unsigned long val, void *v)
 {
-	return notifier_call_chain(&memory_chain, val, v);
+	return blocking_notifier_call_chain(&memory_chain, val, v);
 }
 
 /*
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index b8fb87c..40eb005 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -3744,7 +3744,7 @@ static int ipmi_init_msghandler(void)
 	ipmi_timer.expires = jiffies + IPMI_TIMEOUT_JIFFIES;
 	add_timer(&ipmi_timer);
 
-	notifier_chain_register(&panic_notifier_list, &panic_block);
+	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
 
 	initialized = 1;
 
@@ -3764,7 +3764,7 @@ static __exit void cleanup_ipmi(void)
 	if (!initialized)
 		return;
 
-	notifier_chain_unregister(&panic_notifier_list, &panic_block);
+	atomic_notifier_chain_unregister(&panic_notifier_list, &panic_block);
 
 	/* This can't be called if any interfaces exist, so no worry about
 	   shutting down the interfaces. */
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 12f858d..35fbd4d 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -237,10 +237,10 @@ struct smi_info
 
 static int try_smi_init(struct smi_info *smi);
 
-static struct notifier_block *xaction_notifier_list;
+static ATOMIC_NOTIFIER_HEAD(xaction_notifier_list);
 static int register_xaction_notifier(struct notifier_block * nb)
 {
-	return notifier_chain_register(&xaction_notifier_list, nb);
+	return atomic_notifier_chain_register(&xaction_notifier_list, nb);
 }
 
 static void si_restart_short_timer(struct smi_info *smi_info);
@@ -302,7 +302,8 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info)
 		do_gettimeofday(&t);
 		printk("**Start2: %d.%9.9d\n", t.tv_sec, t.tv_usec);
 #endif
-		err = notifier_call_chain(&xaction_notifier_list, 0, smi_info);
+		err = atomic_notifier_call_chain(&xaction_notifier_list,
+				0, smi_info);
 		if (err & NOTIFY_STOP_MASK) {
 			rv = SI_SM_CALL_WITHOUT_DELAY;
 			goto out;
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 6165393..7ece9f3 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -1158,7 +1158,8 @@ static int __init ipmi_wdog_init(void)
 	}
 
 	register_reboot_notifier(&wdog_reboot_notifier);
-	notifier_chain_register(&panic_notifier_list, &wdog_panic_notifier);
+	atomic_notifier_chain_register(&panic_notifier_list,
+			&wdog_panic_notifier);
 
 	printk(KERN_INFO PFX "driver initialized\n");
 
@@ -1176,7 +1177,8 @@ static __exit void ipmi_unregister_watchdog(void)
 		release_nmi(&ipmi_nmi_handler);
 #endif
 
-	notifier_chain_unregister(&panic_notifier_list, &wdog_panic_notifier);
+	atomic_notifier_chain_unregister(&panic_notifier_list,
+			&wdog_panic_notifier);
 	unregister_reboot_notifier(&wdog_reboot_notifier);
 
 	if (! watchdog_user)
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index aed80e6..9b6ae7d 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -52,9 +52,8 @@ static void handle_update(void *data);
  * changes to devices when the CPU clock speed changes.
  * The mutex locks both lists.
  */
-static struct notifier_block *cpufreq_policy_notifier_list;
-static struct notifier_block *cpufreq_transition_notifier_list;
-static DECLARE_RWSEM (cpufreq_notifier_rwsem);
+static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
+static BLOCKING_NOTIFIER_HEAD(cpufreq_transition_notifier_list);
 
 
 static LIST_HEAD(cpufreq_governor_list);
@@ -247,8 +246,6 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 	dprintk("notification %u of frequency transition to %u kHz\n",
 		state, freqs->new);
 
-	down_read(&cpufreq_notifier_rwsem);
-
 	policy = cpufreq_cpu_data[freqs->cpu];
 	switch (state) {
 
@@ -266,20 +263,19 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 				freqs->old = policy->cur;
 			}
 		}
-		notifier_call_chain(&cpufreq_transition_notifier_list,
-					CPUFREQ_PRECHANGE, freqs);
+		blocking_notifier_call_chain(&cpufreq_transition_notifier_list,
+				CPUFREQ_PRECHANGE, freqs);
 		adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
 		break;
 
 	case CPUFREQ_POSTCHANGE:
 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
-		notifier_call_chain(&cpufreq_transition_notifier_list,
-					CPUFREQ_POSTCHANGE, freqs);
+		blocking_notifier_call_chain(&cpufreq_transition_notifier_list,
+				CPUFREQ_POSTCHANGE, freqs);
 		if (likely(policy) && likely(policy->cpu == freqs->cpu))
 			policy->cur = freqs->new;
 		break;
 	}
-	up_read(&cpufreq_notifier_rwsem);
 }
 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
 
@@ -1007,7 +1003,7 @@ static int cpufreq_suspend(struct sys_device * sysdev, pm_message_t pmsg)
 		freqs.old = cpu_policy->cur;
 		freqs.new = cur_freq;
 
-		notifier_call_chain(&cpufreq_transition_notifier_list,
+		blocking_notifier_call_chain(&cpufreq_transition_notifier_list,
 				    CPUFREQ_SUSPENDCHANGE, &freqs);
 		adjust_jiffies(CPUFREQ_SUSPENDCHANGE, &freqs);
 
@@ -1088,7 +1084,8 @@ static int cpufreq_resume(struct sys_device * sysdev)
 			freqs.old = cpu_policy->cur;
 			freqs.new = cur_freq;
 
-			notifier_call_chain(&cpufreq_transition_notifier_list,
+			blocking_notifier_call_chain(
+					&cpufreq_transition_notifier_list,
 					CPUFREQ_RESUMECHANGE, &freqs);
 			adjust_jiffies(CPUFREQ_RESUMECHANGE, &freqs);
 
@@ -1125,24 +1122,24 @@ static struct sysdev_driver cpufreq_sysdev_driver = {
  *      changes in cpufreq policy.
  *
  *	This function may sleep, and has the same return conditions as
- *	notifier_chain_register.
+ *	blocking_notifier_chain_register.
  */
 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
 {
 	int ret;
 
-	down_write(&cpufreq_notifier_rwsem);
 	switch (list) {
 	case CPUFREQ_TRANSITION_NOTIFIER:
-		ret = notifier_chain_register(&cpufreq_transition_notifier_list, nb);
+		ret = blocking_notifier_chain_register(
+				&cpufreq_transition_notifier_list, nb);
 		break;
 	case CPUFREQ_POLICY_NOTIFIER:
-		ret = notifier_chain_register(&cpufreq_policy_notifier_list, nb);
+		ret = blocking_notifier_chain_register(
+				&cpufreq_policy_notifier_list, nb);
 		break;
 	default:
 		ret = -EINVAL;
 	}
-	up_write(&cpufreq_notifier_rwsem);
 
 	return ret;
 }
@@ -1157,24 +1154,24 @@ EXPORT_SYMBOL(cpufreq_register_notifier);
  *	Remove a driver from the CPU frequency notifier list.
  *
  *	This function may sleep, and has the same return conditions as
- *	notifier_chain_unregister.
+ *	blocking_notifier_chain_unregister.
  */
 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
 {
 	int ret;
 
-	down_write(&cpufreq_notifier_rwsem);
 	switch (list) {
 	case CPUFREQ_TRANSITION_NOTIFIER:
-		ret = notifier_chain_unregister(&cpufreq_transition_notifier_list, nb);
+		ret = blocking_notifier_chain_unregister(
+				&cpufreq_transition_notifier_list, nb);
 		break;
 	case CPUFREQ_POLICY_NOTIFIER:
-		ret = notifier_chain_unregister(&cpufreq_policy_notifier_list, nb);
+		ret = blocking_notifier_chain_unregister(
+				&cpufreq_policy_notifier_list, nb);
 		break;
 	default:
 		ret = -EINVAL;
 	}
-	up_write(&cpufreq_notifier_rwsem);
 
 	return ret;
 }
@@ -1346,29 +1343,23 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data, struct cpufreq_poli
 	if (ret)
 		goto error_out;
 
-	down_read(&cpufreq_notifier_rwsem);
-
 	/* adjust if necessary - all reasons */
-	notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_ADJUST,
-			    policy);
+	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+			CPUFREQ_ADJUST, policy);
 
 	/* adjust if necessary - hardware incompatibility*/
-	notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_INCOMPATIBLE,
-			    policy);
+	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+			CPUFREQ_INCOMPATIBLE, policy);
 
 	/* verify the cpu speed can be set within this limit,
 	   which might be different to the first one */
 	ret = cpufreq_driver->verify(policy);
-	if (ret) {
-		up_read(&cpufreq_notifier_rwsem);
+	if (ret)
 		goto error_out;
-	}
 
 	/* notification of the new policy */
-	notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_NOTIFY,
-			    policy);
-
-	up_read(&cpufreq_notifier_rwsem);
+	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+			CPUFREQ_NOTIFY, policy);
 
 	data->min = policy->min;
 	data->max = policy->max;
diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c
index d6543fc..339f405 100644
--- a/drivers/firmware/dcdbas.c
+++ b/drivers/firmware/dcdbas.c
@@ -484,26 +484,15 @@ static void dcdbas_host_control(void)
 static int dcdbas_reboot_notify(struct notifier_block *nb, unsigned long code,
 				void *unused)
 {
-	static unsigned int notify_cnt = 0;
-
 	switch (code) {
 	case SYS_DOWN:
 	case SYS_HALT:
 	case SYS_POWER_OFF:
 		if (host_control_on_shutdown) {
 			/* firmware is going to perform host control action */
-			if (++notify_cnt == 2) {
-				printk(KERN_WARNING
-				       "Please wait for shutdown "
-				       "action to complete...\n");
-				dcdbas_host_control();
-			}
-			/*
-			 * register again and initiate the host control
-			 * action on the second notification to allow
-			 * everyone that registered to be notified
-			 */
-			register_reboot_notifier(nb);
+			printk(KERN_WARNING "Please wait for shutdown "
+			       "action to complete...\n");
+			dcdbas_host_control();
 		}
 		break;
 	}
@@ -514,7 +503,7 @@ static int dcdbas_reboot_notify(struct notifier_block *nb, unsigned long code,
 static struct notifier_block dcdbas_reboot_nb = {
 	.notifier_call = dcdbas_reboot_notify,
 	.next = NULL,
-	.priority = 0
+	.priority = INT_MIN
 };
 
 static DCDBAS_BIN_ATTR_RW(smi_data);
diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c
index d2ead17..34fcaba 100644
--- a/drivers/macintosh/adb.c
+++ b/drivers/macintosh/adb.c
@@ -80,7 +80,7 @@ static struct adb_driver *adb_driver_list[] = {
 static struct class *adb_dev_class;
 
 struct adb_driver *adb_controller;
-struct notifier_block *adb_client_list = NULL;
+BLOCKING_NOTIFIER_HEAD(adb_client_list);
 static int adb_got_sleep;
 static int adb_inited;
 static pid_t adb_probe_task_pid;
@@ -354,7 +354,8 @@ adb_notify_sleep(struct pmu_sleep_notifier *self, int when)
 		/* Stop autopoll */
 		if (adb_controller->autopoll)
 			adb_controller->autopoll(0);
-		ret = notifier_call_chain(&adb_client_list, ADB_MSG_POWERDOWN, NULL);
+		ret = blocking_notifier_call_chain(&adb_client_list,
+				ADB_MSG_POWERDOWN, NULL);
 		if (ret & NOTIFY_STOP_MASK) {
 			up(&adb_probe_mutex);
 			return PBOOK_SLEEP_REFUSE;
@@ -391,7 +392,8 @@ do_adb_reset_bus(void)
 	if (adb_controller->autopoll)
 		adb_controller->autopoll(0);
 
-	nret = notifier_call_chain(&adb_client_list, ADB_MSG_PRE_RESET, NULL);
+	nret = blocking_notifier_call_chain(&adb_client_list,
+			ADB_MSG_PRE_RESET, NULL);
 	if (nret & NOTIFY_STOP_MASK) {
 		if (adb_controller->autopoll)
 			adb_controller->autopoll(autopoll_devs);
@@ -426,7 +428,8 @@ do_adb_reset_bus(void)
 	}
 	up(&adb_handler_sem);
 
-	nret = notifier_call_chain(&adb_client_list, ADB_MSG_POST_RESET, NULL);
+	nret = blocking_notifier_call_chain(&adb_client_list,
+			ADB_MSG_POST_RESET, NULL);
 	if (nret & NOTIFY_STOP_MASK)
 		return -EBUSY;
 	
diff --git a/drivers/macintosh/adbhid.c b/drivers/macintosh/adbhid.c
index c0b46bc..f5779a7 100644
--- a/drivers/macintosh/adbhid.c
+++ b/drivers/macintosh/adbhid.c
@@ -1214,7 +1214,8 @@ static int __init adbhid_init(void)
 
 	adbhid_probe();
 
-	notifier_chain_register(&adb_client_list, &adbhid_adb_notifier);
+	blocking_notifier_chain_register(&adb_client_list,
+			&adbhid_adb_notifier);
 
 	return 0;
 }
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 4f5f3ab..0b5ff55 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -187,7 +187,7 @@ extern int disable_kernel_backlight;
 
 int __fake_sleep;
 int asleep;
-struct notifier_block *sleep_notifier_list;
+BLOCKING_NOTIFIER_HEAD(sleep_notifier_list);
 
 #ifdef CONFIG_ADB
 static int adb_dev_map = 0;
diff --git a/drivers/macintosh/via-pmu68k.c b/drivers/macintosh/via-pmu68k.c
index f08e52f..35b7032 100644
--- a/drivers/macintosh/via-pmu68k.c
+++ b/drivers/macintosh/via-pmu68k.c
@@ -102,7 +102,7 @@ static int pmu_kind = PMU_UNKNOWN;
 static int pmu_fully_inited = 0;
 
 int asleep;
-struct notifier_block *sleep_notifier_list;
+BLOCKING_NOTIFIER_HEAD(sleep_notifier_list);
 
 static int pmu_probe(void);
 static int pmu_init(void);
@@ -913,7 +913,8 @@ int powerbook_sleep(void)
 	struct adb_request sleep_req;
 
 	/* Notify device drivers */
-	ret = notifier_call_chain(&sleep_notifier_list, PBOOK_SLEEP, NULL);
+	ret = blocking_notifier_call_chain(&sleep_notifier_list,
+			PBOOK_SLEEP, NULL);
 	if (ret & NOTIFY_STOP_MASK)
 		return -EBUSY;
 
@@ -984,7 +985,7 @@ int powerbook_sleep(void)
 			enable_irq(i);
 
 	/* Notify drivers */
-	notifier_call_chain(&sleep_notifier_list, PBOOK_WAKE, NULL);
+	blocking_notifier_call_chain(&sleep_notifier_list, PBOOK_WAKE, NULL);
 
 	/* reenable ADB autopoll */
 	pmu_adb_autopoll(adb_dev_map);
diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c
index 6c0ba04..ab3faa7 100644
--- a/drivers/macintosh/windfarm_core.c
+++ b/drivers/macintosh/windfarm_core.c
@@ -52,7 +52,7 @@
 static LIST_HEAD(wf_controls);
 static LIST_HEAD(wf_sensors);
 static DEFINE_MUTEX(wf_lock);
-static struct notifier_block *wf_client_list;
+static BLOCKING_NOTIFIER_HEAD(wf_client_list);
 static int wf_client_count;
 static unsigned int wf_overtemp;
 static unsigned int wf_overtemp_counter;
@@ -68,7 +68,7 @@ static struct platform_device wf_platform_device = {
 
 static inline void wf_notify(int event, void *param)
 {
-	notifier_call_chain(&wf_client_list, event, param);
+	blocking_notifier_call_chain(&wf_client_list, event, param);
 }
 
 int wf_critical_overtemp(void)
@@ -398,7 +398,7 @@ int wf_register_client(struct notifier_block *nb)
 	struct wf_sensor *sr;
 
 	mutex_lock(&wf_lock);
-	rc = notifier_chain_register(&wf_client_list, nb);
+	rc = blocking_notifier_chain_register(&wf_client_list, nb);
 	if (rc != 0)
 		goto bail;
 	wf_client_count++;
@@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(wf_register_client);
 int wf_unregister_client(struct notifier_block *nb)
 {
 	mutex_lock(&wf_lock);
-	notifier_chain_unregister(&wf_client_list, nb);
+	blocking_notifier_chain_unregister(&wf_client_list, nb);
 	wf_client_count++;
 	if (wf_client_count == 0)
 		wf_stop_thread();
diff --git a/drivers/misc/ibmasm/heartbeat.c b/drivers/misc/ibmasm/heartbeat.c
index f295401..7fd7a43 100644
--- a/drivers/misc/ibmasm/heartbeat.c
+++ b/drivers/misc/ibmasm/heartbeat.c
@@ -52,12 +52,13 @@ static struct notifier_block panic_notifier = { panic_happened, NULL, 1 };
 
 void ibmasm_register_panic_notifier(void)
 {
-	notifier_chain_register(&panic_notifier_list, &panic_notifier);
+	atomic_notifier_chain_register(&panic_notifier_list, &panic_notifier);
 }
 
 void ibmasm_unregister_panic_notifier(void)
 {
-	notifier_chain_unregister(&panic_notifier_list, &panic_notifier);
+	atomic_notifier_chain_unregister(&panic_notifier_list,
+			&panic_notifier);
 }
 
 
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2d0ac16..f13a539 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3159,7 +3159,7 @@ static int bond_slave_netdev_event(unsigned long event, struct net_device *slave
  * bond_netdev_event: handle netdev notifier chain events.
  *
  * This function receives events for the netdev chain.  The caller (an
- * ioctl handler calling notifier_call_chain) holds the necessary
+ * ioctl handler calling blocking_notifier_call_chain) holds the necessary
  * locks for us to safely manipulate the slave devices (RTNL lock,
  * dev_probe_lock).
  */
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index 3627a2d..298f2dd 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -499,11 +499,16 @@ static int led_halt(struct notifier_block *, unsigned long, void *);
 static struct notifier_block led_notifier = {
 	.notifier_call = led_halt,
 };
+static int notifier_disabled = 0;
 
 static int led_halt(struct notifier_block *nb, unsigned long event, void *buf) 
 {
 	char *txt;
-	
+
+	if (notifier_disabled)
+		return NOTIFY_OK;
+
+	notifier_disabled = 1;
 	switch (event) {
 	case SYS_RESTART:	txt = "SYSTEM RESTART";
 				break;
@@ -527,7 +532,6 @@ static int led_halt(struct notifier_block *nb, unsigned long event, void *buf)
 		if (led_func_ptr)
 			led_func_ptr(0xff); /* turn all LEDs ON */
 	
-	unregister_reboot_notifier(&led_notifier);
 	return NOTIFY_OK;
 }
 
@@ -758,6 +762,12 @@ not_found:
 	return 1;
 }
 
+static void __exit led_exit(void)
+{
+	unregister_reboot_notifier(&led_notifier);
+	return;
+}
+
 #ifdef CONFIG_PROC_FS
 module_init(led_create_procfs)
 #endif
diff --git a/drivers/parisc/power.c b/drivers/parisc/power.c
index 54b2b7f..0bcab83 100644
--- a/drivers/parisc/power.c
+++ b/drivers/parisc/power.c
@@ -251,7 +251,8 @@ static int __init power_init(void)
 	}
 
 	/* Register a call for panic conditions. */
-	notifier_chain_register(&panic_notifier_list, &parisc_panic_block);
+	atomic_notifier_chain_register(&panic_notifier_list,
+			&parisc_panic_block);
 
 	tasklet_enable(&power_tasklet);
 
@@ -264,7 +265,8 @@ static void __exit power_exit(void)
 		return;
 
 	tasklet_disable(&power_tasklet);
-	notifier_chain_unregister(&panic_notifier_list, &parisc_panic_block);
+	atomic_notifier_chain_unregister(&panic_notifier_list,
+			&parisc_panic_block);
 	power_tasklet.func = NULL;
 	pdc_soft_power_button(0);
 }
diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index 62e3cda..7f7013e 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c
@@ -671,7 +671,7 @@ static struct file_operations gdth_fops = {
 static struct notifier_block gdth_notifier = {
     gdth_halt, NULL, 0
 };
-
+static int notifier_disabled = 0;
 
 static void gdth_delay(int milliseconds)
 {
@@ -4595,13 +4595,13 @@ static int __init gdth_detect(struct scsi_host_template *shtp)
         add_timer(&gdth_timer);
 #endif
         major = register_chrdev(0,"gdth",&gdth_fops);
+        notifier_disabled = 0;
         register_reboot_notifier(&gdth_notifier);
     }
     gdth_polling = FALSE;
     return gdth_ctr_vcount;
 }
 
-
 static int gdth_release(struct Scsi_Host *shp)
 {
     int hanum;
@@ -5632,10 +5632,14 @@ static int gdth_halt(struct notifier_block *nb, ulong event, void *buf)
     char            cmnd[MAX_COMMAND_SIZE];   
 #endif
 
+    if (notifier_disabled)
+    	return NOTIFY_OK;
+
     TRACE2(("gdth_halt() event %d\n",(int)event));
     if (event != SYS_RESTART && event != SYS_HALT && event != SYS_POWER_OFF)
         return NOTIFY_DONE;
 
+    notifier_disabled = 1;
     printk("GDT-HA: Flushing all host drives .. ");
     for (hanum = 0; hanum < gdth_ctr_count; ++hanum) {
         gdth_flush(hanum);
@@ -5679,7 +5683,6 @@ static int gdth_halt(struct notifier_block *nb, ulong event, void *buf)
 #ifdef GDTH_STATISTICS
     del_timer(&gdth_timer);
 #endif
-    unregister_reboot_notifier(&gdth_notifier);
     return NOTIFY_OK;
 }
 
diff --git a/drivers/usb/core/notify.c b/drivers/usb/core/notify.c
index 4b55285..fe0ed54 100644
--- a/drivers/usb/core/notify.c
+++ b/drivers/usb/core/notify.c
@@ -16,57 +16,7 @@
 #include <linux/mutex.h>
 #include "usb.h"
 
-
-static struct notifier_block *usb_notifier_list;
-static DEFINE_MUTEX(usb_notifier_lock);
-
-static void usb_notifier_chain_register(struct notifier_block **list,
-					struct notifier_block *n)
-{
-	mutex_lock(&usb_notifier_lock);
-	while (*list) {
-		if (n->priority > (*list)->priority)
-			break;
-		list = &((*list)->next);
-	}
-	n->next = *list;
-	*list = n;
-	mutex_unlock(&usb_notifier_lock);
-}
-
-static void usb_notifier_chain_unregister(struct notifier_block **nl,
-				   struct notifier_block *n)
-{
-	mutex_lock(&usb_notifier_lock);
-	while ((*nl)!=NULL) {
-		if ((*nl)==n) {
-			*nl = n->next;
-			goto exit;
-		}
-		nl=&((*nl)->next);
-	}
-exit:
-	mutex_unlock(&usb_notifier_lock);
-}
-
-static int usb_notifier_call_chain(struct notifier_block **n,
-				   unsigned long val, void *v)
-{
-	int ret=NOTIFY_DONE;
-	struct notifier_block *nb = *n;
-
-	mutex_lock(&usb_notifier_lock);
-	while (nb) {
-		ret = nb->notifier_call(nb,val,v);
-		if (ret&NOTIFY_STOP_MASK) {
-			goto exit;
-		}
-		nb = nb->next;
-	}
-exit:
-	mutex_unlock(&usb_notifier_lock);
-	return ret;
-}
+static BLOCKING_NOTIFIER_HEAD(usb_notifier_list);
 
 /**
  * usb_register_notify - register a notifier callback whenever a usb change happens
@@ -76,7 +26,7 @@ exit:
  */
 void usb_register_notify(struct notifier_block *nb)
 {
-	usb_notifier_chain_register(&usb_notifier_list, nb);
+	blocking_notifier_chain_register(&usb_notifier_list, nb);
 }
 EXPORT_SYMBOL_GPL(usb_register_notify);
 
@@ -89,27 +39,28 @@ EXPORT_SYMBOL_GPL(usb_register_notify);
  */
 void usb_unregister_notify(struct notifier_block *nb)
 {
-	usb_notifier_chain_unregister(&usb_notifier_list, nb);
+	blocking_notifier_chain_unregister(&usb_notifier_list, nb);
 }
 EXPORT_SYMBOL_GPL(usb_unregister_notify);
 
 
 void usb_notify_add_device(struct usb_device *udev)
 {
-	usb_notifier_call_chain(&usb_notifier_list, USB_DEVICE_ADD, udev);
+	blocking_notifier_call_chain(&usb_notifier_list, USB_DEVICE_ADD, udev);
 }
 
 void usb_notify_remove_device(struct usb_device *udev)
 {
-	usb_notifier_call_chain(&usb_notifier_list, USB_DEVICE_REMOVE, udev);
+	blocking_notifier_call_chain(&usb_notifier_list,
+			USB_DEVICE_REMOVE, udev);
 }
 
 void usb_notify_add_bus(struct usb_bus *ubus)
 {
-	usb_notifier_call_chain(&usb_notifier_list, USB_BUS_ADD, ubus);
+	blocking_notifier_call_chain(&usb_notifier_list, USB_BUS_ADD, ubus);
 }
 
 void usb_notify_remove_bus(struct usb_bus *ubus)
 {
-	usb_notifier_call_chain(&usb_notifier_list, USB_BUS_REMOVE, ubus);
+	blocking_notifier_call_chain(&usb_notifier_list, USB_BUS_REMOVE, ubus);
 }
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 07d882b..b1a8dca 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -55,7 +55,7 @@
 
 #define FBPIXMAPSIZE	(1024 * 8)
 
-static struct notifier_block *fb_notifier_list;
+static BLOCKING_NOTIFIER_HEAD(fb_notifier_list);
 struct fb_info *registered_fb[FB_MAX];
 int num_registered_fb;
 
@@ -784,7 +784,7 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var)
 
 		    event.info = info;
 		    event.data = &mode1;
-		    ret = notifier_call_chain(&fb_notifier_list,
+		    ret = blocking_notifier_call_chain(&fb_notifier_list,
 					      FB_EVENT_MODE_DELETE, &event);
 		}
 
@@ -830,8 +830,8 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var)
 
 				info->flags &= ~FBINFO_MISC_USEREVENT;
 				event.info = info;
-				notifier_call_chain(&fb_notifier_list, evnt,
-						    &event);
+				blocking_notifier_call_chain(&fb_notifier_list,
+						evnt, &event);
 			}
 		}
 	}
@@ -854,7 +854,8 @@ fb_blank(struct fb_info *info, int blank)
 
 		event.info = info;
 		event.data = &blank;
-		notifier_call_chain(&fb_notifier_list, FB_EVENT_BLANK, &event);
+		blocking_notifier_call_chain(&fb_notifier_list,
+				FB_EVENT_BLANK, &event);
 	}
 
  	return ret;
@@ -925,7 +926,7 @@ fb_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 		con2fb.framebuffer = -1;
 		event.info = info;
 		event.data = &con2fb;
-		notifier_call_chain(&fb_notifier_list,
+		blocking_notifier_call_chain(&fb_notifier_list,
 				    FB_EVENT_GET_CONSOLE_MAP, &event);
 		return copy_to_user(argp, &con2fb,
 				    sizeof(con2fb)) ? -EFAULT : 0;
@@ -944,7 +945,7 @@ fb_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 		    return -EINVAL;
 		event.info = info;
 		event.data = &con2fb;
-		return notifier_call_chain(&fb_notifier_list,
+		return blocking_notifier_call_chain(&fb_notifier_list,
 					   FB_EVENT_SET_CONSOLE_MAP,
 					   &event);
 	case FBIOBLANK:
@@ -1324,7 +1325,7 @@ register_framebuffer(struct fb_info *fb_info)
 	devfs_mk_cdev(MKDEV(FB_MAJOR, i),
 			S_IFCHR | S_IRUGO | S_IWUGO, "fb/%d", i);
 	event.info = fb_info;
-	notifier_call_chain(&fb_notifier_list,
+	blocking_notifier_call_chain(&fb_notifier_list,
 			    FB_EVENT_FB_REGISTERED, &event);
 	return 0;
 }
@@ -1366,7 +1367,7 @@ unregister_framebuffer(struct fb_info *fb_info)
  */
 int fb_register_client(struct notifier_block *nb)
 {
-	return notifier_chain_register(&fb_notifier_list, nb);
+	return blocking_notifier_chain_register(&fb_notifier_list, nb);
 }
 
 /**
@@ -1375,7 +1376,7 @@ int fb_register_client(struct notifier_block *nb)
  */
 int fb_unregister_client(struct notifier_block *nb)
 {
-	return notifier_chain_unregister(&fb_notifier_list, nb);
+	return blocking_notifier_chain_unregister(&fb_notifier_list, nb);
 }
 
 /**
@@ -1393,11 +1394,13 @@ void fb_set_suspend(struct fb_info *info, int state)
 
 	event.info = info;
 	if (state) {
-		notifier_call_chain(&fb_notifier_list, FB_EVENT_SUSPEND, &event);
+		blocking_notifier_call_chain(&fb_notifier_list,
+				FB_EVENT_SUSPEND, &event);
 		info->state = FBINFO_STATE_SUSPENDED;
 	} else {
 		info->state = FBINFO_STATE_RUNNING;
-		notifier_call_chain(&fb_notifier_list, FB_EVENT_RESUME, &event);
+		blocking_notifier_call_chain(&fb_notifier_list,
+				FB_EVENT_RESUME, &event);
 	}
 }
 
@@ -1469,7 +1472,7 @@ int fb_new_modelist(struct fb_info *info)
 
 	if (!list_empty(&info->modelist)) {
 		event.info = info;
-		err = notifier_call_chain(&fb_notifier_list,
+		err = blocking_notifier_call_chain(&fb_notifier_list,
 					   FB_EVENT_NEW_MODELIST,
 					   &event);
 	}
@@ -1495,7 +1498,7 @@ int fb_con_duit(struct fb_info *info, int event, void *data)
 	evnt.info = info;
 	evnt.data = data;
 
-	return notifier_call_chain(&fb_notifier_list, event, &evnt);
+	return blocking_notifier_call_chain(&fb_notifier_list, event, &evnt);
 }
 EXPORT_SYMBOL(fb_con_duit);
 
diff --git a/include/asm-i386/kdebug.h b/include/asm-i386/kdebug.h
index 316138e..96d0828 100644
--- a/include/asm-i386/kdebug.h
+++ b/include/asm-i386/kdebug.h
@@ -17,11 +17,9 @@ struct die_args {
 	int signr;
 };
 
-/* Note - you should never unregister because that can race with NMIs.
-   If you really want to do it first unregister - then synchronize_sched - then free.
-  */
-int register_die_notifier(struct notifier_block *nb);
-extern struct notifier_block *i386die_chain;
+extern int register_die_notifier(struct notifier_block *);
+extern int unregister_die_notifier(struct notifier_block *);
+extern struct atomic_notifier_head i386die_chain;
 
 
 /* Grossly misnamed. */
@@ -51,7 +49,7 @@ static inline int notify_die(enum die_val val, const char *str,
 		.trapnr = trap,
 		.signr = sig
 	};
-	return notifier_call_chain(&i386die_chain, val, &args);
+	return atomic_notifier_call_chain(&i386die_chain, val, &args);
 }
 
 #endif
diff --git a/include/asm-ia64/kdebug.h b/include/asm-ia64/kdebug.h
index 8b01a08..218c458 100644
--- a/include/asm-ia64/kdebug.h
+++ b/include/asm-ia64/kdebug.h
@@ -40,7 +40,7 @@ struct die_args {
 
 extern int register_die_notifier(struct notifier_block *);
 extern int unregister_die_notifier(struct notifier_block *);
-extern struct notifier_block *ia64die_chain;
+extern struct atomic_notifier_head ia64die_chain;
 
 enum die_val {
 	DIE_BREAK = 1,
@@ -81,7 +81,7 @@ static inline int notify_die(enum die_val val, char *str, struct pt_regs *regs,
 		.signr  = sig
 	};
 
-	return notifier_call_chain(&ia64die_chain, val, &args);
+	return atomic_notifier_call_chain(&ia64die_chain, val, &args);
 }
 
 #endif
diff --git a/include/asm-powerpc/kdebug.h b/include/asm-powerpc/kdebug.h
index 7c16265..c01786a 100644
--- a/include/asm-powerpc/kdebug.h
+++ b/include/asm-powerpc/kdebug.h
@@ -16,13 +16,9 @@ struct die_args {
 	int signr;
 };
 
-/*
-   Note - you should never unregister because that can race with NMIs.
-   If you really want to do it first unregister - then synchronize_sched -
-   then free.
- */
-int register_die_notifier(struct notifier_block *nb);
-extern struct notifier_block *powerpc_die_chain;
+extern int register_die_notifier(struct notifier_block *);
+extern int unregister_die_notifier(struct notifier_block *);
+extern struct atomic_notifier_head powerpc_die_chain;
 
 /* Grossly misnamed. */
 enum die_val {
@@ -37,7 +33,7 @@ enum die_val {
 static inline int notify_die(enum die_val val,char *str,struct pt_regs *regs,long err,int trap, int sig)
 {
 	struct die_args args = { .regs=regs, .str=str, .err=err, .trapnr=trap,.signr=sig };
-	return notifier_call_chain(&powerpc_die_chain, val, &args);
+	return atomic_notifier_call_chain(&powerpc_die_chain, val, &args);
 }
 
 #endif /* __KERNEL__ */
diff --git a/include/asm-sparc64/kdebug.h b/include/asm-sparc64/kdebug.h
index 6321f5a..4040d12 100644
--- a/include/asm-sparc64/kdebug.h
+++ b/include/asm-sparc64/kdebug.h
@@ -15,12 +15,9 @@ struct die_args {
 	int signr;
 };
 
-/* Note - you should never unregister because that can race with NMIs.
- * If you really want to do it first unregister - then synchronize_sched
- * - then free.
- */
-int register_die_notifier(struct notifier_block *nb);
-extern struct notifier_block *sparc64die_chain;
+extern int register_die_notifier(struct notifier_block *);
+extern int unregister_die_notifier(struct notifier_block *);
+extern struct atomic_notifier_head sparc64die_chain;
 
 extern void bad_trap(struct pt_regs *, long);
 
@@ -46,7 +43,7 @@ static inline int notify_die(enum die_val val,char *str, struct pt_regs *regs,
 				 .trapnr	= trap,
 				 .signr		= sig };
 
-	return notifier_call_chain(&sparc64die_chain, val, &args);
+	return atomic_notifier_call_chain(&sparc64die_chain, val, &args);
 }
 
 #endif
diff --git a/include/asm-x86_64/kdebug.h b/include/asm-x86_64/kdebug.h
index b9ed4c0..cf79563 100644
--- a/include/asm-x86_64/kdebug.h
+++ b/include/asm-x86_64/kdebug.h
@@ -5,21 +5,20 @@
 
 struct pt_regs;
 
-struct die_args { 
+struct die_args {
 	struct pt_regs *regs;
 	const char *str;
-	long err; 
+	long err;
 	int trapnr;
 	int signr;
-}; 
+};
+
+extern int register_die_notifier(struct notifier_block *);
+extern int unregister_die_notifier(struct notifier_block *);
+extern struct atomic_notifier_head die_chain;
 
-/* Note - you should never unregister because that can race with NMIs.
-   If you really want to do it first unregister - then synchronize_sched - then free.
-  */
-int register_die_notifier(struct notifier_block *nb);
-extern struct notifier_block *die_chain;
 /* Grossly misnamed. */
-enum die_val { 
+enum die_val {
 	DIE_OOPS = 1,
 	DIE_INT3,
 	DIE_DEBUG,
@@ -33,8 +32,8 @@ enum die_val {
 	DIE_CALL,
 	DIE_NMI_IPI,
 	DIE_PAGE_FAULT,
-}; 
-	
+};
+
 static inline int notify_die(enum die_val val, const char *str,
 			struct pt_regs *regs, long err, int trap, int sig)
 {
@@ -45,7 +44,7 @@ static inline int notify_die(enum die_val val, const char *str,
 		.trapnr = trap,
 		.signr = sig
 	};
-	return notifier_call_chain(&die_chain, val, &args); 
+	return atomic_notifier_call_chain(&die_chain, val, &args);
 } 
 
 extern int printk_address(unsigned long address);
diff --git a/include/linux/adb.h b/include/linux/adb.h
index e9fdc63..b7305b1 100644
--- a/include/linux/adb.h
+++ b/include/linux/adb.h
@@ -85,7 +85,7 @@ enum adb_message {
     ADB_MSG_POST_RESET	/* Called after resetting the bus (re-do init & register) */
 };
 extern struct adb_driver *adb_controller;
-extern struct notifier_block *adb_client_list;
+extern struct blocking_notifier_head adb_client_list;
 
 int adb_request(struct adb_request *req, void (*done)(struct adb_request *),
 		int flags, int nbytes, ...);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 03d6cfa..a3720f9 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -87,7 +87,7 @@ extern int cond_resched(void);
 		(__x < 0) ? -__x : __x;		\
 	})
 
-extern struct notifier_block *panic_notifier_list;
+extern struct atomic_notifier_head panic_notifier_list;
 extern long (*panic_blink)(long time);
 NORET_TYPE void panic(const char * fmt, ...)
 	__attribute__ ((NORET_AND format (printf, 1, 2)));
diff --git a/include/linux/memory.h b/include/linux/memory.h
index e251dc4..8f04143 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -77,7 +77,6 @@ extern int remove_memory_block(unsigned long, struct mem_section *, int);
 
 #define CONFIG_MEM_BLOCK_SIZE	(PAGES_PER_SECTION<<PAGE_SHIFT)
 
-struct notifier_block;
 
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h
index f32d75c..d54d7b2 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -308,29 +308,30 @@ DECLARE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
 
 #define CONNTRACK_ECACHE(x)	(__get_cpu_var(ip_conntrack_ecache).x)
  
-extern struct notifier_block *ip_conntrack_chain;
-extern struct notifier_block *ip_conntrack_expect_chain;
+extern struct atomic_notifier_head ip_conntrack_chain;
+extern struct atomic_notifier_head ip_conntrack_expect_chain;
 
 static inline int ip_conntrack_register_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_register(&ip_conntrack_chain, nb);
+	return atomic_notifier_chain_register(&ip_conntrack_chain, nb);
 }
 
 static inline int ip_conntrack_unregister_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_unregister(&ip_conntrack_chain, nb);
+	return atomic_notifier_chain_unregister(&ip_conntrack_chain, nb);
 }
 
 static inline int 
 ip_conntrack_expect_register_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_register(&ip_conntrack_expect_chain, nb);
+	return atomic_notifier_chain_register(&ip_conntrack_expect_chain, nb);
 }
 
 static inline int
 ip_conntrack_expect_unregister_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_unregister(&ip_conntrack_expect_chain, nb);
+	return atomic_notifier_chain_unregister(&ip_conntrack_expect_chain,
+			nb);
 }
 
 extern void ip_ct_deliver_cached_events(const struct ip_conntrack *ct);
@@ -355,14 +356,14 @@ static inline void ip_conntrack_event(enum ip_conntrack_events event,
 				      struct ip_conntrack *ct)
 {
 	if (is_confirmed(ct) && !is_dying(ct))
-		notifier_call_chain(&ip_conntrack_chain, event, ct);
+		atomic_notifier_call_chain(&ip_conntrack_chain, event, ct);
 }
 
 static inline void 
 ip_conntrack_expect_event(enum ip_conntrack_expect_events event,
 			  struct ip_conntrack_expect *exp)
 {
-	notifier_call_chain(&ip_conntrack_expect_chain, event, exp);
+	atomic_notifier_call_chain(&ip_conntrack_expect_chain, event, exp);
 }
 #else /* CONFIG_IP_NF_CONNTRACK_EVENTS */
 static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, 
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 5937dd60..51dbab9 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -10,25 +10,107 @@
 #ifndef _LINUX_NOTIFIER_H
 #define _LINUX_NOTIFIER_H
 #include <linux/errno.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
 
-struct notifier_block
-{
-	int (*notifier_call)(struct notifier_block *self, unsigned long, void *);
+/*
+ * Notifier chains are of three types:
+ *
+ *	Atomic notifier chains: Chain callbacks run in interrupt/atomic
+ *		context. Callouts are not allowed to block.
+ *	Blocking notifier chains: Chain callbacks run in process context.
+ *		Callouts are allowed to block.
+ *	Raw notifier chains: There are no restrictions on callbacks,
+ *		registration, or unregistration.  All locking and protection
+ *		must be provided by the caller.
+ *
+ * atomic_notifier_chain_register() may be called from an atomic context,
+ * but blocking_notifier_chain_register() must be called from a process
+ * context.  Ditto for the corresponding _unregister() routines.
+ *
+ * atomic_notifier_chain_unregister() and blocking_notifier_chain_unregister()
+ * _must not_ be called from within the call chain.
+ */
+
+struct notifier_block {
+	int (*notifier_call)(struct notifier_block *, unsigned long, void *);
 	struct notifier_block *next;
 	int priority;
 };
 
+struct atomic_notifier_head {
+	spinlock_t lock;
+	struct notifier_block *head;
+};
+
+struct blocking_notifier_head {
+	struct rw_semaphore rwsem;
+	struct notifier_block *head;
+};
+
+struct raw_notifier_head {
+	struct notifier_block *head;
+};
+
+#define ATOMIC_INIT_NOTIFIER_HEAD(name) do {	\
+		spin_lock_init(&(name)->lock);	\
+		(name)->head = NULL;		\
+	} while (0)
+#define BLOCKING_INIT_NOTIFIER_HEAD(name) do {	\
+		init_rwsem(&(name)->rwsem);	\
+		(name)->head = NULL;		\
+	} while (0)
+#define RAW_INIT_NOTIFIER_HEAD(name) do {	\
+		(name)->head = NULL;		\
+	} while (0)
+
+#define ATOMIC_NOTIFIER_INIT(name) {				\
+		.lock = SPIN_LOCK_UNLOCKED,			\
+		.head = NULL }
+#define BLOCKING_NOTIFIER_INIT(name) {				\
+		.rwsem = __RWSEM_INITIALIZER((name).rwsem),	\
+		.head = NULL }
+#define RAW_NOTIFIER_INIT(name)	{				\
+		.head = NULL }
+
+#define ATOMIC_NOTIFIER_HEAD(name)				\
+	struct atomic_notifier_head name =			\
+		ATOMIC_NOTIFIER_INIT(name)
+#define BLOCKING_NOTIFIER_HEAD(name)				\
+	struct blocking_notifier_head name =			\
+		BLOCKING_NOTIFIER_INIT(name)
+#define RAW_NOTIFIER_HEAD(name)					\
+	struct raw_notifier_head name =				\
+		RAW_NOTIFIER_INIT(name)
 
 #ifdef __KERNEL__
 
-extern int notifier_chain_register(struct notifier_block **list, struct notifier_block *n);
-extern int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n);
-extern int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v);
+extern int atomic_notifier_chain_register(struct atomic_notifier_head *,
+		struct notifier_block *);
+extern int blocking_notifier_chain_register(struct blocking_notifier_head *,
+		struct notifier_block *);
+extern int raw_notifier_chain_register(struct raw_notifier_head *,
+		struct notifier_block *);
+
+extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *,
+		struct notifier_block *);
+extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *,
+		struct notifier_block *);
+extern int raw_notifier_chain_unregister(struct raw_notifier_head *,
+		struct notifier_block *);
+
+extern int atomic_notifier_call_chain(struct atomic_notifier_head *,
+		unsigned long val, void *v);
+extern int blocking_notifier_call_chain(struct blocking_notifier_head *,
+		unsigned long val, void *v);
+extern int raw_notifier_call_chain(struct raw_notifier_head *,
+		unsigned long val, void *v);
 
 #define NOTIFY_DONE		0x0000		/* Don't care */
 #define NOTIFY_OK		0x0001		/* Suits me */
 #define NOTIFY_STOP_MASK	0x8000		/* Don't call further */
-#define NOTIFY_BAD		(NOTIFY_STOP_MASK|0x0002)	/* Bad/Veto action	*/
+#define NOTIFY_BAD		(NOTIFY_STOP_MASK|0x0002)
+						/* Bad/Veto action */
 /*
  * Clean way to return from the notifier and stop further calls.
  */
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index b6f0905..916013c 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -300,29 +300,30 @@ DECLARE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
 
 #define CONNTRACK_ECACHE(x)	(__get_cpu_var(nf_conntrack_ecache).x)
 
-extern struct notifier_block *nf_conntrack_chain;
-extern struct notifier_block *nf_conntrack_expect_chain;
+extern struct atomic_notifier_head nf_conntrack_chain;
+extern struct atomic_notifier_head nf_conntrack_expect_chain;
 
 static inline int nf_conntrack_register_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_register(&nf_conntrack_chain, nb);
+	return atomic_notifier_chain_register(&nf_conntrack_chain, nb);
 }
 
 static inline int nf_conntrack_unregister_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_unregister(&nf_conntrack_chain, nb);
+	return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb);
 }
 
 static inline int
 nf_conntrack_expect_register_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_register(&nf_conntrack_expect_chain, nb);
+	return atomic_notifier_chain_register(&nf_conntrack_expect_chain, nb);
 }
 
 static inline int
 nf_conntrack_expect_unregister_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_unregister(&nf_conntrack_expect_chain, nb);
+	return atomic_notifier_chain_unregister(&nf_conntrack_expect_chain,
+			nb);
 }
 
 extern void nf_ct_deliver_cached_events(const struct nf_conn *ct);
@@ -347,14 +348,14 @@ static inline void nf_conntrack_event(enum ip_conntrack_events event,
 				      struct nf_conn *ct)
 {
 	if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct))
-		notifier_call_chain(&nf_conntrack_chain, event, ct);
+		atomic_notifier_call_chain(&nf_conntrack_chain, event, ct);
 }
 
 static inline void
 nf_conntrack_expect_event(enum ip_conntrack_expect_events event,
 			  struct nf_conntrack_expect *exp)
 {
-	notifier_call_chain(&nf_conntrack_expect_chain, event, exp);
+	atomic_notifier_call_chain(&nf_conntrack_expect_chain, event, exp);
 }
 #else /* CONFIG_NF_CONNTRACK_EVENTS */
 static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8be22bd..fe2b8d0 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -18,7 +18,7 @@
 /* This protects CPUs going up and down... */
 static DECLARE_MUTEX(cpucontrol);
 
-static struct notifier_block *cpu_chain;
+static BLOCKING_NOTIFIER_HEAD(cpu_chain);
 
 #ifdef CONFIG_HOTPLUG_CPU
 static struct task_struct *lock_cpu_hotplug_owner;
@@ -71,21 +71,13 @@ EXPORT_SYMBOL_GPL(lock_cpu_hotplug_interruptible);
 /* Need to know about CPUs going up/down? */
 int register_cpu_notifier(struct notifier_block *nb)
 {
-	int ret;
-
-	if ((ret = lock_cpu_hotplug_interruptible()) != 0)
-		return ret;
-	ret = notifier_chain_register(&cpu_chain, nb);
-	unlock_cpu_hotplug();
-	return ret;
+	return blocking_notifier_chain_register(&cpu_chain, nb);
 }
 EXPORT_SYMBOL(register_cpu_notifier);
 
 void unregister_cpu_notifier(struct notifier_block *nb)
 {
-	lock_cpu_hotplug();
-	notifier_chain_unregister(&cpu_chain, nb);
-	unlock_cpu_hotplug();
+	blocking_notifier_chain_unregister(&cpu_chain, nb);
 }
 EXPORT_SYMBOL(unregister_cpu_notifier);
 
@@ -141,7 +133,7 @@ int cpu_down(unsigned int cpu)
 		goto out;
 	}
 
-	err = notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
+	err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
 						(void *)(long)cpu);
 	if (err == NOTIFY_BAD) {
 		printk("%s: attempt to take down CPU %u failed\n",
@@ -159,7 +151,7 @@ int cpu_down(unsigned int cpu)
 	p = __stop_machine_run(take_cpu_down, NULL, cpu);
 	if (IS_ERR(p)) {
 		/* CPU didn't die: tell everyone.  Can't complain. */
-		if (notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED,
+		if (blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED,
 				(void *)(long)cpu) == NOTIFY_BAD)
 			BUG();
 
@@ -182,8 +174,8 @@ int cpu_down(unsigned int cpu)
 	put_cpu();
 
 	/* CPU is completely dead: tell everyone.  Too late to complain. */
-	if (notifier_call_chain(&cpu_chain, CPU_DEAD, (void *)(long)cpu)
-	    == NOTIFY_BAD)
+	if (blocking_notifier_call_chain(&cpu_chain, CPU_DEAD,
+			(void *)(long)cpu) == NOTIFY_BAD)
 		BUG();
 
 	check_for_tasks(cpu);
@@ -211,7 +203,7 @@ int __devinit cpu_up(unsigned int cpu)
 		goto out;
 	}
 
-	ret = notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
+	ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
 	if (ret == NOTIFY_BAD) {
 		printk("%s: attempt to bring up CPU %u failed\n",
 				__FUNCTION__, cpu);
@@ -226,11 +218,12 @@ int __devinit cpu_up(unsigned int cpu)
 	BUG_ON(!cpu_online(cpu));
 
 	/* Now call notifier in preparation. */
-	notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu);
+	blocking_notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu);
 
 out_notify:
 	if (ret != 0)
-		notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu);
+		blocking_notifier_call_chain(&cpu_chain,
+				CPU_UP_CANCELED, hcpu);
 out:
 	unlock_cpu_hotplug();
 	return ret;
diff --git a/kernel/module.c b/kernel/module.c
index ddfe45a..4fafd58 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -64,26 +64,17 @@ static DEFINE_SPINLOCK(modlist_lock);
 static DEFINE_MUTEX(module_mutex);
 static LIST_HEAD(modules);
 
-static DEFINE_MUTEX(notify_mutex);
-static struct notifier_block * module_notify_list;
+static BLOCKING_NOTIFIER_HEAD(module_notify_list);
 
 int register_module_notifier(struct notifier_block * nb)
 {
-	int err;
-	mutex_lock(&notify_mutex);
-	err = notifier_chain_register(&module_notify_list, nb);
-	mutex_unlock(&notify_mutex);
-	return err;
+	return blocking_notifier_chain_register(&module_notify_list, nb);
 }
 EXPORT_SYMBOL(register_module_notifier);
 
 int unregister_module_notifier(struct notifier_block * nb)
 {
-	int err;
-	mutex_lock(&notify_mutex);
-	err = notifier_chain_unregister(&module_notify_list, nb);
-	mutex_unlock(&notify_mutex);
-	return err;
+	return blocking_notifier_chain_unregister(&module_notify_list, nb);
 }
 EXPORT_SYMBOL(unregister_module_notifier);
 
@@ -1816,9 +1807,8 @@ sys_init_module(void __user *umod,
 	/* Drop lock so they can recurse */
 	mutex_unlock(&module_mutex);
 
-	mutex_lock(&notify_mutex);
-	notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod);
-	mutex_unlock(&notify_mutex);
+	blocking_notifier_call_chain(&module_notify_list,
+			MODULE_STATE_COMING, mod);
 
 	/* Start the module */
 	if (mod->init != NULL)
diff --git a/kernel/panic.c b/kernel/panic.c
index acd95ad..f895c7c 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -29,7 +29,7 @@ static DEFINE_SPINLOCK(pause_on_oops_lock);
 int panic_timeout;
 EXPORT_SYMBOL(panic_timeout);
 
-struct notifier_block *panic_notifier_list;
+ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
 
 EXPORT_SYMBOL(panic_notifier_list);
 
@@ -97,7 +97,7 @@ NORET_TYPE void panic(const char * fmt, ...)
 	smp_send_stop();
 #endif
 
-	notifier_call_chain(&panic_notifier_list, 0, buf);
+	atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
 
 	if (!panic_blink)
 		panic_blink = no_blink;
diff --git a/kernel/profile.c b/kernel/profile.c
index ad81f79..5a730fd 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -87,72 +87,52 @@ void __init profile_init(void)
  
 #ifdef CONFIG_PROFILING
  
-static DECLARE_RWSEM(profile_rwsem);
-static DEFINE_RWLOCK(handoff_lock);
-static struct notifier_block * task_exit_notifier;
-static struct notifier_block * task_free_notifier;
-static struct notifier_block * munmap_notifier;
+static BLOCKING_NOTIFIER_HEAD(task_exit_notifier);
+static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
+static BLOCKING_NOTIFIER_HEAD(munmap_notifier);
  
 void profile_task_exit(struct task_struct * task)
 {
-	down_read(&profile_rwsem);
-	notifier_call_chain(&task_exit_notifier, 0, task);
-	up_read(&profile_rwsem);
+	blocking_notifier_call_chain(&task_exit_notifier, 0, task);
 }
  
 int profile_handoff_task(struct task_struct * task)
 {
 	int ret;
-	read_lock(&handoff_lock);
-	ret = notifier_call_chain(&task_free_notifier, 0, task);
-	read_unlock(&handoff_lock);
+	ret = atomic_notifier_call_chain(&task_free_notifier, 0, task);
 	return (ret == NOTIFY_OK) ? 1 : 0;
 }
 
 void profile_munmap(unsigned long addr)
 {
-	down_read(&profile_rwsem);
-	notifier_call_chain(&munmap_notifier, 0, (void *)addr);
-	up_read(&profile_rwsem);
+	blocking_notifier_call_chain(&munmap_notifier, 0, (void *)addr);
 }
 
 int task_handoff_register(struct notifier_block * n)
 {
-	int err = -EINVAL;
-
-	write_lock(&handoff_lock);
-	err = notifier_chain_register(&task_free_notifier, n);
-	write_unlock(&handoff_lock);
-	return err;
+	return atomic_notifier_chain_register(&task_free_notifier, n);
 }
 
 int task_handoff_unregister(struct notifier_block * n)
 {
-	int err = -EINVAL;
-
-	write_lock(&handoff_lock);
-	err = notifier_chain_unregister(&task_free_notifier, n);
-	write_unlock(&handoff_lock);
-	return err;
+	return atomic_notifier_chain_unregister(&task_free_notifier, n);
 }
 
 int profile_event_register(enum profile_type type, struct notifier_block * n)
 {
 	int err = -EINVAL;
  
-	down_write(&profile_rwsem);
- 
 	switch (type) {
 		case PROFILE_TASK_EXIT:
-			err = notifier_chain_register(&task_exit_notifier, n);
+			err = blocking_notifier_chain_register(
+					&task_exit_notifier, n);
 			break;
 		case PROFILE_MUNMAP:
-			err = notifier_chain_register(&munmap_notifier, n);
+			err = blocking_notifier_chain_register(
+					&munmap_notifier, n);
 			break;
 	}
  
-	up_write(&profile_rwsem);
- 
 	return err;
 }
 
@@ -161,18 +141,17 @@ int profile_event_unregister(enum profile_type type, struct notifier_block * n)
 {
 	int err = -EINVAL;
  
-	down_write(&profile_rwsem);
- 
 	switch (type) {
 		case PROFILE_TASK_EXIT:
-			err = notifier_chain_unregister(&task_exit_notifier, n);
+			err = blocking_notifier_chain_unregister(
+					&task_exit_notifier, n);
 			break;
 		case PROFILE_MUNMAP:
-			err = notifier_chain_unregister(&munmap_notifier, n);
+			err = blocking_notifier_chain_unregister(
+					&munmap_notifier, n);
 			break;
 	}
 
-	up_write(&profile_rwsem);
 	return err;
 }
 
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index d9b3d58..ced91e1 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -152,5 +152,5 @@ __init void spawn_softlockup_task(void)
 	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
 	register_cpu_notifier(&cpu_nfb);
 
-	notifier_chain_register(&panic_notifier_list, &panic_block);
+	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
 }
diff --git a/kernel/sys.c b/kernel/sys.c
index 38bc73e..c93d37f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -95,99 +95,304 @@ int cad_pid = 1;
  *	and the like. 
  */
 
-static struct notifier_block *reboot_notifier_list;
-static DEFINE_RWLOCK(notifier_lock);
+static BLOCKING_NOTIFIER_HEAD(reboot_notifier_list);
+
+/*
+ *	Notifier chain core routines.  The exported routines below
+ *	are layered on top of these, with appropriate locking added.
+ */
+
+static int notifier_chain_register(struct notifier_block **nl,
+		struct notifier_block *n)
+{
+	while ((*nl) != NULL) {
+		if (n->priority > (*nl)->priority)
+			break;
+		nl = &((*nl)->next);
+	}
+	n->next = *nl;
+	rcu_assign_pointer(*nl, n);
+	return 0;
+}
+
+static int notifier_chain_unregister(struct notifier_block **nl,
+		struct notifier_block *n)
+{
+	while ((*nl) != NULL) {
+		if ((*nl) == n) {
+			rcu_assign_pointer(*nl, n->next);
+			return 0;
+		}
+		nl = &((*nl)->next);
+	}
+	return -ENOENT;
+}
+
+static int __kprobes notifier_call_chain(struct notifier_block **nl,
+		unsigned long val, void *v)
+{
+	int ret = NOTIFY_DONE;
+	struct notifier_block *nb;
+
+	nb = rcu_dereference(*nl);
+	while (nb) {
+		ret = nb->notifier_call(nb, val, v);
+		if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
+			break;
+		nb = rcu_dereference(nb->next);
+	}
+	return ret;
+}
+
+/*
+ *	Atomic notifier chain routines.  Registration and unregistration
+ *	use a mutex, and call_chain is synchronized by RCU (no locks).
+ */
 
 /**
- *	notifier_chain_register	- Add notifier to a notifier chain
- *	@list: Pointer to root list pointer
+ *	atomic_notifier_chain_register - Add notifier to an atomic notifier chain
+ *	@nh: Pointer to head of the atomic notifier chain
  *	@n: New entry in notifier chain
  *
- *	Adds a notifier to a notifier chain.
+ *	Adds a notifier to an atomic notifier chain.
  *
  *	Currently always returns zero.
  */
+
+int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
+		struct notifier_block *n)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&nh->lock, flags);
+	ret = notifier_chain_register(&nh->head, n);
+	spin_unlock_irqrestore(&nh->lock, flags);
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);
+
+/**
+ *	atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain
+ *	@nh: Pointer to head of the atomic notifier chain
+ *	@n: Entry to remove from notifier chain
+ *
+ *	Removes a notifier from an atomic notifier chain.
+ *
+ *	Returns zero on success or %-ENOENT on failure.
+ */
+int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
+		struct notifier_block *n)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&nh->lock, flags);
+	ret = notifier_chain_unregister(&nh->head, n);
+	spin_unlock_irqrestore(&nh->lock, flags);
+	synchronize_rcu();
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
+
+/**
+ *	atomic_notifier_call_chain - Call functions in an atomic notifier chain
+ *	@nh: Pointer to head of the atomic notifier chain
+ *	@val: Value passed unmodified to notifier function
+ *	@v: Pointer passed unmodified to notifier function
+ *
+ *	Calls each function in a notifier chain in turn.  The functions
+ *	run in an atomic context, so they must not block.
+ *	This routine uses RCU to synchronize with changes to the chain.
+ *
+ *	If the return value of the notifier can be and'ed
+ *	with %NOTIFY_STOP_MASK then atomic_notifier_call_chain
+ *	will return immediately, with the return value of
+ *	the notifier function which halted execution.
+ *	Otherwise the return value is the return value
+ *	of the last notifier function called.
+ */
  
-int notifier_chain_register(struct notifier_block **list, struct notifier_block *n)
+int atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+		unsigned long val, void *v)
 {
-	write_lock(&notifier_lock);
-	while(*list)
-	{
-		if(n->priority > (*list)->priority)
-			break;
-		list= &((*list)->next);
-	}
-	n->next = *list;
-	*list=n;
-	write_unlock(&notifier_lock);
-	return 0;
+	int ret;
+
+	rcu_read_lock();
+	ret = notifier_call_chain(&nh->head, val, v);
+	rcu_read_unlock();
+	return ret;
 }
 
-EXPORT_SYMBOL(notifier_chain_register);
+EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
+
+/*
+ *	Blocking notifier chain routines.  All access to the chain is
+ *	synchronized by an rwsem.
+ */
 
 /**
- *	notifier_chain_unregister - Remove notifier from a notifier chain
- *	@nl: Pointer to root list pointer
+ *	blocking_notifier_chain_register - Add notifier to a blocking notifier chain
+ *	@nh: Pointer to head of the blocking notifier chain
  *	@n: New entry in notifier chain
  *
- *	Removes a notifier from a notifier chain.
+ *	Adds a notifier to a blocking notifier chain.
+ *	Must be called in process context.
  *
- *	Returns zero on success, or %-ENOENT on failure.
+ *	Currently always returns zero.
  */
  
-int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n)
+int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
+		struct notifier_block *n)
 {
-	write_lock(&notifier_lock);
-	while((*nl)!=NULL)
-	{
-		if((*nl)==n)
-		{
-			*nl=n->next;
-			write_unlock(&notifier_lock);
-			return 0;
-		}
-		nl=&((*nl)->next);
-	}
-	write_unlock(&notifier_lock);
-	return -ENOENT;
+	int ret;
+
+	/*
+	 * This code gets used during boot-up, when task switching is
+	 * not yet working and interrupts must remain disabled.  At
+	 * such times we must not call down_write().
+	 */
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		return notifier_chain_register(&nh->head, n);
+
+	down_write(&nh->rwsem);
+	ret = notifier_chain_register(&nh->head, n);
+	up_write(&nh->rwsem);
+	return ret;
 }
 
-EXPORT_SYMBOL(notifier_chain_unregister);
+EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);
 
 /**
- *	notifier_call_chain - Call functions in a notifier chain
- *	@n: Pointer to root pointer of notifier chain
+ *	blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain
+ *	@nh: Pointer to head of the blocking notifier chain
+ *	@n: Entry to remove from notifier chain
+ *
+ *	Removes a notifier from a blocking notifier chain.
+ *	Must be called from process context.
+ *
+ *	Returns zero on success or %-ENOENT on failure.
+ */
+int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,
+		struct notifier_block *n)
+{
+	int ret;
+
+	/*
+	 * This code gets used during boot-up, when task switching is
+	 * not yet working and interrupts must remain disabled.  At
+	 * such times we must not call down_write().
+	 */
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		return notifier_chain_unregister(&nh->head, n);
+
+	down_write(&nh->rwsem);
+	ret = notifier_chain_unregister(&nh->head, n);
+	up_write(&nh->rwsem);
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister);
+
+/**
+ *	blocking_notifier_call_chain - Call functions in a blocking notifier chain
+ *	@nh: Pointer to head of the blocking notifier chain
  *	@val: Value passed unmodified to notifier function
  *	@v: Pointer passed unmodified to notifier function
  *
- *	Calls each function in a notifier chain in turn.
+ *	Calls each function in a notifier chain in turn.  The functions
+ *	run in a process context, so they are allowed to block.
  *
- *	If the return value of the notifier can be and'd
- *	with %NOTIFY_STOP_MASK, then notifier_call_chain
+ *	If the return value of the notifier can be and'ed
+ *	with %NOTIFY_STOP_MASK then blocking_notifier_call_chain
  *	will return immediately, with the return value of
  *	the notifier function which halted execution.
- *	Otherwise, the return value is the return value
+ *	Otherwise the return value is the return value
  *	of the last notifier function called.
  */
  
-int __kprobes notifier_call_chain(struct notifier_block **n, unsigned long val, void *v)
+int blocking_notifier_call_chain(struct blocking_notifier_head *nh,
+		unsigned long val, void *v)
 {
-	int ret=NOTIFY_DONE;
-	struct notifier_block *nb = *n;
+	int ret;
 
-	while(nb)
-	{
-		ret=nb->notifier_call(nb,val,v);
-		if(ret&NOTIFY_STOP_MASK)
-		{
-			return ret;
-		}
-		nb=nb->next;
-	}
+	down_read(&nh->rwsem);
+	ret = notifier_call_chain(&nh->head, val, v);
+	up_read(&nh->rwsem);
 	return ret;
 }
 
-EXPORT_SYMBOL(notifier_call_chain);
+EXPORT_SYMBOL_GPL(blocking_notifier_call_chain);
+
+/*
+ *	Raw notifier chain routines.  There is no protection;
+ *	the caller must provide it.  Use at your own risk!
+ */
+
+/**
+ *	raw_notifier_chain_register - Add notifier to a raw notifier chain
+ *	@nh: Pointer to head of the raw notifier chain
+ *	@n: New entry in notifier chain
+ *
+ *	Adds a notifier to a raw notifier chain.
+ *	All locking must be provided by the caller.
+ *
+ *	Currently always returns zero.
+ */
+
+int raw_notifier_chain_register(struct raw_notifier_head *nh,
+		struct notifier_block *n)
+{
+	return notifier_chain_register(&nh->head, n);
+}
+
+EXPORT_SYMBOL_GPL(raw_notifier_chain_register);
+
+/**
+ *	raw_notifier_chain_unregister - Remove notifier from a raw notifier chain
+ *	@nh: Pointer to head of the raw notifier chain
+ *	@n: Entry to remove from notifier chain
+ *
+ *	Removes a notifier from a raw notifier chain.
+ *	All locking must be provided by the caller.
+ *
+ *	Returns zero on success or %-ENOENT on failure.
+ */
+int raw_notifier_chain_unregister(struct raw_notifier_head *nh,
+		struct notifier_block *n)
+{
+	return notifier_chain_unregister(&nh->head, n);
+}
+
+EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister);
+
+/**
+ *	raw_notifier_call_chain - Call functions in a raw notifier chain
+ *	@nh: Pointer to head of the raw notifier chain
+ *	@val: Value passed unmodified to notifier function
+ *	@v: Pointer passed unmodified to notifier function
+ *
+ *	Calls each function in a notifier chain in turn.  The functions
+ *	run in an undefined context.
+ *	All locking must be provided by the caller.
+ *
+ *	If the return value of the notifier can be and'ed
+ *	with %NOTIFY_STOP_MASK then raw_notifier_call_chain
+ *	will return immediately, with the return value of
+ *	the notifier function which halted execution.
+ *	Otherwise the return value is the return value
+ *	of the last notifier function called.
+ */
+
+int raw_notifier_call_chain(struct raw_notifier_head *nh,
+		unsigned long val, void *v)
+{
+	return notifier_call_chain(&nh->head, val, v);
+}
+
+EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
 
 /**
  *	register_reboot_notifier - Register function to be called at reboot time
@@ -196,13 +401,13 @@ EXPORT_SYMBOL(notifier_call_chain);
  *	Registers a function with the list of functions
  *	to be called at reboot time.
  *
- *	Currently always returns zero, as notifier_chain_register
+ *	Currently always returns zero, as blocking_notifier_chain_register
  *	always returns zero.
  */
  
 int register_reboot_notifier(struct notifier_block * nb)
 {
-	return notifier_chain_register(&reboot_notifier_list, nb);
+	return blocking_notifier_chain_register(&reboot_notifier_list, nb);
 }
 
 EXPORT_SYMBOL(register_reboot_notifier);
@@ -219,7 +424,7 @@ EXPORT_SYMBOL(register_reboot_notifier);
  
 int unregister_reboot_notifier(struct notifier_block * nb)
 {
-	return notifier_chain_unregister(&reboot_notifier_list, nb);
+	return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
 }
 
 EXPORT_SYMBOL(unregister_reboot_notifier);
@@ -380,7 +585,7 @@ EXPORT_SYMBOL_GPL(emergency_restart);
 
 void kernel_restart_prepare(char *cmd)
 {
-	notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
+	blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
 	system_state = SYSTEM_RESTART;
 	device_shutdown();
 }
@@ -430,7 +635,7 @@ EXPORT_SYMBOL_GPL(kernel_kexec);
 
 void kernel_shutdown_prepare(enum system_states state)
 {
-	notifier_call_chain(&reboot_notifier_list,
+	blocking_notifier_call_chain(&reboot_notifier_list,
 		(state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
 	system_state = state;
 	device_shutdown();
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 9106354..a49a697 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -73,23 +73,23 @@ DEFINE_RWLOCK(hci_cb_list_lock);
 struct hci_proto *hci_proto[HCI_MAX_PROTO];
 
 /* HCI notifiers list */
-static struct notifier_block *hci_notifier;
+static ATOMIC_NOTIFIER_HEAD(hci_notifier);
 
 /* ---- HCI notifications ---- */
 
 int hci_register_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_register(&hci_notifier, nb);
+	return atomic_notifier_chain_register(&hci_notifier, nb);
 }
 
 int hci_unregister_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_unregister(&hci_notifier, nb);
+	return atomic_notifier_chain_unregister(&hci_notifier, nb);
 }
 
 static void hci_notify(struct hci_dev *hdev, int event)
 {
-	notifier_call_chain(&hci_notifier, event, hdev);
+	atomic_notifier_call_chain(&hci_notifier, event, hdev);
 }
 
 /* ---- HCI requests ---- */
diff --git a/net/core/dev.c b/net/core/dev.c
index 8e1dc30..a3ab11f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -193,7 +193,7 @@ static inline struct hlist_head *dev_index_hash(int ifindex)
  *	Our notifier list
  */
 
-static struct notifier_block *netdev_chain;
+static BLOCKING_NOTIFIER_HEAD(netdev_chain);
 
 /*
  *	Device drivers call our routines to queue packets here. We empty the
@@ -736,7 +736,8 @@ int dev_change_name(struct net_device *dev, char *newname)
 	if (!err) {
 		hlist_del(&dev->name_hlist);
 		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
-		notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+		blocking_notifier_call_chain(&netdev_chain,
+				NETDEV_CHANGENAME, dev);
 	}
 
 	return err;
@@ -750,7 +751,7 @@ int dev_change_name(struct net_device *dev, char *newname)
  */
 void netdev_features_change(struct net_device *dev)
 {
-	notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
+	blocking_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
 }
 EXPORT_SYMBOL(netdev_features_change);
 
@@ -765,7 +766,8 @@ EXPORT_SYMBOL(netdev_features_change);
 void netdev_state_change(struct net_device *dev)
 {
 	if (dev->flags & IFF_UP) {
-		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+		blocking_notifier_call_chain(&netdev_chain,
+				NETDEV_CHANGE, dev);
 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 	}
 }
@@ -862,7 +864,7 @@ int dev_open(struct net_device *dev)
 		/*
 		 *	... and announce new interface.
 		 */
-		notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
+		blocking_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
 	}
 	return ret;
 }
@@ -885,7 +887,7 @@ int dev_close(struct net_device *dev)
 	 *	Tell people we are going down, so that they can
 	 *	prepare to death, when device is still operating.
 	 */
-	notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
+	blocking_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
 
 	dev_deactivate(dev);
 
@@ -922,7 +924,7 @@ int dev_close(struct net_device *dev)
 	/*
 	 * Tell people we are down
 	 */
-	notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
+	blocking_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
 
 	return 0;
 }
@@ -953,7 +955,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
 	int err;
 
 	rtnl_lock();
-	err = notifier_chain_register(&netdev_chain, nb);
+	err = blocking_notifier_chain_register(&netdev_chain, nb);
 	if (!err) {
 		for (dev = dev_base; dev; dev = dev->next) {
 			nb->notifier_call(nb, NETDEV_REGISTER, dev);
@@ -981,7 +983,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
 	int err;
 
 	rtnl_lock();
-	err = notifier_chain_unregister(&netdev_chain, nb);
+	err = blocking_notifier_chain_unregister(&netdev_chain, nb);
 	rtnl_unlock();
 	return err;
 }
@@ -992,12 +994,12 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
  *      @v:   pointer passed unmodified to notifier function
  *
  *	Call all network notifier blocks.  Parameters and return value
- *	are as for notifier_call_chain().
+ *	are as for blocking_notifier_call_chain().
  */
 
 int call_netdevice_notifiers(unsigned long val, void *v)
 {
-	return notifier_call_chain(&netdev_chain, val, v);
+	return blocking_notifier_call_chain(&netdev_chain, val, v);
 }
 
 /* When > 0 there are consumers of rx skb time stamps */
@@ -2242,7 +2244,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
 	if (dev->flags & IFF_UP &&
 	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
 					  IFF_VOLATILE)))
-		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+		blocking_notifier_call_chain(&netdev_chain,
+				NETDEV_CHANGE, dev);
 
 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
 		int inc = (flags & IFF_PROMISC) ? +1 : -1;
@@ -2286,8 +2289,8 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
 	else
 		dev->mtu = new_mtu;
 	if (!err && dev->flags & IFF_UP)
-		notifier_call_chain(&netdev_chain,
-				    NETDEV_CHANGEMTU, dev);
+		blocking_notifier_call_chain(&netdev_chain,
+				NETDEV_CHANGEMTU, dev);
 	return err;
 }
 
@@ -2303,7 +2306,8 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
 		return -ENODEV;
 	err = dev->set_mac_address(dev, sa);
 	if (!err)
-		notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
+		blocking_notifier_call_chain(&netdev_chain,
+				NETDEV_CHANGEADDR, dev);
 	return err;
 }
 
@@ -2359,7 +2363,7 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
 				return -EINVAL;
 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
-			notifier_call_chain(&netdev_chain,
+			blocking_notifier_call_chain(&netdev_chain,
 					    NETDEV_CHANGEADDR, dev);
 			return 0;
 
@@ -2813,7 +2817,7 @@ int register_netdevice(struct net_device *dev)
 	write_unlock_bh(&dev_base_lock);
 
 	/* Notify protocols, that a new device appeared. */
-	notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
+	blocking_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
 
 	/* Finish registration after unlock */
 	net_set_todo(dev);
@@ -2892,7 +2896,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
 			rtnl_lock();
 
 			/* Rebroadcast unregister notification */
-			notifier_call_chain(&netdev_chain,
+			blocking_notifier_call_chain(&netdev_chain,
 					    NETDEV_UNREGISTER, dev);
 
 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
@@ -3148,7 +3152,7 @@ int unregister_netdevice(struct net_device *dev)
 	/* Notify protocols, that we are about to destroy
 	   this device. They should clean all the things.
 	*/
-	notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
+	blocking_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
 	
 	/*
 	 *	Flush the multicast chain
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index cc7b9d9..d2ae989 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -68,7 +68,7 @@ __le16 decnet_address = 0;
 
 static DEFINE_RWLOCK(dndev_lock);
 static struct net_device *decnet_default_device;
-static struct notifier_block *dnaddr_chain;
+static BLOCKING_NOTIFIER_HEAD(dnaddr_chain);
 
 static struct dn_dev *dn_dev_create(struct net_device *dev, int *err);
 static void dn_dev_delete(struct net_device *dev);
@@ -446,7 +446,7 @@ static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr **ifap, int de
 	}
 
 	rtmsg_ifa(RTM_DELADDR, ifa1);
-	notifier_call_chain(&dnaddr_chain, NETDEV_DOWN, ifa1);
+	blocking_notifier_call_chain(&dnaddr_chain, NETDEV_DOWN, ifa1);
 	if (destroy) {
 		dn_dev_free_ifa(ifa1);
 
@@ -481,7 +481,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
 	dn_db->ifa_list = ifa;
 
 	rtmsg_ifa(RTM_NEWADDR, ifa);
-	notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa);
+	blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa);
 
 	return 0;
 }
@@ -1285,12 +1285,12 @@ void dn_dev_devices_on(void)
 
 int register_dnaddr_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_register(&dnaddr_chain, nb);
+	return blocking_notifier_chain_register(&dnaddr_chain, nb);
 }
 
 int unregister_dnaddr_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_unregister(&dnaddr_chain, nb);
+	return blocking_notifier_chain_unregister(&dnaddr_chain, nb);
 }
 
 #ifdef CONFIG_PROC_FS
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 44fdf14..81c2f78 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -81,7 +81,7 @@ static struct ipv4_devconf ipv4_devconf_dflt = {
 
 static void rtmsg_ifa(int event, struct in_ifaddr *);
 
-static struct notifier_block *inetaddr_chain;
+static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 			 int destroy);
 #ifdef CONFIG_SYSCTL
@@ -267,7 +267,8 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 				*ifap1 = ifa->ifa_next;
 
 				rtmsg_ifa(RTM_DELADDR, ifa);
-				notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa);
+				blocking_notifier_call_chain(&inetaddr_chain,
+						NETDEV_DOWN, ifa);
 				inet_free_ifa(ifa);
 			} else {
 				promote = ifa;
@@ -291,7 +292,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 	   So that, this order is correct.
 	 */
 	rtmsg_ifa(RTM_DELADDR, ifa1);
-	notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
+	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
 
 	if (promote) {
 
@@ -303,7 +304,8 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 
 		promote->ifa_flags &= ~IFA_F_SECONDARY;
 		rtmsg_ifa(RTM_NEWADDR, promote);
-		notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote);
+		blocking_notifier_call_chain(&inetaddr_chain,
+				NETDEV_UP, promote);
 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
 			if (ifa1->ifa_mask != ifa->ifa_mask ||
 			    !inet_ifa_match(ifa1->ifa_address, ifa))
@@ -366,7 +368,7 @@ static int inet_insert_ifa(struct in_ifaddr *ifa)
 	   Notifier will trigger FIB update, so that
 	   listeners of netlink will know about new ifaddr */
 	rtmsg_ifa(RTM_NEWADDR, ifa);
-	notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
+	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
 
 	return 0;
 }
@@ -938,12 +940,12 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop
 
 int register_inetaddr_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_register(&inetaddr_chain, nb);
+	return blocking_notifier_chain_register(&inetaddr_chain, nb);
 }
 
 int unregister_inetaddr_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_unregister(&inetaddr_chain, nb);
+	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
 }
 
 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 9e34034..ceaabc1 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -80,8 +80,8 @@ static int ip_conntrack_vmalloc;
 static unsigned int ip_conntrack_next_id;
 static unsigned int ip_conntrack_expect_next_id;
 #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-struct notifier_block *ip_conntrack_chain;
-struct notifier_block *ip_conntrack_expect_chain;
+ATOMIC_NOTIFIER_HEAD(ip_conntrack_chain);
+ATOMIC_NOTIFIER_HEAD(ip_conntrack_expect_chain);
 
 DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
 
@@ -92,7 +92,7 @@ __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
 {
 	DEBUGP("ecache: delivering events for %p\n", ecache->ct);
 	if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events)
-		notifier_call_chain(&ip_conntrack_chain, ecache->events,
+		atomic_notifier_call_chain(&ip_conntrack_chain, ecache->events,
 				    ecache->ct);
 	ecache->events = 0;
 	ip_conntrack_put(ecache->ct);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 01c62a0..445006e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -143,7 +143,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
 				struct prefix_info *pinfo);
 static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev);
 
-static struct notifier_block *inet6addr_chain;
+static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
 
 struct ipv6_devconf ipv6_devconf = {
 	.forwarding		= 0,
@@ -593,7 +593,7 @@ out2:
 	read_unlock_bh(&addrconf_lock);
 
 	if (likely(err == 0))
-		notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa);
+		atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa);
 	else {
 		kfree(ifa);
 		ifa = ERR_PTR(err);
@@ -688,7 +688,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 
 	ipv6_ifa_notify(RTM_DELADDR, ifp);
 
-	notifier_call_chain(&inet6addr_chain,NETDEV_DOWN,ifp);
+	atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp);
 
 	addrconf_del_timer(ifp);
 
@@ -3767,12 +3767,12 @@ static void addrconf_sysctl_unregister(struct ipv6_devconf *p)
 
 int register_inet6addr_notifier(struct notifier_block *nb)
 {
-        return notifier_chain_register(&inet6addr_chain, nb);
+        return atomic_notifier_chain_register(&inet6addr_chain, nb);
 }
 
 int unregister_inet6addr_notifier(struct notifier_block *nb)
 {
-        return notifier_chain_unregister(&inet6addr_chain,nb);
+        return atomic_notifier_chain_unregister(&inet6addr_chain,nb);
 }
 
 /*
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0ae281d..56389c8 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -90,8 +90,8 @@ static int nf_conntrack_vmalloc;
 static unsigned int nf_conntrack_next_id;
 static unsigned int nf_conntrack_expect_next_id;
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
-struct notifier_block *nf_conntrack_chain;
-struct notifier_block *nf_conntrack_expect_chain;
+ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain);
+ATOMIC_NOTIFIER_HEAD(nf_conntrack_expect_chain);
 
 DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
 
@@ -103,7 +103,7 @@ __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
 	DEBUGP("ecache: delivering events for %p\n", ecache->ct);
 	if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
 	    && ecache->events)
-		notifier_call_chain(&nf_conntrack_chain, ecache->events,
+		atomic_notifier_call_chain(&nf_conntrack_chain, ecache->events,
 				    ecache->ct);
 
 	ecache->events = 0;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index d00a903..2a233ff 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -123,7 +123,7 @@ static void netlink_destroy_callback(struct netlink_callback *cb);
 static DEFINE_RWLOCK(nl_table_lock);
 static atomic_t nl_table_users = ATOMIC_INIT(0);
 
-static struct notifier_block *netlink_chain;
+static ATOMIC_NOTIFIER_HEAD(netlink_chain);
 
 static u32 netlink_group_mask(u32 group)
 {
@@ -469,7 +469,8 @@ static int netlink_release(struct socket *sock)
 						.protocol = sk->sk_protocol,
 						.pid = nlk->pid,
 					  };
-		notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n);
+		atomic_notifier_call_chain(&netlink_chain,
+				NETLINK_URELEASE, &n);
 	}	
 
 	if (nlk->module)
@@ -1695,12 +1696,12 @@ static struct file_operations netlink_seq_fops = {
 
 int netlink_register_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_register(&netlink_chain, nb);
+	return atomic_notifier_chain_register(&netlink_chain, nb);
 }
 
 int netlink_unregister_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_unregister(&netlink_chain, nb);
+	return atomic_notifier_chain_unregister(&netlink_chain, nb);
 }
                 
 static const struct proto_ops netlink_ops = {
-- 
cgit v0.10.2


From d23ee8fe6e2176a9d4dbfdd18edfa1b5bc3c79a5 Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Date: Mon, 27 Mar 2006 01:16:33 -0800
Subject: [PATCH] mips: fixed collision of rtc function name

Fix the collision of rtc function name.

Signed-off-by: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/mips/ddb5xxx/common/rtc_ds1386.c b/arch/mips/ddb5xxx/common/rtc_ds1386.c
index 995896a..5dc34da 100644
--- a/arch/mips/ddb5xxx/common/rtc_ds1386.c
+++ b/arch/mips/ddb5xxx/common/rtc_ds1386.c
@@ -165,6 +165,6 @@ rtc_ds1386_init(unsigned long base)
 	WRITE_RTC(0xB, byte);
 
 	/* set the function pointers */
-	rtc_get_time = rtc_ds1386_get_time;
-	rtc_set_time = rtc_ds1386_set_time;
+	rtc_mips_get_time = rtc_ds1386_get_time;
+	rtc_mips_set_time = rtc_ds1386_set_time;
 }
diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c
index 1748223..f17d337 100644
--- a/arch/mips/dec/time.c
+++ b/arch/mips/dec/time.c
@@ -193,8 +193,8 @@ static void dec_ioasic_hpt_init(unsigned int count)
 
 void __init dec_time_init(void)
 {
-	rtc_get_time = dec_rtc_get_time;
-	rtc_set_mmss = dec_rtc_set_mmss;
+	rtc_mips_get_time = dec_rtc_get_time;
+	rtc_mips_set_mmss = dec_rtc_set_mmss;
 
 	mips_timer_state = dec_timer_state;
 	mips_timer_ack = dec_timer_ack;
diff --git a/arch/mips/ite-boards/generic/time.c b/arch/mips/ite-boards/generic/time.c
index f5d67ee..b79817b 100644
--- a/arch/mips/ite-boards/generic/time.c
+++ b/arch/mips/ite-boards/generic/time.c
@@ -227,8 +227,8 @@ void __init it8172_time_init(void)
 
 	local_irq_restore(flags);
 
-	rtc_get_time = it8172_rtc_get_time;
-	rtc_set_time = it8172_rtc_set_time;
+	rtc_mips_get_time = it8172_rtc_get_time;
+	rtc_mips_set_time = it8172_rtc_set_time;
 }
 
 #define ALLINTS (IE_IRQ0 | IE_IRQ1 | IE_IRQ2 | IE_IRQ3 | IE_IRQ4 | IE_IRQ5)
diff --git a/arch/mips/jmr3927/common/rtc_ds1742.c b/arch/mips/jmr3927/common/rtc_ds1742.c
index 9a8bff1..a6bd3f4 100644
--- a/arch/mips/jmr3927/common/rtc_ds1742.c
+++ b/arch/mips/jmr3927/common/rtc_ds1742.c
@@ -159,8 +159,8 @@ rtc_ds1742_init(unsigned long base)
 	db_assert((rtc_base & 0xe0000000) == KSEG1);
 
 	/* set the function pointers */
-	rtc_get_time = rtc_ds1742_get_time;
-	rtc_set_time = rtc_ds1742_set_time;
+	rtc_mips_get_time = rtc_ds1742_get_time;
+	rtc_mips_set_time = rtc_ds1742_set_time;
 
 	/* clear oscillator stop bit */
 	CMOS_WRITE(RTC_READ, RTC_CONTROL);
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index 51273b7..5e51a2d 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -65,9 +65,9 @@ static int null_rtc_set_time(unsigned long sec)
 	return 0;
 }
 
-unsigned long (*rtc_get_time)(void) = null_rtc_get_time;
-int (*rtc_set_time)(unsigned long) = null_rtc_set_time;
-int (*rtc_set_mmss)(unsigned long);
+unsigned long (*rtc_mips_get_time)(void) = null_rtc_get_time;
+int (*rtc_mips_set_time)(unsigned long) = null_rtc_set_time;
+int (*rtc_mips_set_mmss)(unsigned long);
 
 
 /* usecs per counter cycle, shifted to left by 32 bits */
@@ -440,14 +440,14 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 
 	/*
 	 * If we have an externally synchronized Linux clock, then update
-	 * CMOS clock accordingly every ~11 minutes. rtc_set_time() has to be
+	 * CMOS clock accordingly every ~11 minutes. rtc_mips_set_time() has to be
 	 * called as close as possible to 500 ms before the new second starts.
 	 */
 	if (ntp_synced() &&
 	    xtime.tv_sec > last_rtc_update + 660 &&
 	    (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 &&
 	    (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) {
-		if (rtc_set_mmss(xtime.tv_sec) == 0) {
+		if (rtc_mips_set_mmss(xtime.tv_sec) == 0) {
 			last_rtc_update = xtime.tv_sec;
 		} else {
 			/* do it again in 60 s */
@@ -565,7 +565,7 @@ asmlinkage void ll_local_timer_interrupt(int irq, struct pt_regs *regs)
  *      b) (optional) calibrate and set the mips_hpt_frequency
  *	    (only needed if you intended to use fixed_rate_gettimeoffset
  *	     or use cpu counter as timer interrupt source)
- * 2) setup xtime based on rtc_get_time().
+ * 2) setup xtime based on rtc_mips_get_time().
  * 3) choose a appropriate gettimeoffset routine.
  * 4) calculate a couple of cached variables for later usage
  * 5) board_timer_setup() -
@@ -633,10 +633,10 @@ void __init time_init(void)
 	if (board_time_init)
 		board_time_init();
 
-	if (!rtc_set_mmss)
-		rtc_set_mmss = rtc_set_time;
+	if (!rtc_mips_set_mmss)
+		rtc_mips_set_mmss = rtc_mips_set_time;
 
-	xtime.tv_sec = rtc_get_time();
+	xtime.tv_sec = rtc_mips_get_time();
 	xtime.tv_nsec = 0;
 
 	set_normalized_timespec(&wall_to_monotonic,
@@ -772,8 +772,8 @@ void to_tm(unsigned long tim, struct rtc_time *tm)
 
 EXPORT_SYMBOL(rtc_lock);
 EXPORT_SYMBOL(to_tm);
-EXPORT_SYMBOL(rtc_set_time);
-EXPORT_SYMBOL(rtc_get_time);
+EXPORT_SYMBOL(rtc_mips_set_time);
+EXPORT_SYMBOL(rtc_mips_get_time);
 
 unsigned long long sched_clock(void)
 {
diff --git a/arch/mips/lasat/setup.c b/arch/mips/lasat/setup.c
index e9e9a89..bb70a82 100644
--- a/arch/mips/lasat/setup.c
+++ b/arch/mips/lasat/setup.c
@@ -175,8 +175,8 @@ void __init plat_setup(void)
 
 #ifdef CONFIG_DS1603
 	ds1603 = &ds_defs[mips_machtype];
-	rtc_get_time = ds1603_read;
-	rtc_set_time = ds1603_set;
+	rtc_mips_get_time = ds1603_read;
+	rtc_mips_set_time = ds1603_set;
 #endif
 
 #ifdef DYNAMIC_SERIAL_INIT
diff --git a/arch/mips/mips-boards/atlas/atlas_setup.c b/arch/mips/mips-boards/atlas/atlas_setup.c
index 873cf31..c20d401 100644
--- a/arch/mips/mips-boards/atlas/atlas_setup.c
+++ b/arch/mips/mips-boards/atlas/atlas_setup.c
@@ -65,7 +65,7 @@ void __init plat_setup(void)
 
 	board_time_init = mips_time_init;
 	board_timer_setup = mips_timer_setup;
-	rtc_get_time = mips_rtc_get_time;
+	rtc_mips_get_time = mips_rtc_get_time;
 }
 
 static void __init serial_init(void)
diff --git a/arch/mips/mips-boards/malta/malta_setup.c b/arch/mips/mips-boards/malta/malta_setup.c
index 2209e8a..b8488aa 100644
--- a/arch/mips/mips-boards/malta/malta_setup.c
+++ b/arch/mips/mips-boards/malta/malta_setup.c
@@ -225,5 +225,5 @@ void __init plat_setup(void)
 
 	board_time_init = mips_time_init;
 	board_timer_setup = mips_timer_setup;
-	rtc_get_time = mips_rtc_get_time;
+	rtc_mips_get_time = mips_rtc_get_time;
 }
diff --git a/arch/mips/momentum/jaguar_atx/setup.c b/arch/mips/momentum/jaguar_atx/setup.c
index 3784c89..91d9637 100644
--- a/arch/mips/momentum/jaguar_atx/setup.c
+++ b/arch/mips/momentum/jaguar_atx/setup.c
@@ -229,8 +229,8 @@ void momenco_time_init(void)
 	mips_hpt_frequency = cpu_clock / 2;
 	board_timer_setup = momenco_timer_setup;
 
-	rtc_get_time = m48t37y_get_time;
-	rtc_set_time = m48t37y_set_time;
+	rtc_mips_get_time = m48t37y_get_time;
+	rtc_mips_set_time = m48t37y_set_time;
 }
 
 static struct resource mv_pci_io_mem0_resource = {
diff --git a/arch/mips/momentum/ocelot_3/setup.c b/arch/mips/momentum/ocelot_3/setup.c
index f95677f..969612e 100644
--- a/arch/mips/momentum/ocelot_3/setup.c
+++ b/arch/mips/momentum/ocelot_3/setup.c
@@ -215,8 +215,8 @@ void momenco_time_init(void)
 	mips_hpt_frequency = cpu_clock / 2;
 	board_timer_setup = momenco_timer_setup;
 
-	rtc_get_time = m48t37y_get_time;
-	rtc_set_time = m48t37y_set_time;
+	rtc_mips_get_time = m48t37y_get_time;
+	rtc_mips_set_time = m48t37y_set_time;
 }
 
 /*
diff --git a/arch/mips/momentum/ocelot_c/setup.c b/arch/mips/momentum/ocelot_c/setup.c
index bd02e60..a3e6f55 100644
--- a/arch/mips/momentum/ocelot_c/setup.c
+++ b/arch/mips/momentum/ocelot_c/setup.c
@@ -227,8 +227,8 @@ void momenco_time_init(void)
 	printk("momenco_time_init cpu_clock=%d\n", cpu_clock);
 	board_timer_setup = momenco_timer_setup;
 
-	rtc_get_time = m48t37y_get_time;
-	rtc_set_time = m48t37y_set_time;
+	rtc_mips_get_time = m48t37y_get_time;
+	rtc_mips_set_time = m48t37y_set_time;
 }
 
 void __init plat_setup(void)
diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c
index 8bce711..3f724d6 100644
--- a/arch/mips/pmc-sierra/yosemite/setup.c
+++ b/arch/mips/pmc-sierra/yosemite/setup.c
@@ -198,8 +198,8 @@ static void __init py_rtc_setup(void)
 	if (!m48t37_base)
 		printk(KERN_ERR "Mapping the RTC failed\n");
 
-	rtc_get_time = m48t37y_get_time;
-	rtc_set_time = m48t37y_set_time;
+	rtc_mips_get_time = m48t37y_get_time;
+	rtc_mips_set_time = m48t37y_set_time;
 
 	write_seqlock(&xtime_lock);
 	xtime.tv_sec = m48t37y_get_time();
diff --git a/arch/mips/sgi-ip22/ip22-time.c b/arch/mips/sgi-ip22/ip22-time.c
index b7300cc..cca688a 100644
--- a/arch/mips/sgi-ip22/ip22-time.c
+++ b/arch/mips/sgi-ip22/ip22-time.c
@@ -212,8 +212,8 @@ static void indy_timer_setup(struct irqaction *irq)
 void __init ip22_time_init(void)
 {
 	/* setup hookup functions */
-	rtc_get_time = indy_rtc_get_time;
-	rtc_set_time = indy_rtc_set_time;
+	rtc_mips_get_time = indy_rtc_get_time;
+	rtc_mips_set_time = indy_rtc_set_time;
 
 	board_time_init = indy_time_init;
 	board_timer_setup = indy_timer_setup;
diff --git a/arch/mips/sgi-ip32/ip32-setup.c b/arch/mips/sgi-ip32/ip32-setup.c
index 2f50c79..a2dd8ae 100644
--- a/arch/mips/sgi-ip32/ip32-setup.c
+++ b/arch/mips/sgi-ip32/ip32-setup.c
@@ -91,8 +91,8 @@ void __init plat_setup(void)
 {
 	board_be_init = ip32_be_init;
 
-	rtc_get_time = mc146818_get_cmos_time;
-	rtc_set_mmss = mc146818_set_rtc_mmss;
+	rtc_mips_get_time = mc146818_get_cmos_time;
+	rtc_mips_set_mmss = mc146818_set_rtc_mmss;
 
 	board_time_init = ip32_time_init;
 	board_timer_setup = ip32_timer_setup;
diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c
index b661d24..4b5f74f 100644
--- a/arch/mips/sibyte/swarm/setup.c
+++ b/arch/mips/sibyte/swarm/setup.c
@@ -121,14 +121,14 @@ void __init plat_setup(void)
 
 	if (xicor_probe()) {
 		printk("swarm setup: Xicor 1241 RTC detected.\n");
-		rtc_get_time = xicor_get_time;
-		rtc_set_time = xicor_set_time;
+		rtc_mips_get_time = xicor_get_time;
+		rtc_mips_set_time = xicor_set_time;
 	}
 
 	if (m41t81_probe()) {
 		printk("swarm setup: M41T81 RTC detected.\n");
-		rtc_get_time = m41t81_get_time;
-		rtc_set_time = m41t81_set_time;
+		rtc_mips_get_time = m41t81_get_time;
+		rtc_mips_set_time = m41t81_set_time;
 	}
 
 	printk("This kernel optimized for "
diff --git a/arch/mips/sni/setup.c b/arch/mips/sni/setup.c
index 1141fcd..01ba6c5 100644
--- a/arch/mips/sni/setup.c
+++ b/arch/mips/sni/setup.c
@@ -164,8 +164,8 @@ static struct pci_controller sni_controller = {
 
 static inline void sni_pcimt_time_init(void)
 {
-	rtc_get_time = mc146818_get_cmos_time;
-	rtc_set_time = mc146818_set_rtc_mmss;
+	rtc_mips_get_time = mc146818_get_cmos_time;
+	rtc_mips_set_time = mc146818_set_rtc_mmss;
 }
 
 void __init plat_setup(void)
diff --git a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
index 2ad6401..6dcf077 100644
--- a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
+++ b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
@@ -1036,8 +1036,8 @@ toshiba_rbtx4927_time_init(void)
 
 #ifdef CONFIG_RTC_DS1742
 
-	rtc_get_time = rtc_ds1742_get_time;
-	rtc_set_time = rtc_ds1742_set_time;
+	rtc_mips_get_time = rtc_ds1742_get_time;
+	rtc_mips_set_time = rtc_ds1742_set_time;
 
 	TOSHIBA_RBTX4927_SETUP_DPRINTK(TOSHIBA_RBTX4927_SETUP_TIME_INIT,
 				       ":rtc_ds1742_init()-\n");
diff --git a/arch/mips/tx4938/common/rtc_rx5c348.c b/arch/mips/tx4938/common/rtc_rx5c348.c
index d249edb..f74295f 100644
--- a/arch/mips/tx4938/common/rtc_rx5c348.c
+++ b/arch/mips/tx4938/common/rtc_rx5c348.c
@@ -197,6 +197,6 @@ rtc_rx5c348_init(int chipid)
 		srtc_24h = 1;
 
 	/* set the function pointers */
-	rtc_get_time = rtc_rx5c348_get_time;
-	rtc_set_time = rtc_rx5c348_set_time;
+	rtc_mips_get_time = rtc_rx5c348_get_time;
+	rtc_mips_set_time = rtc_rx5c348_set_time;
 }
diff --git a/include/asm-mips/time.h b/include/asm-mips/time.h
index 9cc3564..d897c8b 100644
--- a/include/asm-mips/time.h
+++ b/include/asm-mips/time.h
@@ -26,14 +26,14 @@ extern spinlock_t rtc_lock;
 
 /*
  * RTC ops.  By default, they point to no-RTC functions.
- *	rtc_get_time - mktime(year, mon, day, hour, min, sec) in seconds.
- *	rtc_set_time - reverse the above translation and set time to RTC.
- *	rtc_set_mmss - similar to rtc_set_time, but only min and sec need
+ *	rtc_mips_get_time - mktime(year, mon, day, hour, min, sec) in seconds.
+ *	rtc_mips_set_time - reverse the above translation and set time to RTC.
+ *	rtc_mips_set_mmss - similar to rtc_set_time, but only min and sec need
  *			to be set.  Used by RTC sync-up.
  */
-extern unsigned long (*rtc_get_time)(void);
-extern int (*rtc_set_time)(unsigned long);
-extern int (*rtc_set_mmss)(unsigned long);
+extern unsigned long (*rtc_mips_get_time)(void);
+extern int (*rtc_mips_set_time)(unsigned long);
+extern int (*rtc_mips_set_mmss)(unsigned long);
 
 /*
  * Timer interrupt functions.
-- 
cgit v0.10.2


From c58411e95d7f5062dedd1a3064af4d359da1e633 Mon Sep 17 00:00:00 2001
From: Alessandro Zummo <a.zummo@towertech.it>
Date: Mon, 27 Mar 2006 01:16:34 -0800
Subject: [PATCH] RTC Subsystem: library functions

RTC and date/time related functions.

Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/Kconfig b/drivers/Kconfig
index bddf431..9f5c0da 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -70,4 +70,6 @@ source "drivers/sn/Kconfig"
 
 source "drivers/edac/Kconfig"
 
+source "drivers/rtc/Kconfig"
+
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 5c69b86..4249552 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_USB_GADGET)	+= usb/gadget/
 obj-$(CONFIG_GAMEPORT)		+= input/gameport/
 obj-$(CONFIG_INPUT)		+= input/
 obj-$(CONFIG_I2O)		+= message/
+obj-$(CONFIG_RTC_LIB)		+= rtc/
 obj-$(CONFIG_I2C)		+= i2c/
 obj-$(CONFIG_W1)		+= w1/
 obj-$(CONFIG_HWMON)		+= hwmon/
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
new file mode 100644
index 0000000..15df7c1
--- /dev/null
+++ b/drivers/rtc/Kconfig
@@ -0,0 +1,6 @@
+#
+# RTC class/drivers configuration
+#
+
+config RTC_LIB
+	tristate
\ No newline at end of file
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
new file mode 100644
index 0000000..eb9ad77
--- /dev/null
+++ b/drivers/rtc/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for RTC class/drivers.
+#
+
+obj-$(CONFIG_RTC_LIB)	+= rtc-lib.o
diff --git a/drivers/rtc/rtc-lib.c b/drivers/rtc/rtc-lib.c
new file mode 100644
index 0000000..cfedc1d
--- /dev/null
+++ b/drivers/rtc/rtc-lib.c
@@ -0,0 +1,101 @@
+/*
+ * rtc and date/time utility functions
+ *
+ * Copyright (C) 2005-06 Tower Technologies
+ * Author: Alessandro Zummo <a.zummo@towertech.it>
+ *
+ * based on arch/arm/common/rtctime.c and other bits
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/module.h>
+#include <linux/rtc.h>
+
+static const unsigned char rtc_days_in_month[] = {
+	31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
+};
+
+#define LEAPS_THRU_END_OF(y) ((y)/4 - (y)/100 + (y)/400)
+#define LEAP_YEAR(year) ((!(year % 4) && (year % 100)) || !(year % 400))
+
+int rtc_month_days(unsigned int month, unsigned int year)
+{
+	return rtc_days_in_month[month] + (LEAP_YEAR(year) && month == 1);
+}
+EXPORT_SYMBOL(rtc_month_days);
+
+/*
+ * Convert seconds since 01-01-1970 00:00:00 to Gregorian date.
+ */
+void rtc_time_to_tm(unsigned long time, struct rtc_time *tm)
+{
+	register int days, month, year;
+
+	days = time / 86400;
+	time -= days * 86400;
+
+	/* day of the week, 1970-01-01 was a Thursday */
+	tm->tm_wday = (days + 4) % 7;
+
+	year = 1970 + days / 365;
+	days -= (year - 1970) * 365
+		+ LEAPS_THRU_END_OF(year - 1)
+		- LEAPS_THRU_END_OF(1970 - 1);
+	if (days < 0) {
+		year -= 1;
+		days += 365 + LEAP_YEAR(year);
+	}
+	tm->tm_year = year - 1900;
+	tm->tm_yday = days + 1;
+
+	for (month = 0; month < 11; month++) {
+		int newdays;
+
+		newdays = days - rtc_month_days(month, year);
+		if (newdays < 0)
+			break;
+		days = newdays;
+	}
+	tm->tm_mon = month;
+	tm->tm_mday = days + 1;
+
+	tm->tm_hour = time / 3600;
+	time -= tm->tm_hour * 3600;
+	tm->tm_min = time / 60;
+	tm->tm_sec = time - tm->tm_min * 60;
+}
+EXPORT_SYMBOL(rtc_time_to_tm);
+
+/*
+ * Does the rtc_time represent a valid date/time?
+ */
+int rtc_valid_tm(struct rtc_time *tm)
+{
+	if (tm->tm_year < 70
+		|| tm->tm_mon >= 12
+		|| tm->tm_mday < 1
+		|| tm->tm_mday > rtc_month_days(tm->tm_mon, tm->tm_year + 1900)
+		|| tm->tm_hour >= 24
+		|| tm->tm_min >= 60
+		|| tm->tm_sec >= 60)
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL(rtc_valid_tm);
+
+/*
+ * Convert Gregorian date to seconds since 01-01-1970 00:00:00.
+ */
+int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time)
+{
+	*time = mktime(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
+			tm->tm_hour, tm->tm_min, tm->tm_sec);
+	return 0;
+}
+EXPORT_SYMBOL(rtc_tm_to_time);
+
+MODULE_LICENSE("GPL");
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index b739ac1..8454337 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -95,6 +95,11 @@ struct rtc_pll_info {
 
 #include <linux/interrupt.h>
 
+extern int rtc_month_days(unsigned int month, unsigned int year);
+extern int rtc_valid_tm(struct rtc_time *tm);
+extern int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time);
+extern void rtc_time_to_tm(unsigned long time, struct rtc_time *tm);
+
 typedef struct rtc_task {
 	void (*func)(void *private_data);
 	void *private_data;
-- 
cgit v0.10.2


From 12b824fb15a37cdcdfa3e92c9e912a07cc6f7a24 Mon Sep 17 00:00:00 2001
From: Alessandro Zummo <a.zummo@towertech.it>
Date: Mon, 27 Mar 2006 01:16:35 -0800
Subject: [PATCH] RTC subsystem: ARM cleanup

This patch removes from the ARM subsytem some of the rtc-related functions
that have been included in the RTC subsystem.  It also fixes some naming
collisions.

Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index bf2e726..9731b3f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -8,6 +8,7 @@ mainmenu "Linux Kernel Configuration"
 config ARM
 	bool
 	default y
+	select RTC_LIB
 	help
 	  The ARM series is a line of low-power-consumption RISC chip designs
 	  licensed by ARM Ltd and targeted at embedded applications and
@@ -839,6 +840,8 @@ source "drivers/usb/Kconfig"
 
 source "drivers/mmc/Kconfig"
 
+source "drivers/rtc/Kconfig"
+
 endmenu
 
 source "fs/Kconfig"
diff --git a/arch/arm/common/rtctime.c b/arch/arm/common/rtctime.c
index e851d86..35c9a64 100644
--- a/arch/arm/common/rtctime.c
+++ b/arch/arm/common/rtctime.c
@@ -20,6 +20,7 @@
 #include <linux/capability.h>
 #include <linux/device.h>
 #include <linux/mutex.h>
+#include <linux/rtc.h>
 
 #include <asm/rtc.h>
 #include <asm/semaphore.h>
@@ -42,89 +43,6 @@ static struct rtc_ops *rtc_ops;
 
 #define rtc_epoch 1900UL
 
-static const unsigned char days_in_month[] = {
-	31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
-};
-
-#define LEAPS_THRU_END_OF(y) ((y)/4 - (y)/100 + (y)/400)
-#define LEAP_YEAR(year) ((!(year % 4) && (year % 100)) || !(year % 400))
-
-static int month_days(unsigned int month, unsigned int year)
-{
-	return days_in_month[month] + (LEAP_YEAR(year) && month == 1);
-}
-
-/*
- * Convert seconds since 01-01-1970 00:00:00 to Gregorian date.
- */
-void rtc_time_to_tm(unsigned long time, struct rtc_time *tm)
-{
-	int days, month, year;
-
-	days = time / 86400;
-	time -= days * 86400;
-
-	tm->tm_wday = (days + 4) % 7;
-
-	year = 1970 + days / 365;
-	days -= (year - 1970) * 365
-	        + LEAPS_THRU_END_OF(year - 1)
-	        - LEAPS_THRU_END_OF(1970 - 1);
-	if (days < 0) {
-		year -= 1;
-		days += 365 + LEAP_YEAR(year);
-	}
-	tm->tm_year = year - 1900;
-	tm->tm_yday = days + 1;
-
-	for (month = 0; month < 11; month++) {
-		int newdays;
-
-		newdays = days - month_days(month, year);
-		if (newdays < 0)
-			break;
-		days = newdays;
-	}
-	tm->tm_mon = month;
-	tm->tm_mday = days + 1;
-
-	tm->tm_hour = time / 3600;
-	time -= tm->tm_hour * 3600;
-	tm->tm_min = time / 60;
-	tm->tm_sec = time - tm->tm_min * 60;
-}
-EXPORT_SYMBOL(rtc_time_to_tm);
-
-/*
- * Does the rtc_time represent a valid date/time?
- */
-int rtc_valid_tm(struct rtc_time *tm)
-{
-	if (tm->tm_year < 70 ||
-	    tm->tm_mon >= 12 ||
-	    tm->tm_mday < 1 ||
-	    tm->tm_mday > month_days(tm->tm_mon, tm->tm_year + 1900) ||
-	    tm->tm_hour >= 24 ||
-	    tm->tm_min >= 60 ||
-	    tm->tm_sec >= 60)
-		return -EINVAL;
-
-	return 0;
-}
-EXPORT_SYMBOL(rtc_valid_tm);
-
-/*
- * Convert Gregorian date to seconds since 01-01-1970 00:00:00.
- */
-int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time)
-{
-	*time = mktime(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
-		       tm->tm_hour, tm->tm_min, tm->tm_sec);
-
-	return 0;
-}
-EXPORT_SYMBOL(rtc_tm_to_time);
-
 /*
  * Calculate the next alarm time given the requested alarm time mask
  * and the current time.
@@ -151,13 +69,13 @@ void rtc_next_alarm_time(struct rtc_time *next, struct rtc_time *now, struct rtc
 	}
 }
 
-static inline int rtc_read_time(struct rtc_ops *ops, struct rtc_time *tm)
+static inline int rtc_arm_read_time(struct rtc_ops *ops, struct rtc_time *tm)
 {
 	memset(tm, 0, sizeof(struct rtc_time));
 	return ops->read_time(tm);
 }
 
-static inline int rtc_set_time(struct rtc_ops *ops, struct rtc_time *tm)
+static inline int rtc_arm_set_time(struct rtc_ops *ops, struct rtc_time *tm)
 {
 	int ret;
 
@@ -168,7 +86,7 @@ static inline int rtc_set_time(struct rtc_ops *ops, struct rtc_time *tm)
 	return ret;
 }
 
-static inline int rtc_read_alarm(struct rtc_ops *ops, struct rtc_wkalrm *alrm)
+static inline int rtc_arm_read_alarm(struct rtc_ops *ops, struct rtc_wkalrm *alrm)
 {
 	int ret = -EINVAL;
 	if (ops->read_alarm) {
@@ -178,7 +96,7 @@ static inline int rtc_read_alarm(struct rtc_ops *ops, struct rtc_wkalrm *alrm)
 	return ret;
 }
 
-static inline int rtc_set_alarm(struct rtc_ops *ops, struct rtc_wkalrm *alrm)
+static inline int rtc_arm_set_alarm(struct rtc_ops *ops, struct rtc_wkalrm *alrm)
 {
 	int ret = -EINVAL;
 	if (ops->set_alarm)
@@ -266,7 +184,7 @@ static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 
 	switch (cmd) {
 	case RTC_ALM_READ:
-		ret = rtc_read_alarm(ops, &alrm);
+		ret = rtc_arm_read_alarm(ops, &alrm);
 		if (ret)
 			break;
 		ret = copy_to_user(uarg, &alrm.time, sizeof(tm));
@@ -288,11 +206,11 @@ static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 		alrm.time.tm_wday = -1;
 		alrm.time.tm_yday = -1;
 		alrm.time.tm_isdst = -1;
-		ret = rtc_set_alarm(ops, &alrm);
+		ret = rtc_arm_set_alarm(ops, &alrm);
 		break;
 
 	case RTC_RD_TIME:
-		ret = rtc_read_time(ops, &tm);
+		ret = rtc_arm_read_time(ops, &tm);
 		if (ret)
 			break;
 		ret = copy_to_user(uarg, &tm, sizeof(tm));
@@ -310,7 +228,7 @@ static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 			ret = -EFAULT;
 			break;
 		}
-		ret = rtc_set_time(ops, &tm);
+		ret = rtc_arm_set_time(ops, &tm);
 		break;
 
 	case RTC_EPOCH_SET:
@@ -341,11 +259,11 @@ static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 			ret = -EFAULT;
 			break;
 		}
-		ret = rtc_set_alarm(ops, &alrm);
+		ret = rtc_arm_set_alarm(ops, &alrm);
 		break;
 
 	case RTC_WKALM_RD:
-		ret = rtc_read_alarm(ops, &alrm);
+		ret = rtc_arm_read_alarm(ops, &alrm);
 		if (ret)
 			break;
 		ret = copy_to_user(uarg, &alrm, sizeof(alrm));
@@ -435,7 +353,7 @@ static int rtc_read_proc(char *page, char **start, off_t off, int count, int *eo
 	struct rtc_time tm;
 	char *p = page;
 
-	if (rtc_read_time(ops, &tm) == 0) {
+	if (rtc_arm_read_time(ops, &tm) == 0) {
 		p += sprintf(p,
 			"rtc_time\t: %02d:%02d:%02d\n"
 			"rtc_date\t: %04d-%02d-%02d\n"
@@ -445,7 +363,7 @@ static int rtc_read_proc(char *page, char **start, off_t off, int count, int *eo
 			rtc_epoch);
 	}
 
-	if (rtc_read_alarm(ops, &alrm) == 0) {
+	if (rtc_arm_read_alarm(ops, &alrm) == 0) {
 		p += sprintf(p, "alrm_time\t: ");
 		if ((unsigned int)alrm.time.tm_hour <= 24)
 			p += sprintf(p, "%02d:", alrm.time.tm_hour);
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index facc3f1..73d30bf 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -696,7 +696,7 @@ config NVRAM
 
 config RTC
 	tristate "Enhanced Real Time Clock Support"
-	depends on !PPC && !PARISC && !IA64 && !M68K && (!SPARC || PCI) && !FRV
+	depends on !PPC && !PARISC && !IA64 && !M68K && (!SPARC || PCI) && !FRV && !ARM
 	---help---
 	  If you say Y here and create a character special file /dev/rtc with
 	  major number 10 and minor number 135 using mknod ("man mknod"), you
diff --git a/include/asm-arm/rtc.h b/include/asm-arm/rtc.h
index 370dfe7..1a5c923 100644
--- a/include/asm-arm/rtc.h
+++ b/include/asm-arm/rtc.h
@@ -25,9 +25,6 @@ struct rtc_ops {
 	int		(*proc)(char *buf);
 };
 
-void rtc_time_to_tm(unsigned long, struct rtc_time *);
-int rtc_tm_to_time(struct rtc_time *, unsigned long *);
-int rtc_valid_tm(struct rtc_time *);
 void rtc_next_alarm_time(struct rtc_time *, struct rtc_time *, struct rtc_time *);
 void rtc_update(unsigned long, unsigned long);
 int register_rtc(struct rtc_ops *);
-- 
cgit v0.10.2


From 4079c39aaab65022f4875609d76e62669ef94c29 Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Mon, 27 Mar 2006 01:16:36 -0800
Subject: [PATCH] RTC subsystem: ARM Integrator cleanup

Fix some namespace conflicts between the RTC subsystem and the ARM Integrator
time functions.

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/arm/mach-integrator/time.c b/arch/arm/mach-integrator/time.c
index 3c22c16..bc07f52 100644
--- a/arch/arm/mach-integrator/time.c
+++ b/arch/arm/mach-integrator/time.c
@@ -40,13 +40,13 @@ static int integrator_set_rtc(void)
 	return 1;
 }
 
-static int rtc_read_alarm(struct rtc_wkalrm *alrm)
+static int integrator_rtc_read_alarm(struct rtc_wkalrm *alrm)
 {
 	rtc_time_to_tm(readl(rtc_base + RTC_MR), &alrm->time);
 	return 0;
 }
 
-static inline int rtc_set_alarm(struct rtc_wkalrm *alrm)
+static inline int integrator_rtc_set_alarm(struct rtc_wkalrm *alrm)
 {
 	unsigned long time;
 	int ret;
@@ -62,7 +62,7 @@ static inline int rtc_set_alarm(struct rtc_wkalrm *alrm)
 	return ret;
 }
 
-static int rtc_read_time(struct rtc_time *tm)
+static int integrator_rtc_read_time(struct rtc_time *tm)
 {
 	rtc_time_to_tm(readl(rtc_base + RTC_DR), tm);
 	return 0;
@@ -76,7 +76,7 @@ static int rtc_read_time(struct rtc_time *tm)
  * edge of the 1Hz clock, we must write the time one second
  * in advance.
  */
-static inline int rtc_set_time(struct rtc_time *tm)
+static inline int integrator_rtc_set_time(struct rtc_time *tm)
 {
 	unsigned long time;
 	int ret;
@@ -90,10 +90,10 @@ static inline int rtc_set_time(struct rtc_time *tm)
 
 static struct rtc_ops rtc_ops = {
 	.owner		= THIS_MODULE,
-	.read_time	= rtc_read_time,
-	.set_time	= rtc_set_time,
-	.read_alarm	= rtc_read_alarm,
-	.set_alarm	= rtc_set_alarm,
+	.read_time	= integrator_rtc_read_time,
+	.set_time	= integrator_rtc_set_time,
+	.read_alarm	= integrator_rtc_read_alarm,
+	.set_alarm	= integrator_rtc_set_alarm,
 };
 
 static irqreturn_t arm_rtc_interrupt(int irq, void *dev_id,
-- 
cgit v0.10.2


From 0c86edc0d4970649f39748c4ce4f2895f728468f Mon Sep 17 00:00:00 2001
From: Alessandro Zummo <a.zummo@towertech.it>
Date: Mon, 27 Mar 2006 01:16:37 -0800
Subject: [PATCH] RTC subsystem: class

Add the basic RTC subsystem infrastructure to the kernel.

rtc/class.c - registration facilities for RTC drivers
rtc/interface.c - kernel/rtc interface functions
rtc/hctosys.c - snippet of code that copies hw clock to sw clock
		at bootup, if configured to do so.

Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/CREDITS b/CREDITS
index c6d69bf..35850d8 100644
--- a/CREDITS
+++ b/CREDITS
@@ -3741,10 +3741,11 @@ D: Mylex DAC960 PCI RAID driver
 D: Miscellaneous kernel fixes
 
 N: Alessandro Zummo
-E: azummo@ita.flashnet.it
-W: http://freepage.logicom.it/azummo/
+E: a.zummo@towertech.it
 D: CMI8330 support is sb_card.c
 D: ISAPnP fixes in sb_card.c
+D: ZyXEL omni.net lcd plus driver
+D: RTC subsystem
 S: Italy
 
 N: Marc Zyngier
diff --git a/MAINTAINERS b/MAINTAINERS
index f278467..e5b051f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2233,6 +2233,12 @@ M:	p_gortmaker@yahoo.com
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 
+REAL TIME CLOCK (RTC) SUBSYSTEM
+P:	Alessandro Zummo
+M:	a.zummo@towertech.it
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+
 REISERFS FILE SYSTEM
 P:	Hans Reiser
 M:	reiserfs-dev@namesys.com
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 15df7c1..a256f67 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -1,6 +1,46 @@
-#
+\#
 # RTC class/drivers configuration
 #
 
+menu "Real Time Clock"
+
 config RTC_LIB
-	tristate
\ No newline at end of file
+	tristate
+
+config RTC_CLASS
+	tristate "RTC class"
+	depends on EXPERIMENTAL
+	default n
+	select RTC_LIB
+	help
+	  Generic RTC class support. If you say yes here, you will
+ 	  be allowed to plug one or more RTCs to your system. You will
+	  probably want to enable one of more of the interfaces below.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-class.
+
+config RTC_HCTOSYS
+	bool "Set system time from RTC on startup"
+	depends on RTC_CLASS = y
+	default y
+	help
+	  If you say yes here, the system time will be set using
+	  the value read from the specified RTC device. This is useful
+	  in order to avoid unnecessary fschk runs.
+
+config RTC_HCTOSYS_DEVICE
+	string "The RTC to read the time from"
+	depends on RTC_HCTOSYS = y
+	default "rtc0"
+	help
+	  The RTC device that will be used as the source for
+	  the system time, usually rtc0.
+
+comment "RTC interfaces"
+	depends on RTC_CLASS
+
+comment "RTC drivers"
+	depends on RTC_CLASS
+
+endmenu
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index eb9ad77..7b87f37 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -2,4 +2,7 @@
 # Makefile for RTC class/drivers.
 #
 
-obj-$(CONFIG_RTC_LIB)	+= rtc-lib.o
+obj-$(CONFIG_RTC_LIB)		+= rtc-lib.o
+obj-$(CONFIG_RTC_HCTOSYS)	+= hctosys.o
+obj-$(CONFIG_RTC_CLASS)		+= rtc-core.o
+rtc-core-y			:= class.o interface.o
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
new file mode 100644
index 0000000..8533936
--- /dev/null
+++ b/drivers/rtc/class.c
@@ -0,0 +1,145 @@
+/*
+ * RTC subsystem, base class
+ *
+ * Copyright (C) 2005 Tower Technologies
+ * Author: Alessandro Zummo <a.zummo@towertech.it>
+ *
+ * class skeleton from drivers/hwmon/hwmon.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/module.h>
+#include <linux/rtc.h>
+#include <linux/kdev_t.h>
+#include <linux/idr.h>
+
+static DEFINE_IDR(rtc_idr);
+static DEFINE_MUTEX(idr_lock);
+struct class *rtc_class;
+
+static void rtc_device_release(struct class_device *class_dev)
+{
+	struct rtc_device *rtc = to_rtc_device(class_dev);
+	mutex_lock(&idr_lock);
+	idr_remove(&rtc_idr, rtc->id);
+	mutex_unlock(&idr_lock);
+	kfree(rtc);
+}
+
+/**
+ * rtc_device_register - register w/ RTC class
+ * @dev: the device to register
+ *
+ * rtc_device_unregister() must be called when the class device is no
+ * longer needed.
+ *
+ * Returns the pointer to the new struct class device.
+ */
+struct rtc_device *rtc_device_register(const char *name, struct device *dev,
+					struct rtc_class_ops *ops,
+					struct module *owner)
+{
+	struct rtc_device *rtc;
+	int id, err;
+
+	if (idr_pre_get(&rtc_idr, GFP_KERNEL) == 0) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+
+	mutex_lock(&idr_lock);
+	err = idr_get_new(&rtc_idr, NULL, &id);
+	mutex_unlock(&idr_lock);
+
+	if (err < 0)
+		goto exit;
+
+	id = id & MAX_ID_MASK;
+
+	rtc = kzalloc(sizeof(struct rtc_device), GFP_KERNEL);
+	if (rtc == NULL) {
+		err = -ENOMEM;
+		goto exit_idr;
+	}
+
+	rtc->id = id;
+	rtc->ops = ops;
+	rtc->owner = owner;
+	rtc->class_dev.dev = dev;
+	rtc->class_dev.class = rtc_class;
+	rtc->class_dev.release = rtc_device_release;
+
+	mutex_init(&rtc->ops_lock);
+	spin_lock_init(&rtc->irq_lock);
+	spin_lock_init(&rtc->irq_task_lock);
+
+	strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE);
+	snprintf(rtc->class_dev.class_id, BUS_ID_SIZE, "rtc%d", id);
+
+	err = class_device_register(&rtc->class_dev);
+	if (err)
+		goto exit_kfree;
+
+	dev_info(dev, "rtc core: registered %s as %s\n",
+			rtc->name, rtc->class_dev.class_id);
+
+	return rtc;
+
+exit_kfree:
+	kfree(rtc);
+
+exit_idr:
+	idr_remove(&rtc_idr, id);
+
+exit:
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(rtc_device_register);
+
+
+/**
+ * rtc_device_unregister - removes the previously registered RTC class device
+ *
+ * @rtc: the RTC class device to destroy
+ */
+void rtc_device_unregister(struct rtc_device *rtc)
+{
+	mutex_lock(&rtc->ops_lock);
+	rtc->ops = NULL;
+	mutex_unlock(&rtc->ops_lock);
+	class_device_unregister(&rtc->class_dev);
+}
+EXPORT_SYMBOL_GPL(rtc_device_unregister);
+
+int rtc_interface_register(struct class_interface *intf)
+{
+	intf->class = rtc_class;
+	return class_interface_register(intf);
+}
+EXPORT_SYMBOL_GPL(rtc_interface_register);
+
+static int __init rtc_init(void)
+{
+	rtc_class = class_create(THIS_MODULE, "rtc");
+	if (IS_ERR(rtc_class)) {
+		printk(KERN_ERR "%s: couldn't create class\n", __FILE__);
+		return PTR_ERR(rtc_class);
+	}
+	return 0;
+}
+
+static void __exit rtc_exit(void)
+{
+	class_destroy(rtc_class);
+}
+
+module_init(rtc_init);
+module_exit(rtc_exit);
+
+MODULE_AUTHOR("Alessandro Zummo <a.zummo@towerteh.it>");
+MODULE_DESCRIPTION("RTC class support");
+MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/hctosys.c b/drivers/rtc/hctosys.c
new file mode 100644
index 0000000..d02fe9a
--- /dev/null
+++ b/drivers/rtc/hctosys.c
@@ -0,0 +1,69 @@
+/*
+ * RTC subsystem, initialize system time on startup
+ *
+ * Copyright (C) 2005 Tower Technologies
+ * Author: Alessandro Zummo <a.zummo@towertech.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/rtc.h>
+
+/* IMPORTANT: the RTC only stores whole seconds. It is arbitrary
+ * whether it stores the most close value or the value with partial
+ * seconds truncated. However, it is important that we use it to store
+ * the truncated value. This is because otherwise it is necessary,
+ * in an rtc sync function, to read both xtime.tv_sec and
+ * xtime.tv_nsec. On some processors (i.e. ARM), an atomic read
+ * of >32bits is not possible. So storing the most close value would
+ * slow down the sync API. So here we have the truncated value and
+ * the best guess is to add 0.5s.
+ */
+
+static int __init rtc_hctosys(void)
+{
+	int err;
+	struct rtc_time tm;
+	struct class_device *class_dev = rtc_class_open(CONFIG_RTC_HCTOSYS_DEVICE);
+
+	if (class_dev == NULL) {
+		printk("%s: unable to open rtc device (%s)\n",
+			__FILE__, CONFIG_RTC_HCTOSYS_DEVICE);
+		return -ENODEV;
+	}
+
+	err = rtc_read_time(class_dev, &tm);
+	if (err == 0) {
+		err = rtc_valid_tm(&tm);
+		if (err == 0) {
+			struct timespec tv;
+
+			tv.tv_nsec = NSEC_PER_SEC >> 1;
+
+			rtc_tm_to_time(&tm, &tv.tv_sec);
+
+			do_settimeofday(&tv);
+
+			dev_info(class_dev->dev,
+				"setting the system clock to "
+				"%d-%02d-%02d %02d:%02d:%02d (%u)\n",
+				tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+				tm.tm_hour, tm.tm_min, tm.tm_sec,
+				(unsigned int) tv.tv_sec);
+		}
+		else
+			dev_err(class_dev->dev,
+				"hctosys: invalid date/time\n");
+	}
+	else
+		dev_err(class_dev->dev,
+			"hctosys: unable to read the hardware clock\n");
+
+	rtc_class_close(class_dev);
+
+	return 0;
+}
+
+late_initcall(rtc_hctosys);
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
new file mode 100644
index 0000000..56e4907
--- /dev/null
+++ b/drivers/rtc/interface.c
@@ -0,0 +1,277 @@
+/*
+ * RTC subsystem, interface functions
+ *
+ * Copyright (C) 2005 Tower Technologies
+ * Author: Alessandro Zummo <a.zummo@towertech.it>
+ *
+ * based on arch/arm/common/rtctime.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/rtc.h>
+
+int rtc_read_time(struct class_device *class_dev, struct rtc_time *tm)
+{
+	int err;
+	struct rtc_device *rtc = to_rtc_device(class_dev);
+
+	err = mutex_lock_interruptible(&rtc->ops_lock);
+	if (err)
+		return -EBUSY;
+
+	if (!rtc->ops)
+		err = -ENODEV;
+	else if (!rtc->ops->read_time)
+		err = -EINVAL;
+	else {
+		memset(tm, 0, sizeof(struct rtc_time));
+		err = rtc->ops->read_time(class_dev->dev, tm);
+	}
+
+	mutex_unlock(&rtc->ops_lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtc_read_time);
+
+int rtc_set_time(struct class_device *class_dev, struct rtc_time *tm)
+{
+	int err;
+	struct rtc_device *rtc = to_rtc_device(class_dev);
+
+	err = rtc_valid_tm(tm);
+	if (err != 0)
+		return err;
+
+	err = mutex_lock_interruptible(&rtc->ops_lock);
+	if (err)
+		return -EBUSY;
+
+	if (!rtc->ops)
+		err = -ENODEV;
+	else if (!rtc->ops->set_time)
+		err = -EINVAL;
+	else
+		err = rtc->ops->set_time(class_dev->dev, tm);
+
+	mutex_unlock(&rtc->ops_lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtc_set_time);
+
+int rtc_set_mmss(struct class_device *class_dev, unsigned long secs)
+{
+	int err;
+	struct rtc_device *rtc = to_rtc_device(class_dev);
+
+	err = mutex_lock_interruptible(&rtc->ops_lock);
+	if (err)
+		return -EBUSY;
+
+	if (!rtc->ops)
+		err = -ENODEV;
+	else if (rtc->ops->set_mmss)
+		err = rtc->ops->set_mmss(class_dev->dev, secs);
+	else if (rtc->ops->read_time && rtc->ops->set_time) {
+		struct rtc_time new, old;
+
+		err = rtc->ops->read_time(class_dev->dev, &old);
+		if (err == 0) {
+			rtc_time_to_tm(secs, &new);
+
+			/*
+			 * avoid writing when we're going to change the day of
+			 * the month. We will retry in the next minute. This
+			 * basically means that if the RTC must not drift
+			 * by more than 1 minute in 11 minutes.
+			 */
+			if (!((old.tm_hour == 23 && old.tm_min == 59) ||
+				(new.tm_hour == 23 && new.tm_min == 59)))
+				err = rtc->ops->set_time(class_dev->dev, &new);
+		}
+	}
+	else
+		err = -EINVAL;
+
+	mutex_unlock(&rtc->ops_lock);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtc_set_mmss);
+
+int rtc_read_alarm(struct class_device *class_dev, struct rtc_wkalrm *alarm)
+{
+	int err;
+	struct rtc_device *rtc = to_rtc_device(class_dev);
+
+	err = mutex_lock_interruptible(&rtc->ops_lock);
+	if (err)
+		return -EBUSY;
+
+	if (rtc->ops == NULL)
+		err = -ENODEV;
+	else if (!rtc->ops->read_alarm)
+		err = -EINVAL;
+	else {
+		memset(alarm, 0, sizeof(struct rtc_wkalrm));
+		err = rtc->ops->read_alarm(class_dev->dev, alarm);
+	}
+
+	mutex_unlock(&rtc->ops_lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtc_read_alarm);
+
+int rtc_set_alarm(struct class_device *class_dev, struct rtc_wkalrm *alarm)
+{
+	int err;
+	struct rtc_device *rtc = to_rtc_device(class_dev);
+
+	err = mutex_lock_interruptible(&rtc->ops_lock);
+	if (err)
+		return -EBUSY;
+
+	if (!rtc->ops)
+		err = -ENODEV;
+	else if (!rtc->ops->set_alarm)
+		err = -EINVAL;
+	else
+		err = rtc->ops->set_alarm(class_dev->dev, alarm);
+
+	mutex_unlock(&rtc->ops_lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtc_set_alarm);
+
+void rtc_update_irq(struct class_device *class_dev,
+		unsigned long num, unsigned long events)
+{
+	struct rtc_device *rtc = to_rtc_device(class_dev);
+
+	spin_lock(&rtc->irq_lock);
+	rtc->irq_data = (rtc->irq_data + (num << 8)) | events;
+	spin_unlock(&rtc->irq_lock);
+
+	spin_lock(&rtc->irq_task_lock);
+	if (rtc->irq_task)
+		rtc->irq_task->func(rtc->irq_task->private_data);
+	spin_unlock(&rtc->irq_task_lock);
+
+	wake_up_interruptible(&rtc->irq_queue);
+	kill_fasync(&rtc->async_queue, SIGIO, POLL_IN);
+}
+EXPORT_SYMBOL_GPL(rtc_update_irq);
+
+struct class_device *rtc_class_open(char *name)
+{
+	struct class_device *class_dev = NULL,
+				*class_dev_tmp;
+
+	down(&rtc_class->sem);
+	list_for_each_entry(class_dev_tmp, &rtc_class->children, node) {
+		if (strncmp(class_dev_tmp->class_id, name, BUS_ID_SIZE) == 0) {
+			class_dev = class_dev_tmp;
+			break;
+		}
+	}
+
+	if (class_dev) {
+		if (!try_module_get(to_rtc_device(class_dev)->owner))
+			class_dev = NULL;
+	}
+	up(&rtc_class->sem);
+
+	return class_dev;
+}
+EXPORT_SYMBOL_GPL(rtc_class_open);
+
+void rtc_class_close(struct class_device *class_dev)
+{
+	module_put(to_rtc_device(class_dev)->owner);
+}
+EXPORT_SYMBOL_GPL(rtc_class_close);
+
+int rtc_irq_register(struct class_device *class_dev, struct rtc_task *task)
+{
+	int retval = -EBUSY;
+	struct rtc_device *rtc = to_rtc_device(class_dev);
+
+	if (task == NULL || task->func == NULL)
+		return -EINVAL;
+
+	spin_lock(&rtc->irq_task_lock);
+	if (rtc->irq_task == NULL) {
+		rtc->irq_task = task;
+		retval = 0;
+	}
+	spin_unlock(&rtc->irq_task_lock);
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(rtc_irq_register);
+
+void rtc_irq_unregister(struct class_device *class_dev, struct rtc_task *task)
+{
+	struct rtc_device *rtc = to_rtc_device(class_dev);
+
+	spin_lock(&rtc->irq_task_lock);
+	if (rtc->irq_task == task)
+		rtc->irq_task = NULL;
+	spin_unlock(&rtc->irq_task_lock);
+}
+EXPORT_SYMBOL_GPL(rtc_irq_unregister);
+
+int rtc_irq_set_state(struct class_device *class_dev, struct rtc_task *task, int enabled)
+{
+	int err = 0;
+	unsigned long flags;
+	struct rtc_device *rtc = to_rtc_device(class_dev);
+
+	spin_lock_irqsave(&rtc->irq_task_lock, flags);
+	if (rtc->irq_task != task)
+		err = -ENXIO;
+	spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
+
+	if (err == 0)
+		err = rtc->ops->irq_set_state(class_dev->dev, enabled);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtc_irq_set_state);
+
+int rtc_irq_set_freq(struct class_device *class_dev, struct rtc_task *task, int freq)
+{
+	int err = 0, tmp = 0;
+	unsigned long flags;
+	struct rtc_device *rtc = to_rtc_device(class_dev);
+
+	/* allowed range is 2-8192 */
+	if (freq < 2 || freq > 8192)
+		return -EINVAL;
+/*
+	FIXME: this does not belong here, will move where appropriate
+	at a later stage. It cannot hurt right now, trust me :)
+	if ((freq > rtc_max_user_freq) && (!capable(CAP_SYS_RESOURCE)))
+		return -EACCES;
+*/
+	/* check if freq is a power of 2 */
+	while (freq > (1 << tmp))
+		tmp++;
+
+	if (freq != (1 << tmp))
+		return -EINVAL;
+
+	spin_lock_irqsave(&rtc->irq_task_lock, flags);
+	if (rtc->irq_task != task)
+		err = -ENXIO;
+	spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
+
+	if (err == 0) {
+		err = rtc->ops->irq_set_freq(class_dev->dev, freq);
+		if (err == 0)
+			rtc->irq_freq = freq;
+	}
+	return err;
+}
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 8454337..ab61cd1 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -91,6 +91,12 @@ struct rtc_pll_info {
 #define RTC_PLL_GET	_IOR('p', 0x11, struct rtc_pll_info)  /* Get PLL correction */
 #define RTC_PLL_SET	_IOW('p', 0x12, struct rtc_pll_info)  /* Set PLL correction */
 
+/* interrupt flags */
+#define RTC_IRQF 0x80 /* any of the following is active */
+#define RTC_PF 0x40
+#define RTC_AF 0x20
+#define RTC_UF 0x10
+
 #ifdef __KERNEL__
 
 #include <linux/interrupt.h>
@@ -100,6 +106,87 @@ extern int rtc_valid_tm(struct rtc_time *tm);
 extern int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time);
 extern void rtc_time_to_tm(unsigned long time, struct rtc_time *tm);
 
+#include <linux/device.h>
+#include <linux/seq_file.h>
+#include <linux/cdev.h>
+#include <linux/poll.h>
+#include <linux/mutex.h>
+
+extern struct class *rtc_class;
+
+struct rtc_class_ops {
+	int (*open)(struct device *);
+	void (*release)(struct device *);
+	int (*ioctl)(struct device *, unsigned int, unsigned long);
+	int (*read_time)(struct device *, struct rtc_time *);
+	int (*set_time)(struct device *, struct rtc_time *);
+	int (*read_alarm)(struct device *, struct rtc_wkalrm *);
+	int (*set_alarm)(struct device *, struct rtc_wkalrm *);
+	int (*proc)(struct device *, struct seq_file *);
+	int (*set_mmss)(struct device *, unsigned long secs);
+	int (*irq_set_state)(struct device *, int enabled);
+	int (*irq_set_freq)(struct device *, int freq);
+	int (*read_callback)(struct device *, int data);
+};
+
+#define RTC_DEVICE_NAME_SIZE 20
+struct rtc_task;
+
+struct rtc_device
+{
+	struct class_device class_dev;
+	struct module *owner;
+
+	int id;
+	char name[RTC_DEVICE_NAME_SIZE];
+
+	struct rtc_class_ops *ops;
+	struct mutex ops_lock;
+
+	struct class_device *rtc_dev;
+	struct cdev char_dev;
+	struct mutex char_lock;
+
+	unsigned long irq_data;
+	spinlock_t irq_lock;
+	wait_queue_head_t irq_queue;
+	struct fasync_struct *async_queue;
+
+	struct rtc_task *irq_task;
+	spinlock_t irq_task_lock;
+	int irq_freq;
+};
+#define to_rtc_device(d) container_of(d, struct rtc_device, class_dev)
+
+extern struct rtc_device *rtc_device_register(const char *name,
+					struct device *dev,
+					struct rtc_class_ops *ops,
+					struct module *owner);
+extern void rtc_device_unregister(struct rtc_device *rdev);
+extern int rtc_interface_register(struct class_interface *intf);
+
+extern int rtc_read_time(struct class_device *class_dev, struct rtc_time *tm);
+extern int rtc_set_time(struct class_device *class_dev, struct rtc_time *tm);
+extern int rtc_set_mmss(struct class_device *class_dev, unsigned long secs);
+extern int rtc_read_alarm(struct class_device *class_dev,
+			struct rtc_wkalrm *alrm);
+extern int rtc_set_alarm(struct class_device *class_dev,
+				struct rtc_wkalrm *alrm);
+extern void rtc_update_irq(struct class_device *class_dev,
+			unsigned long num, unsigned long events);
+
+extern struct class_device *rtc_class_open(char *name);
+extern void rtc_class_close(struct class_device *class_dev);
+
+extern int rtc_irq_register(struct class_device *class_dev,
+				struct rtc_task *task);
+extern void rtc_irq_unregister(struct class_device *class_dev,
+				struct rtc_task *task);
+extern int rtc_irq_set_state(struct class_device *class_dev,
+				struct rtc_task *task, int enabled);
+extern int rtc_irq_set_freq(struct class_device *class_dev,
+				struct rtc_task *task, int freq);
+
 typedef struct rtc_task {
 	void (*func)(void *private_data);
 	void *private_data;
-- 
cgit v0.10.2


From 6fc7f10cee28c7fa190920fefda8c696d5bf3074 Mon Sep 17 00:00:00 2001
From: Alessandro Zummo <a.zummo@towertech.it>
Date: Mon, 27 Mar 2006 01:16:37 -0800
Subject: [PATCH] RTC subsystem: I2C cleanup

This patch, completely optional, removes from drivers/i2c/chips all the
drivers that are implemented in the new RTC subsystem.

It should be noted that none of the current driver is actually integrated,
i.e.  usable without further patches.

Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/i2c/chips/Kconfig b/drivers/i2c/chips/Kconfig
index f9fae28..7aa5c38 100644
--- a/drivers/i2c/chips/Kconfig
+++ b/drivers/i2c/chips/Kconfig
@@ -65,15 +65,6 @@ config SENSORS_PCF8591
 	  This driver can also be built as a module.  If so, the module
 	  will be called pcf8591.
 
-config SENSORS_RTC8564
-	tristate "Epson 8564 RTC chip"
-	depends on I2C && EXPERIMENTAL
-	help
-	  If you say yes here you get support for the Epson 8564 RTC chip.
-
-	  This driver can also be built as a module.  If so, the module
-	  will be called i2c-rtc8564.
-
 config ISP1301_OMAP
 	tristate "Philips ISP1301 with OMAP OTG"
 	depends on I2C && ARCH_OMAP_OTG
@@ -126,13 +117,4 @@ config SENSORS_MAX6875
 	  This driver can also be built as a module.  If so, the module
 	  will be called max6875.
 
-config RTC_X1205_I2C
-	tristate "Xicor X1205 RTC chip"
-	depends on I2C && EXPERIMENTAL
-	help
-	  If you say yes here you get support for the Xicor X1205 RTC chip.
-
-	  This driver can also be built as a module. If so, the module
-	  will be called x1205.
-
 endmenu
diff --git a/drivers/i2c/chips/Makefile b/drivers/i2c/chips/Makefile
index 46178b5..779868e 100644
--- a/drivers/i2c/chips/Makefile
+++ b/drivers/i2c/chips/Makefile
@@ -10,10 +10,8 @@ obj-$(CONFIG_SENSORS_M41T00)	+= m41t00.o
 obj-$(CONFIG_SENSORS_PCA9539)	+= pca9539.o
 obj-$(CONFIG_SENSORS_PCF8574)	+= pcf8574.o
 obj-$(CONFIG_SENSORS_PCF8591)	+= pcf8591.o
-obj-$(CONFIG_SENSORS_RTC8564)	+= rtc8564.o
 obj-$(CONFIG_ISP1301_OMAP)	+= isp1301_omap.o
 obj-$(CONFIG_TPS65010)		+= tps65010.o
-obj-$(CONFIG_RTC_X1205_I2C)	+= x1205.o
 
 ifeq ($(CONFIG_I2C_DEBUG_CHIP),y)
 EXTRA_CFLAGS += -DDEBUG
diff --git a/drivers/i2c/chips/rtc8564.c b/drivers/i2c/chips/rtc8564.c
deleted file mode 100644
index 0d8699b..0000000
--- a/drivers/i2c/chips/rtc8564.c
+++ /dev/null
@@ -1,385 +0,0 @@
-/*
- *  linux/drivers/i2c/chips/rtc8564.c
- *
- *  Copyright (C) 2002-2004 Stefan Eletzhofer
- *
- *	based on linux/drivers/acron/char/pcf8583.c
- *  Copyright (C) 2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Driver for system3's EPSON RTC 8564 chip
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/bcd.h>
-#include <linux/i2c.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/rtc.h>		/* get the user-level API */
-#include <linux/init.h>
-
-#include "rtc8564.h"
-
-#ifdef DEBUG
-# define _DBG(x, fmt, args...) do{ if (debug>=x) printk(KERN_DEBUG"%s: " fmt "\n", __FUNCTION__, ##args); } while(0);
-#else
-# define _DBG(x, fmt, args...) do { } while(0);
-#endif
-
-#define _DBGRTCTM(x, rtctm) if (debug>=x) printk("%s: secs=%d, mins=%d, hours=%d, mday=%d, " \
-			"mon=%d, year=%d, wday=%d VL=%d\n", __FUNCTION__, \
-			(rtctm).secs, (rtctm).mins, (rtctm).hours, (rtctm).mday, \
-			(rtctm).mon, (rtctm).year, (rtctm).wday, (rtctm).vl);
-
-struct rtc8564_data {
-	struct i2c_client client;
-	u16 ctrl;
-};
-
-static inline u8 _rtc8564_ctrl1(struct i2c_client *client)
-{
-	struct rtc8564_data *data = i2c_get_clientdata(client);
-	return data->ctrl & 0xff;
-}
-static inline u8 _rtc8564_ctrl2(struct i2c_client *client)
-{
-	struct rtc8564_data *data = i2c_get_clientdata(client);
-	return (data->ctrl & 0xff00) >> 8;
-}
-
-#define CTRL1(c) _rtc8564_ctrl1(c)
-#define CTRL2(c) _rtc8564_ctrl2(c)
-
-static int debug;
-module_param(debug, int, S_IRUGO | S_IWUSR);
-
-static struct i2c_driver rtc8564_driver;
-
-static unsigned short ignore[] = { I2C_CLIENT_END };
-static unsigned short normal_addr[] = { 0x51, I2C_CLIENT_END };
-
-static struct i2c_client_address_data addr_data = {
-	.normal_i2c		= normal_addr,
-	.probe			= ignore,
-	.ignore			= ignore,
-};
-
-static int rtc8564_read_mem(struct i2c_client *client, struct mem *mem);
-static int rtc8564_write_mem(struct i2c_client *client, struct mem *mem);
-
-static int rtc8564_read(struct i2c_client *client, unsigned char adr,
-			unsigned char *buf, unsigned char len)
-{
-	int ret = -EIO;
-	unsigned char addr[1] = { adr };
-	struct i2c_msg msgs[2] = {
-		{client->addr, 0, 1, addr},
-		{client->addr, I2C_M_RD, len, buf}
-	};
-
-	_DBG(1, "client=%p, adr=%d, buf=%p, len=%d", client, adr, buf, len);
-
-	if (!buf) {
-		ret = -EINVAL;
-		goto done;
-	}
-
-	ret = i2c_transfer(client->adapter, msgs, 2);
-	if (ret == 2) {
-		ret = 0;
-	}
-
-done:
-	return ret;
-}
-
-static int rtc8564_write(struct i2c_client *client, unsigned char adr,
-			 unsigned char *data, unsigned char len)
-{
-	int ret = 0;
-	unsigned char _data[16];
-	struct i2c_msg wr;
-	int i;
-
-	if (!data || len > 15) {
-		ret = -EINVAL;
-		goto done;
-	}
-
-	_DBG(1, "client=%p, adr=%d, buf=%p, len=%d", client, adr, data, len);
-
-	_data[0] = adr;
-	for (i = 0; i < len; i++) {
-		_data[i + 1] = data[i];
-		_DBG(5, "data[%d] = 0x%02x (%d)", i, data[i], data[i]);
-	}
-
-	wr.addr = client->addr;
-	wr.flags = 0;
-	wr.len = len + 1;
-	wr.buf = _data;
-
-	ret = i2c_transfer(client->adapter, &wr, 1);
-	if (ret == 1) {
-		ret = 0;
-	}
-
-done:
-	return ret;
-}
-
-static int rtc8564_attach(struct i2c_adapter *adap, int addr, int kind)
-{
-	int ret;
-	struct i2c_client *new_client;
-	struct rtc8564_data *d;
-	unsigned char data[10];
-	unsigned char ad[1] = { 0 };
-	struct i2c_msg ctrl_wr[1] = {
-		{addr, 0, 2, data}
-	};
-	struct i2c_msg ctrl_rd[2] = {
-		{addr, 0, 1, ad},
-		{addr, I2C_M_RD, 2, data}
-	};
-
-	d = kzalloc(sizeof(struct rtc8564_data), GFP_KERNEL);
-	if (!d) {
-		ret = -ENOMEM;
-		goto done;
-	}
-	new_client = &d->client;
-
-	strlcpy(new_client->name, "RTC8564", I2C_NAME_SIZE);
-	i2c_set_clientdata(new_client, d);
-	new_client->addr = addr;
-	new_client->adapter = adap;
-	new_client->driver = &rtc8564_driver;
-
-	_DBG(1, "client=%p", new_client);
-
-	/* init ctrl1 reg */
-	data[0] = 0;
-	data[1] = 0;
-	ret = i2c_transfer(new_client->adapter, ctrl_wr, 1);
-	if (ret != 1) {
-		printk(KERN_INFO "rtc8564: cant init ctrl1\n");
-		ret = -ENODEV;
-		goto done;
-	}
-
-	/* read back ctrl1 and ctrl2 */
-	ret = i2c_transfer(new_client->adapter, ctrl_rd, 2);
-	if (ret != 2) {
-		printk(KERN_INFO "rtc8564: cant read ctrl\n");
-		ret = -ENODEV;
-		goto done;
-	}
-
-	d->ctrl = data[0] | (data[1] << 8);
-
-	_DBG(1, "RTC8564_REG_CTRL1=%02x, RTC8564_REG_CTRL2=%02x",
-	     data[0], data[1]);
-
-	ret = i2c_attach_client(new_client);
-done:
-	if (ret) {
-		kfree(d);
-	}
-	return ret;
-}
-
-static int rtc8564_probe(struct i2c_adapter *adap)
-{
-	return i2c_probe(adap, &addr_data, rtc8564_attach);
-}
-
-static int rtc8564_detach(struct i2c_client *client)
-{
-	i2c_detach_client(client);
-	kfree(i2c_get_clientdata(client));
-	return 0;
-}
-
-static int rtc8564_get_datetime(struct i2c_client *client, struct rtc_tm *dt)
-{
-	int ret = -EIO;
-	unsigned char buf[15];
-
-	_DBG(1, "client=%p, dt=%p", client, dt);
-
-	if (!dt)
-		return -EINVAL;
-
-	memset(buf, 0, sizeof(buf));
-
-	ret = rtc8564_read(client, 0, buf, 15);
-	if (ret)
-		return ret;
-
-	/* century stored in minute alarm reg */
-	dt->year = BCD2BIN(buf[RTC8564_REG_YEAR]);
-	dt->year += 100 * BCD2BIN(buf[RTC8564_REG_AL_MIN] & 0x3f);
-	dt->mday = BCD2BIN(buf[RTC8564_REG_DAY] & 0x3f);
-	dt->wday = BCD2BIN(buf[RTC8564_REG_WDAY] & 7);
-	dt->mon = BCD2BIN(buf[RTC8564_REG_MON_CENT] & 0x1f);
-
-	dt->secs = BCD2BIN(buf[RTC8564_REG_SEC] & 0x7f);
-	dt->vl = (buf[RTC8564_REG_SEC] & 0x80) == 0x80;
-	dt->mins = BCD2BIN(buf[RTC8564_REG_MIN] & 0x7f);
-	dt->hours = BCD2BIN(buf[RTC8564_REG_HR] & 0x3f);
-
-	_DBGRTCTM(2, *dt);
-
-	return 0;
-}
-
-static int
-rtc8564_set_datetime(struct i2c_client *client, struct rtc_tm *dt, int datetoo)
-{
-	int ret, len = 5;
-	unsigned char buf[15];
-
-	_DBG(1, "client=%p, dt=%p", client, dt);
-
-	if (!dt)
-		return -EINVAL;
-
-	_DBGRTCTM(2, *dt);
-
-	buf[RTC8564_REG_CTRL1] = CTRL1(client) | RTC8564_CTRL1_STOP;
-	buf[RTC8564_REG_CTRL2] = CTRL2(client);
-	buf[RTC8564_REG_SEC] = BIN2BCD(dt->secs);
-	buf[RTC8564_REG_MIN] = BIN2BCD(dt->mins);
-	buf[RTC8564_REG_HR] = BIN2BCD(dt->hours);
-
-	if (datetoo) {
-		len += 5;
-		buf[RTC8564_REG_DAY] = BIN2BCD(dt->mday);
-		buf[RTC8564_REG_WDAY] = BIN2BCD(dt->wday);
-		buf[RTC8564_REG_MON_CENT] = BIN2BCD(dt->mon) & 0x1f;
-		/* century stored in minute alarm reg */
-		buf[RTC8564_REG_YEAR] = BIN2BCD(dt->year % 100);
-		buf[RTC8564_REG_AL_MIN] = BIN2BCD(dt->year / 100);
-	}
-
-	ret = rtc8564_write(client, 0, buf, len);
-	if (ret) {
-		_DBG(1, "error writing data! %d", ret);
-	}
-
-	buf[RTC8564_REG_CTRL1] = CTRL1(client);
-	ret = rtc8564_write(client, 0, buf, 1);
-	if (ret) {
-		_DBG(1, "error writing data! %d", ret);
-	}
-
-	return ret;
-}
-
-static int rtc8564_get_ctrl(struct i2c_client *client, unsigned int *ctrl)
-{
-	struct rtc8564_data *data = i2c_get_clientdata(client);
-
-	if (!ctrl)
-		return -1;
-
-	*ctrl = data->ctrl;
-	return 0;
-}
-
-static int rtc8564_set_ctrl(struct i2c_client *client, unsigned int *ctrl)
-{
-	struct rtc8564_data *data = i2c_get_clientdata(client);
-	unsigned char buf[2];
-
-	if (!ctrl)
-		return -1;
-
-	buf[0] = *ctrl & 0xff;
-	buf[1] = (*ctrl & 0xff00) >> 8;
-	data->ctrl = *ctrl;
-
-	return rtc8564_write(client, 0, buf, 2);
-}
-
-static int rtc8564_read_mem(struct i2c_client *client, struct mem *mem)
-{
-
-	if (!mem)
-		return -EINVAL;
-
-	return rtc8564_read(client, mem->loc, mem->data, mem->nr);
-}
-
-static int rtc8564_write_mem(struct i2c_client *client, struct mem *mem)
-{
-
-	if (!mem)
-		return -EINVAL;
-
-	return rtc8564_write(client, mem->loc, mem->data, mem->nr);
-}
-
-static int
-rtc8564_command(struct i2c_client *client, unsigned int cmd, void *arg)
-{
-
-	_DBG(1, "cmd=%d", cmd);
-
-	switch (cmd) {
-	case RTC_GETDATETIME:
-		return rtc8564_get_datetime(client, arg);
-
-	case RTC_SETTIME:
-		return rtc8564_set_datetime(client, arg, 0);
-
-	case RTC_SETDATETIME:
-		return rtc8564_set_datetime(client, arg, 1);
-
-	case RTC_GETCTRL:
-		return rtc8564_get_ctrl(client, arg);
-
-	case RTC_SETCTRL:
-		return rtc8564_set_ctrl(client, arg);
-
-	case MEM_READ:
-		return rtc8564_read_mem(client, arg);
-
-	case MEM_WRITE:
-		return rtc8564_write_mem(client, arg);
-
-	default:
-		return -EINVAL;
-	}
-}
-
-static struct i2c_driver rtc8564_driver = {
-	.driver = {
-		.name	= "RTC8564",
-	},
-	.id		= I2C_DRIVERID_RTC8564,
-	.attach_adapter = rtc8564_probe,
-	.detach_client	= rtc8564_detach,
-	.command	= rtc8564_command
-};
-
-static __init int rtc8564_init(void)
-{
-	return i2c_add_driver(&rtc8564_driver);
-}
-
-static __exit void rtc8564_exit(void)
-{
-	i2c_del_driver(&rtc8564_driver);
-}
-
-MODULE_AUTHOR("Stefan Eletzhofer <Stefan.Eletzhofer@eletztrick.de>");
-MODULE_DESCRIPTION("EPSON RTC8564 Driver");
-MODULE_LICENSE("GPL");
-
-module_init(rtc8564_init);
-module_exit(rtc8564_exit);
diff --git a/drivers/i2c/chips/rtc8564.h b/drivers/i2c/chips/rtc8564.h
deleted file mode 100644
index e5342d1..0000000
--- a/drivers/i2c/chips/rtc8564.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- *  linux/drivers/i2c/chips/rtc8564.h
- *
- *  Copyright (C) 2002-2004 Stefan Eletzhofer
- *
- *	based on linux/drivers/acron/char/pcf8583.h
- *  Copyright (C) 2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-struct rtc_tm {
-	unsigned char	secs;
-	unsigned char	mins;
-	unsigned char	hours;
-	unsigned char	mday;
-	unsigned char	mon;
-	unsigned short	year; /* xxxx 4 digits :) */
-	unsigned char	wday;
-	unsigned char	vl;
-};
-
-struct mem {
-	unsigned int	loc;
-	unsigned int	nr;
-	unsigned char	*data;
-};
-
-#define RTC_GETDATETIME	0
-#define RTC_SETTIME	1
-#define RTC_SETDATETIME	2
-#define RTC_GETCTRL	3
-#define RTC_SETCTRL	4
-#define MEM_READ	5
-#define MEM_WRITE	6
-
-#define RTC8564_REG_CTRL1		0x0 /* T  0 S 0 | T 0 0 0 */
-#define RTC8564_REG_CTRL2		0x1 /* 0  0 0 TI/TP | AF TF AIE TIE */
-#define RTC8564_REG_SEC			0x2 /* VL 4 2 1 | 8 4 2 1 */
-#define RTC8564_REG_MIN			0x3 /* x  4 2 1 | 8 4 2 1 */
-#define RTC8564_REG_HR			0x4 /* x  x 2 1 | 8 4 2 1 */
-#define RTC8564_REG_DAY			0x5 /* x  x 2 1 | 8 4 2 1 */
-#define RTC8564_REG_WDAY		0x6 /* x  x x x | x 4 2 1 */
-#define RTC8564_REG_MON_CENT	0x7 /* C  x x 1 | 8 4 2 1 */
-#define RTC8564_REG_YEAR		0x8 /* 8  4 2 1 | 8 4 2 1 */
-#define RTC8564_REG_AL_MIN		0x9 /* AE 4 2 1 | 8 4 2 1 */
-#define RTC8564_REG_AL_HR		0xa /* AE 4 2 1 | 8 4 2 1 */
-#define RTC8564_REG_AL_DAY		0xb /* AE x 2 1 | 8 4 2 1 */
-#define RTC8564_REG_AL_WDAY		0xc /* AE x x x | x 4 2 1 */
-#define RTC8564_REG_CLKOUT		0xd /* FE x x x | x x FD1 FD0 */
-#define RTC8564_REG_TCTL		0xe /* TE x x x | x x FD1 FD0 */
-#define RTC8564_REG_TIMER		0xf /* 8 bit binary */
-
-/* Control reg */
-#define RTC8564_CTRL1_TEST1		(1<<3)
-#define RTC8564_CTRL1_STOP		(1<<5)
-#define RTC8564_CTRL1_TEST2		(1<<7)
-
-#define RTC8564_CTRL2_TIE		(1<<0)
-#define RTC8564_CTRL2_AIE		(1<<1)
-#define RTC8564_CTRL2_TF		(1<<2)
-#define RTC8564_CTRL2_AF		(1<<3)
-#define RTC8564_CTRL2_TI_TP		(1<<4)
-
-/* CLKOUT frequencies */
-#define RTC8564_FD_32768HZ		(0x0)
-#define RTC8564_FD_1024HZ		(0x1)
-#define RTC8564_FD_32			(0x2)
-#define RTC8564_FD_1HZ			(0x3)
-
-/* Timer CTRL */
-#define RTC8564_TD_4096HZ		(0x0)
-#define RTC8564_TD_64HZ			(0x1)
-#define RTC8564_TD_1HZ			(0x2)
-#define RTC8564_TD_1_60HZ		(0x3)
-
-#define I2C_DRIVERID_RTC8564 0xf000
diff --git a/drivers/i2c/chips/x1205.c b/drivers/i2c/chips/x1205.c
deleted file mode 100644
index 245fffa..0000000
--- a/drivers/i2c/chips/x1205.c
+++ /dev/null
@@ -1,698 +0,0 @@
-/*
- *  x1205.c - An i2c driver for the Xicor X1205 RTC
- *  Copyright 2004 Karen Spearel
- *  Copyright 2005 Alessandro Zummo
- *
- *  please send all reports to:
- *	kas11 at tampabay dot rr dot com
- *      a dot zummo at towertech dot it
- *
- *  based on the other drivers in this same directory.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/i2c.h>
-#include <linux/string.h>
-#include <linux/bcd.h>
-#include <linux/rtc.h>
-#include <linux/list.h>
-
-#include <linux/x1205.h>
-
-#define DRV_VERSION "0.9.9"
-
-/* Addresses to scan: none. This chip is located at
- * 0x6f and uses a two bytes register addressing.
- * Two bytes need to be written to read a single register,
- * while most other chips just require one and take the second
- * one as the data to be written. To prevent corrupting
- * unknown chips, the user must explicitely set the probe parameter.
- */
-
-static unsigned short normal_i2c[] = { I2C_CLIENT_END };
-
-/* Insmod parameters */
-I2C_CLIENT_INSMOD;
-I2C_CLIENT_MODULE_PARM(hctosys,
-	"Set the system time from the hardware clock upon initialization");
-
-/* offsets into CCR area */
-
-#define CCR_SEC			0
-#define CCR_MIN			1
-#define CCR_HOUR		2
-#define CCR_MDAY		3
-#define CCR_MONTH		4
-#define CCR_YEAR		5
-#define CCR_WDAY		6
-#define CCR_Y2K			7
-
-#define X1205_REG_SR		0x3F	/* status register */
-#define X1205_REG_Y2K		0x37
-#define X1205_REG_DW		0x36
-#define X1205_REG_YR		0x35
-#define X1205_REG_MO		0x34
-#define X1205_REG_DT		0x33
-#define X1205_REG_HR		0x32
-#define X1205_REG_MN		0x31
-#define X1205_REG_SC		0x30
-#define X1205_REG_DTR		0x13
-#define X1205_REG_ATR		0x12
-#define X1205_REG_INT		0x11
-#define X1205_REG_0		0x10
-#define X1205_REG_Y2K1		0x0F
-#define X1205_REG_DWA1		0x0E
-#define X1205_REG_YRA1		0x0D
-#define X1205_REG_MOA1		0x0C
-#define X1205_REG_DTA1		0x0B
-#define X1205_REG_HRA1		0x0A
-#define X1205_REG_MNA1		0x09
-#define X1205_REG_SCA1		0x08
-#define X1205_REG_Y2K0		0x07
-#define X1205_REG_DWA0		0x06
-#define X1205_REG_YRA0		0x05
-#define X1205_REG_MOA0		0x04
-#define X1205_REG_DTA0		0x03
-#define X1205_REG_HRA0		0x02
-#define X1205_REG_MNA0		0x01
-#define X1205_REG_SCA0		0x00
-
-#define X1205_CCR_BASE		0x30	/* Base address of CCR */
-#define X1205_ALM0_BASE		0x00	/* Base address of ALARM0 */
-
-#define X1205_SR_RTCF		0x01	/* Clock failure */
-#define X1205_SR_WEL		0x02	/* Write Enable Latch */
-#define X1205_SR_RWEL		0x04	/* Register Write Enable */
-
-#define X1205_DTR_DTR0		0x01
-#define X1205_DTR_DTR1		0x02
-#define X1205_DTR_DTR2		0x04
-
-#define X1205_HR_MIL		0x80	/* Set in ccr.hour for 24 hr mode */
-
-/* Prototypes */
-static int x1205_attach(struct i2c_adapter *adapter);
-static int x1205_detach(struct i2c_client *client);
-static int x1205_probe(struct i2c_adapter *adapter, int address, int kind);
-static int x1205_command(struct i2c_client *client, unsigned int cmd,
-	void *arg);
-
-static struct i2c_driver x1205_driver = {
-	.driver = {
-		.name	= "x1205",
-	},
-	.attach_adapter = &x1205_attach,
-	.detach_client	= &x1205_detach,
-};
-
-struct x1205_data {
-	struct i2c_client client;
-	struct list_head list;
-	unsigned int epoch;
-};
-
-static const unsigned char days_in_mo[] =
-	{ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
-
-static LIST_HEAD(x1205_clients);
-
-/* Workaround until the I2C subsytem will allow to send
- * commands to a specific client. This function will send the command
- * to the first client.
- */
-int x1205_do_command(unsigned int cmd, void *arg)
-{
-	struct list_head *walk;
-	struct list_head *tmp;
-	struct x1205_data *data;
-
-	list_for_each_safe(walk, tmp, &x1205_clients) {
-		data = list_entry(walk, struct x1205_data, list);
-		return x1205_command(&data->client, cmd, arg);
-	}
-
-	return -ENODEV;
-}
-
-#define is_leap(year) \
-	((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
-
-/* make sure the rtc_time values are in bounds */
-static int x1205_validate_tm(struct rtc_time *tm)
-{
-	int year = tm->tm_year + 1900;
-
-	if ((tm->tm_year < 70) || (tm->tm_year > 255))
-		return -EINVAL;
-
-	if ((tm->tm_mon > 11) || (tm->tm_mday == 0))
-		return -EINVAL;
-
-	if (tm->tm_mday > days_in_mo[tm->tm_mon]
-		+ ((tm->tm_mon == 1) && is_leap(year)))
-		return -EINVAL;
-
-	if ((tm->tm_hour >= 24) || (tm->tm_min >= 60) || (tm->tm_sec >= 60))
-		return -EINVAL;
-
-	return 0;
-}
-
-/*
- * In the routines that deal directly with the x1205 hardware, we use
- * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch
- * Epoch is initialized as 2000. Time is set to UTC.
- */
-static int x1205_get_datetime(struct i2c_client *client, struct rtc_time *tm,
-				u8 reg_base)
-{
-	unsigned char dt_addr[2] = { 0, reg_base };
-	static unsigned char sr_addr[2] = { 0, X1205_REG_SR };
-
-	unsigned char buf[8], sr;
-
-	struct i2c_msg msgs[] = {
-		{ client->addr, 0, 2, sr_addr },	/* setup read ptr */
-		{ client->addr, I2C_M_RD, 1, &sr }, 	/* read status */
-		{ client->addr, 0, 2, dt_addr },	/* setup read ptr */
-		{ client->addr, I2C_M_RD, 8, buf },	/* read date */
-	};
-
-	struct x1205_data *data = i2c_get_clientdata(client);
-
-	/* read status register */
-	if ((i2c_transfer(client->adapter, &msgs[0], 2)) != 2) {
-		dev_err(&client->dev, "%s: read error\n", __FUNCTION__);
-		return -EIO;
-	}
-
-	/* check for battery failure */
-	if (sr & X1205_SR_RTCF) {
-		dev_warn(&client->dev,
-			"Clock had a power failure, you must set the date.\n");
-		return -EINVAL;
-	}
-
-	/* read date registers */
-	if ((i2c_transfer(client->adapter, &msgs[2], 2)) != 2) {
-		dev_err(&client->dev, "%s: read error\n", __FUNCTION__);
-		return -EIO;
-	}
-
-	dev_dbg(&client->dev,
-		"%s: raw read data - sec=%02x, min=%02x, hr=%02x, "
-		"mday=%02x, mon=%02x, year=%02x, wday=%02x, y2k=%02x\n",
-		__FUNCTION__,
-		buf[0], buf[1], buf[2], buf[3],
-		buf[4], buf[5], buf[6], buf[7]);
-
-	tm->tm_sec = BCD2BIN(buf[CCR_SEC]);
-	tm->tm_min = BCD2BIN(buf[CCR_MIN]);
-	tm->tm_hour = BCD2BIN(buf[CCR_HOUR] & 0x3F); /* hr is 0-23 */
-	tm->tm_mday = BCD2BIN(buf[CCR_MDAY]);
-	tm->tm_mon = BCD2BIN(buf[CCR_MONTH]);
-	data->epoch = BCD2BIN(buf[CCR_Y2K]) * 100;
-	tm->tm_year = BCD2BIN(buf[CCR_YEAR]) + data->epoch - 1900;
-	tm->tm_wday = buf[CCR_WDAY];
-
-	dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
-		"mday=%d, mon=%d, year=%d, wday=%d\n",
-		__FUNCTION__,
-		tm->tm_sec, tm->tm_min, tm->tm_hour,
-		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
-
-	return 0;
-}
-
-static int x1205_set_datetime(struct i2c_client *client, struct rtc_time *tm,
-				int datetoo, u8 reg_base)
-{
-	int i, err, xfer;
-
-	unsigned char buf[8];
-
-	static const unsigned char wel[3] = { 0, X1205_REG_SR,
-						X1205_SR_WEL };
-
-	static const unsigned char rwel[3] = { 0, X1205_REG_SR,
-						X1205_SR_WEL | X1205_SR_RWEL };
-
-	static const unsigned char diswe[3] = { 0, X1205_REG_SR, 0 };
-
-	struct x1205_data *data = i2c_get_clientdata(client);
-
-	/* check if all values in the tm struct are correct */
-	if ((err = x1205_validate_tm(tm)) < 0)
-		return err;
-
-	dev_dbg(&client->dev, "%s: secs=%d, mins=%d, hours=%d, "
-		"mday=%d, mon=%d, year=%d, wday=%d\n",
-		__FUNCTION__,
-		tm->tm_sec, tm->tm_min, tm->tm_hour,
-		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
-
-	buf[CCR_SEC] = BIN2BCD(tm->tm_sec);
-	buf[CCR_MIN] = BIN2BCD(tm->tm_min);
-
-	/* set hour and 24hr bit */
-	buf[CCR_HOUR] = BIN2BCD(tm->tm_hour) | X1205_HR_MIL;
-
-	/* should we also set the date? */
-	if (datetoo) {
-		buf[CCR_MDAY] = BIN2BCD(tm->tm_mday);
-
-		/* month, 0 - 11 */
-		buf[CCR_MONTH] = BIN2BCD(tm->tm_mon);
-
-		/* year, since 1900 */
-		buf[CCR_YEAR] = BIN2BCD(tm->tm_year + 1900 - data->epoch);
-		buf[CCR_WDAY] = tm->tm_wday & 0x07;
-		buf[CCR_Y2K] = BIN2BCD(data->epoch / 100);
-	}
-
-	/* this sequence is required to unlock the chip */
-	xfer = i2c_master_send(client, wel, 3);
-	if (xfer != 3) {
-		dev_err(&client->dev, "%s: wel - %d\n", __FUNCTION__, xfer);
-		return -EIO;
-	}
-
-	xfer = i2c_master_send(client, rwel, 3);
-	if (xfer != 3) {
-		dev_err(&client->dev, "%s: rwel - %d\n", __FUNCTION__, xfer);
-		return -EIO;
-	}
-
-	/* write register's data */
-	for (i = 0; i < (datetoo ? 8 : 3); i++) {
-		unsigned char rdata[3] = { 0, reg_base + i, buf[i] };
-
-		xfer = i2c_master_send(client, rdata, 3);
-		if (xfer != 3) {
-			dev_err(&client->dev,
-				"%s: xfer=%d addr=%02x, data=%02x\n",
-				__FUNCTION__,
-				 xfer, rdata[1], rdata[2]);
-			return -EIO;
-		}
-	};
-
-	/* disable further writes */
-	xfer = i2c_master_send(client, diswe, 3);
-	if (xfer != 3) {
-		dev_err(&client->dev, "%s: diswe - %d\n", __FUNCTION__, xfer);
-		return -EIO;
-	}
-
-	return 0;
-}
-
-static int x1205_get_dtrim(struct i2c_client *client, int *trim)
-{
-	unsigned char dtr;
-	static unsigned char dtr_addr[2] = { 0, X1205_REG_DTR };
-
-	struct i2c_msg msgs[] = {
-		{ client->addr, 0, 2, dtr_addr },	/* setup read ptr */
-		{ client->addr, I2C_M_RD, 1, &dtr }, 	/* read dtr */
-	};
-
-	/* read dtr register */
-	if ((i2c_transfer(client->adapter, &msgs[0], 2)) != 2) {
-		dev_err(&client->dev, "%s: read error\n", __FUNCTION__);
-		return -EIO;
-	}
-
-	dev_dbg(&client->dev, "%s: raw dtr=%x\n", __FUNCTION__, dtr);
-
-	*trim = 0;
-
-	if (dtr & X1205_DTR_DTR0)
-		*trim += 20;
-
-	if (dtr & X1205_DTR_DTR1)
-		*trim += 10;
-
-	if (dtr & X1205_DTR_DTR2)
-		*trim = -*trim;
-
-	return 0;
-}
-
-static int x1205_get_atrim(struct i2c_client *client, int *trim)
-{
-	s8 atr;
-	static unsigned char atr_addr[2] = { 0, X1205_REG_ATR };
-
-	struct i2c_msg msgs[] = {
-		{ client->addr, 0, 2, atr_addr },	/* setup read ptr */
-		{ client->addr, I2C_M_RD, 1, &atr }, 	/* read atr */
-	};
-
-	/* read atr register */
-	if ((i2c_transfer(client->adapter, &msgs[0], 2)) != 2) {
-		dev_err(&client->dev, "%s: read error\n", __FUNCTION__);
-		return -EIO;
-	}
-
-	dev_dbg(&client->dev, "%s: raw atr=%x\n", __FUNCTION__, atr);
-
-	/* atr is a two's complement value on 6 bits,
-	 * perform sign extension. The formula is
-	 * Catr = (atr * 0.25pF) + 11.00pF.
-	 */
-	if (atr & 0x20)
-		atr |= 0xC0;
-
-	dev_dbg(&client->dev, "%s: raw atr=%x (%d)\n", __FUNCTION__, atr, atr);
-
-	*trim = (atr * 250) + 11000;
-
-	dev_dbg(&client->dev, "%s: real=%d\n", __FUNCTION__, *trim);
-
-	return 0;
-}
-
-static int x1205_hctosys(struct i2c_client *client)
-{
-	int err;
-
-	struct rtc_time tm;
-	struct timespec tv;
-
-	err = x1205_command(client, X1205_CMD_GETDATETIME, &tm);
-
-	if (err) {
-		dev_err(&client->dev,
-			"Unable to set the system clock\n");
-		return err;
-	}
-
-	/* IMPORTANT: the RTC only stores whole seconds. It is arbitrary
-	 * whether it stores the most close value or the value with partial
-	 * seconds truncated. However, it is important that we use it to store
-	 * the truncated value. This is because otherwise it is necessary,
-	 * in an rtc sync function, to read both xtime.tv_sec and
-	 * xtime.tv_nsec. On some processors (i.e. ARM), an atomic read
-	 * of >32bits is not possible. So storing the most close value would
-	 * slow down the sync API. So here we have the truncated value and
-	 * the best guess is to add 0.5s.
-	 */
-
-	tv.tv_nsec = NSEC_PER_SEC >> 1;
-
-	/* WARNING: this is not the C library 'mktime' call, it is a built in
-	 * inline function from include/linux/time.h.  It expects (requires)
-	 * the month to be in the range 1-12
-	 */
-
-	tv.tv_sec  = mktime(tm.tm_year + 1900, tm.tm_mon + 1,
-				tm.tm_mday, tm.tm_hour,
-				tm.tm_min, tm.tm_sec);
-
-	do_settimeofday(&tv);
-
-	dev_info(&client->dev,
-		"setting the system clock to %d-%d-%d %d:%d:%d\n",
-		tm.tm_year + 1900, tm.tm_mon + 1,
-		tm.tm_mday, tm.tm_hour, tm.tm_min,
-		tm.tm_sec);
-
-	return 0;
-}
-
-struct x1205_limit
-{
-	unsigned char reg;
-	unsigned char mask;
-	unsigned char min;
-	unsigned char max;
-};
-
-static int x1205_validate_client(struct i2c_client *client)
-{
-	int i, xfer;
-
-	/* Probe array. We will read the register at the specified
-	 * address and check if the given bits are zero.
-	 */
-	static const unsigned char probe_zero_pattern[] = {
-		/* register, mask */
-		X1205_REG_SR,	0x18,
-		X1205_REG_DTR,	0xF8,
-		X1205_REG_ATR,	0xC0,
-		X1205_REG_INT,	0x18,
-		X1205_REG_0,	0xFF,
-	};
-
-	static const struct x1205_limit probe_limits_pattern[] = {
-		/* register, mask, min, max */
-		{ X1205_REG_Y2K,	0xFF,	19,	20	},
-		{ X1205_REG_DW,		0xFF,	0,	6	},
-		{ X1205_REG_YR,		0xFF,	0,	99	},
-		{ X1205_REG_MO,		0xFF,	0,	12	},
-		{ X1205_REG_DT,		0xFF,	0,	31	},
-		{ X1205_REG_HR,		0x7F,	0,	23	},
-		{ X1205_REG_MN,		0xFF,	0,	59	},
-		{ X1205_REG_SC,		0xFF,	0,	59	},
-		{ X1205_REG_Y2K1,	0xFF,	19,	20	},
-		{ X1205_REG_Y2K0,	0xFF,	19,	20	},
-	};
-
-	/* check that registers have bits a 0 where expected */
-	for (i = 0; i < ARRAY_SIZE(probe_zero_pattern); i += 2) {
-		unsigned char buf;
-
-		unsigned char addr[2] = { 0, probe_zero_pattern[i] };
-
-		struct i2c_msg msgs[2] = {
-			{ client->addr, 0, 2, addr },
-			{ client->addr, I2C_M_RD, 1, &buf },
-		};
-
-		xfer = i2c_transfer(client->adapter, msgs, 2);
-		if (xfer != 2) {
-			dev_err(&client->adapter->dev,
-				"%s: could not read register %x\n",
-				__FUNCTION__, addr[1]);
-
-			return -EIO;
-		}
-
-		if ((buf & probe_zero_pattern[i+1]) != 0) {
-			dev_err(&client->adapter->dev,
-				"%s: register=%02x, zero pattern=%d, value=%x\n",
-				__FUNCTION__, addr[1], i, buf);
-
-			return -ENODEV;
-		}
-	}
-
-	/* check limits (only registers with bcd values) */
-	for (i = 0; i < ARRAY_SIZE(probe_limits_pattern); i++) {
-		unsigned char reg, value;
-
-		unsigned char addr[2] = { 0, probe_limits_pattern[i].reg };
-
-		struct i2c_msg msgs[2] = {
-			{ client->addr, 0, 2, addr },
-			{ client->addr, I2C_M_RD, 1, &reg },
-		};
-
-		xfer = i2c_transfer(client->adapter, msgs, 2);
-
-		if (xfer != 2) {
-			dev_err(&client->adapter->dev,
-				"%s: could not read register %x\n",
-				__FUNCTION__, addr[1]);
-
-			return -EIO;
-		}
-
-		value = BCD2BIN(reg & probe_limits_pattern[i].mask);
-
-		if (value > probe_limits_pattern[i].max ||
-			value < probe_limits_pattern[i].min) {
-			dev_dbg(&client->adapter->dev,
-				"%s: register=%x, lim pattern=%d, value=%d\n",
-				__FUNCTION__, addr[1], i, value);
-
-			return -ENODEV;
-		}
-	}
-
-	return 0;
-}
-
-static int x1205_attach(struct i2c_adapter *adapter)
-{
-	dev_dbg(&adapter->dev, "%s\n", __FUNCTION__);
-
-	return i2c_probe(adapter, &addr_data, x1205_probe);
-}
-
-int x1205_direct_attach(int adapter_id,
-	struct i2c_client_address_data *address_data)
-{
-	int err;
-	struct i2c_adapter *adapter = i2c_get_adapter(adapter_id);
-
-	if (adapter) {
-		err = i2c_probe(adapter,
-			address_data, x1205_probe);
-
-		i2c_put_adapter(adapter);
-
-		return err;
-	}
-
-	return -ENODEV;
-}
-
-static int x1205_probe(struct i2c_adapter *adapter, int address, int kind)
-{
-	struct i2c_client *client;
-	struct x1205_data *data;
-
-	int err = 0;
-
-	dev_dbg(&adapter->dev, "%s\n", __FUNCTION__);
-
-	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) {
-		err = -ENODEV;
-		goto exit;
-	}
-
-	if (!(data = kzalloc(sizeof(struct x1205_data), GFP_KERNEL))) {
-		err = -ENOMEM;
-		goto exit;
-	}
-
-	/* Initialize our structures */
-	data->epoch = 2000;
-
-	client = &data->client;
-	client->addr = address;
-	client->driver = &x1205_driver;
-	client->adapter	= adapter;
-
-	strlcpy(client->name, "x1205", I2C_NAME_SIZE);
-
-	i2c_set_clientdata(client, data);
-
-	/* Verify the chip is really an X1205 */
-	if (kind < 0) {
-		if (x1205_validate_client(client) < 0) {
-			err = -ENODEV;
-			goto exit_kfree;
-		}
-	}
-
-	/* Inform the i2c layer */
-	if ((err = i2c_attach_client(client)))
-		goto exit_kfree;
-
-	list_add(&data->list, &x1205_clients);
-
-	dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n");
-
-	/* If requested, set the system time */
-	if (hctosys)
-		x1205_hctosys(client);
-
-	return 0;
-
-exit_kfree:
-	kfree(data);
-
-exit:
-	return err;
-}
-
-static int x1205_detach(struct i2c_client *client)
-{
-	int err;
-	struct x1205_data *data = i2c_get_clientdata(client);
-
-	dev_dbg(&client->dev, "%s\n", __FUNCTION__);
-
-	if ((err = i2c_detach_client(client)))
-		return err;
-
-	list_del(&data->list);
-
-	kfree(data);
-
-	return 0;
-}
-
-static int x1205_command(struct i2c_client *client, unsigned int cmd,
-	void *param)
-{
-	if (param == NULL)
-		return -EINVAL;
-
-	if (!capable(CAP_SYS_TIME))
-		return -EACCES;
-
-	dev_dbg(&client->dev, "%s: cmd=%d\n", __FUNCTION__, cmd);
-
-	switch (cmd) {
-	case X1205_CMD_GETDATETIME:
-		return x1205_get_datetime(client, param, X1205_CCR_BASE);
-
-	case X1205_CMD_SETTIME:
-		return x1205_set_datetime(client, param, 0,
-				X1205_CCR_BASE);
-
-	case X1205_CMD_SETDATETIME:
-		return x1205_set_datetime(client, param, 1,
-				X1205_CCR_BASE);
-
-	case X1205_CMD_GETALARM:
-		return x1205_get_datetime(client, param, X1205_ALM0_BASE);
-
-	case X1205_CMD_SETALARM:
-		return x1205_set_datetime(client, param, 1,
-				X1205_ALM0_BASE);
-
-	case X1205_CMD_GETDTRIM:
-		return x1205_get_dtrim(client, param);
-
-	case X1205_CMD_GETATRIM:
-		return x1205_get_atrim(client, param);
-
-	default:
-		return -EINVAL;
-	}
-}
-
-static int __init x1205_init(void)
-{
-	return i2c_add_driver(&x1205_driver);
-}
-
-static void __exit x1205_exit(void)
-{
-	i2c_del_driver(&x1205_driver);
-}
-
-MODULE_AUTHOR(
-	"Karen Spearel <kas11@tampabay.rr.com>, "
-	"Alessandro Zummo <a.zummo@towertech.it>");
-MODULE_DESCRIPTION("Xicor X1205 RTC driver");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(DRV_VERSION);
-
-EXPORT_SYMBOL_GPL(x1205_do_command);
-EXPORT_SYMBOL_GPL(x1205_direct_attach);
-
-module_init(x1205_init);
-module_exit(x1205_exit);
diff --git a/include/linux/x1205.h b/include/linux/x1205.h
deleted file mode 100644
index 64fd3af..0000000
--- a/include/linux/x1205.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- *  x1205.h - defines for drivers/i2c/chips/x1205.c
- *  Copyright 2004 Karen Spearel
- *  Copyright 2005 Alessandro Zummo
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- */
-
-#ifndef __LINUX_X1205_H__
-#define __LINUX_X1205_H__
-
-/* commands */
-
-#define X1205_CMD_GETDATETIME	0
-#define X1205_CMD_SETTIME	1
-#define X1205_CMD_SETDATETIME	2
-#define X1205_CMD_GETALARM	3
-#define X1205_CMD_SETALARM	4
-#define X1205_CMD_GETDTRIM	5
-#define X1205_CMD_SETDTRIM	6
-#define X1205_CMD_GETATRIM	7
-#define X1205_CMD_SETATRIM	8
-
-extern int x1205_do_command(unsigned int cmd, void *arg);
-extern int x1205_direct_attach(int adapter_id,
-	struct i2c_client_address_data *address_data);
-
-#endif /* __LINUX_X1205_H__ */
-- 
cgit v0.10.2


From f7f3682fb2f8bc8a9c912baeea15454416ca1972 Mon Sep 17 00:00:00 2001
From: Alessandro Zummo <a.zummo@towertech.it>
Date: Mon, 27 Mar 2006 01:16:38 -0800
Subject: [PATCH] RTC subsystem: I2C driver ids

This patch adds the I2C driver ids to i2c-id.h in preparation of the I2C
direct probing method.

This is kept separate so that it can be integrated to

Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index 679b46a..c8b81f4 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -108,6 +108,10 @@
 #define I2C_DRIVERID_UPD64083	78	/* upd64083 video processor	*/
 #define I2C_DRIVERID_UPD64031A	79	/* upd64031a video processor	*/
 #define I2C_DRIVERID_SAA717X	80	/* saa717x video encoder	*/
+#define I2C_DRIVERID_DS1672	81	/* Dallas/Maxim DS1672 RTC	*/
+#define I2C_DRIVERID_X1205	82	/* Xicor/Intersil X1205 RTC	*/
+#define I2C_DRIVERID_PCF8563	83	/* Philips PCF8563 RTC		*/
+#define I2C_DRIVERID_RS5C372	84	/* Ricoh RS5C372 RTC		*/
 
 #define I2C_DRIVERID_I2CDEV	900
 #define I2C_DRIVERID_ARP        902    /* SMBus ARP Client              */
-- 
cgit v0.10.2


From c5c3e19225217536d90515c494e55e642a21e4fa Mon Sep 17 00:00:00 2001
From: Alessandro Zummo <a.zummo@towertech.it>
Date: Mon, 27 Mar 2006 01:16:39 -0800
Subject: [PATCH] RTC subsystem: sysfs interface

This patch adds the sysfs interface to the RTC subsystem.

Each RTC client will have his own entry under /sys/classs/rtc/rtcN .

Within this entry some attributes are exported by the subsystem, like date and
time.

Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index a256f67..8244999 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -40,6 +40,17 @@ config RTC_HCTOSYS_DEVICE
 comment "RTC interfaces"
 	depends on RTC_CLASS
 
+config RTC_INTF_SYSFS
+	tristate "sysfs"
+	depends on RTC_CLASS && SYSFS
+	default RTC_CLASS
+	help
+	  Say yes here if you want to use your RTC using the sysfs
+	  interface, /sys/class/rtc/rtcX .
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-sysfs.
+
 comment "RTC drivers"
 	depends on RTC_CLASS
 
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 7b87f37..c443633 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -6,3 +6,6 @@ obj-$(CONFIG_RTC_LIB)		+= rtc-lib.o
 obj-$(CONFIG_RTC_HCTOSYS)	+= hctosys.o
 obj-$(CONFIG_RTC_CLASS)		+= rtc-core.o
 rtc-core-y			:= class.o interface.o
+
+obj-$(CONFIG_RTC_INTF_SYSFS)	+= rtc-sysfs.o
+
diff --git a/drivers/rtc/rtc-sysfs.c b/drivers/rtc/rtc-sysfs.c
new file mode 100644
index 0000000..7c1f3d2
--- /dev/null
+++ b/drivers/rtc/rtc-sysfs.c
@@ -0,0 +1,124 @@
+/*
+ * RTC subsystem, sysfs interface
+ *
+ * Copyright (C) 2005 Tower Technologies
+ * Author: Alessandro Zummo <a.zummo@towertech.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/module.h>
+#include <linux/rtc.h>
+
+/* device attributes */
+
+static ssize_t rtc_sysfs_show_name(struct class_device *dev, char *buf)
+{
+	return sprintf(buf, "%s\n", to_rtc_device(dev)->name);
+}
+static CLASS_DEVICE_ATTR(name, S_IRUGO, rtc_sysfs_show_name, NULL);
+
+static ssize_t rtc_sysfs_show_date(struct class_device *dev, char *buf)
+{
+	ssize_t retval;
+	struct rtc_time tm;
+
+	retval = rtc_read_time(dev, &tm);
+	if (retval == 0) {
+		retval = sprintf(buf, "%04d-%02d-%02d\n",
+			tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday);
+	}
+
+	return retval;
+}
+static CLASS_DEVICE_ATTR(date, S_IRUGO, rtc_sysfs_show_date, NULL);
+
+static ssize_t rtc_sysfs_show_time(struct class_device *dev, char *buf)
+{
+	ssize_t retval;
+	struct rtc_time tm;
+
+	retval = rtc_read_time(dev, &tm);
+	if (retval == 0) {
+		retval = sprintf(buf, "%02d:%02d:%02d\n",
+			tm.tm_hour, tm.tm_min, tm.tm_sec);
+	}
+
+	return retval;
+}
+static CLASS_DEVICE_ATTR(time, S_IRUGO, rtc_sysfs_show_time, NULL);
+
+static ssize_t rtc_sysfs_show_since_epoch(struct class_device *dev, char *buf)
+{
+	ssize_t retval;
+	struct rtc_time tm;
+
+	retval = rtc_read_time(dev, &tm);
+	if (retval == 0) {
+		unsigned long time;
+		rtc_tm_to_time(&tm, &time);
+		retval = sprintf(buf, "%lu\n", time);
+	}
+
+	return retval;
+}
+static CLASS_DEVICE_ATTR(since_epoch, S_IRUGO, rtc_sysfs_show_since_epoch, NULL);
+
+static struct attribute *rtc_attrs[] = {
+	&class_device_attr_name.attr,
+	&class_device_attr_date.attr,
+	&class_device_attr_time.attr,
+	&class_device_attr_since_epoch.attr,
+	NULL,
+};
+
+static struct attribute_group rtc_attr_group = {
+	.attrs = rtc_attrs,
+};
+
+static int __devinit rtc_sysfs_add_device(struct class_device *class_dev,
+					struct class_interface *class_intf)
+{
+	int err;
+
+	dev_info(class_dev->dev, "rtc intf: sysfs\n");
+
+	err = sysfs_create_group(&class_dev->kobj, &rtc_attr_group);
+	if (err)
+		dev_err(class_dev->dev,
+			"failed to create sysfs attributes\n");
+
+	return err;
+}
+
+static void rtc_sysfs_remove_device(struct class_device *class_dev,
+				struct class_interface *class_intf)
+{
+	sysfs_remove_group(&class_dev->kobj, &rtc_attr_group);
+}
+
+/* interface registration */
+
+static struct class_interface rtc_sysfs_interface = {
+	.add = &rtc_sysfs_add_device,
+	.remove = &rtc_sysfs_remove_device,
+};
+
+static int __init rtc_sysfs_init(void)
+{
+	return rtc_interface_register(&rtc_sysfs_interface);
+}
+
+static void __exit rtc_sysfs_exit(void)
+{
+	class_interface_unregister(&rtc_sysfs_interface);
+}
+
+module_init(rtc_sysfs_init);
+module_exit(rtc_sysfs_exit);
+
+MODULE_AUTHOR("Alessandro Zummo <a.zummo@towertech.it>");
+MODULE_DESCRIPTION("RTC class sysfs interface");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 728a294787b780130d8eb237518d4cac0afe760c Mon Sep 17 00:00:00 2001
From: Alessandro Zummo <a.zummo@towertech.it>
Date: Mon, 27 Mar 2006 01:16:40 -0800
Subject: [PATCH] RTC subsystem: proc interface

Add the proc interface to the RTC subsystem.

The first RTC driver which registers with the class will be accessible by
/proc/driver/rtc .

This is required for compatibility with the standard RTC driver and to avoid
breaking any user space application which may erroneusly rely on this.

Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 8244999..200fb06 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -51,6 +51,17 @@ config RTC_INTF_SYSFS
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-sysfs.
 
+config RTC_INTF_PROC
+	tristate "proc"
+	depends on RTC_CLASS && PROC_FS
+	default RTC_CLASS
+	help
+	  Say yes here if you want to use your RTC using the proc
+	  interface, /proc/driver/rtc .
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-proc.
+
 comment "RTC drivers"
 	depends on RTC_CLASS
 
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index c443633..137aef0 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -8,4 +8,4 @@ obj-$(CONFIG_RTC_CLASS)		+= rtc-core.o
 rtc-core-y			:= class.o interface.o
 
 obj-$(CONFIG_RTC_INTF_SYSFS)	+= rtc-sysfs.o
-
+obj-$(CONFIG_RTC_INTF_PROC)	+= rtc-proc.o
diff --git a/drivers/rtc/rtc-proc.c b/drivers/rtc/rtc-proc.c
new file mode 100644
index 0000000..90b8a97
--- /dev/null
+++ b/drivers/rtc/rtc-proc.c
@@ -0,0 +1,162 @@
+/*
+ * RTC subsystem, proc interface
+ *
+ * Copyright (C) 2005-06 Tower Technologies
+ * Author: Alessandro Zummo <a.zummo@towertech.it>
+ *
+ * based on arch/arm/common/rtctime.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/module.h>
+#include <linux/rtc.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+static struct class_device *rtc_dev = NULL;
+static DEFINE_MUTEX(rtc_lock);
+
+static int rtc_proc_show(struct seq_file *seq, void *offset)
+{
+	int err;
+	struct class_device *class_dev = seq->private;
+	struct rtc_class_ops *ops = to_rtc_device(class_dev)->ops;
+	struct rtc_wkalrm alrm;
+	struct rtc_time tm;
+
+	err = rtc_read_time(class_dev, &tm);
+	if (err == 0) {
+		seq_printf(seq,
+			"rtc_time\t: %02d:%02d:%02d\n"
+			"rtc_date\t: %04d-%02d-%02d\n",
+			tm.tm_hour, tm.tm_min, tm.tm_sec,
+			tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday);
+	}
+
+	err = rtc_read_alarm(class_dev, &alrm);
+	if (err == 0) {
+		seq_printf(seq, "alrm_time\t: ");
+		if ((unsigned int)alrm.time.tm_hour <= 24)
+			seq_printf(seq, "%02d:", alrm.time.tm_hour);
+		else
+			seq_printf(seq, "**:");
+		if ((unsigned int)alrm.time.tm_min <= 59)
+			seq_printf(seq, "%02d:", alrm.time.tm_min);
+		else
+			seq_printf(seq, "**:");
+		if ((unsigned int)alrm.time.tm_sec <= 59)
+			seq_printf(seq, "%02d\n", alrm.time.tm_sec);
+		else
+			seq_printf(seq, "**\n");
+
+		seq_printf(seq, "alrm_date\t: ");
+		if ((unsigned int)alrm.time.tm_year <= 200)
+			seq_printf(seq, "%04d-", alrm.time.tm_year + 1900);
+		else
+			seq_printf(seq, "****-");
+		if ((unsigned int)alrm.time.tm_mon <= 11)
+			seq_printf(seq, "%02d-", alrm.time.tm_mon + 1);
+		else
+			seq_printf(seq, "**-");
+		if ((unsigned int)alrm.time.tm_mday <= 31)
+			seq_printf(seq, "%02d\n", alrm.time.tm_mday);
+		else
+			seq_printf(seq, "**\n");
+		seq_printf(seq, "alrm_wakeup\t: %s\n",
+				alrm.enabled ? "yes" : "no");
+		seq_printf(seq, "alrm_pending\t: %s\n",
+				alrm.pending ? "yes" : "no");
+	}
+
+	if (ops->proc)
+		ops->proc(class_dev->dev, seq);
+
+	return 0;
+}
+
+static int rtc_proc_open(struct inode *inode, struct file *file)
+{
+	struct class_device *class_dev = PDE(inode)->data;
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	return single_open(file, rtc_proc_show, class_dev);
+}
+
+static int rtc_proc_release(struct inode *inode, struct file *file)
+{
+	int res = single_release(inode, file);
+	module_put(THIS_MODULE);
+	return res;
+}
+
+static struct file_operations rtc_proc_fops = {
+	.open		= rtc_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= rtc_proc_release,
+};
+
+static int rtc_proc_add_device(struct class_device *class_dev,
+					struct class_interface *class_intf)
+{
+	mutex_lock(&rtc_lock);
+	if (rtc_dev == NULL) {
+		struct proc_dir_entry *ent;
+
+		rtc_dev = class_dev;
+
+		ent = create_proc_entry("driver/rtc", 0, NULL);
+		if (ent) {
+			struct rtc_device *rtc = to_rtc_device(class_dev);
+
+			ent->proc_fops = &rtc_proc_fops;
+			ent->owner = rtc->owner;
+			ent->data = class_dev;
+
+			dev_info(class_dev->dev, "rtc intf: proc\n");
+		}
+		else
+			rtc_dev = NULL;
+	}
+	mutex_unlock(&rtc_lock);
+
+	return 0;
+}
+
+static void rtc_proc_remove_device(struct class_device *class_dev,
+					struct class_interface *class_intf)
+{
+	mutex_lock(&rtc_lock);
+	if (rtc_dev == class_dev) {
+		remove_proc_entry("driver/rtc", NULL);
+		rtc_dev = NULL;
+	}
+	mutex_unlock(&rtc_lock);
+}
+
+static struct class_interface rtc_proc_interface = {
+	.add = &rtc_proc_add_device,
+	.remove = &rtc_proc_remove_device,
+};
+
+static int __init rtc_proc_init(void)
+{
+	return rtc_interface_register(&rtc_proc_interface);
+}
+
+static void __exit rtc_proc_exit(void)
+{
+	class_interface_unregister(&rtc_proc_interface);
+}
+
+module_init(rtc_proc_init);
+module_exit(rtc_proc_exit);
+
+MODULE_AUTHOR("Alessandro Zummo <a.zummo@towertech.it>");
+MODULE_DESCRIPTION("RTC class proc interface");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From e824290e5dcfaf2120da587b16d10dfdff8d5d3e Mon Sep 17 00:00:00 2001
From: Alessandro Zummo <a.zummo@towertech.it>
Date: Mon, 27 Mar 2006 01:16:41 -0800
Subject: [PATCH] RTC subsystem: dev interface

Add the dev interface to the RTC subsystem.

Each RTC will be available under /dev/rtcX .  A symlink from /dev/rtc0 to
/dev/rtc cab be obtained with the following udev rule:

KERNEL=="rtc0", SYMLINK+="rtc"

Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 200fb06..8e4dc32 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -62,6 +62,17 @@ config RTC_INTF_PROC
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-proc.
 
+config RTC_INTF_DEV
+	tristate "dev"
+	depends on RTC_CLASS
+	default RTC_CLASS
+	help
+	  Say yes here if you want to use your RTC using the dev
+	  interface, /dev/rtc .
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-dev.
+
 comment "RTC drivers"
 	depends on RTC_CLASS
 
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 137aef0..38a9212 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -9,3 +9,4 @@ rtc-core-y			:= class.o interface.o
 
 obj-$(CONFIG_RTC_INTF_SYSFS)	+= rtc-sysfs.o
 obj-$(CONFIG_RTC_INTF_PROC)	+= rtc-proc.o
+obj-$(CONFIG_RTC_INTF_DEV)	+= rtc-dev.o
diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
new file mode 100644
index 0000000..b1e3e61
--- /dev/null
+++ b/drivers/rtc/rtc-dev.c
@@ -0,0 +1,382 @@
+/*
+ * RTC subsystem, dev interface
+ *
+ * Copyright (C) 2005 Tower Technologies
+ * Author: Alessandro Zummo <a.zummo@towertech.it>
+ *
+ * based on arch/arm/common/rtctime.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/module.h>
+#include <linux/rtc.h>
+
+static struct class *rtc_dev_class;
+static dev_t rtc_devt;
+
+#define RTC_DEV_MAX 16 /* 16 RTCs should be enough for everyone... */
+
+static int rtc_dev_open(struct inode *inode, struct file *file)
+{
+	int err;
+	struct rtc_device *rtc = container_of(inode->i_cdev,
+					struct rtc_device, char_dev);
+	struct rtc_class_ops *ops = rtc->ops;
+
+	/* We keep the lock as long as the device is in use
+	 * and return immediately if busy
+	 */
+	if (!(mutex_trylock(&rtc->char_lock)))
+		return -EBUSY;
+
+	file->private_data = &rtc->class_dev;
+
+	err = ops->open ? ops->open(rtc->class_dev.dev) : 0;
+	if (err == 0) {
+		spin_lock_irq(&rtc->irq_lock);
+		rtc->irq_data = 0;
+		spin_unlock_irq(&rtc->irq_lock);
+
+		return 0;
+	}
+
+	/* something has gone wrong, release the lock */
+	mutex_unlock(&rtc->char_lock);
+	return err;
+}
+
+
+static ssize_t
+rtc_dev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+	struct rtc_device *rtc = to_rtc_device(file->private_data);
+
+	DECLARE_WAITQUEUE(wait, current);
+	unsigned long data;
+	ssize_t ret;
+
+	if (count < sizeof(unsigned long))
+		return -EINVAL;
+
+	add_wait_queue(&rtc->irq_queue, &wait);
+	do {
+		__set_current_state(TASK_INTERRUPTIBLE);
+
+		spin_lock_irq(&rtc->irq_lock);
+		data = rtc->irq_data;
+		rtc->irq_data = 0;
+		spin_unlock_irq(&rtc->irq_lock);
+
+		if (data != 0) {
+			ret = 0;
+			break;
+		}
+		if (file->f_flags & O_NONBLOCK) {
+			ret = -EAGAIN;
+			break;
+		}
+		if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+		schedule();
+	} while (1);
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&rtc->irq_queue, &wait);
+
+	if (ret == 0) {
+		/* Check for any data updates */
+		if (rtc->ops->read_callback)
+			data = rtc->ops->read_callback(rtc->class_dev.dev, data);
+
+		ret = put_user(data, (unsigned long __user *)buf);
+		if (ret == 0)
+			ret = sizeof(unsigned long);
+	}
+	return ret;
+}
+
+static unsigned int rtc_dev_poll(struct file *file, poll_table *wait)
+{
+	struct rtc_device *rtc = to_rtc_device(file->private_data);
+	unsigned long data;
+
+	poll_wait(file, &rtc->irq_queue, wait);
+
+	data = rtc->irq_data;
+
+	return (data != 0) ? (POLLIN | POLLRDNORM) : 0;
+}
+
+static int rtc_dev_ioctl(struct inode *inode, struct file *file,
+		unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	struct class_device *class_dev = file->private_data;
+	struct rtc_device *rtc = to_rtc_device(class_dev);
+	struct rtc_class_ops *ops = rtc->ops;
+	struct rtc_time tm;
+	struct rtc_wkalrm alarm;
+	void __user *uarg = (void __user *) arg;
+
+	/* avoid conflicting IRQ users */
+	if (cmd == RTC_PIE_ON || cmd == RTC_PIE_OFF || cmd == RTC_IRQP_SET) {
+		spin_lock(&rtc->irq_task_lock);
+		if (rtc->irq_task)
+			err = -EBUSY;
+		spin_unlock(&rtc->irq_task_lock);
+
+		if (err < 0)
+			return err;
+	}
+
+	/* try the driver's ioctl interface */
+	if (ops->ioctl) {
+		err = ops->ioctl(class_dev->dev, cmd, arg);
+		if (err != -EINVAL)
+			return err;
+	}
+
+	/* if the driver does not provide the ioctl interface
+	 * or if that particular ioctl was not implemented
+	 * (-EINVAL), we will try to emulate here.
+	 */
+
+	switch (cmd) {
+	case RTC_ALM_READ:
+		err = rtc_read_alarm(class_dev, &alarm);
+		if (err < 0)
+			return err;
+
+		if (copy_to_user(uarg, &alarm.time, sizeof(tm)))
+			return -EFAULT;
+		break;
+
+	case RTC_ALM_SET:
+		if (copy_from_user(&alarm.time, uarg, sizeof(tm)))
+			return -EFAULT;
+
+		alarm.enabled = 0;
+		alarm.pending = 0;
+		alarm.time.tm_mday = -1;
+		alarm.time.tm_mon = -1;
+		alarm.time.tm_year = -1;
+		alarm.time.tm_wday = -1;
+		alarm.time.tm_yday = -1;
+		alarm.time.tm_isdst = -1;
+		err = rtc_set_alarm(class_dev, &alarm);
+		break;
+
+	case RTC_RD_TIME:
+		err = rtc_read_time(class_dev, &tm);
+		if (err < 0)
+			return err;
+
+		if (copy_to_user(uarg, &tm, sizeof(tm)))
+			return -EFAULT;
+		break;
+
+	case RTC_SET_TIME:
+		if (!capable(CAP_SYS_TIME))
+			return -EACCES;
+
+		if (copy_from_user(&tm, uarg, sizeof(tm)))
+			return -EFAULT;
+
+		err = rtc_set_time(class_dev, &tm);
+		break;
+#if 0
+	case RTC_EPOCH_SET:
+#ifndef rtc_epoch
+		/*
+		 * There were no RTC clocks before 1900.
+		 */
+		if (arg < 1900) {
+			err = -EINVAL;
+			break;
+		}
+		if (!capable(CAP_SYS_TIME)) {
+			err = -EACCES;
+			break;
+		}
+		rtc_epoch = arg;
+		err = 0;
+#endif
+		break;
+
+	case RTC_EPOCH_READ:
+		err = put_user(rtc_epoch, (unsigned long __user *)uarg);
+		break;
+#endif
+	case RTC_WKALM_SET:
+		if (copy_from_user(&alarm, uarg, sizeof(alarm)))
+			return -EFAULT;
+
+		err = rtc_set_alarm(class_dev, &alarm);
+		break;
+
+	case RTC_WKALM_RD:
+		err = rtc_read_alarm(class_dev, &alarm);
+		if (err < 0)
+			return err;
+
+		if (copy_to_user(uarg, &alarm, sizeof(alarm)))
+			return -EFAULT;
+		break;
+
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+	return err;
+}
+
+static int rtc_dev_release(struct inode *inode, struct file *file)
+{
+	struct rtc_device *rtc = to_rtc_device(file->private_data);
+
+	if (rtc->ops->release)
+		rtc->ops->release(rtc->class_dev.dev);
+
+	mutex_unlock(&rtc->char_lock);
+	return 0;
+}
+
+static int rtc_dev_fasync(int fd, struct file *file, int on)
+{
+	struct rtc_device *rtc = to_rtc_device(file->private_data);
+	return fasync_helper(fd, file, on, &rtc->async_queue);
+}
+
+static struct file_operations rtc_dev_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+	.read		= rtc_dev_read,
+	.poll		= rtc_dev_poll,
+	.ioctl		= rtc_dev_ioctl,
+	.open		= rtc_dev_open,
+	.release	= rtc_dev_release,
+	.fasync		= rtc_dev_fasync,
+};
+
+/* insertion/removal hooks */
+
+static int rtc_dev_add_device(struct class_device *class_dev,
+				struct class_interface *class_intf)
+{
+	int err = 0;
+	struct rtc_device *rtc = to_rtc_device(class_dev);
+
+	if (rtc->id >= RTC_DEV_MAX) {
+		dev_err(class_dev->dev, "too many RTCs\n");
+		return -EINVAL;
+	}
+
+	mutex_init(&rtc->char_lock);
+	spin_lock_init(&rtc->irq_lock);
+	init_waitqueue_head(&rtc->irq_queue);
+
+	cdev_init(&rtc->char_dev, &rtc_dev_fops);
+	rtc->char_dev.owner = rtc->owner;
+
+	if (cdev_add(&rtc->char_dev, MKDEV(MAJOR(rtc_devt), rtc->id), 1)) {
+		cdev_del(&rtc->char_dev);
+		dev_err(class_dev->dev,
+			"failed to add char device %d:%d\n",
+			MAJOR(rtc_devt), rtc->id);
+		return -ENODEV;
+	}
+
+	rtc->rtc_dev = class_device_create(rtc_dev_class, NULL,
+						MKDEV(MAJOR(rtc_devt), rtc->id),
+						class_dev->dev, "rtc%d", rtc->id);
+	if (IS_ERR(rtc->rtc_dev)) {
+		dev_err(class_dev->dev, "cannot create rtc_dev device\n");
+		err = PTR_ERR(rtc->rtc_dev);
+		goto err_cdev_del;
+	}
+
+	dev_info(class_dev->dev, "rtc intf: dev (%d:%d)\n",
+		MAJOR(rtc->rtc_dev->devt),
+		MINOR(rtc->rtc_dev->devt));
+
+	return 0;
+
+err_cdev_del:
+
+	cdev_del(&rtc->char_dev);
+	return err;
+}
+
+static void rtc_dev_remove_device(struct class_device *class_dev,
+					struct class_interface *class_intf)
+{
+	struct rtc_device *rtc = to_rtc_device(class_dev);
+
+	if (rtc->rtc_dev) {
+		dev_dbg(class_dev->dev, "removing char %d:%d\n",
+			MAJOR(rtc->rtc_dev->devt),
+			MINOR(rtc->rtc_dev->devt));
+
+		class_device_unregister(rtc->rtc_dev);
+		cdev_del(&rtc->char_dev);
+	}
+}
+
+/* interface registration */
+
+static struct class_interface rtc_dev_interface = {
+	.add = &rtc_dev_add_device,
+	.remove = &rtc_dev_remove_device,
+};
+
+static int __init rtc_dev_init(void)
+{
+	int err;
+
+	rtc_dev_class = class_create(THIS_MODULE, "rtc-dev");
+	if (IS_ERR(rtc_dev_class))
+		return PTR_ERR(rtc_dev_class);
+
+	err = alloc_chrdev_region(&rtc_devt, 0, RTC_DEV_MAX, "rtc");
+	if (err < 0) {
+		printk(KERN_ERR "%s: failed to allocate char dev region\n",
+			__FILE__);
+		goto err_destroy_class;
+	}
+
+	err = rtc_interface_register(&rtc_dev_interface);
+	if (err < 0) {
+		printk(KERN_ERR "%s: failed to register the interface\n",
+			__FILE__);
+		goto err_unregister_chrdev;
+	}
+
+	return 0;
+
+err_unregister_chrdev:
+	unregister_chrdev_region(rtc_devt, RTC_DEV_MAX);
+
+err_destroy_class:
+	class_destroy(rtc_dev_class);
+
+	return err;
+}
+
+static void __exit rtc_dev_exit(void)
+{
+	class_interface_unregister(&rtc_dev_interface);
+	class_destroy(rtc_dev_class);
+	unregister_chrdev_region(rtc_devt, RTC_DEV_MAX);
+}
+
+module_init(rtc_dev_init);
+module_exit(rtc_dev_exit);
+
+MODULE_AUTHOR("Alessandro Zummo <a.zummo@towertech.it>");
+MODULE_DESCRIPTION("RTC class dev interface");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 1fec7c66ba98fc3a04e15fd14fad6b404e56fc94 Mon Sep 17 00:00:00 2001
From: Alessandro Zummo <a.zummo@towertech.it>
Date: Mon, 27 Mar 2006 01:16:42 -0800
Subject: [PATCH] RTC subsystem: X1205 driver

A port of the existing x1205 driver under the new RTC subsystem.

It is actually under test within the NSLU2 project
(http://www.nslu2-linux.org) and it is working quite well.

It is the first driver under this new subsystem and should be used as a guide
to port other drivers.

Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 8e4dc32..3de823f 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -76,4 +76,14 @@ config RTC_INTF_DEV
 comment "RTC drivers"
 	depends on RTC_CLASS
 
+config RTC_DRV_X1205
+	tristate "Xicor/Intersil X1205"
+	depends on RTC_CLASS && I2C
+	help
+	  If you say yes here you get support for the
+	  Xicor/Intersil X1205 RTC chip.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-x1205.
+
 endmenu
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 38a9212..eaf89b2 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -10,3 +10,6 @@ rtc-core-y			:= class.o interface.o
 obj-$(CONFIG_RTC_INTF_SYSFS)	+= rtc-sysfs.o
 obj-$(CONFIG_RTC_INTF_PROC)	+= rtc-proc.o
 obj-$(CONFIG_RTC_INTF_DEV)	+= rtc-dev.o
+
+obj-$(CONFIG_RTC_DRV_X1205)	+= rtc-x1205.o
+
diff --git a/drivers/rtc/rtc-x1205.c b/drivers/rtc/rtc-x1205.c
new file mode 100644
index 0000000..621d17a
--- /dev/null
+++ b/drivers/rtc/rtc-x1205.c
@@ -0,0 +1,619 @@
+/*
+ * An i2c driver for the Xicor/Intersil X1205 RTC
+ * Copyright 2004 Karen Spearel
+ * Copyright 2005 Alessandro Zummo
+ *
+ * please send all reports to:
+ * 	Karen Spearel <kas111 at gmail dot com>
+ *	Alessandro Zummo <a.zummo@towertech.it>
+ *
+ * based on a lot of other RTC drivers.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/i2c.h>
+#include <linux/bcd.h>
+#include <linux/rtc.h>
+#include <linux/delay.h>
+
+#define DRV_VERSION "1.0.6"
+
+/* Addresses to scan: none. This chip is located at
+ * 0x6f and uses a two bytes register addressing.
+ * Two bytes need to be written to read a single register,
+ * while most other chips just require one and take the second
+ * one as the data to be written. To prevent corrupting
+ * unknown chips, the user must explicitely set the probe parameter.
+ */
+
+static unsigned short normal_i2c[] = { I2C_CLIENT_END };
+
+/* Insmod parameters */
+I2C_CLIENT_INSMOD;
+
+/* offsets into CCR area */
+
+#define CCR_SEC			0
+#define CCR_MIN			1
+#define CCR_HOUR		2
+#define CCR_MDAY		3
+#define CCR_MONTH		4
+#define CCR_YEAR		5
+#define CCR_WDAY		6
+#define CCR_Y2K			7
+
+#define X1205_REG_SR		0x3F	/* status register */
+#define X1205_REG_Y2K		0x37
+#define X1205_REG_DW		0x36
+#define X1205_REG_YR		0x35
+#define X1205_REG_MO		0x34
+#define X1205_REG_DT		0x33
+#define X1205_REG_HR		0x32
+#define X1205_REG_MN		0x31
+#define X1205_REG_SC		0x30
+#define X1205_REG_DTR		0x13
+#define X1205_REG_ATR		0x12
+#define X1205_REG_INT		0x11
+#define X1205_REG_0		0x10
+#define X1205_REG_Y2K1		0x0F
+#define X1205_REG_DWA1		0x0E
+#define X1205_REG_YRA1		0x0D
+#define X1205_REG_MOA1		0x0C
+#define X1205_REG_DTA1		0x0B
+#define X1205_REG_HRA1		0x0A
+#define X1205_REG_MNA1		0x09
+#define X1205_REG_SCA1		0x08
+#define X1205_REG_Y2K0		0x07
+#define X1205_REG_DWA0		0x06
+#define X1205_REG_YRA0		0x05
+#define X1205_REG_MOA0		0x04
+#define X1205_REG_DTA0		0x03
+#define X1205_REG_HRA0		0x02
+#define X1205_REG_MNA0		0x01
+#define X1205_REG_SCA0		0x00
+
+#define X1205_CCR_BASE		0x30	/* Base address of CCR */
+#define X1205_ALM0_BASE		0x00	/* Base address of ALARM0 */
+
+#define X1205_SR_RTCF		0x01	/* Clock failure */
+#define X1205_SR_WEL		0x02	/* Write Enable Latch */
+#define X1205_SR_RWEL		0x04	/* Register Write Enable */
+
+#define X1205_DTR_DTR0		0x01
+#define X1205_DTR_DTR1		0x02
+#define X1205_DTR_DTR2		0x04
+
+#define X1205_HR_MIL		0x80	/* Set in ccr.hour for 24 hr mode */
+
+/* Prototypes */
+static int x1205_attach(struct i2c_adapter *adapter);
+static int x1205_detach(struct i2c_client *client);
+static int x1205_probe(struct i2c_adapter *adapter, int address, int kind);
+
+static struct i2c_driver x1205_driver = {
+	.driver		= {
+		.name	= "x1205",
+	},
+	.id		= I2C_DRIVERID_X1205,
+	.attach_adapter = &x1205_attach,
+	.detach_client	= &x1205_detach,
+};
+
+/*
+ * In the routines that deal directly with the x1205 hardware, we use
+ * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch
+ * Epoch is initialized as 2000. Time is set to UTC.
+ */
+static int x1205_get_datetime(struct i2c_client *client, struct rtc_time *tm,
+				unsigned char reg_base)
+{
+	unsigned char dt_addr[2] = { 0, reg_base };
+
+	unsigned char buf[8];
+
+	struct i2c_msg msgs[] = {
+		{ client->addr, 0, 2, dt_addr },	/* setup read ptr */
+		{ client->addr, I2C_M_RD, 8, buf },	/* read date */
+	};
+
+	/* read date registers */
+	if ((i2c_transfer(client->adapter, &msgs[0], 2)) != 2) {
+		dev_err(&client->dev, "%s: read error\n", __FUNCTION__);
+		return -EIO;
+	}
+
+	dev_dbg(&client->dev,
+		"%s: raw read data - sec=%02x, min=%02x, hr=%02x, "
+		"mday=%02x, mon=%02x, year=%02x, wday=%02x, y2k=%02x\n",
+		__FUNCTION__,
+		buf[0], buf[1], buf[2], buf[3],
+		buf[4], buf[5], buf[6], buf[7]);
+
+	tm->tm_sec = BCD2BIN(buf[CCR_SEC]);
+	tm->tm_min = BCD2BIN(buf[CCR_MIN]);
+	tm->tm_hour = BCD2BIN(buf[CCR_HOUR] & 0x3F); /* hr is 0-23 */
+	tm->tm_mday = BCD2BIN(buf[CCR_MDAY]);
+	tm->tm_mon = BCD2BIN(buf[CCR_MONTH]) - 1; /* mon is 0-11 */
+	tm->tm_year = BCD2BIN(buf[CCR_YEAR])
+			+ (BCD2BIN(buf[CCR_Y2K]) * 100) - 1900;
+	tm->tm_wday = buf[CCR_WDAY];
+
+	dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
+		"mday=%d, mon=%d, year=%d, wday=%d\n",
+		__FUNCTION__,
+		tm->tm_sec, tm->tm_min, tm->tm_hour,
+		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
+
+	return 0;
+}
+
+static int x1205_get_status(struct i2c_client *client, unsigned char *sr)
+{
+	static unsigned char sr_addr[2] = { 0, X1205_REG_SR };
+
+	struct i2c_msg msgs[] = {
+		{ client->addr, 0, 2, sr_addr },	/* setup read ptr */
+		{ client->addr, I2C_M_RD, 1, sr },	/* read status */
+	};
+
+	/* read status register */
+	if ((i2c_transfer(client->adapter, &msgs[0], 2)) != 2) {
+		dev_err(&client->dev, "%s: read error\n", __FUNCTION__);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int x1205_set_datetime(struct i2c_client *client, struct rtc_time *tm,
+				int datetoo, u8 reg_base)
+{
+	int i, xfer;
+	unsigned char buf[8];
+
+	static const unsigned char wel[3] = { 0, X1205_REG_SR,
+						X1205_SR_WEL };
+
+	static const unsigned char rwel[3] = { 0, X1205_REG_SR,
+						X1205_SR_WEL | X1205_SR_RWEL };
+
+	static const unsigned char diswe[3] = { 0, X1205_REG_SR, 0 };
+
+	dev_dbg(&client->dev,
+		"%s: secs=%d, mins=%d, hours=%d\n",
+		__FUNCTION__,
+		tm->tm_sec, tm->tm_min, tm->tm_hour);
+
+	buf[CCR_SEC] = BIN2BCD(tm->tm_sec);
+	buf[CCR_MIN] = BIN2BCD(tm->tm_min);
+
+	/* set hour and 24hr bit */
+	buf[CCR_HOUR] = BIN2BCD(tm->tm_hour) | X1205_HR_MIL;
+
+	/* should we also set the date? */
+	if (datetoo) {
+		dev_dbg(&client->dev,
+			"%s: mday=%d, mon=%d, year=%d, wday=%d\n",
+			__FUNCTION__,
+			tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
+
+		buf[CCR_MDAY] = BIN2BCD(tm->tm_mday);
+
+		/* month, 1 - 12 */
+		buf[CCR_MONTH] = BIN2BCD(tm->tm_mon + 1);
+
+		/* year, since the rtc epoch*/
+		buf[CCR_YEAR] = BIN2BCD(tm->tm_year % 100);
+		buf[CCR_WDAY] = tm->tm_wday & 0x07;
+		buf[CCR_Y2K] = BIN2BCD(tm->tm_year / 100);
+	}
+
+	/* this sequence is required to unlock the chip */
+	if ((xfer = i2c_master_send(client, wel, 3)) != 3) {
+		dev_err(&client->dev, "%s: wel - %d\n", __FUNCTION__, xfer);
+		return -EIO;
+	}
+
+	if ((xfer = i2c_master_send(client, rwel, 3)) != 3) {
+		dev_err(&client->dev, "%s: rwel - %d\n", __FUNCTION__, xfer);
+		return -EIO;
+	}
+
+	/* write register's data */
+	for (i = 0; i < (datetoo ? 8 : 3); i++) {
+		unsigned char rdata[3] = { 0, reg_base + i, buf[i] };
+
+		xfer = i2c_master_send(client, rdata, 3);
+		if (xfer != 3) {
+			dev_err(&client->dev,
+				"%s: xfer=%d addr=%02x, data=%02x\n",
+				__FUNCTION__,
+				 xfer, rdata[1], rdata[2]);
+			return -EIO;
+		}
+	};
+
+	/* disable further writes */
+	if ((xfer = i2c_master_send(client, diswe, 3)) != 3) {
+		dev_err(&client->dev, "%s: diswe - %d\n", __FUNCTION__, xfer);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int x1205_fix_osc(struct i2c_client *client)
+{
+	int err;
+	struct rtc_time tm;
+
+	tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
+
+	if ((err = x1205_set_datetime(client, &tm, 0, X1205_CCR_BASE)) < 0)
+		dev_err(&client->dev,
+			"unable to restart the oscillator\n");
+
+	return err;
+}
+
+static int x1205_get_dtrim(struct i2c_client *client, int *trim)
+{
+	unsigned char dtr;
+	static unsigned char dtr_addr[2] = { 0, X1205_REG_DTR };
+
+	struct i2c_msg msgs[] = {
+		{ client->addr, 0, 2, dtr_addr },	/* setup read ptr */
+		{ client->addr, I2C_M_RD, 1, &dtr }, 	/* read dtr */
+	};
+
+	/* read dtr register */
+	if ((i2c_transfer(client->adapter, &msgs[0], 2)) != 2) {
+		dev_err(&client->dev, "%s: read error\n", __FUNCTION__);
+		return -EIO;
+	}
+
+	dev_dbg(&client->dev, "%s: raw dtr=%x\n", __FUNCTION__, dtr);
+
+	*trim = 0;
+
+	if (dtr & X1205_DTR_DTR0)
+		*trim += 20;
+
+	if (dtr & X1205_DTR_DTR1)
+		*trim += 10;
+
+	if (dtr & X1205_DTR_DTR2)
+		*trim = -*trim;
+
+	return 0;
+}
+
+static int x1205_get_atrim(struct i2c_client *client, int *trim)
+{
+	s8 atr;
+	static unsigned char atr_addr[2] = { 0, X1205_REG_ATR };
+
+	struct i2c_msg msgs[] = {
+		{ client->addr, 0, 2, atr_addr },	/* setup read ptr */
+		{ client->addr, I2C_M_RD, 1, &atr }, 	/* read atr */
+	};
+
+	/* read atr register */
+	if ((i2c_transfer(client->adapter, &msgs[0], 2)) != 2) {
+		dev_err(&client->dev, "%s: read error\n", __FUNCTION__);
+		return -EIO;
+	}
+
+	dev_dbg(&client->dev, "%s: raw atr=%x\n", __FUNCTION__, atr);
+
+	/* atr is a two's complement value on 6 bits,
+	 * perform sign extension. The formula is
+	 * Catr = (atr * 0.25pF) + 11.00pF.
+	 */
+	if (atr & 0x20)
+		atr |= 0xC0;
+
+	dev_dbg(&client->dev, "%s: raw atr=%x (%d)\n", __FUNCTION__, atr, atr);
+
+	*trim = (atr * 250) + 11000;
+
+	dev_dbg(&client->dev, "%s: real=%d\n", __FUNCTION__, *trim);
+
+	return 0;
+}
+
+struct x1205_limit
+{
+	unsigned char reg, mask, min, max;
+};
+
+static int x1205_validate_client(struct i2c_client *client)
+{
+	int i, xfer;
+
+	/* Probe array. We will read the register at the specified
+	 * address and check if the given bits are zero.
+	 */
+	static const unsigned char probe_zero_pattern[] = {
+		/* register, mask */
+		X1205_REG_SR,	0x18,
+		X1205_REG_DTR,	0xF8,
+		X1205_REG_ATR,	0xC0,
+		X1205_REG_INT,	0x18,
+		X1205_REG_0,	0xFF,
+	};
+
+	static const struct x1205_limit probe_limits_pattern[] = {
+		/* register, mask, min, max */
+		{ X1205_REG_Y2K,	0xFF,	19,	20	},
+		{ X1205_REG_DW,		0xFF,	0,	6	},
+		{ X1205_REG_YR,		0xFF,	0,	99	},
+		{ X1205_REG_MO,		0xFF,	0,	12	},
+		{ X1205_REG_DT,		0xFF,	0,	31	},
+		{ X1205_REG_HR,		0x7F,	0,	23	},
+		{ X1205_REG_MN,		0xFF,	0,	59	},
+		{ X1205_REG_SC,		0xFF,	0,	59	},
+		{ X1205_REG_Y2K1,	0xFF,	19,	20	},
+		{ X1205_REG_Y2K0,	0xFF,	19,	20	},
+	};
+
+	/* check that registers have bits a 0 where expected */
+	for (i = 0; i < ARRAY_SIZE(probe_zero_pattern); i += 2) {
+		unsigned char buf;
+
+		unsigned char addr[2] = { 0, probe_zero_pattern[i] };
+
+		struct i2c_msg msgs[2] = {
+			{ client->addr, 0, 2, addr },
+			{ client->addr, I2C_M_RD, 1, &buf },
+		};
+
+		if ((xfer = i2c_transfer(client->adapter, msgs, 2)) != 2) {
+			dev_err(&client->adapter->dev,
+				"%s: could not read register %x\n",
+				__FUNCTION__, probe_zero_pattern[i]);
+
+			return -EIO;
+		}
+
+		if ((buf & probe_zero_pattern[i+1]) != 0) {
+			dev_err(&client->adapter->dev,
+				"%s: register=%02x, zero pattern=%d, value=%x\n",
+				__FUNCTION__, probe_zero_pattern[i], i, buf);
+
+			return -ENODEV;
+		}
+	}
+
+	/* check limits (only registers with bcd values) */
+	for (i = 0; i < ARRAY_SIZE(probe_limits_pattern); i++) {
+		unsigned char reg, value;
+
+		unsigned char addr[2] = { 0, probe_limits_pattern[i].reg };
+
+		struct i2c_msg msgs[2] = {
+			{ client->addr, 0, 2, addr },
+			{ client->addr, I2C_M_RD, 1, &reg },
+		};
+
+		if ((xfer = i2c_transfer(client->adapter, msgs, 2)) != 2) {
+			dev_err(&client->adapter->dev,
+				"%s: could not read register %x\n",
+				__FUNCTION__, probe_limits_pattern[i].reg);
+
+			return -EIO;
+		}
+
+		value = BCD2BIN(reg & probe_limits_pattern[i].mask);
+
+		if (value > probe_limits_pattern[i].max ||
+			value < probe_limits_pattern[i].min) {
+			dev_dbg(&client->adapter->dev,
+				"%s: register=%x, lim pattern=%d, value=%d\n",
+				__FUNCTION__, probe_limits_pattern[i].reg,
+				i, value);
+
+			return -ENODEV;
+		}
+	}
+
+	return 0;
+}
+
+static int x1205_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	return x1205_get_datetime(to_i2c_client(dev),
+		&alrm->time, X1205_ALM0_BASE);
+}
+
+static int x1205_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	return x1205_set_datetime(to_i2c_client(dev),
+		&alrm->time, 1, X1205_ALM0_BASE);
+}
+
+static int x1205_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	return x1205_get_datetime(to_i2c_client(dev),
+		tm, X1205_CCR_BASE);
+}
+
+static int x1205_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	return x1205_set_datetime(to_i2c_client(dev),
+		tm, 1, X1205_CCR_BASE);
+}
+
+static int x1205_rtc_proc(struct device *dev, struct seq_file *seq)
+{
+	int err, dtrim, atrim;
+
+	seq_printf(seq, "24hr\t\t: yes\n");
+
+	if ((err = x1205_get_dtrim(to_i2c_client(dev), &dtrim)) == 0)
+		seq_printf(seq, "digital_trim\t: %d ppm\n", dtrim);
+
+	if ((err = x1205_get_atrim(to_i2c_client(dev), &atrim)) == 0)
+		seq_printf(seq, "analog_trim\t: %d.%02d pF\n",
+			atrim / 1000, atrim % 1000);
+	return 0;
+}
+
+static struct rtc_class_ops x1205_rtc_ops = {
+	.proc		= x1205_rtc_proc,
+	.read_time	= x1205_rtc_read_time,
+	.set_time	= x1205_rtc_set_time,
+	.read_alarm	= x1205_rtc_read_alarm,
+	.set_alarm	= x1205_rtc_set_alarm,
+};
+
+static ssize_t x1205_sysfs_show_atrim(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	int atrim;
+
+	if (x1205_get_atrim(to_i2c_client(dev), &atrim) == 0)
+		return sprintf(buf, "%d.%02d pF\n",
+			atrim / 1000, atrim % 1000);
+	return 0;
+}
+static DEVICE_ATTR(atrim, S_IRUGO, x1205_sysfs_show_atrim, NULL);
+
+static ssize_t x1205_sysfs_show_dtrim(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	int dtrim;
+
+	if (x1205_get_dtrim(to_i2c_client(dev), &dtrim) == 0)
+		return sprintf(buf, "%d ppm\n", dtrim);
+
+	return 0;
+}
+static DEVICE_ATTR(dtrim, S_IRUGO, x1205_sysfs_show_dtrim, NULL);
+
+static int x1205_attach(struct i2c_adapter *adapter)
+{
+	dev_dbg(&adapter->dev, "%s\n", __FUNCTION__);
+	return i2c_probe(adapter, &addr_data, x1205_probe);
+}
+
+static int x1205_probe(struct i2c_adapter *adapter, int address, int kind)
+{
+	int err = 0;
+	unsigned char sr;
+	struct i2c_client *client;
+	struct rtc_device *rtc;
+
+	dev_dbg(&adapter->dev, "%s\n", __FUNCTION__);
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) {
+		err = -ENODEV;
+		goto exit;
+	}
+
+	if (!(client = kzalloc(sizeof(struct i2c_client), GFP_KERNEL))) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	/* I2C client */
+	client->addr = address;
+	client->driver = &x1205_driver;
+	client->adapter	= adapter;
+
+	strlcpy(client->name, x1205_driver.driver.name, I2C_NAME_SIZE);
+
+	/* Verify the chip is really an X1205 */
+	if (kind < 0) {
+		if (x1205_validate_client(client) < 0) {
+			err = -ENODEV;
+			goto exit_kfree;
+		}
+	}
+
+	/* Inform the i2c layer */
+	if ((err = i2c_attach_client(client)))
+		goto exit_kfree;
+
+	dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n");
+
+	rtc = rtc_device_register(x1205_driver.driver.name, &client->dev,
+				&x1205_rtc_ops, THIS_MODULE);
+
+	if (IS_ERR(rtc)) {
+		err = PTR_ERR(rtc);
+		dev_err(&client->dev,
+			"unable to register the class device\n");
+		goto exit_detach;
+	}
+
+	i2c_set_clientdata(client, rtc);
+
+	/* Check for power failures and eventualy enable the osc */
+	if ((err = x1205_get_status(client, &sr)) == 0) {
+		if (sr & X1205_SR_RTCF) {
+			dev_err(&client->dev,
+				"power failure detected, "
+				"please set the clock\n");
+			udelay(50);
+			x1205_fix_osc(client);
+		}
+	}
+	else
+		dev_err(&client->dev, "couldn't read status\n");
+
+	device_create_file(&client->dev, &dev_attr_atrim);
+	device_create_file(&client->dev, &dev_attr_dtrim);
+
+	return 0;
+
+exit_detach:
+	i2c_detach_client(client);
+
+exit_kfree:
+	kfree(client);
+
+exit:
+	return err;
+}
+
+static int x1205_detach(struct i2c_client *client)
+{
+	int err;
+	struct rtc_device *rtc = i2c_get_clientdata(client);
+
+	dev_dbg(&client->dev, "%s\n", __FUNCTION__);
+
+ 	if (rtc)
+		rtc_device_unregister(rtc);
+
+	if ((err = i2c_detach_client(client)))
+		return err;
+
+	kfree(client);
+
+	return 0;
+}
+
+static int __init x1205_init(void)
+{
+	return i2c_add_driver(&x1205_driver);
+}
+
+static void __exit x1205_exit(void)
+{
+	i2c_del_driver(&x1205_driver);
+}
+
+MODULE_AUTHOR(
+	"Karen Spearel <kas111 at gmail dot com>, "
+	"Alessandro Zummo <a.zummo@towertech.it>");
+MODULE_DESCRIPTION("Xicor/Intersil X1205 RTC driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+module_init(x1205_init);
+module_exit(x1205_exit);
-- 
cgit v0.10.2


From a95579cd4b40a4e062e187d931f498145551ee29 Mon Sep 17 00:00:00 2001
From: Alessandro Zummo <a.zummo@towertech.it>
Date: Mon, 27 Mar 2006 01:16:42 -0800
Subject: [PATCH] RTC subsystem: test device/driver

Interrupts can be generated by

echo "alarm|tick|update" >/sys/class/rtc/rtcX/device/irq

Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 3de823f..7eb1368 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -86,4 +86,19 @@ config RTC_DRV_X1205
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-x1205.
 
+config RTC_DRV_TEST
+	tristate "Test driver/device"
+	depends on RTC_CLASS
+	help
+	  If you say yes here you get support for the
+	  RTC test driver. It's a software RTC which can be
+	  used to test the RTC subsystem APIs. It gets
+	  the time from the system clock.
+	  You want this driver only if you are doing development
+	  on the RTC subsystem. Please read the source code
+	  for further details.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-test.
+
 endmenu
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index eaf89b2..d5ea0ed 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -12,4 +12,5 @@ obj-$(CONFIG_RTC_INTF_PROC)	+= rtc-proc.o
 obj-$(CONFIG_RTC_INTF_DEV)	+= rtc-dev.o
 
 obj-$(CONFIG_RTC_DRV_X1205)	+= rtc-x1205.o
+obj-$(CONFIG_RTC_DRV_TEST)	+= rtc-test.o
 
diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c
new file mode 100644
index 0000000..43d1074
--- /dev/null
+++ b/drivers/rtc/rtc-test.c
@@ -0,0 +1,204 @@
+/*
+ * An RTC test device/driver
+ * Copyright (C) 2005 Tower Technologies
+ * Author: Alessandro Zummo <a.zummo@towertech.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/rtc.h>
+#include <linux/platform_device.h>
+
+static struct platform_device *test0 = NULL, *test1 = NULL;
+
+static int test_rtc_read_alarm(struct device *dev,
+	struct rtc_wkalrm *alrm)
+{
+	return 0;
+}
+
+static int test_rtc_set_alarm(struct device *dev,
+	struct rtc_wkalrm *alrm)
+{
+	return 0;
+}
+
+static int test_rtc_read_time(struct device *dev,
+	struct rtc_time *tm)
+{
+	rtc_time_to_tm(get_seconds(), tm);
+	return 0;
+}
+
+static int test_rtc_set_time(struct device *dev,
+	struct rtc_time *tm)
+{
+	return 0;
+}
+
+static int test_rtc_set_mmss(struct device *dev, unsigned long secs)
+{
+	return 0;
+}
+
+static int test_rtc_proc(struct device *dev, struct seq_file *seq)
+{
+	struct platform_device *plat_dev = to_platform_device(dev);
+
+	seq_printf(seq, "24hr\t\t: yes\n");
+	seq_printf(seq, "test\t\t: yes\n");
+	seq_printf(seq, "id\t\t: %d\n", plat_dev->id);
+
+	return 0;
+}
+
+static int test_rtc_ioctl(struct device *dev, unsigned int cmd,
+	unsigned long arg)
+{
+	/* We do support interrupts, they're generated
+	 * using the sysfs interface.
+	 */
+	switch (cmd) {
+	case RTC_PIE_ON:
+	case RTC_PIE_OFF:
+	case RTC_UIE_ON:
+	case RTC_UIE_OFF:
+	case RTC_AIE_ON:
+	case RTC_AIE_OFF:
+		return 0;
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static struct rtc_class_ops test_rtc_ops = {
+	.proc = test_rtc_proc,
+	.read_time = test_rtc_read_time,
+	.set_time = test_rtc_set_time,
+	.read_alarm = test_rtc_read_alarm,
+	.set_alarm = test_rtc_set_alarm,
+	.set_mmss = test_rtc_set_mmss,
+	.ioctl = test_rtc_ioctl,
+};
+
+static ssize_t test_irq_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", 42);
+}
+static ssize_t test_irq_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	int retval;
+	struct platform_device *plat_dev = to_platform_device(dev);
+	struct rtc_device *rtc = platform_get_drvdata(plat_dev);
+
+	retval = count;
+	if (strncmp(buf, "tick", 4) == 0)
+		rtc_update_irq(&rtc->class_dev, 1, RTC_PF | RTC_IRQF);
+	else if (strncmp(buf, "alarm", 5) == 0)
+		rtc_update_irq(&rtc->class_dev, 1, RTC_AF | RTC_IRQF);
+	else if (strncmp(buf, "update", 6) == 0)
+		rtc_update_irq(&rtc->class_dev, 1, RTC_UF | RTC_IRQF);
+	else
+		retval = -EINVAL;
+
+	return retval;
+}
+static DEVICE_ATTR(irq, S_IRUGO | S_IWUSR, test_irq_show, test_irq_store);
+
+static int test_probe(struct platform_device *plat_dev)
+{
+	int err;
+	struct rtc_device *rtc = rtc_device_register("test", &plat_dev->dev,
+						&test_rtc_ops, THIS_MODULE);
+	if (IS_ERR(rtc)) {
+		err = PTR_ERR(rtc);
+		dev_err(&plat_dev->dev,
+			"unable to register the class device\n");
+		return err;
+	}
+	device_create_file(&plat_dev->dev, &dev_attr_irq);
+
+	platform_set_drvdata(plat_dev, rtc);
+
+	return 0;
+}
+
+static int __devexit test_remove(struct platform_device *plat_dev)
+{
+	struct rtc_device *rtc = platform_get_drvdata(plat_dev);
+
+	rtc_device_unregister(rtc);
+	device_remove_file(&plat_dev->dev, &dev_attr_irq);
+
+	return 0;
+}
+
+static struct platform_driver test_drv = {
+	.probe	= test_probe,
+	.remove = __devexit_p(test_remove),
+	.driver = {
+		.name = "rtc-test",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init test_init(void)
+{
+	int err;
+
+	if ((err = platform_driver_register(&test_drv)))
+		return err;
+
+	if ((test0 = platform_device_alloc("rtc-test", 0)) == NULL) {
+		err = -ENOMEM;
+		goto exit_driver_unregister;
+	}
+
+	if ((test1 = platform_device_alloc("rtc-test", 1)) == NULL) {
+		err = -ENOMEM;
+		goto exit_free_test0;
+	}
+
+	if ((err = platform_device_add(test0)))
+		goto exit_free_test1;
+
+	if ((err = platform_device_add(test1)))
+		goto exit_device_unregister;
+
+	return 0;
+
+exit_device_unregister:
+	platform_device_unregister(test0);
+
+exit_free_test1:
+	platform_device_put(test1);
+
+exit_free_test0:
+	platform_device_put(test0);
+
+exit_driver_unregister:
+	platform_driver_unregister(&test_drv);
+	return err;
+}
+
+static void __exit test_exit(void)
+{
+	platform_device_unregister(test0);
+	platform_device_unregister(test1);
+	platform_driver_unregister(&test_drv);
+}
+
+MODULE_AUTHOR("Alessandro Zummo <a.zummo@towertech.it>");
+MODULE_DESCRIPTION("RTC test driver/device");
+MODULE_LICENSE("GPL");
+
+module_init(test_init);
+module_exit(test_exit);
-- 
cgit v0.10.2


From edf1aaa31fc52ade0da7b6d1f2dffc17f0bdb9ff Mon Sep 17 00:00:00 2001
From: Alessandro Zummo <a.zummo@towertech.it>
Date: Mon, 27 Mar 2006 01:16:43 -0800
Subject: [PATCH] RTC subsystem: DS1672 driver

Driver for the Dallas/Maxim DS1672 chip, found on the Loft
(http://www.giantshoulderinc.com).

Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 7eb1368..de33526 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -86,6 +86,16 @@ config RTC_DRV_X1205
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-x1205.
 
+config RTC_DRV_DS1672
+	tristate "Dallas/Maxim DS1672"
+	depends on RTC_CLASS && I2C
+	help
+	  If you say yes here you get support for the
+	  Dallas/Maxim DS1672 timekeeping chip.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-ds1672.
+
 config RTC_DRV_TEST
 	tristate "Test driver/device"
 	depends on RTC_CLASS
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index d5ea0ed..f3fd5b0 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -13,4 +13,5 @@ obj-$(CONFIG_RTC_INTF_DEV)	+= rtc-dev.o
 
 obj-$(CONFIG_RTC_DRV_X1205)	+= rtc-x1205.o
 obj-$(CONFIG_RTC_DRV_TEST)	+= rtc-test.o
+obj-$(CONFIG_RTC_DRV_DS1672)	+= rtc-ds1672.o
 
diff --git a/drivers/rtc/rtc-ds1672.c b/drivers/rtc/rtc-ds1672.c
new file mode 100644
index 0000000..358695a
--- /dev/null
+++ b/drivers/rtc/rtc-ds1672.c
@@ -0,0 +1,233 @@
+/*
+ * An rtc/i2c driver for the Dallas DS1672
+ * Copyright 2005 Alessandro Zummo
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/rtc.h>
+
+#define DRV_VERSION "0.2"
+
+/* Addresses to scan: none. This chip cannot be detected. */
+static unsigned short normal_i2c[] = { I2C_CLIENT_END };
+
+/* Insmod parameters */
+I2C_CLIENT_INSMOD;
+
+/* Registers */
+
+#define DS1672_REG_CNT_BASE	0
+#define DS1672_REG_CONTROL	4
+#define DS1672_REG_TRICKLE	5
+
+
+/* Prototypes */
+static int ds1672_probe(struct i2c_adapter *adapter, int address, int kind);
+
+/*
+ * In the routines that deal directly with the ds1672 hardware, we use
+ * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch
+ * Epoch is initialized as 2000. Time is set to UTC.
+ */
+static int ds1672_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+{
+	unsigned long time;
+	unsigned char addr = DS1672_REG_CNT_BASE;
+	unsigned char buf[4];
+
+	struct i2c_msg msgs[] = {
+		{ client->addr, 0, 1, &addr },		/* setup read ptr */
+		{ client->addr, I2C_M_RD, 4, buf },	/* read date */
+	};
+
+	/* read date registers */
+	if ((i2c_transfer(client->adapter, &msgs[0], 2)) != 2) {
+		dev_err(&client->dev, "%s: read error\n", __FUNCTION__);
+		return -EIO;
+	}
+
+	dev_dbg(&client->dev,
+		"%s: raw read data - counters=%02x,%02x,%02x,%02x\n"
+		__FUNCTION__,
+		buf[0], buf[1], buf[2], buf[3]);
+
+	time = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0];
+
+	rtc_time_to_tm(time, tm);
+
+	dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
+		"mday=%d, mon=%d, year=%d, wday=%d\n",
+		__FUNCTION__,
+		tm->tm_sec, tm->tm_min, tm->tm_hour,
+		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
+
+	return 0;
+}
+
+static int ds1672_set_mmss(struct i2c_client *client, unsigned long secs)
+{
+	int xfer;
+	unsigned char buf[5];
+
+	buf[0] = DS1672_REG_CNT_BASE;
+	buf[1] = secs & 0x000000FF;
+	buf[2] = (secs & 0x0000FF00) >> 8;
+	buf[3] = (secs & 0x00FF0000) >> 16;
+	buf[4] = (secs & 0xFF000000) >> 24;
+
+	xfer = i2c_master_send(client, buf, 5);
+	if (xfer != 5) {
+		dev_err(&client->dev, "%s: send: %d\n", __FUNCTION__, xfer);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int ds1672_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+{
+	unsigned long secs;
+
+	dev_dbg(&client->dev,
+		"%s: secs=%d, mins=%d, hours=%d, ",
+		"mday=%d, mon=%d, year=%d, wday=%d\n",
+		__FUNCTION__,
+		tm->tm_sec, tm->tm_min, tm->tm_hour,
+		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
+
+	rtc_tm_to_time(tm, &secs);
+
+	return ds1672_set_mmss(client, secs);
+}
+
+static int ds1672_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	return ds1672_get_datetime(to_i2c_client(dev), tm);
+}
+
+static int ds1672_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	return ds1672_set_datetime(to_i2c_client(dev), tm);
+}
+
+static int ds1672_rtc_set_mmss(struct device *dev, unsigned long secs)
+{
+	return ds1672_set_mmss(to_i2c_client(dev), secs);
+}
+
+static struct rtc_class_ops ds1672_rtc_ops = {
+	.read_time	= ds1672_rtc_read_time,
+	.set_time	= ds1672_rtc_set_time,
+	.set_mmss	= ds1672_rtc_set_mmss,
+};
+
+static int ds1672_attach(struct i2c_adapter *adapter)
+{
+	dev_dbg(&adapter->dev, "%s\n", __FUNCTION__);
+	return i2c_probe(adapter, &addr_data, ds1672_probe);
+}
+
+static int ds1672_detach(struct i2c_client *client)
+{
+	int err;
+	struct rtc_device *rtc = i2c_get_clientdata(client);
+
+	dev_dbg(&client->dev, "%s\n", __FUNCTION__);
+
+ 	if (rtc)
+		rtc_device_unregister(rtc);
+
+	if ((err = i2c_detach_client(client)))
+		return err;
+
+	kfree(client);
+
+	return 0;
+}
+
+static struct i2c_driver ds1672_driver = {
+	.driver		= {
+		.name	= "ds1672",
+	},
+	.id		= I2C_DRIVERID_DS1672,
+	.attach_adapter = &ds1672_attach,
+	.detach_client	= &ds1672_detach,
+};
+
+static int ds1672_probe(struct i2c_adapter *adapter, int address, int kind)
+{
+	int err = 0;
+	struct i2c_client *client;
+	struct rtc_device *rtc;
+
+	dev_dbg(&adapter->dev, "%s\n", __FUNCTION__);
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) {
+		err = -ENODEV;
+		goto exit;
+	}
+
+	if (!(client = kzalloc(sizeof(struct i2c_client), GFP_KERNEL))) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	/* I2C client */
+	client->addr = address;
+	client->driver = &ds1672_driver;
+	client->adapter	= adapter;
+
+	strlcpy(client->name, ds1672_driver.driver.name, I2C_NAME_SIZE);
+
+	/* Inform the i2c layer */
+	if ((err = i2c_attach_client(client)))
+		goto exit_kfree;
+
+	dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n");
+
+	rtc = rtc_device_register(ds1672_driver.driver.name, &client->dev,
+				&ds1672_rtc_ops, THIS_MODULE);
+
+	if (IS_ERR(rtc)) {
+		err = PTR_ERR(rtc);
+		dev_err(&client->dev,
+			"unable to register the class device\n");
+		goto exit_detach;
+	}
+
+	i2c_set_clientdata(client, rtc);
+
+	return 0;
+
+exit_detach:
+	i2c_detach_client(client);
+
+exit_kfree:
+	kfree(client);
+
+exit:
+	return err;
+}
+
+static int __init ds1672_init(void)
+{
+	return i2c_add_driver(&ds1672_driver);
+}
+
+static void __exit ds1672_exit(void)
+{
+	i2c_del_driver(&ds1672_driver);
+}
+
+MODULE_AUTHOR("Alessandro Zummo <a.zummo@towertech.it>");
+MODULE_DESCRIPTION("Dallas/Maxim DS1672 timekeeper driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+module_init(ds1672_init);
+module_exit(ds1672_exit);
-- 
cgit v0.10.2


From b5a82d628d4491558c109fbeabc2993d6686e89c Mon Sep 17 00:00:00 2001
From: Alessandro Zummo <a.zummo@towertech.it>
Date: Mon, 27 Mar 2006 01:16:44 -0800
Subject: [PATCH] RTC subsystem: PCF8563 driver

An RTC class aware driver for the Philips PCF8563 RTC and Epson RTC8564 chips.

This chip is used on the Iomega NAS100D.

Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index de33526..0309fb3 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -96,6 +96,17 @@ config RTC_DRV_DS1672
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-ds1672.
 
+config RTC_DRV_PCF8563
+	tristate "Philips PCF8563/Epson RTC8564"
+	depends on RTC_CLASS && I2C
+	help
+	  If you say yes here you get support for the
+	  Philips PCF8563 RTC chip. The Epson RTC8564
+	  should work as well.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-pcf8563.
+
 config RTC_DRV_TEST
 	tristate "Test driver/device"
 	depends on RTC_CLASS
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index f3fd5b0..bdf7e49 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -14,4 +14,5 @@ obj-$(CONFIG_RTC_INTF_DEV)	+= rtc-dev.o
 obj-$(CONFIG_RTC_DRV_X1205)	+= rtc-x1205.o
 obj-$(CONFIG_RTC_DRV_TEST)	+= rtc-test.o
 obj-$(CONFIG_RTC_DRV_DS1672)	+= rtc-ds1672.o
+obj-$(CONFIG_RTC_DRV_PCF8563)	+= rtc-pcf8563.o
 
diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c
new file mode 100644
index 0000000..d857d45
--- /dev/null
+++ b/drivers/rtc/rtc-pcf8563.c
@@ -0,0 +1,353 @@
+/*
+ * An I2C driver for the Philips PCF8563 RTC
+ * Copyright 2005-06 Tower Technologies
+ *
+ * Author: Alessandro Zummo <a.zummo@towertech.it>
+ * Maintainers: http://www.nslu2-linux.org/
+ *
+ * based on the other drivers in this same directory.
+ *
+ * http://www.semiconductors.philips.com/acrobat/datasheets/PCF8563-04.pdf
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/i2c.h>
+#include <linux/bcd.h>
+#include <linux/rtc.h>
+
+#define DRV_VERSION "0.4.2"
+
+/* Addresses to scan: none
+ * This chip cannot be reliably autodetected. An empty eeprom
+ * located at 0x51 will pass the validation routine due to
+ * the way the registers are implemented.
+ */
+static unsigned short normal_i2c[] = { I2C_CLIENT_END };
+
+/* Module parameters */
+I2C_CLIENT_INSMOD;
+
+#define PCF8563_REG_ST1		0x00 /* status */
+#define PCF8563_REG_ST2		0x01
+
+#define PCF8563_REG_SC		0x02 /* datetime */
+#define PCF8563_REG_MN		0x03
+#define PCF8563_REG_HR		0x04
+#define PCF8563_REG_DM		0x05
+#define PCF8563_REG_DW		0x06
+#define PCF8563_REG_MO		0x07
+#define PCF8563_REG_YR		0x08
+
+#define PCF8563_REG_AMN		0x09 /* alarm */
+#define PCF8563_REG_AHR		0x0A
+#define PCF8563_REG_ADM		0x0B
+#define PCF8563_REG_ADW		0x0C
+
+#define PCF8563_REG_CLKO	0x0D /* clock out */
+#define PCF8563_REG_TMRC	0x0E /* timer control */
+#define PCF8563_REG_TMR		0x0F /* timer */
+
+#define PCF8563_SC_LV		0x80 /* low voltage */
+#define PCF8563_MO_C		0x80 /* century */
+
+static int pcf8563_probe(struct i2c_adapter *adapter, int address, int kind);
+static int pcf8563_detach(struct i2c_client *client);
+
+/*
+ * In the routines that deal directly with the pcf8563 hardware, we use
+ * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch.
+ */
+static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+{
+	unsigned char buf[13] = { PCF8563_REG_ST1 };
+
+	struct i2c_msg msgs[] = {
+		{ client->addr, 0, 1, buf },	/* setup read ptr */
+		{ client->addr, I2C_M_RD, 13, buf },	/* read status + date */
+	};
+
+	/* read registers */
+	if ((i2c_transfer(client->adapter, msgs, 2)) != 2) {
+		dev_err(&client->dev, "%s: read error\n", __FUNCTION__);
+		return -EIO;
+	}
+
+	if (buf[PCF8563_REG_SC] & PCF8563_SC_LV)
+		dev_info(&client->dev,
+			"low voltage detected, date/time is not reliable.\n");
+
+	dev_dbg(&client->dev,
+		"%s: raw data is st1=%02x, st2=%02x, sec=%02x, min=%02x, hr=%02x, "
+		"mday=%02x, wday=%02x, mon=%02x, year=%02x\n",
+		__FUNCTION__,
+		buf[0], buf[1], buf[2], buf[3],
+		buf[4], buf[5], buf[6], buf[7],
+		buf[8]);
+
+
+	tm->tm_sec = BCD2BIN(buf[PCF8563_REG_SC] & 0x7F);
+	tm->tm_min = BCD2BIN(buf[PCF8563_REG_MN] & 0x7F);
+	tm->tm_hour = BCD2BIN(buf[PCF8563_REG_HR] & 0x3F); /* rtc hr 0-23 */
+	tm->tm_mday = BCD2BIN(buf[PCF8563_REG_DM] & 0x3F);
+	tm->tm_wday = buf[PCF8563_REG_DW] & 0x07;
+	tm->tm_mon = BCD2BIN(buf[PCF8563_REG_MO] & 0x1F) - 1; /* rtc mn 1-12 */
+	tm->tm_year = BCD2BIN(buf[PCF8563_REG_YR])
+		+ (buf[PCF8563_REG_MO] & PCF8563_MO_C ? 100 : 0);
+
+	dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
+		"mday=%d, mon=%d, year=%d, wday=%d\n",
+		__FUNCTION__,
+		tm->tm_sec, tm->tm_min, tm->tm_hour,
+		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
+
+	/* the clock can give out invalid datetime, but we cannot return
+	 * -EINVAL otherwise hwclock will refuse to set the time on bootup.
+	 */
+	if (rtc_valid_tm(tm) < 0)
+		dev_err(&client->dev, "retrieved date/time is not valid.\n");
+
+	return 0;
+}
+
+static int pcf8563_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+{
+	int i, err;
+	unsigned char buf[9];
+
+	dev_dbg(&client->dev, "%s: secs=%d, mins=%d, hours=%d, "
+		"mday=%d, mon=%d, year=%d, wday=%d\n",
+		__FUNCTION__,
+		tm->tm_sec, tm->tm_min, tm->tm_hour,
+		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
+
+	/* hours, minutes and seconds */
+	buf[PCF8563_REG_SC] = BIN2BCD(tm->tm_sec);
+	buf[PCF8563_REG_MN] = BIN2BCD(tm->tm_min);
+	buf[PCF8563_REG_HR] = BIN2BCD(tm->tm_hour);
+
+	buf[PCF8563_REG_DM] = BIN2BCD(tm->tm_mday);
+
+	/* month, 1 - 12 */
+	buf[PCF8563_REG_MO] = BIN2BCD(tm->tm_mon + 1);
+
+	/* year and century */
+	buf[PCF8563_REG_YR] = BIN2BCD(tm->tm_year % 100);
+	if (tm->tm_year / 100)
+		buf[PCF8563_REG_MO] |= PCF8563_MO_C;
+
+	buf[PCF8563_REG_DW] = tm->tm_wday & 0x07;
+
+	/* write register's data */
+	for (i = 0; i < 7; i++) {
+		unsigned char data[2] = { PCF8563_REG_SC + i,
+						buf[PCF8563_REG_SC + i] };
+
+		err = i2c_master_send(client, data, sizeof(data));
+		if (err != sizeof(data)) {
+			dev_err(&client->dev,
+				"%s: err=%d addr=%02x, data=%02x\n",
+				__FUNCTION__, err, data[0], data[1]);
+			return -EIO;
+		}
+	};
+
+	return 0;
+}
+
+struct pcf8563_limit
+{
+	unsigned char reg;
+	unsigned char mask;
+	unsigned char min;
+	unsigned char max;
+};
+
+static int pcf8563_validate_client(struct i2c_client *client)
+{
+	int i;
+
+	static const struct pcf8563_limit pattern[] = {
+		/* register, mask, min, max */
+		{ PCF8563_REG_SC,	0x7F,	0,	59	},
+		{ PCF8563_REG_MN,	0x7F,	0,	59	},
+		{ PCF8563_REG_HR,	0x3F,	0,	23	},
+		{ PCF8563_REG_DM,	0x3F,	0,	31	},
+		{ PCF8563_REG_MO,	0x1F,	0,	12	},
+	};
+
+	/* check limits (only registers with bcd values) */
+	for (i = 0; i < ARRAY_SIZE(pattern); i++) {
+		int xfer;
+		unsigned char value;
+		unsigned char buf = pattern[i].reg;
+
+		struct i2c_msg msgs[] = {
+			{ client->addr, 0, 1, &buf },
+			{ client->addr, I2C_M_RD, 1, &buf },
+		};
+
+		xfer = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+
+		if (xfer != ARRAY_SIZE(msgs)) {
+			dev_err(&client->adapter->dev,
+				"%s: could not read register 0x%02X\n",
+				__FUNCTION__, pattern[i].reg);
+
+			return -EIO;
+		}
+
+		value = BCD2BIN(buf & pattern[i].mask);
+
+		if (value > pattern[i].max ||
+			value < pattern[i].min) {
+			dev_dbg(&client->adapter->dev,
+				"%s: pattern=%d, reg=%x, mask=0x%02x, min=%d, "
+				"max=%d, value=%d, raw=0x%02X\n",
+				__FUNCTION__, i, pattern[i].reg, pattern[i].mask,
+				pattern[i].min, pattern[i].max,
+				value, buf);
+
+			return -ENODEV;
+		}
+	}
+
+	return 0;
+}
+
+static int pcf8563_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	return pcf8563_get_datetime(to_i2c_client(dev), tm);
+}
+
+static int pcf8563_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	return pcf8563_set_datetime(to_i2c_client(dev), tm);
+}
+
+static int pcf8563_rtc_proc(struct device *dev, struct seq_file *seq)
+{
+	seq_printf(seq, "24hr\t\t: yes\n");
+	return 0;
+}
+
+static struct rtc_class_ops pcf8563_rtc_ops = {
+	.proc		= pcf8563_rtc_proc,
+	.read_time	= pcf8563_rtc_read_time,
+	.set_time	= pcf8563_rtc_set_time,
+};
+
+static int pcf8563_attach(struct i2c_adapter *adapter)
+{
+	return i2c_probe(adapter, &addr_data, pcf8563_probe);
+}
+
+static struct i2c_driver pcf8563_driver = {
+	.driver		= {
+		.name	= "pcf8563",
+	},
+	.id		= I2C_DRIVERID_PCF8563,
+	.attach_adapter = &pcf8563_attach,
+	.detach_client	= &pcf8563_detach,
+};
+
+static int pcf8563_probe(struct i2c_adapter *adapter, int address, int kind)
+{
+	struct i2c_client *client;
+	struct rtc_device *rtc;
+
+	int err = 0;
+
+	dev_dbg(&adapter->dev, "%s\n", __FUNCTION__);
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) {
+		err = -ENODEV;
+		goto exit;
+	}
+
+	if (!(client = kzalloc(sizeof(struct i2c_client), GFP_KERNEL))) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	client->addr = address;
+	client->driver = &pcf8563_driver;
+	client->adapter	= adapter;
+
+	strlcpy(client->name, pcf8563_driver.driver.name, I2C_NAME_SIZE);
+
+	/* Verify the chip is really an PCF8563 */
+	if (kind < 0) {
+		if (pcf8563_validate_client(client) < 0) {
+			err = -ENODEV;
+			goto exit_kfree;
+		}
+	}
+
+	/* Inform the i2c layer */
+	if ((err = i2c_attach_client(client)))
+		goto exit_kfree;
+
+	dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n");
+
+	rtc = rtc_device_register(pcf8563_driver.driver.name, &client->dev,
+				&pcf8563_rtc_ops, THIS_MODULE);
+
+	if (IS_ERR(rtc)) {
+		err = PTR_ERR(rtc);
+		dev_err(&client->dev,
+			"unable to register the class device\n");
+		goto exit_detach;
+	}
+
+	i2c_set_clientdata(client, rtc);
+
+	return 0;
+
+exit_detach:
+	i2c_detach_client(client);
+
+exit_kfree:
+	kfree(client);
+
+exit:
+	return err;
+}
+
+static int pcf8563_detach(struct i2c_client *client)
+{
+	int err;
+	struct rtc_device *rtc = i2c_get_clientdata(client);
+
+	dev_dbg(&client->dev, "%s\n", __FUNCTION__);
+
+	if (rtc)
+		rtc_device_unregister(rtc);
+
+	if ((err = i2c_detach_client(client)))
+		return err;
+
+	kfree(client);
+
+	return 0;
+}
+
+static int __init pcf8563_init(void)
+{
+	return i2c_add_driver(&pcf8563_driver);
+}
+
+static void __exit pcf8563_exit(void)
+{
+	i2c_del_driver(&pcf8563_driver);
+}
+
+MODULE_AUTHOR("Alessandro Zummo <a.zummo@towertech.it>");
+MODULE_DESCRIPTION("Philips PCF8563/Epson RTC8564 RTC driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+module_init(pcf8563_init);
+module_exit(pcf8563_exit);
-- 
cgit v0.10.2


From 7520b94debdc61620e1582fb4f5cca4a830f91cd Mon Sep 17 00:00:00 2001
From: Alessandro Zummo <a.zummo@towertech.it>
Date: Mon, 27 Mar 2006 01:16:45 -0800
Subject: [PATCH] RTC subsystem: RS5C372 driver

RTC class aware driver for the Ricoh RS5C372 chip used, among others, on the
Synology DS101.

Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 0309fb3..609f555 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -107,6 +107,16 @@ config RTC_DRV_PCF8563
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-pcf8563.
 
+config RTC_DRV_RS5C372
+	tristate "Ricoh RS5C372A/B"
+	depends on RTC_CLASS && I2C
+	help
+	  If you say yes here you get support for the
+	  Ricoh RS5C372A and RS5C372B RTC chips.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-rs5c372.
+
 config RTC_DRV_TEST
 	tristate "Test driver/device"
 	depends on RTC_CLASS
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index bdf7e49..d2b4b17 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -15,4 +15,5 @@ obj-$(CONFIG_RTC_DRV_X1205)	+= rtc-x1205.o
 obj-$(CONFIG_RTC_DRV_TEST)	+= rtc-test.o
 obj-$(CONFIG_RTC_DRV_DS1672)	+= rtc-ds1672.o
 obj-$(CONFIG_RTC_DRV_PCF8563)	+= rtc-pcf8563.o
+obj-$(CONFIG_RTC_DRV_RS5C372)	+= rtc-rs5c372.o
 
diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
new file mode 100644
index 0000000..396c868
--- /dev/null
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -0,0 +1,294 @@
+/*
+ * An I2C driver for the Ricoh RS5C372 RTC
+ *
+ * Copyright (C) 2005 Pavel Mironchik <pmironchik@optifacio.net>
+ * Copyright (C) 2006 Tower Technologies
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/i2c.h>
+#include <linux/rtc.h>
+#include <linux/bcd.h>
+
+#define DRV_VERSION "0.2"
+
+/* Addresses to scan */
+static unsigned short normal_i2c[] = { /* 0x32,*/ I2C_CLIENT_END };
+
+/* Insmod parameters */
+I2C_CLIENT_INSMOD;
+
+#define RS5C372_REG_SECS	0
+#define RS5C372_REG_MINS	1
+#define RS5C372_REG_HOURS	2
+#define RS5C372_REG_WDAY	3
+#define RS5C372_REG_DAY		4
+#define RS5C372_REG_MONTH	5
+#define RS5C372_REG_YEAR	6
+#define RS5C372_REG_TRIM	7
+
+#define RS5C372_TRIM_XSL	0x80
+#define RS5C372_TRIM_MASK	0x7F
+
+#define RS5C372_REG_BASE	0
+
+static int rs5c372_attach(struct i2c_adapter *adapter);
+static int rs5c372_detach(struct i2c_client *client);
+static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind);
+
+static struct i2c_driver rs5c372_driver = {
+	.driver		= {
+		.name	= "rs5c372",
+	},
+	.attach_adapter	= &rs5c372_attach,
+	.detach_client	= &rs5c372_detach,
+};
+
+static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+{
+	unsigned char buf[7] = { RS5C372_REG_BASE };
+
+	/* this implements the 1st reading method, according
+	 * to the datasheet. buf[0] is initialized with
+	 * address ptr and transmission format register.
+	 */
+	struct i2c_msg msgs[] = {
+		{ client->addr, 0, 1, buf },
+		{ client->addr, I2C_M_RD, 7, buf },
+	};
+
+	if ((i2c_transfer(client->adapter, msgs, 2)) != 2) {
+		dev_err(&client->dev, "%s: read error\n", __FUNCTION__);
+		return -EIO;
+	}
+
+	tm->tm_sec = BCD2BIN(buf[RS5C372_REG_SECS] & 0x7f);
+	tm->tm_min = BCD2BIN(buf[RS5C372_REG_MINS] & 0x7f);
+	tm->tm_hour = BCD2BIN(buf[RS5C372_REG_HOURS] & 0x3f);
+	tm->tm_wday = BCD2BIN(buf[RS5C372_REG_WDAY] & 0x07);
+	tm->tm_mday = BCD2BIN(buf[RS5C372_REG_DAY] & 0x3f);
+
+	/* tm->tm_mon is zero-based */
+	tm->tm_mon = BCD2BIN(buf[RS5C372_REG_MONTH] & 0x1f) - 1;
+
+	/* year is 1900 + tm->tm_year */
+	tm->tm_year = BCD2BIN(buf[RS5C372_REG_YEAR]) + 100;
+
+	dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
+		"mday=%d, mon=%d, year=%d, wday=%d\n",
+		__FUNCTION__,
+		tm->tm_sec, tm->tm_min, tm->tm_hour,
+		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
+
+	return 0;
+}
+
+static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+{
+	unsigned char buf[8] = { RS5C372_REG_BASE };
+
+	dev_dbg(&client->dev,
+		"%s: secs=%d, mins=%d, hours=%d ",
+		"mday=%d, mon=%d, year=%d, wday=%d\n",
+		__FUNCTION__, tm->tm_sec, tm->tm_min, tm->tm_hour,
+		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
+
+	buf[1] = BIN2BCD(tm->tm_sec);
+	buf[2] = BIN2BCD(tm->tm_min);
+	buf[3] = BIN2BCD(tm->tm_hour);
+	buf[4] = BIN2BCD(tm->tm_wday);
+	buf[5] = BIN2BCD(tm->tm_mday);
+	buf[6] = BIN2BCD(tm->tm_mon + 1);
+	buf[7] = BIN2BCD(tm->tm_year - 100);
+
+	if ((i2c_master_send(client, buf, 8)) != 8) {
+		dev_err(&client->dev, "%s: write error\n", __FUNCTION__);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int rs5c372_get_trim(struct i2c_client *client, int *osc, int *trim)
+{
+	unsigned char buf = RS5C372_REG_TRIM;
+
+	struct i2c_msg msgs[] = {
+		{ client->addr, 0, 1, &buf },
+		{ client->addr, I2C_M_RD, 1, &buf },
+	};
+
+	if ((i2c_transfer(client->adapter, msgs, 2)) != 2) {
+		dev_err(&client->dev, "%s: read error\n", __FUNCTION__);
+		return -EIO;
+	}
+
+	dev_dbg(&client->dev, "%s: raw trim=%x\n", __FUNCTION__, trim);
+
+	if (osc)
+		*osc = (buf & RS5C372_TRIM_XSL) ? 32000 : 32768;
+
+	if (trim)
+		*trim = buf & RS5C372_TRIM_MASK;
+
+	return 0;
+}
+
+static int rs5c372_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	return rs5c372_get_datetime(to_i2c_client(dev), tm);
+}
+
+static int rs5c372_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	return rs5c372_set_datetime(to_i2c_client(dev), tm);
+}
+
+static int rs5c372_rtc_proc(struct device *dev, struct seq_file *seq)
+{
+	int err, osc, trim;
+
+	seq_printf(seq, "24hr\t\t: yes\n");
+
+	if ((err = rs5c372_get_trim(to_i2c_client(dev), &osc, &trim)) == 0) {
+		seq_printf(seq, "%d.%03d KHz\n", osc / 1000, osc % 1000);
+		seq_printf(seq, "trim\t: %d\n", trim);
+	}
+
+	return 0;
+}
+
+static struct rtc_class_ops rs5c372_rtc_ops = {
+	.proc		= rs5c372_rtc_proc,
+	.read_time	= rs5c372_rtc_read_time,
+	.set_time	= rs5c372_rtc_set_time,
+};
+
+static ssize_t rs5c372_sysfs_show_trim(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	int trim;
+
+	if (rs5c372_get_trim(to_i2c_client(dev), NULL, &trim) == 0)
+		return sprintf(buf, "0x%2x\n", trim);
+
+	return 0;
+}
+static DEVICE_ATTR(trim, S_IRUGO, rs5c372_sysfs_show_trim, NULL);
+
+static ssize_t rs5c372_sysfs_show_osc(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	int osc;
+
+	if (rs5c372_get_trim(to_i2c_client(dev), &osc, NULL) == 0)
+		return sprintf(buf, "%d.%03d KHz\n", osc / 1000, osc % 1000);
+
+	return 0;
+}
+static DEVICE_ATTR(osc, S_IRUGO, rs5c372_sysfs_show_osc, NULL);
+
+static int rs5c372_attach(struct i2c_adapter *adapter)
+{
+	dev_dbg(&adapter->dev, "%s\n", __FUNCTION__);
+	return i2c_probe(adapter, &addr_data, rs5c372_probe);
+}
+
+static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind)
+{
+	int err = 0;
+	struct i2c_client *client;
+	struct rtc_device *rtc;
+
+	dev_dbg(&adapter->dev, "%s\n", __FUNCTION__);
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) {
+		err = -ENODEV;
+		goto exit;
+	}
+
+	if (!(client = kzalloc(sizeof(struct i2c_client), GFP_KERNEL))) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	/* I2C client */
+	client->addr = address;
+	client->driver = &rs5c372_driver;
+	client->adapter = adapter;
+
+	strlcpy(client->name, rs5c372_driver.driver.name, I2C_NAME_SIZE);
+
+	/* Inform the i2c layer */
+	if ((err = i2c_attach_client(client)))
+		goto exit_kfree;
+
+	dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n");
+
+	rtc = rtc_device_register(rs5c372_driver.driver.name, &client->dev,
+				&rs5c372_rtc_ops, THIS_MODULE);
+
+	if (IS_ERR(rtc)) {
+		err = PTR_ERR(rtc);
+		dev_err(&client->dev,
+			"unable to register the class device\n");
+		goto exit_detach;
+	}
+
+	i2c_set_clientdata(client, rtc);
+
+	device_create_file(&client->dev, &dev_attr_trim);
+	device_create_file(&client->dev, &dev_attr_osc);
+
+	return 0;
+
+exit_detach:
+	i2c_detach_client(client);
+
+exit_kfree:
+	kfree(client);
+
+exit:
+	return err;
+}
+
+static int rs5c372_detach(struct i2c_client *client)
+{
+	int err;
+	struct rtc_device *rtc = i2c_get_clientdata(client);
+
+	dev_dbg(&client->dev, "%s\n", __FUNCTION__);
+
+	if (rtc)
+		rtc_device_unregister(rtc);
+
+	if ((err = i2c_detach_client(client)))
+		return err;
+
+	kfree(client);
+
+	return 0;
+}
+
+static __init int rs5c372_init(void)
+{
+	return i2c_add_driver(&rs5c372_driver);
+}
+
+static __exit void rs5c372_exit(void)
+{
+	i2c_del_driver(&rs5c372_driver);
+}
+
+module_init(rs5c372_init);
+module_exit(rs5c372_exit);
+
+MODULE_AUTHOR(
+		"Pavel Mironchik <pmironchik@optifacio.net>, "
+		"Alessandro Zummo <a.zummo@towertech.it>");
+MODULE_DESCRIPTION("Ricoh RS5C372 RTC driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
-- 
cgit v0.10.2


From fd507e2ff3a5adaccbefa05f4bc9f58f44e930db Mon Sep 17 00:00:00 2001
From: Alessandro Zummo <a.zummo@towertech.it>
Date: Mon, 27 Mar 2006 01:16:45 -0800
Subject: [PATCH] RTC subsystem: EP93XX driver

This patch adds a driver for the RTC embedded in the Cirrus Logic EP93XX
family of processors.

Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 609f555..b399259 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -117,6 +117,17 @@ config RTC_DRV_RS5C372
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-rs5c372.
 
+config RTC_DRV_EP93XX
+	tristate "Cirrus Logic EP93XX"
+	depends on RTC_CLASS && ARCH_EP93XX
+	help
+	  If you say yes here you get support for the
+	  RTC embedded in the Cirrus Logic EP93XX processors.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-ep93xx.
+
+
 config RTC_DRV_TEST
 	tristate "Test driver/device"
 	depends on RTC_CLASS
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index d2b4b17..38b4d87 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -16,4 +16,4 @@ obj-$(CONFIG_RTC_DRV_TEST)	+= rtc-test.o
 obj-$(CONFIG_RTC_DRV_DS1672)	+= rtc-ds1672.o
 obj-$(CONFIG_RTC_DRV_PCF8563)	+= rtc-pcf8563.o
 obj-$(CONFIG_RTC_DRV_RS5C372)	+= rtc-rs5c372.o
-
+obj-$(CONFIG_RTC_DRV_EP93XX)	+= rtc-ep93xx.o
diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c
new file mode 100644
index 0000000..0dd80ea
--- /dev/null
+++ b/drivers/rtc/rtc-ep93xx.c
@@ -0,0 +1,162 @@
+/*
+ * A driver for the RTC embedded in the Cirrus Logic EP93XX processors
+ * Copyright (c) 2006 Tower Technologies
+ *
+ * Author: Alessandro Zummo <a.zummo@towertech.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/rtc.h>
+#include <linux/platform_device.h>
+#include <asm/hardware.h>
+
+#define EP93XX_RTC_REG(x)	(EP93XX_RTC_BASE + (x))
+#define EP93XX_RTC_DATA		EP93XX_RTC_REG(0x0000)
+#define EP93XX_RTC_LOAD		EP93XX_RTC_REG(0x000C)
+#define EP93XX_RTC_SWCOMP	EP93XX_RTC_REG(0x0108)
+
+#define DRV_VERSION "0.2"
+
+static int ep93xx_get_swcomp(struct device *dev, unsigned short *preload,
+				unsigned short *delete)
+{
+	unsigned short comp = __raw_readl(EP93XX_RTC_SWCOMP);
+
+	if (preload)
+		*preload = comp & 0xffff;
+
+	if (delete)
+		*delete = (comp >> 16) & 0x1f;
+
+	return 0;
+}
+
+static int ep93xx_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	unsigned long time = __raw_readl(EP93XX_RTC_DATA);
+
+	rtc_time_to_tm(time, tm);
+	return 0;
+}
+
+static int ep93xx_rtc_set_mmss(struct device *dev, unsigned long secs)
+{
+	__raw_writel(secs + 1, EP93XX_RTC_LOAD);
+	return 0;
+}
+
+static int ep93xx_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	int err;
+	unsigned long secs;
+
+	err = rtc_tm_to_time(tm, &secs);
+	if (err != 0)
+		return err;
+
+	return ep93xx_rtc_set_mmss(dev, secs);
+}
+
+static int ep93xx_rtc_proc(struct device *dev, struct seq_file *seq)
+{
+	unsigned short preload, delete;
+
+	ep93xx_get_swcomp(dev, &preload, &delete);
+
+	seq_printf(seq, "24hr\t\t: yes\n");
+	seq_printf(seq, "preload\t\t: %d\n", preload);
+	seq_printf(seq, "delete\t\t: %d\n", delete);
+
+	return 0;
+}
+
+static struct rtc_class_ops ep93xx_rtc_ops = {
+	.read_time	= ep93xx_rtc_read_time,
+	.set_time	= ep93xx_rtc_set_time,
+	.set_mmss	= ep93xx_rtc_set_mmss,
+	.proc		= ep93xx_rtc_proc,
+};
+
+static ssize_t ep93xx_sysfs_show_comp_preload(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	unsigned short preload;
+
+	ep93xx_get_swcomp(dev, &preload, NULL);
+
+	return sprintf(buf, "%d\n", preload);
+}
+static DEVICE_ATTR(comp_preload, S_IRUGO, ep93xx_sysfs_show_comp_preload, NULL);
+
+static ssize_t ep93xx_sysfs_show_comp_delete(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	unsigned short delete;
+
+	ep93xx_get_swcomp(dev, NULL, &delete);
+
+	return sprintf(buf, "%d\n", delete);
+}
+static DEVICE_ATTR(comp_delete, S_IRUGO, ep93xx_sysfs_show_comp_delete, NULL);
+
+
+static int __devinit ep93xx_rtc_probe(struct platform_device *dev)
+{
+	struct rtc_device *rtc = rtc_device_register("ep93xx",
+				&dev->dev, &ep93xx_rtc_ops, THIS_MODULE);
+
+	if (IS_ERR(rtc)) {
+		dev_err(&dev->dev, "unable to register\n");
+		return PTR_ERR(rtc);
+	}
+
+	platform_set_drvdata(dev, rtc);
+
+	device_create_file(&dev->dev, &dev_attr_comp_preload);
+	device_create_file(&dev->dev, &dev_attr_comp_delete);
+
+	return 0;
+}
+
+static int __devexit ep93xx_rtc_remove(struct platform_device *dev)
+{
+	struct rtc_device *rtc = platform_get_drvdata(dev);
+
+ 	if (rtc)
+		rtc_device_unregister(rtc);
+
+	platform_set_drvdata(dev, NULL);
+
+	return 0;
+}
+
+static struct platform_driver ep93xx_rtc_platform_driver = {
+	.driver		= {
+		.name	= "ep93xx-rtc",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= ep93xx_rtc_probe,
+	.remove		= __devexit_p(ep93xx_rtc_remove),
+};
+
+static int __init ep93xx_rtc_init(void)
+{
+	return platform_driver_register(&ep93xx_rtc_platform_driver);
+}
+
+static void __exit ep93xx_rtc_exit(void)
+{
+	platform_driver_unregister(&ep93xx_rtc_platform_driver);
+}
+
+MODULE_AUTHOR("Alessandro Zummo <a.zummo@towertech.it>");
+MODULE_DESCRIPTION("EP93XX RTC driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+module_init(ep93xx_rtc_init);
+module_exit(ep93xx_rtc_exit);
-- 
cgit v0.10.2


From e842f1c8ff8a88f290e26d1139e89aad02c4e0c3 Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Mon, 27 Mar 2006 01:16:46 -0800
Subject: [PATCH] RTC subsystem: SA1100/PXA2XX driver

Add an RTC subsystem driver for the ARM SA1100/PXA2XX processor RTC.

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/arm/mach-pxa/generic.c b/arch/arm/mach-pxa/generic.c
index 9b48a90..5efa847 100644
--- a/arch/arm/mach-pxa/generic.c
+++ b/arch/arm/mach-pxa/generic.c
@@ -319,6 +319,11 @@ void __init pxa_set_ficp_info(struct pxaficp_platform_data *info)
 	pxaficp_device.dev.platform_data = info;
 }
 
+static struct platform_device pxartc_device = {
+	.name		= "sa1100-rtc",
+	.id		= -1,
+};
+
 static struct platform_device *devices[] __initdata = {
 	&pxamci_device,
 	&udc_device,
@@ -329,6 +334,7 @@ static struct platform_device *devices[] __initdata = {
 	&pxaficp_device,
 	&i2c_device,
 	&i2s_device,
+	&pxartc_device,
 };
 
 static int __init pxa_init(void)
diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
index 2abdc41..9ea7155 100644
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c
@@ -324,6 +324,11 @@ void sa11x0_set_irda_data(struct irda_platform_data *irda)
 	sa11x0ir_device.dev.platform_data = irda;
 }
 
+static struct platform_device sa11x0rtc_device = {
+	.name		= "sa1100-rtc",
+	.id		= -1,
+};
+
 static struct platform_device *sa11x0_devices[] __initdata = {
 	&sa11x0udc_device,
 	&sa11x0uart1_device,
@@ -333,6 +338,7 @@ static struct platform_device *sa11x0_devices[] __initdata = {
 	&sa11x0pcmcia_device,
 	&sa11x0fb_device,
 	&sa11x0mtd_device,
+	&sa11x0rtc_device,
 };
 
 static int __init sa1100_init(void)
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index b399259..166232a 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -127,6 +127,15 @@ config RTC_DRV_EP93XX
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-ep93xx.
 
+config RTC_DRV_SA1100
+	tristate "SA11x0/PXA2xx"
+	depends on RTC_CLASS && (ARCH_SA1100 || ARCH_PXA)
+	help
+	  If you say Y here you will get access to the real time clock
+	  built into your SA11x0 or PXA2xx CPU.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called rtc-sa1100.
 
 config RTC_DRV_TEST
 	tristate "Test driver/device"
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 38b4d87..56820d1 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_RTC_DRV_DS1672)	+= rtc-ds1672.o
 obj-$(CONFIG_RTC_DRV_PCF8563)	+= rtc-pcf8563.o
 obj-$(CONFIG_RTC_DRV_RS5C372)	+= rtc-rs5c372.o
 obj-$(CONFIG_RTC_DRV_EP93XX)	+= rtc-ep93xx.o
+obj-$(CONFIG_RTC_DRV_SA1100)	+= rtc-sa1100.o
diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c
new file mode 100644
index 0000000..83b2bb4
--- /dev/null
+++ b/drivers/rtc/rtc-sa1100.c
@@ -0,0 +1,388 @@
+/*
+ * Real Time Clock interface for StrongARM SA1x00 and XScale PXA2xx
+ *
+ * Copyright (c) 2000 Nils Faerber
+ *
+ * Based on rtc.c by Paul Gortmaker
+ *
+ * Original Driver by Nils Faerber <nils@kernelconcepts.de>
+ *
+ * Modifications from:
+ *   CIH <cih@coventive.com>
+ *   Nicolas Pitre <nico@cam.org>
+ *   Andrew Christian <andrew.christian@hp.com>
+ *
+ * Converted to the RTC subsystem and Driver Model
+ *   by Richard Purdie <rpurdie@rpsys.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/rtc.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/interrupt.h>
+#include <linux/string.h>
+#include <linux/pm.h>
+
+#include <asm/bitops.h>
+#include <asm/hardware.h>
+#include <asm/irq.h>
+#include <asm/rtc.h>
+
+#ifdef CONFIG_ARCH_PXA
+#include <asm/arch/pxa-regs.h>
+#endif
+
+#define TIMER_FREQ		CLOCK_TICK_RATE
+#define RTC_DEF_DIVIDER		32768 - 1
+#define RTC_DEF_TRIM		0
+
+static unsigned long rtc_freq = 1024;
+static struct rtc_time rtc_alarm;
+static spinlock_t sa1100_rtc_lock = SPIN_LOCK_UNLOCKED;
+
+static int rtc_update_alarm(struct rtc_time *alrm)
+{
+	struct rtc_time alarm_tm, now_tm;
+	unsigned long now, time;
+	int ret;
+
+	do {
+		now = RCNR;
+		rtc_time_to_tm(now, &now_tm);
+		rtc_next_alarm_time(&alarm_tm, &now_tm, alrm);
+		ret = rtc_tm_to_time(&alarm_tm, &time);
+		if (ret != 0)
+			break;
+
+		RTSR = RTSR & (RTSR_HZE|RTSR_ALE|RTSR_AL);
+		RTAR = time;
+	} while (now != RCNR);
+
+	return ret;
+}
+
+static irqreturn_t sa1100_rtc_interrupt(int irq, void *dev_id,
+		struct pt_regs *regs)
+{
+	struct platform_device *pdev = to_platform_device(dev_id);
+	struct rtc_device *rtc = platform_get_drvdata(pdev);
+	unsigned int rtsr;
+	unsigned long events = 0;
+
+	spin_lock(&sa1100_rtc_lock);
+
+	rtsr = RTSR;
+	/* clear interrupt sources */
+	RTSR = 0;
+	RTSR = (RTSR_AL | RTSR_HZ) & (rtsr >> 2);
+
+	/* clear alarm interrupt if it has occurred */
+	if (rtsr & RTSR_AL)
+		rtsr &= ~RTSR_ALE;
+	RTSR = rtsr & (RTSR_ALE | RTSR_HZE);
+
+	/* update irq data & counter */
+	if (rtsr & RTSR_AL)
+		events |= RTC_AF | RTC_IRQF;
+	if (rtsr & RTSR_HZ)
+		events |= RTC_UF | RTC_IRQF;
+
+	rtc_update_irq(&rtc->class_dev, 1, events);
+
+	if (rtsr & RTSR_AL && rtc_periodic_alarm(&rtc_alarm))
+		rtc_update_alarm(&rtc_alarm);
+
+	spin_unlock(&sa1100_rtc_lock);
+
+	return IRQ_HANDLED;
+}
+
+static int rtc_timer1_count;
+
+static irqreturn_t timer1_interrupt(int irq, void *dev_id,
+		struct pt_regs *regs)
+{
+	struct platform_device *pdev = to_platform_device(dev_id);
+	struct rtc_device *rtc = platform_get_drvdata(pdev);
+
+	/*
+	 * If we match for the first time, rtc_timer1_count will be 1.
+	 * Otherwise, we wrapped around (very unlikely but
+	 * still possible) so compute the amount of missed periods.
+	 * The match reg is updated only when the data is actually retrieved
+	 * to avoid unnecessary interrupts.
+	 */
+	OSSR = OSSR_M1;	/* clear match on timer1 */
+
+	rtc_update_irq(&rtc->class_dev, rtc_timer1_count, RTC_PF | RTC_IRQF);
+
+	if (rtc_timer1_count == 1)
+		rtc_timer1_count = (rtc_freq * ((1<<30)/(TIMER_FREQ>>2)));
+
+	return IRQ_HANDLED;
+}
+
+static int sa1100_rtc_read_callback(struct device *dev, int data)
+{
+	if (data & RTC_PF) {
+		/* interpolate missed periods and set match for the next */
+		unsigned long period = TIMER_FREQ/rtc_freq;
+		unsigned long oscr = OSCR;
+		unsigned long osmr1 = OSMR1;
+		unsigned long missed = (oscr - osmr1)/period;
+		data += missed << 8;
+		OSSR = OSSR_M1;	/* clear match on timer 1 */
+		OSMR1 = osmr1 + (missed + 1)*period;
+		/* Ensure we didn't miss another match in the mean time.
+		 * Here we compare (match - OSCR) 8 instead of 0 --
+		 * see comment in pxa_timer_interrupt() for explanation.
+		 */
+		while( (signed long)((osmr1 = OSMR1) - OSCR) <= 8 ) {
+			data += 0x100;
+			OSSR = OSSR_M1;	/* clear match on timer 1 */
+			OSMR1 = osmr1 + period;
+		}
+	}
+	return data;
+}
+
+static int sa1100_rtc_open(struct device *dev)
+{
+	int ret;
+
+	ret = request_irq(IRQ_RTC1Hz, sa1100_rtc_interrupt, SA_INTERRUPT,
+				"rtc 1Hz", dev);
+	if (ret) {
+		printk(KERN_ERR "rtc: IRQ%d already in use.\n", IRQ_RTC1Hz);
+		goto fail_ui;
+	}
+	ret = request_irq(IRQ_RTCAlrm, sa1100_rtc_interrupt, SA_INTERRUPT,
+				"rtc Alrm", dev);
+	if (ret) {
+		printk(KERN_ERR "rtc: IRQ%d already in use.\n", IRQ_RTCAlrm);
+		goto fail_ai;
+	}
+	ret = request_irq(IRQ_OST1, timer1_interrupt, SA_INTERRUPT,
+				"rtc timer", dev);
+	if (ret) {
+		printk(KERN_ERR "rtc: IRQ%d already in use.\n", IRQ_OST1);
+		goto fail_pi;
+	}
+	return 0;
+
+ fail_pi:
+	free_irq(IRQ_RTCAlrm, NULL);
+ fail_ai:
+	free_irq(IRQ_RTC1Hz, NULL);
+ fail_ui:
+	return ret;
+}
+
+static void sa1100_rtc_release(struct device *dev)
+{
+	spin_lock_irq(&sa1100_rtc_lock);
+	RTSR = 0;
+	OIER &= ~OIER_E1;
+	OSSR = OSSR_M1;
+	spin_unlock_irq(&sa1100_rtc_lock);
+
+	free_irq(IRQ_OST1, dev);
+	free_irq(IRQ_RTCAlrm, dev);
+	free_irq(IRQ_RTC1Hz, dev);
+}
+
+
+static int sa1100_rtc_ioctl(struct device *dev, unsigned int cmd,
+		unsigned long arg)
+{
+	switch(cmd) {
+	case RTC_AIE_OFF:
+		spin_lock_irq(&sa1100_rtc_lock);
+		RTSR &= ~RTSR_ALE;
+		spin_unlock_irq(&sa1100_rtc_lock);
+		return 0;
+	case RTC_AIE_ON:
+		spin_lock_irq(&sa1100_rtc_lock);
+		RTSR |= RTSR_ALE;
+		spin_unlock_irq(&sa1100_rtc_lock);
+		return 0;
+	case RTC_UIE_OFF:
+		spin_lock_irq(&sa1100_rtc_lock);
+		RTSR &= ~RTSR_HZE;
+		spin_unlock_irq(&sa1100_rtc_lock);
+		return 0;
+	case RTC_UIE_ON:
+		spin_lock_irq(&sa1100_rtc_lock);
+		RTSR |= RTSR_HZE;
+		spin_unlock_irq(&sa1100_rtc_lock);
+		return 0;
+	case RTC_PIE_OFF:
+		spin_lock_irq(&sa1100_rtc_lock);
+		OIER &= ~OIER_E1;
+		spin_unlock_irq(&sa1100_rtc_lock);
+		return 0;
+	case RTC_PIE_ON:
+		if ((rtc_freq > 64) && !capable(CAP_SYS_RESOURCE))
+			return -EACCES;
+		spin_lock_irq(&sa1100_rtc_lock);
+		OSMR1 = TIMER_FREQ/rtc_freq + OSCR;
+		OIER |= OIER_E1;
+		rtc_timer1_count = 1;
+		spin_unlock_irq(&sa1100_rtc_lock);
+		return 0;
+	case RTC_IRQP_READ:
+		return put_user(rtc_freq, (unsigned long *)arg);
+	case RTC_IRQP_SET:
+		if (arg < 1 || arg > TIMER_FREQ)
+			return -EINVAL;
+		if ((arg > 64) && (!capable(CAP_SYS_RESOURCE)))
+			return -EACCES;
+		rtc_freq = arg;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int sa1100_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	rtc_time_to_tm(RCNR, tm);
+	return 0;
+}
+
+static int sa1100_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	unsigned long time;
+	int ret;
+
+	ret = rtc_tm_to_time(tm, &time);
+	if (ret == 0)
+		RCNR = time;
+	return ret;
+}
+
+static int sa1100_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	memcpy(&alrm->time, &rtc_alarm, sizeof(struct rtc_time));
+	alrm->pending = RTSR & RTSR_AL ? 1 : 0;
+	return 0;
+}
+
+static int sa1100_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	int ret;
+
+	spin_lock_irq(&sa1100_rtc_lock);
+	ret = rtc_update_alarm(&alrm->time);
+	if (ret == 0) {
+		memcpy(&rtc_alarm, &alrm->time, sizeof(struct rtc_time));
+
+		if (alrm->enabled)
+			enable_irq_wake(IRQ_RTCAlrm);
+		else
+			disable_irq_wake(IRQ_RTCAlrm);
+	}
+	spin_unlock_irq(&sa1100_rtc_lock);
+
+	return ret;
+}
+
+static int sa1100_rtc_proc(struct device *dev, struct seq_file *seq)
+{
+	seq_printf(seq, "trim/divider\t: 0x%08x\n", RTTR);
+	seq_printf(seq, "alarm_IRQ\t: %s\n",
+			(RTSR & RTSR_ALE) ? "yes" : "no" );
+	seq_printf(seq, "update_IRQ\t: %s\n",
+			(RTSR & RTSR_HZE) ? "yes" : "no");
+	seq_printf(seq, "periodic_IRQ\t: %s\n",
+			(OIER & OIER_E1) ? "yes" : "no");
+	seq_printf(seq, "periodic_freq\t: %ld\n", rtc_freq);
+
+	return 0;
+}
+
+static struct rtc_class_ops sa1100_rtc_ops = {
+	.open = sa1100_rtc_open,
+	.read_callback = sa1100_rtc_read_callback,
+	.release = sa1100_rtc_release,
+	.ioctl = sa1100_rtc_ioctl,
+	.read_time = sa1100_rtc_read_time,
+	.set_time = sa1100_rtc_set_time,
+	.read_alarm = sa1100_rtc_read_alarm,
+	.set_alarm = sa1100_rtc_set_alarm,
+	.proc = sa1100_rtc_proc,
+};
+
+static int sa1100_rtc_probe(struct platform_device *pdev)
+{
+	struct rtc_device *rtc;
+
+	/*
+	 * According to the manual we should be able to let RTTR be zero
+	 * and then a default diviser for a 32.768KHz clock is used.
+	 * Apparently this doesn't work, at least for my SA1110 rev 5.
+	 * If the clock divider is uninitialized then reset it to the
+	 * default value to get the 1Hz clock.
+	 */
+	if (RTTR == 0) {
+		RTTR = RTC_DEF_DIVIDER + (RTC_DEF_TRIM << 16);
+		printk(KERN_WARNING "rtc: warning: initializing default clock divider/trim value\n");
+		/* The current RTC value probably doesn't make sense either */
+		RCNR = 0;
+	}
+
+	rtc = rtc_device_register(pdev->name, &pdev->dev, &sa1100_rtc_ops,
+				THIS_MODULE);
+
+	if (IS_ERR(rtc)) {
+		dev_err(&pdev->dev, "Unable to register the RTC device\n");
+		return PTR_ERR(rtc);
+	}
+
+	platform_set_drvdata(pdev, rtc);
+
+	dev_info(&pdev->dev, "SA11xx/PXA2xx RTC Registered\n");
+
+	return 0;
+}
+
+static int sa1100_rtc_remove(struct platform_device *pdev)
+{
+	struct rtc_device *rtc = platform_get_drvdata(pdev);
+
+ 	if (rtc)
+		rtc_device_unregister(rtc);
+
+	return 0;
+}
+
+static struct platform_driver sa1100_rtc_driver = {
+	.probe		= sa1100_rtc_probe,
+	.remove		= sa1100_rtc_remove,
+	.driver		= {
+		.name		= "sa1100-rtc",
+	},
+};
+
+static int __init sa1100_rtc_init(void)
+{
+	return platform_driver_register(&sa1100_rtc_driver);
+}
+
+static void __exit sa1100_rtc_exit(void)
+{
+	platform_driver_unregister(&sa1100_rtc_driver);
+}
+
+module_init(sa1100_rtc_init);
+module_exit(sa1100_rtc_exit);
+
+MODULE_AUTHOR("Richard Purdie <rpurdie@rpsys.net>");
+MODULE_DESCRIPTION("SA11x0/PXA2xx Realtime Clock Driver (RTC)");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 1d98af87270cc08bb8251e004b9dc63cc838f24b Mon Sep 17 00:00:00 2001
From: Alessandro Zummo <a.zummo@towertech.it>
Date: Mon, 27 Mar 2006 01:16:47 -0800
Subject: [PATCH] RTC subsystem: M48T86 driver

Add a driver for the ST M48T86 / Dallas DS12887 RTC.

This is a platform driver.  The platform device must provide I/O routines to
access the RTC.

Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 166232a..929dd80 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -117,6 +117,16 @@ config RTC_DRV_RS5C372
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-rs5c372.
 
+config RTC_DRV_M48T86
+	tristate "ST M48T86/Dallas DS12887"
+	depends on RTC_CLASS
+	help
+	  If you say Y here you will get support for the
+	  ST M48T86 and Dallas DS12887 RTC chips.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-m48t86.
+
 config RTC_DRV_EP93XX
 	tristate "Cirrus Logic EP93XX"
 	depends on RTC_CLASS && ARCH_EP93XX
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 56820d1..8d4c7fe 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -16,5 +16,6 @@ obj-$(CONFIG_RTC_DRV_TEST)	+= rtc-test.o
 obj-$(CONFIG_RTC_DRV_DS1672)	+= rtc-ds1672.o
 obj-$(CONFIG_RTC_DRV_PCF8563)	+= rtc-pcf8563.o
 obj-$(CONFIG_RTC_DRV_RS5C372)	+= rtc-rs5c372.o
+obj-$(CONFIG_RTC_DRV_M48T86)	+= rtc-m48t86.o
 obj-$(CONFIG_RTC_DRV_EP93XX)	+= rtc-ep93xx.o
 obj-$(CONFIG_RTC_DRV_SA1100)	+= rtc-sa1100.o
diff --git a/drivers/rtc/rtc-m48t86.c b/drivers/rtc/rtc-m48t86.c
new file mode 100644
index 0000000..db445c8
--- /dev/null
+++ b/drivers/rtc/rtc-m48t86.c
@@ -0,0 +1,209 @@
+/*
+ * ST M48T86 / Dallas DS12887 RTC driver
+ * Copyright (c) 2006 Tower Technologies
+ *
+ * Author: Alessandro Zummo <a.zummo@towertech.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This drivers only supports the clock running in BCD and 24H mode.
+ * If it will be ever adapted to binary and 12H mode, care must be taken
+ * to not introduce bugs.
+ */
+
+#include <linux/module.h>
+#include <linux/rtc.h>
+#include <linux/platform_device.h>
+#include <linux/m48t86.h>
+#include <linux/bcd.h>
+
+#define M48T86_REG_SEC		0x00
+#define M48T86_REG_SECALRM	0x01
+#define M48T86_REG_MIN		0x02
+#define M48T86_REG_MINALRM	0x03
+#define M48T86_REG_HOUR	0x04
+#define M48T86_REG_HOURALRM	0x05
+#define M48T86_REG_DOW		0x06 /* 1 = sunday */
+#define M48T86_REG_DOM		0x07
+#define M48T86_REG_MONTH	0x08 /* 1 - 12 */
+#define M48T86_REG_YEAR		0x09 /* 0 - 99 */
+#define M48T86_REG_A		0x0A
+#define M48T86_REG_B		0x0B
+#define M48T86_REG_C		0x0C
+#define M48T86_REG_D		0x0D
+
+#define M48T86_REG_B_H24	(1 << 1)
+#define M48T86_REG_B_DM		(1 << 2)
+#define M48T86_REG_B_SET	(1 << 7)
+#define M48T86_REG_D_VRT	(1 << 7)
+
+#define DRV_VERSION "0.1"
+
+
+static int m48t86_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	unsigned char reg;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct m48t86_ops *ops = pdev->dev.platform_data;
+
+	reg = ops->readb(M48T86_REG_B);
+
+	if (reg & M48T86_REG_B_DM) {
+		/* data (binary) mode */
+		tm->tm_sec	= ops->readb(M48T86_REG_SEC);
+		tm->tm_min	= ops->readb(M48T86_REG_MIN);
+		tm->tm_hour	= ops->readb(M48T86_REG_HOUR) & 0x3F;
+		tm->tm_mday	= ops->readb(M48T86_REG_DOM);
+		/* tm_mon is 0-11 */
+		tm->tm_mon	= ops->readb(M48T86_REG_MONTH) - 1;
+		tm->tm_year	= ops->readb(M48T86_REG_YEAR) + 100;
+		tm->tm_wday	= ops->readb(M48T86_REG_DOW);
+	} else {
+		/* bcd mode */
+		tm->tm_sec	= BCD2BIN(ops->readb(M48T86_REG_SEC));
+		tm->tm_min	= BCD2BIN(ops->readb(M48T86_REG_MIN));
+		tm->tm_hour	= BCD2BIN(ops->readb(M48T86_REG_HOUR) & 0x3F);
+		tm->tm_mday	= BCD2BIN(ops->readb(M48T86_REG_DOM));
+		/* tm_mon is 0-11 */
+		tm->tm_mon	= BCD2BIN(ops->readb(M48T86_REG_MONTH)) - 1;
+		tm->tm_year	= BCD2BIN(ops->readb(M48T86_REG_YEAR)) + 100;
+		tm->tm_wday	= BCD2BIN(ops->readb(M48T86_REG_DOW));
+	}
+
+	/* correct the hour if the clock is in 12h mode */
+	if (!(reg & M48T86_REG_B_H24))
+		if (ops->readb(M48T86_REG_HOUR) & 0x80)
+			tm->tm_hour += 12;
+
+	return 0;
+}
+
+static int m48t86_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	unsigned char reg;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct m48t86_ops *ops = pdev->dev.platform_data;
+
+	reg = ops->readb(M48T86_REG_B);
+
+	/* update flag and 24h mode */
+	reg |= M48T86_REG_B_SET | M48T86_REG_B_H24;
+	ops->writeb(reg, M48T86_REG_B);
+
+	if (reg & M48T86_REG_B_DM) {
+		/* data (binary) mode */
+		ops->writeb(tm->tm_sec, M48T86_REG_SEC);
+		ops->writeb(tm->tm_min, M48T86_REG_MIN);
+		ops->writeb(tm->tm_hour, M48T86_REG_HOUR);
+		ops->writeb(tm->tm_mday, M48T86_REG_DOM);
+		ops->writeb(tm->tm_mon + 1, M48T86_REG_MONTH);
+		ops->writeb(tm->tm_year % 100, M48T86_REG_YEAR);
+		ops->writeb(tm->tm_wday, M48T86_REG_DOW);
+	} else {
+		/* bcd mode */
+		ops->writeb(BIN2BCD(tm->tm_sec), M48T86_REG_SEC);
+		ops->writeb(BIN2BCD(tm->tm_min), M48T86_REG_MIN);
+		ops->writeb(BIN2BCD(tm->tm_hour), M48T86_REG_HOUR);
+		ops->writeb(BIN2BCD(tm->tm_mday), M48T86_REG_DOM);
+		ops->writeb(BIN2BCD(tm->tm_mon + 1), M48T86_REG_MONTH);
+		ops->writeb(BIN2BCD(tm->tm_year % 100), M48T86_REG_YEAR);
+		ops->writeb(BIN2BCD(tm->tm_wday), M48T86_REG_DOW);
+	}
+
+	/* update ended */
+	reg &= ~M48T86_REG_B_SET;
+	ops->writeb(reg, M48T86_REG_B);
+
+	return 0;
+}
+
+static int m48t86_rtc_proc(struct device *dev, struct seq_file *seq)
+{
+	unsigned char reg;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct m48t86_ops *ops = pdev->dev.platform_data;
+
+	reg = ops->readb(M48T86_REG_B);
+
+	seq_printf(seq, "24hr\t\t: %s\n",
+		 (reg & M48T86_REG_B_H24) ? "yes" : "no");
+
+	seq_printf(seq, "mode\t\t: %s\n",
+		 (reg & M48T86_REG_B_DM) ? "binary" : "bcd");
+
+	reg = ops->readb(M48T86_REG_D);
+
+	seq_printf(seq, "battery\t\t: %s\n",
+		 (reg & M48T86_REG_D_VRT) ? "ok" : "exhausted");
+
+	return 0;
+}
+
+static struct rtc_class_ops m48t86_rtc_ops = {
+	.read_time	= m48t86_rtc_read_time,
+	.set_time	= m48t86_rtc_set_time,
+	.proc		= m48t86_rtc_proc,
+};
+
+static int __devinit m48t86_rtc_probe(struct platform_device *dev)
+{
+	unsigned char reg;
+	struct m48t86_ops *ops = dev->dev.platform_data;
+	struct rtc_device *rtc = rtc_device_register("m48t86",
+				&dev->dev, &m48t86_rtc_ops, THIS_MODULE);
+
+	if (IS_ERR(rtc)) {
+		dev_err(&dev->dev, "unable to register\n");
+		return PTR_ERR(rtc);
+	}
+
+	platform_set_drvdata(dev, rtc);
+
+	/* read battery status */
+	reg = ops->readb(M48T86_REG_D);
+	dev_info(&dev->dev, "battery %s\n",
+		(reg & M48T86_REG_D_VRT) ? "ok" : "exhausted");
+
+	return 0;
+}
+
+static int __devexit m48t86_rtc_remove(struct platform_device *dev)
+{
+	struct rtc_device *rtc = platform_get_drvdata(dev);
+
+ 	if (rtc)
+		rtc_device_unregister(rtc);
+
+	platform_set_drvdata(dev, NULL);
+
+	return 0;
+}
+
+static struct platform_driver m48t86_rtc_platform_driver = {
+	.driver		= {
+		.name	= "rtc-m48t86",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= m48t86_rtc_probe,
+	.remove		= __devexit_p(m48t86_rtc_remove),
+};
+
+static int __init m48t86_rtc_init(void)
+{
+	return platform_driver_register(&m48t86_rtc_platform_driver);
+}
+
+static void __exit m48t86_rtc_exit(void)
+{
+	platform_driver_unregister(&m48t86_rtc_platform_driver);
+}
+
+MODULE_AUTHOR("Alessandro Zummo <a.zummo@towertech.it>");
+MODULE_DESCRIPTION("M48T86 RTC driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+module_init(m48t86_rtc_init);
+module_exit(m48t86_rtc_exit);
diff --git a/include/linux/m48t86.h b/include/linux/m48t86.h
new file mode 100644
index 0000000..9065199
--- /dev/null
+++ b/include/linux/m48t86.h
@@ -0,0 +1,16 @@
+/*
+ * ST M48T86 / Dallas DS12887 RTC driver
+ * Copyright (c) 2006 Tower Technologies
+ *
+ * Author: Alessandro Zummo <a.zummo@towertech.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+struct m48t86_ops
+{
+	void (*writeb)(unsigned char value, unsigned long addr);
+	unsigned char (*readb)(unsigned long addr);
+};
-- 
cgit v0.10.2


From 7597fee389b18e5ed3c8bd58f0012674beb279ed Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Mar 2006 01:17:02 -0800
Subject: [PATCH] pnp: parport: adjust pnp_register_driver signature

Remove the assumption that pnp_register_driver() returns the number of devices
claimed.

parport_pc_init() does nothing with "count", so remove it.  Then nobody uses
the return value of parport_pc_find_ports(), so make it void.  Finally, update
pnp_register_driver() usage.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index 9302b8f..d589002 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -3126,9 +3126,9 @@ parport_pc_find_isa_ports (int autoirq, int autodma)
  * autoirq is PARPORT_IRQ_NONE, PARPORT_IRQ_AUTO, or PARPORT_IRQ_PROBEONLY
  * autodma is PARPORT_DMA_NONE or PARPORT_DMA_AUTO
  */
-static int __init parport_pc_find_ports (int autoirq, int autodma)
+static void __init parport_pc_find_ports (int autoirq, int autodma)
 {
-	int count = 0, r;
+	int count = 0, err;
 
 #ifdef CONFIG_PARPORT_PC_SUPERIO
 	detect_and_report_winbond ();
@@ -3140,23 +3140,17 @@ static int __init parport_pc_find_ports (int autoirq, int autodma)
 
 	/* PnP ports, skip detection if SuperIO already found them */
 	if (!count) {
-		r = pnp_register_driver (&parport_pc_pnp_driver);
-		if (r >= 0) {
+		err = pnp_register_driver (&parport_pc_pnp_driver);
+		if (!err)
 			pnp_registered_parport = 1;
-			count += r;
-		}
 	}
 
 	/* ISA ports and whatever (see asm/parport.h). */
-	count += parport_pc_find_nonpci_ports (autoirq, autodma);
-
-	r = pci_register_driver (&parport_pc_pci_driver);
-	if (r)
-		return r;
-	pci_registered_parport = 1;
-	count += 1;
+	parport_pc_find_nonpci_ports (autoirq, autodma);
 
-	return count;
+	err = pci_register_driver (&parport_pc_pci_driver);
+	if (!err)
+		pci_registered_parport = 1;
 }
 
 /*
@@ -3381,8 +3375,6 @@ __setup("parport_init_mode=",parport_init_mode_setup);
 
 static int __init parport_pc_init(void)
 {
-	int count = 0;
-
 	if (parse_parport_params())
 		return -EINVAL;
 
@@ -3395,12 +3387,11 @@ static int __init parport_pc_init(void)
 				break;
 			if ((io_hi[i]) == PARPORT_IOHI_AUTO)
 			       io_hi[i] = 0x400 + io[i];
-			if (parport_pc_probe_port(io[i], io_hi[i],
-						  irqval[i], dmaval[i], NULL))
-				count++;
+			parport_pc_probe_port(io[i], io_hi[i],
+						  irqval[i], dmaval[i], NULL);
 		}
 	} else
-		count += parport_pc_find_ports (irqval[0], dmaval[0]);
+		parport_pc_find_ports (irqval[0], dmaval[0]);
 
 	return 0;
 }
-- 
cgit v0.10.2


From f301ae6a690abe7edd2f92fa6df0b1b06986a6ad Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Mar 2006 01:17:04 -0800
Subject: [PATCH] pnp: mpu401: adjust pnp_register_driver signature

This series of patches removes the assumption that pnp_register_driver()
returns the number of devices claimed.  Returning the count is unreliable
because devices may be hot-plugged in the future.  (Many devices don't support
hot-plug, of course, but PNP in general does.)

This changes the convention to "zero for success, or a negative error value,"
which matches pci_register_driver(), acpi_bus_register_driver(), and
platform_driver_register().

If drivers need to know the number of devices, they can count calls to their
.probe() methods.

This patch:

Remove the assumption that pnp_register_driver() returns the number of devices
claimed.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/drivers/mpu401/mpu401.c b/sound/drivers/mpu401/mpu401.c
index 9d10d79..9ea3059 100644
--- a/sound/drivers/mpu401/mpu401.c
+++ b/sound/drivers/mpu401/mpu401.c
@@ -59,7 +59,8 @@ module_param_array(irq, int, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ # for MPU-401 device.");
 
 static struct platform_device *platform_devices[SNDRV_CARDS];
-static int pnp_registered = 0;
+static int pnp_registered;
+static unsigned int snd_mpu401_devices;
 
 static int snd_mpu401_create(int dev, struct snd_card **rcard)
 {
@@ -197,6 +198,7 @@ static int __devinit snd_mpu401_pnp_probe(struct pnp_dev *pnp_dev,
 		}
 		snd_card_set_dev(card, &pnp_dev->dev);
 		pnp_set_drvdata(pnp_dev, card);
+		snd_mpu401_devices++;
 		++dev;
 		return 0;
 	}
@@ -234,12 +236,11 @@ static void __init_or_module snd_mpu401_unregister_all(void)
 
 static int __init alsa_card_mpu401_init(void)
 {
-	int i, err, devices;
+	int i, err;
 
 	if ((err = platform_driver_register(&snd_mpu401_driver)) < 0)
 		return err;
 
-	devices = 0;
 	for (i = 0; i < SNDRV_CARDS; i++) {
 		struct platform_device *device;
 		if (! enable[i])
@@ -255,14 +256,13 @@ static int __init alsa_card_mpu401_init(void)
 			goto errout;
 		}
 		platform_devices[i] = device;
-		devices++;
+		snd_mpu401_devices++;
 	}
-	if ((err = pnp_register_driver(&snd_mpu401_pnp_driver)) >= 0) {
+	err = pnp_register_driver(&snd_mpu401_pnp_driver);
+	if (!err)
 		pnp_registered = 1;
-		devices += err;
-	}
 
-	if (!devices) {
+	if (!snd_mpu401_devices) {
 #ifdef MODULE
 		printk(KERN_ERR "MPU-401 device not found or device busy\n");
 #endif
-- 
cgit v0.10.2


From ed7cb1913168fdce6d084f2f5cd6818851313383 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Mar 2006 01:17:04 -0800
Subject: [PATCH] pnp: cs4236: adjust pnp_register_driver signature

Remove the assumption that pnp_register_driver() returns the number of devices
claimed.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c
index 4060918..382bb17 100644
--- a/sound/isa/cs423x/cs4236.c
+++ b/sound/isa/cs423x/cs4236.c
@@ -133,6 +133,7 @@ static int pnpc_registered;
 static int pnp_registered;
 #endif
 #endif /* CONFIG_PNP */
+static unsigned int snd_cs423x_devices;
 
 struct snd_card_cs4236 {
 	struct snd_cs4231 *chip;
@@ -564,7 +565,7 @@ static int __init snd_cs423x_nonpnp_probe(struct platform_device *pdev)
 		snd_card_free(card);
 		return err;
 	}
-	
+
 	platform_set_drvdata(pdev, card);
 	return 0;
 }
@@ -650,6 +651,7 @@ static int __devinit snd_cs4232_pnpbios_detect(struct pnp_dev *pdev,
 	}
 	pnp_set_drvdata(pdev, card);
 	dev++;
+	snd_cs423x_devices++;
 	return 0;
 }
 
@@ -713,6 +715,7 @@ static int __devinit snd_cs423x_pnpc_detect(struct pnp_card_link *pcard,
 	}
 	pnp_set_card_drvdata(pcard, card);
 	dev++;
+	snd_cs423x_devices++;
 	return 0;
 }
 
@@ -721,7 +724,7 @@ static void __devexit snd_cs423x_pnpc_remove(struct pnp_card_link * pcard)
 	snd_card_free(pnp_get_card_drvdata(pcard));
 	pnp_set_card_drvdata(pcard, NULL);
 }
-                        
+
 #ifdef CONFIG_PM
 static int snd_cs423x_pnpc_suspend(struct pnp_card_link *pcard, pm_message_t state)
 {
@@ -766,7 +769,7 @@ static void __init_or_module snd_cs423x_unregister_all(void)
 
 static int __init alsa_card_cs423x_init(void)
 {
-	int i, err, cards = 0;
+	int i, err;
 
 	if ((err = platform_driver_register(&cs423x_nonpnp_driver)) < 0)
 		return err;
@@ -782,24 +785,20 @@ static int __init alsa_card_cs423x_init(void)
 			goto errout;
 		}
 		platform_devices[i] = device;
-		cards++;
+		snd_cs423x_devices++;
 	}
 #ifdef CONFIG_PNP
 #ifdef CS4232
-	i = pnp_register_driver(&cs4232_pnp_driver);
-	if (i >= 0) {
+	err = pnp_register_driver(&cs4232_pnp_driver);
+	if (!err)
 		pnp_registered = 1;
-		cards += i;
-	}
 #endif
-	i = pnp_register_card_driver(&cs423x_pnpc_driver);
-	if (i >= 0) {
+	err = pnp_register_card_driver(&cs423x_pnpc_driver);
+	if (!err)
 		pnpc_registered = 1;
-		cards += i;
-	}
 #endif /* CONFIG_PNP */
 
-	if (!cards) {
+	if (!snd_cs423x_devices) {
 #ifdef MODULE
 		printk(KERN_ERR IDENT " soundcard not found or device busy\n");
 #endif
-- 
cgit v0.10.2


From 8c59c4a2216583cf79eaaaf71ef1dd5757e12854 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Mar 2006 01:17:05 -0800
Subject: [PATCH] pnp: opl3sa2: adjust pnp_register_driver signature

Remove the assumption that pnp_register_driver() returns the number of devices
claimed.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/isa/opl3sa2.c b/sound/isa/opl3sa2.c
index 56fcd8a..c906e20 100644
--- a/sound/isa/opl3sa2.c
+++ b/sound/isa/opl3sa2.c
@@ -95,6 +95,7 @@ static struct platform_device *platform_devices[SNDRV_CARDS];
 static int pnp_registered;
 static int pnpc_registered;
 #endif
+static unsigned int snd_opl3sa2_devices;
 
 /* control ports */
 #define OPL3SA2_PM_CTRL		0x01
@@ -760,6 +761,7 @@ static int __devinit snd_opl3sa2_pnp_detect(struct pnp_dev *pdev,
 	}
 	pnp_set_drvdata(pdev, card);
 	dev++;
+	snd_opl3sa2_devices++;
 	return 0;
 }
 
@@ -826,6 +828,7 @@ static int __devinit snd_opl3sa2_pnp_cdetect(struct pnp_card_link *pcard,
 	}
 	pnp_set_card_drvdata(pcard, card);
 	dev++;
+	snd_opl3sa2_devices++;
 	return 0;
 }
 
@@ -944,7 +947,7 @@ static void __init_or_module snd_opl3sa2_unregister_all(void)
 
 static int __init alsa_card_opl3sa2_init(void)
 {
-	int i, err, cards = 0;
+	int i, err;
 
 	if ((err = platform_driver_register(&snd_opl3sa2_nonpnp_driver)) < 0)
 		return err;
@@ -964,23 +967,19 @@ static int __init alsa_card_opl3sa2_init(void)
 			goto errout;
 		}
 		platform_devices[i] = device;
-		cards++;
+		snd_opl3sa2_devices++;
 	}
 
 #ifdef CONFIG_PNP
 	err = pnp_register_driver(&opl3sa2_pnp_driver);
-	if (err >= 0) {
+	if (!err)
 		pnp_registered = 1;
-		cards += err;
-	}
 	err = pnp_register_card_driver(&opl3sa2_pnpc_driver);
-	if (err >= 0) {
+	if (!err)
 		pnpc_registered = 1;
-		cards += err;
-	}
 #endif
 
-	if (!cards) {
+	if (!snd_opl3sa2_devices) {
 #ifdef MODULE
 		snd_printk(KERN_ERR "Yamaha OPL3-SA soundcard not found or device busy\n");
 #endif
-- 
cgit v0.10.2


From 803d0abb3dcfc93701c8a8dc7f2968a47271214c Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Mar 2006 01:17:06 -0800
Subject: [PATCH] pnp: IRDA: adjust pnp_register_driver signature

Remove the assumption that pnp_register_driver() returns the number of devices
claimed.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index 83141a3..9aa074b 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -207,7 +207,7 @@ static int __init nsc_ircc_init(void)
  	/* Register with PnP subsystem to detect disable ports */
 	ret = pnp_register_driver(&nsc_ircc_pnp_driver);
 
- 	if (ret >= 0)
+ 	if (!ret)
  		pnp_registered = 1;
 
 	ret = -ENODEV;
-- 
cgit v0.10.2


From 070c6999831dc4cfd9b07c74c2fea1964d7adfec Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Mar 2006 01:17:07 -0800
Subject: [PATCH] pnp: cs4232: adjust pnp_register_driver signature

Remove the assumption that pnp_register_driver() returns the number of devices
claimed.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/oss/cs4232.c b/sound/oss/cs4232.c
index 7c59e2d..c7f86f0 100644
--- a/sound/oss/cs4232.c
+++ b/sound/oss/cs4232.c
@@ -360,6 +360,8 @@ static int __initdata synthio	= -1;
 static int __initdata synthirq	= -1;
 static int __initdata isapnp	= 1;
 
+static unsigned int cs4232_devices;
+
 MODULE_DESCRIPTION("CS4232 based soundcard driver"); 
 MODULE_AUTHOR("Hannu Savolainen, Paul Barton-Davis"); 
 MODULE_LICENSE("GPL");
@@ -421,6 +423,7 @@ static int cs4232_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev
 		return -ENODEV;
 	}
 	pnp_set_drvdata(dev,isapnpcfg);
+	cs4232_devices++;
 	return 0;
 }
 
@@ -455,10 +458,11 @@ static int __init init_cs4232(void)
 #endif
 	cfg.irq = -1;
 
-	if (isapnp &&
-	    (pnp_register_driver(&cs4232_driver) > 0)
-	)
-		return 0;
+	if (isapnp) {
+		pnp_register_driver(&cs4232_driver);
+		if (cs4232_devices)
+			return 0;
+	}
 
 	if(io==-1||irq==-1||dma==-1)
 	{
@@ -503,7 +507,8 @@ static int __init setup_cs4232(char *str)
 	int ints[7];
 
 	/* If we have isapnp cards, no need for options */
-	if (pnp_register_driver(&cs4232_driver) > 0)
+	pnp_register_driver(&cs4232_driver);
+	if (cs4232_devices)
 		return 1;
 	
 	str = get_options(str, ARRAY_SIZE(ints), ints);
-- 
cgit v0.10.2


From 982c609448b9d724e1c3a0d5aeee388c064479f0 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Mar 2006 01:17:08 -0800
Subject: [PATCH] pnp: PNP: adjust pnp_register_driver signature

Remove the assumption that pnp_register_driver() returns the number of devices
claimed.  Returning the count is unreliable because devices may be hot-plugged
in the future.

This changes the convention to "zero for success, or a negative error value,"
which matches pci_register_driver(), acpi_bus_register_driver(), and
platform_driver_register().

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/Documentation/pnp.txt b/Documentation/pnp.txt
index af0f6ea..9529c9c 100644
--- a/Documentation/pnp.txt
+++ b/Documentation/pnp.txt
@@ -115,6 +115,9 @@ pnp_unregister_protocol
 pnp_register_driver
 - adds a PnP driver to the Plug and Play Layer
 - this includes driver model integration
+- returns zero for success or a negative error number for failure; count
+  calls to the .add() method if you need to know how many devices bind to
+  the driver
 
 pnp_unregister_driver
 - removes a PnP driver from the Plug and Play Layer
diff --git a/drivers/pnp/card.c b/drivers/pnp/card.c
index b68eef25..bb19c64 100644
--- a/drivers/pnp/card.c
+++ b/drivers/pnp/card.c
@@ -47,7 +47,7 @@ static void card_remove(struct pnp_dev * dev)
 {
 	dev->card_link = NULL;
 }
- 
+
 static void card_remove_first(struct pnp_dev * dev)
 {
 	struct pnp_card_driver * drv = to_pnp_card_driver(dev->driver);
@@ -361,7 +361,7 @@ static int card_resume(struct pnp_dev *dev)
 
 int pnp_register_card_driver(struct pnp_card_driver * drv)
 {
-	int count;
+	int error;
 	struct list_head *pos, *temp;
 
 	drv->link.name = drv->name;
@@ -372,21 +372,19 @@ int pnp_register_card_driver(struct pnp_card_driver * drv)
 	drv->link.suspend = drv->suspend ? card_suspend : NULL;
 	drv->link.resume = drv->resume ? card_resume : NULL;
 
-	count = pnp_register_driver(&drv->link);
-	if (count < 0)
-		return count;
+	error = pnp_register_driver(&drv->link);
+	if (error < 0)
+		return error;
 
 	spin_lock(&pnp_lock);
 	list_add_tail(&drv->global_list, &pnp_card_drivers);
 	spin_unlock(&pnp_lock);
 
-	count = 0;
-
 	list_for_each_safe(pos,temp,&pnp_cards){
 		struct pnp_card *card = list_entry(pos, struct pnp_card, global_list);
-		count += card_probe(card,drv);
+		card_probe(card,drv);
 	}
-	return count;
+	return 0;
 }
 
 /**
diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c
index 7cafacd..e54c153 100644
--- a/drivers/pnp/driver.c
+++ b/drivers/pnp/driver.c
@@ -201,31 +201,14 @@ struct bus_type pnp_bus_type = {
 	.resume = pnp_bus_resume,
 };
 
-
-static int count_devices(struct device * dev, void * c)
-{
-	int * count = c;
-	(*count)++;
-	return 0;
-}
-
 int pnp_register_driver(struct pnp_driver *drv)
 {
-	int count;
-
 	pnp_dbg("the driver '%s' has been registered", drv->name);
 
 	drv->driver.name = drv->name;
 	drv->driver.bus = &pnp_bus_type;
 
-	count = driver_register(&drv->driver);
-
-	/* get the number of initial matches */
-	if (count >= 0){
-		count = 0;
-		driver_for_each_device(&drv->driver, NULL, &count, count_devices);
-	}
-	return count;
+	return driver_register(&drv->driver);
 }
 
 void pnp_unregister_driver(struct pnp_driver *drv)
-- 
cgit v0.10.2


From 5f53f4e2107fcfdd33da1cc34a86c9aef5e76af6 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Mar 2006 01:17:08 -0800
Subject: [PATCH] PNP: adjust pnp_register_card_driver() signature: ad1816a

Remove the assumption that pnp_register_card_driver() returns the
number of devices claimed.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Cc: Jaroslav Kysela <perex@suse.cz>
Acked-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/isa/ad1816a/ad1816a.c b/sound/isa/ad1816a/ad1816a.c
index 7051f77..31f299a 100644
--- a/sound/isa/ad1816a/ad1816a.c
+++ b/sound/isa/ad1816a/ad1816a.c
@@ -262,6 +262,8 @@ static int __devinit snd_card_ad1816a_probe(int dev, struct pnp_card_link *pcard
 	return 0;
 }
 
+static unsigned int __devinitdata ad1816a_devices;
+
 static int __devinit snd_ad1816a_pnp_detect(struct pnp_card_link *card,
 					    const struct pnp_card_device_id *id)
 {
@@ -275,6 +277,7 @@ static int __devinit snd_ad1816a_pnp_detect(struct pnp_card_link *card,
 		if (res < 0)
 			return res;
 		dev++;
+		ad1816a_devices++;
 		return 0;
 	}
         return -ENODEV;
@@ -297,10 +300,13 @@ static struct pnp_card_driver ad1816a_pnpc_driver = {
 
 static int __init alsa_card_ad1816a_init(void)
 {
-	int cards;
+	int err;
+
+	err = pnp_register_card_driver(&ad1816a_pnpc_driver);
+	if (err)
+		return err;
 
-	cards = pnp_register_card_driver(&ad1816a_pnpc_driver);
-	if (cards <= 0) {
+	if (!ad1816a_devices) {
 		pnp_unregister_card_driver(&ad1816a_pnpc_driver);
 #ifdef MODULE
 		printk(KERN_ERR "no AD1816A based soundcards found.\n");
-- 
cgit v0.10.2


From 51427ec0f222cb73b21f3849416a95d751bdd742 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Mar 2006 01:17:09 -0800
Subject: [PATCH] PNP: adjust pnp_register_card_driver() signature: als100

Remove the assumption that pnp_register_card_driver() returns the
number of devices claimed.  And fix a __init/__devinit issue.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Cc: Jaroslav Kysela <perex@suse.cz>
Acked-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/isa/als100.c b/sound/isa/als100.c
index 9b77c17..a52bd8a 100644
--- a/sound/isa/als100.c
+++ b/sound/isa/als100.c
@@ -199,7 +199,7 @@ static int __devinit snd_card_als100_pnp(int dev, struct snd_card_als100 *acard,
 	return 0;
 }
 
-static int __init snd_card_als100_probe(int dev,
+static int __devinit snd_card_als100_probe(int dev,
 					struct pnp_card_link *pcard,
 					const struct pnp_card_device_id *pid)
 {
@@ -281,6 +281,8 @@ static int __init snd_card_als100_probe(int dev,
 	return 0;
 }
 
+static unsigned int __devinitdata als100_devices;
+
 static int __devinit snd_als100_pnp_detect(struct pnp_card_link *card,
 					   const struct pnp_card_device_id *id)
 {
@@ -294,6 +296,7 @@ static int __devinit snd_als100_pnp_detect(struct pnp_card_link *card,
 		if (res < 0)
 			return res;
 		dev++;
+		als100_devices++;
 		return 0;
 	}
 	return -ENODEV;
@@ -345,10 +348,13 @@ static struct pnp_card_driver als100_pnpc_driver = {
 
 static int __init alsa_card_als100_init(void)
 {
-	int cards;
+	int err;
+
+	err = pnp_register_card_driver(&als100_pnpc_driver);
+	if (err)
+		return err;
 
-	cards = pnp_register_card_driver(&als100_pnpc_driver);
-	if (cards <= 0) {
+	if (!als100_devices) {
 		pnp_unregister_card_driver(&als100_pnpc_driver);
 #ifdef MODULE
 		snd_printk(KERN_ERR "no ALS100 based soundcards found\n");
-- 
cgit v0.10.2


From db2735eb9076c5176ec9dcbbaefd38e7d82f0e47 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Mar 2006 01:17:10 -0800
Subject: [PATCH] PNP: adjust pnp_register_card_driver() signature: azt2320

Remove the assumption that pnp_register_card_driver() returns the
number of devices claimed.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Cc: Jaroslav Kysela <perex@suse.cz>
Acked-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/isa/azt2320.c b/sound/isa/azt2320.c
index a530691..15e5928 100644
--- a/sound/isa/azt2320.c
+++ b/sound/isa/azt2320.c
@@ -310,6 +310,8 @@ static int __devinit snd_card_azt2320_probe(int dev,
 	return 0;
 }
 
+static unsigned int __devinitdata azt2320_devices;
+
 static int __devinit snd_azt2320_pnp_detect(struct pnp_card_link *card,
 					    const struct pnp_card_device_id *id)
 {
@@ -323,6 +325,7 @@ static int __devinit snd_azt2320_pnp_detect(struct pnp_card_link *card,
 		if (res < 0)
 			return res;
 		dev++;
+		azt2320_devices++;
 		return 0;
 	}
         return -ENODEV;
@@ -372,10 +375,13 @@ static struct pnp_card_driver azt2320_pnpc_driver = {
 
 static int __init alsa_card_azt2320_init(void)
 {
-	int cards;
+	int err;
+
+	err = pnp_register_card_driver(&azt2320_pnpc_driver);
+	if (err)
+		return err;
 
-	cards = pnp_register_card_driver(&azt2320_pnpc_driver);
-	if (cards <= 0) {
+	if (!azt2320_devices) {
 		pnp_unregister_card_driver(&azt2320_pnpc_driver);
 #ifdef MODULE
 		snd_printk(KERN_ERR "no AZT2320 based soundcards found\n");
-- 
cgit v0.10.2


From 07d58ad06b46e630a5ff6b10e9b81f370b175b56 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Mar 2006 01:17:11 -0800
Subject: [PATCH] PNP: adjust pnp_register_card_driver() signature: cmi8330

Remove the assumption that pnp_register_card_driver() returns the
number of devices claimed.  And fix some __init/__devinit issues.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Cc: Jaroslav Kysela <perex@suse.cz>
Acked-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/isa/cmi8330.c b/sound/isa/cmi8330.c
index fd9bb25..fa63048a 100644
--- a/sound/isa/cmi8330.c
+++ b/sound/isa/cmi8330.c
@@ -175,7 +175,7 @@ MODULE_DEVICE_TABLE(pnp_card, snd_cmi8330_pnpids);
 #endif
 
 
-static struct ad1848_mix_elem snd_cmi8330_controls[] __initdata = {
+static struct ad1848_mix_elem snd_cmi8330_controls[] __devinitdata = {
 AD1848_DOUBLE("Master Playback Volume", 0, CMI8330_MASTVOL, CMI8330_MASTVOL, 4, 0, 15, 0),
 AD1848_SINGLE("Loud Playback Switch", 0, CMI8330_MUTEMUX, 6, 1, 1),
 AD1848_DOUBLE("PCM Playback Switch", 0, AD1848_LEFT_OUTPUT, AD1848_RIGHT_OUTPUT, 7, 7, 1, 1),
@@ -204,7 +204,7 @@ AD1848_SINGLE(SNDRV_CTL_NAME_IEC958("Input ",PLAYBACK,SWITCH), 0, CMI8330_MUTEMU
 };
 
 #ifdef ENABLE_SB_MIXER
-static struct sbmix_elem cmi8330_sb_mixers[] __initdata = {
+static struct sbmix_elem cmi8330_sb_mixers[] __devinitdata = {
 SB_DOUBLE("SB Master Playback Volume", SB_DSP4_MASTER_DEV, (SB_DSP4_MASTER_DEV + 1), 3, 3, 31),
 SB_DOUBLE("Tone Control - Bass", SB_DSP4_BASS_DEV, (SB_DSP4_BASS_DEV + 1), 4, 4, 15),
 SB_DOUBLE("Tone Control - Treble", SB_DSP4_TREBLE_DEV, (SB_DSP4_TREBLE_DEV + 1), 4, 4, 15),
@@ -222,7 +222,7 @@ SB_DOUBLE("SB Playback Volume", SB_DSP4_OGAIN_DEV, (SB_DSP4_OGAIN_DEV + 1), 6, 6
 SB_SINGLE("SB Mic Auto Gain", SB_DSP4_MIC_AGC, 0, 1),
 };
 
-static unsigned char cmi8330_sb_init_values[][2] __initdata = {
+static unsigned char cmi8330_sb_init_values[][2] __devinitdata = {
 	{ SB_DSP4_MASTER_DEV + 0, 0 },
 	{ SB_DSP4_MASTER_DEV + 1, 0 },
 	{ SB_DSP4_PCM_DEV + 0, 0 },
@@ -545,7 +545,7 @@ static int __devinit snd_cmi8330_probe(struct snd_card *card, int dev)
 	return snd_card_register(card);
 }
 
-static int __init snd_cmi8330_nonpnp_probe(struct platform_device *pdev)
+static int __devinit snd_cmi8330_nonpnp_probe(struct platform_device *pdev)
 {
 	struct snd_card *card;
 	int err;
@@ -607,6 +607,8 @@ static struct platform_driver snd_cmi8330_driver = {
 
 
 #ifdef CONFIG_PNP
+static unsigned int __devinitdata cmi8330_pnp_devices;
+
 static int __devinit snd_cmi8330_pnp_detect(struct pnp_card_link *pcard,
 					    const struct pnp_card_device_id *pid)
 {
@@ -636,6 +638,7 @@ static int __devinit snd_cmi8330_pnp_detect(struct pnp_card_link *pcard,
 	}
 	pnp_set_card_drvdata(pcard, card);
 	dev++;
+	cmi8330_pnp_devices++;
 	return 0;
 }
 
@@ -706,9 +709,9 @@ static int __init alsa_card_cmi8330_init(void)
 
 #ifdef CONFIG_PNP
 	err = pnp_register_card_driver(&cmi8330_pnpc_driver);
-	if (err >= 0) {
+	if (!err) {
 		pnp_registered = 1;
-		cards += err;
+		cards += cmi8330_pnp_devices;
 	}
 #endif
 
-- 
cgit v0.10.2


From 6736a6587b991477aae927c37176e8cab8689f9e Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Mar 2006 01:17:12 -0800
Subject: [PATCH] PNP: adjust pnp_register_card_driver() signature: dt019x

Remove the assumption that pnp_register_card_driver() returns the
number of devices claimed.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Cc: Jaroslav Kysela <perex@suse.cz>
Acked-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/isa/dt019x.c b/sound/isa/dt019x.c
index 50e7bc5..0acb4e5 100644
--- a/sound/isa/dt019x.c
+++ b/sound/isa/dt019x.c
@@ -272,6 +272,8 @@ static int __devinit snd_card_dt019x_probe(int dev, struct pnp_card_link *pcard,
 	return 0;
 }
 
+static unsigned int __devinitdata dt019x_devices;
+
 static int __devinit snd_dt019x_pnp_probe(struct pnp_card_link *card,
 					  const struct pnp_card_device_id *pid)
 {
@@ -285,6 +287,7 @@ static int __devinit snd_dt019x_pnp_probe(struct pnp_card_link *card,
 		if (res < 0)
 			return res;
 		dev++;
+		dt019x_devices++;
 		return 0;
 	}
 	return -ENODEV;
@@ -336,10 +339,13 @@ static struct pnp_card_driver dt019x_pnpc_driver = {
 
 static int __init alsa_card_dt019x_init(void)
 {
-	int cards = 0;
+	int err;
+
+	err = pnp_register_card_driver(&dt019x_pnpc_driver);
+	if (err)
+		return err;
 
-	cards = pnp_register_card_driver(&dt019x_pnpc_driver);
-	if (cards <= 0) {
+	if (!dt019x_devices) {
 		pnp_unregister_card_driver(&dt019x_pnpc_driver);
 #ifdef MODULE
 		snd_printk(KERN_ERR "no DT-019X / ALS-007 based soundcards found\n");
-- 
cgit v0.10.2


From fea9739f2a1aed1cfb9b7af7cce66b28b68d4394 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Mar 2006 01:17:13 -0800
Subject: [PATCH] PNP: adjust pnp_register_card_driver() signature: es18xx

Remove the assumption that pnp_register_card_driver() returns the
number of devices claimed.  And fix some __init/__devinit issues.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Cc: Jaroslav Kysela <perex@suse.cz>
Acked-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/isa/es18xx.c b/sound/isa/es18xx.c
index 721955d..9fbc185 100644
--- a/sound/isa/es18xx.c
+++ b/sound/isa/es18xx.c
@@ -2204,7 +2204,7 @@ static int __devinit snd_audiodrive_probe(struct snd_card *card, int dev)
 	return snd_card_register(card);
 }
 
-static int __init snd_es18xx_nonpnp_probe1(int dev, struct platform_device *devptr)
+static int __devinit snd_es18xx_nonpnp_probe1(int dev, struct platform_device *devptr)
 {
 	struct snd_card *card;
 	int err;
@@ -2221,7 +2221,7 @@ static int __init snd_es18xx_nonpnp_probe1(int dev, struct platform_device *devp
 	return 0;
 }
 
-static int __init snd_es18xx_nonpnp_probe(struct platform_device *pdev)
+static int __devinit snd_es18xx_nonpnp_probe(struct platform_device *pdev)
 {
 	int dev = pdev->id;
 	int err;
@@ -2297,6 +2297,8 @@ static struct platform_driver snd_es18xx_nonpnp_driver = {
 
 
 #ifdef CONFIG_PNP
+static unsigned int __devinitdata es18xx_pnp_devices;
+
 static int __devinit snd_audiodrive_pnp_detect(struct pnp_card_link *pcard,
 					       const struct pnp_card_device_id *pid)
 {
@@ -2327,6 +2329,7 @@ static int __devinit snd_audiodrive_pnp_detect(struct pnp_card_link *pcard,
 
 	pnp_set_card_drvdata(pcard, card);
 	dev++;
+	es18xx_pnp_devices++;
 	return 0;
 }
 
@@ -2397,10 +2400,10 @@ static int __init alsa_card_es18xx_init(void)
 	}
 
 #ifdef CONFIG_PNP
-	i = pnp_register_card_driver(&es18xx_pnpc_driver);
-	if (i >= 0) {
+	err = pnp_register_card_driver(&es18xx_pnpc_driver);
+	if (!err) {
 		pnp_registered = 1;
-		cards += i;
+		cards += es18xx_pnp_devices;
 	}
 #endif
 
-- 
cgit v0.10.2


From 6a3a3a0260cf5a4e5942db8d56cae4db4dc139a6 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Mar 2006 01:17:14 -0800
Subject: [PATCH] PNP: adjust pnp_register_card_driver() signature: es968

Remove the assumption that pnp_register_card_driver() returns the
number of devices claimed.  And fix some __init/__devinit issues.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Cc: Jaroslav Kysela <perex@suse.cz>
Acked-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/isa/sb/es968.c b/sound/isa/sb/es968.c
index 9da80bf..d4d65b8 100644
--- a/sound/isa/sb/es968.c
+++ b/sound/isa/sb/es968.c
@@ -124,7 +124,7 @@ static int __devinit snd_card_es968_pnp(int dev, struct snd_card_es968 *acard,
 	return 0;
 }
 
-static int __init snd_card_es968_probe(int dev,
+static int __devinit snd_card_es968_probe(int dev,
 					struct pnp_card_link *pcard,
 					const struct pnp_card_device_id *pid)
 {
@@ -182,6 +182,8 @@ static int __init snd_card_es968_probe(int dev,
 	return 0;
 }
 
+static unsigned int __devinitdata es968_devices;
+
 static int __devinit snd_es968_pnp_detect(struct pnp_card_link *card,
                                           const struct pnp_card_device_id *id)
 {
@@ -195,6 +197,7 @@ static int __devinit snd_es968_pnp_detect(struct pnp_card_link *card,
 		if (res < 0)
 			return res;
 		dev++;
+		es968_devices++;
 		return 0;
 	}
 	return -ENODEV;
@@ -246,8 +249,11 @@ static struct pnp_card_driver es968_pnpc_driver = {
 
 static int __init alsa_card_es968_init(void)
 {
-	int cards = pnp_register_card_driver(&es968_pnpc_driver);
-	if (cards <= 0) {
+	int err = pnp_register_card_driver(&es968_pnpc_driver);
+	if (err)
+		return err;
+
+	if (!es968_devices) {
 		pnp_unregister_card_driver(&es968_pnpc_driver);
 #ifdef MODULE
 		snd_printk(KERN_ERR "no ES968 based soundcards found\n");
-- 
cgit v0.10.2


From ebdb71ea7b384ff3f32dd038dba0aad58580e832 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Mar 2006 01:17:14 -0800
Subject: [PATCH] PNP: adjust pnp_register_card_driver() signature: interwave

Remove the assumption that pnp_register_card_driver() returns the
number of devices claimed.  And fix some __init/__devinit issues.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Cc: Jaroslav Kysela <perex@suse.cz>
Acked-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/isa/gus/interwave.c b/sound/isa/gus/interwave.c
index 2cacd0f..de71b7a 100644
--- a/sound/isa/gus/interwave.c
+++ b/sound/isa/gus/interwave.c
@@ -791,7 +791,7 @@ static int __devinit snd_interwave_probe(struct snd_card *card, int dev)
 	return 0;
 }
 
-static int __init snd_interwave_nonpnp_probe1(int dev, struct platform_device *devptr)
+static int __devinit snd_interwave_nonpnp_probe1(int dev, struct platform_device *devptr)
 {
 	struct snd_card *card;
 	int err;
@@ -809,7 +809,7 @@ static int __init snd_interwave_nonpnp_probe1(int dev, struct platform_device *d
 	return 0;
 }
 
-static int __init snd_interwave_nonpnp_probe(struct platform_device *pdev)
+static int __devinit snd_interwave_nonpnp_probe(struct platform_device *pdev)
 {
 	int dev = pdev->id;
 	int err;
@@ -867,6 +867,7 @@ static struct platform_driver snd_interwave_driver = {
 };
 
 #ifdef CONFIG_PNP
+static unsigned int __devinitdata interwave_pnp_devices;
 
 static int __devinit snd_interwave_pnp_detect(struct pnp_card_link *pcard,
 					      const struct pnp_card_device_id *pid)
@@ -897,6 +898,7 @@ static int __devinit snd_interwave_pnp_detect(struct pnp_card_link *pcard,
 	}
 	pnp_set_card_drvdata(pcard, card);
 	dev++;
+	interwave_pnp_devices++;
 	return 0;
 }
 
@@ -954,10 +956,10 @@ static int __init alsa_card_interwave_init(void)
 	}
 
 	/* ISA PnP cards */
-	i = pnp_register_card_driver(&interwave_pnpc_driver);
-	if (i >= 0) {
+	err = pnp_register_card_driver(&interwave_pnpc_driver);
+	if (!err) {
 		pnp_registered = 1;
-		cards += i;
+		cards += interwave_pnp_devices;;
 	}
 
 	if (!cards) {
-- 
cgit v0.10.2


From 312fef308cb15e793d8902f1dbd318d01268d36e Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Mar 2006 01:17:15 -0800
Subject: [PATCH] PNP: adjust pnp_register_card_driver() signature: sb16

Remove the assumption that pnp_register_card_driver() returns the
number of devices claimed.  And fix some __init/__devinit issues.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Cc: Jaroslav Kysela <perex@suse.cz>
Acked-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/isa/sb/sb16.c b/sound/isa/sb/sb16.c
index 5737ab7..21ea659 100644
--- a/sound/isa/sb/sb16.c
+++ b/sound/isa/sb/sb16.c
@@ -369,7 +369,7 @@ static struct snd_card *snd_sb16_card_new(int dev)
 	return card;
 }
 
-static int __init snd_sb16_probe(struct snd_card *card, int dev)
+static int __devinit snd_sb16_probe(struct snd_card *card, int dev)
 {
 	int xirq, xdma8, xdma16;
 	struct snd_sb *chip;
@@ -518,7 +518,7 @@ static int snd_sb16_resume(struct snd_card *card)
 }
 #endif
 
-static int __init snd_sb16_nonpnp_probe1(int dev, struct platform_device *devptr)
+static int __devinit snd_sb16_nonpnp_probe1(int dev, struct platform_device *devptr)
 {
 	struct snd_card_sb16 *acard;
 	struct snd_card *card;
@@ -548,7 +548,7 @@ static int __init snd_sb16_nonpnp_probe1(int dev, struct platform_device *devptr
 }
 
 
-static int __init snd_sb16_nonpnp_probe(struct platform_device *pdev)
+static int __devinit snd_sb16_nonpnp_probe(struct platform_device *pdev)
 {
 	int dev = pdev->id;
 	int err;
@@ -629,6 +629,7 @@ static struct platform_driver snd_sb16_nonpnp_driver = {
 
 
 #ifdef CONFIG_PNP
+static unsigned int __devinitdata sb16_pnp_devices;
 
 static int __devinit snd_sb16_pnp_detect(struct pnp_card_link *pcard,
 					 const struct pnp_card_device_id *pid)
@@ -651,6 +652,7 @@ static int __devinit snd_sb16_pnp_detect(struct pnp_card_link *pcard,
 		}
 		pnp_set_card_drvdata(pcard, card);
 		dev++;
+		sb16_pnp_devices++;
 		return 0;
 	}
 
@@ -727,10 +729,10 @@ static int __init alsa_card_sb16_init(void)
 	}
 #ifdef CONFIG_PNP
 	/* PnP cards at last */
-	i = pnp_register_card_driver(&sb16_pnpc_driver);
-	if (i >= 0) {
+	err = pnp_register_card_driver(&sb16_pnpc_driver);
+	if (!err) {
 		pnp_registered = 1;
-		cards += i;
+		cards += sb16_pnp_devices;
 	}
 #endif
 
-- 
cgit v0.10.2


From be54414d597b168d8c4eb2dafb8933ea15a7c148 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Mar 2006 01:17:16 -0800
Subject: [PATCH] PNP: adjust pnp_register_card_driver() signature: sb_card

Remove the assumption that pnp_register_card_driver() returns the
number of devices claimed.  And fix some __init/__devinit issues.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Cc: Jaroslav Kysela <perex@suse.cz>
Acked-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/oss/sb_card.c b/sound/oss/sb_card.c
index 680b82e..d38e88a 100644
--- a/sound/oss/sb_card.c
+++ b/sound/oss/sb_card.c
@@ -52,6 +52,7 @@ static int __initdata sm_games 	= 0; /* Logitech soundman games? */
 static struct sb_card_config *legacy = NULL;
 
 #ifdef CONFIG_PNP
+static int pnp_registered;
 static int __initdata pnp       = 1;
 /*
 static int __initdata uart401	= 0;
@@ -133,7 +134,7 @@ static void sb_unload(struct sb_card_config *scc)
 }
 
 /* Register legacy card with OSS subsystem */
-static int sb_init_legacy(void)
+static int __init sb_init_legacy(void)
 {
 	struct sb_module_options sbmo = {0};
 
@@ -234,6 +235,8 @@ static void sb_dev2cfg(struct pnp_dev *dev, struct sb_card_config *scc)
 	}
 }
 
+static unsigned int sb_pnp_devices;
+
 /* Probe callback function for the PnP API */
 static int sb_pnp_probe(struct pnp_card_link *card, const struct pnp_card_device_id *card_id)
 {
@@ -264,6 +267,7 @@ static int sb_pnp_probe(struct pnp_card_link *card, const struct pnp_card_device
 	       scc->conf.dma, scc->conf.dma2);
 
 	pnp_set_card_drvdata(card, scc);
+	sb_pnp_devices++;
 
 	return sb_register_oss(scc, &sbmo);
 }
@@ -289,6 +293,14 @@ static struct pnp_card_driver sb_pnp_driver = {
 MODULE_DEVICE_TABLE(pnp_card, sb_pnp_card_table);
 #endif /* CONFIG_PNP */
 
+static void __init_or_module sb_unregister_all(void)
+{
+#ifdef CONFIG_PNP
+	if (pnp_registered)
+		pnp_unregister_card_driver(&sb_pnp_driver);
+#endif
+}
+
 static int __init sb_init(void)
 {
 	int lres = 0;
@@ -307,17 +319,18 @@ static int __init sb_init(void)
 
 #ifdef CONFIG_PNP
 	if(pnp) {
-		pres = pnp_register_card_driver(&sb_pnp_driver);
+		int err = pnp_register_card_driver(&sb_pnp_driver);
+		if (!err)
+			pnp_registered = 1;
+		pres = sb_pnp_devices;
 	}
 #endif
 	printk(KERN_INFO "sb: Init: Done\n");
 
 	/* If either PnP or Legacy registered a card then return
 	 * success */
-	if (pres <= 0 && lres <= 0) {
-#ifdef CONFIG_PNP
-		pnp_unregister_card_driver(&sb_pnp_driver);
-#endif
+	if (pres == 0 && lres <= 0) {
+		sb_unregister_all();
 		return -ENODEV;
 	}
 	return 0;
@@ -333,9 +346,7 @@ static void __exit sb_exit(void)
 		sb_unload(legacy);
 	}
 
-#ifdef CONFIG_PNP
-	pnp_unregister_card_driver(&sb_pnp_driver);
-#endif
+	sb_unregister_all();
 
 	if (smw_free) {
 		vfree(smw_free);
-- 
cgit v0.10.2


From 38125956441c5cab28333414cee308d162507477 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Mar 2006 01:17:17 -0800
Subject: [PATCH] PNP: adjust pnp_register_card_driver() signature: sscape

Remove the assumption that pnp_register_card_driver() returns the
number of devices claimed.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Cc: Jaroslav Kysela <perex@suse.cz>
Acked-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/isa/sscape.c b/sound/isa/sscape.c
index 29bba8c..48e5552 100644
--- a/sound/isa/sscape.c
+++ b/sound/isa/sscape.c
@@ -1255,7 +1255,7 @@ static int __devinit create_sscape(int dev, struct snd_card **rcardp)
 }
 
 
-static int __init snd_sscape_probe(struct platform_device *pdev)
+static int __devinit snd_sscape_probe(struct platform_device *pdev)
 {
 	int dev = pdev->id;
 	struct snd_card *card;
@@ -1469,7 +1469,7 @@ static int __init sscape_init(void)
 	if (ret < 0)
 		return ret;
 #ifdef CONFIG_PNP
-	if (pnp_register_card_driver(&sscape_pnpc_driver) >= 0)
+	if (pnp_register_card_driver(&sscape_pnpc_driver) == 0)
 		pnp_registered = 1;
 #endif
 	return 0;
-- 
cgit v0.10.2


From c94ded6e602594c8e66fd5b1ba832d5327f5c103 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Mar 2006 01:17:18 -0800
Subject: [PATCH] PNP: adjust pnp_register_card_driver() signature: wavefront

Remove the assumption that pnp_register_card_driver() returns the
number of devices claimed.  And fix some __init/__devinit issues.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Cc: Jaroslav Kysela <perex@suse.cz>
Acked-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/isa/wavefront/wavefront.c b/sound/isa/wavefront/wavefront.c
index c0115bf..2f13cd5 100644
--- a/sound/isa/wavefront/wavefront.c
+++ b/sound/isa/wavefront/wavefront.c
@@ -589,7 +589,7 @@ snd_wavefront_probe (struct snd_card *card, int dev)
 	return snd_card_register(card);
 }	
 
-static int __init snd_wavefront_nonpnp_probe(struct platform_device *pdev)
+static int __devinit snd_wavefront_nonpnp_probe(struct platform_device *pdev)
 {
 	int dev = pdev->id;
 	struct snd_card *card;
@@ -637,6 +637,7 @@ static struct platform_driver snd_wavefront_driver = {
 
 
 #ifdef CONFIG_PNP
+static unsigned int __devinitdata wavefront_pnp_devices;
 
 static int __devinit snd_wavefront_pnp_detect(struct pnp_card_link *pcard,
                                               const struct pnp_card_device_id *pid)
@@ -670,6 +671,7 @@ static int __devinit snd_wavefront_pnp_detect(struct pnp_card_link *pcard,
 
 	pnp_set_card_drvdata(pcard, card);
 	dev++;
+	wavefront_pnp_devices++;
 	return 0;
 }
 
@@ -729,10 +731,10 @@ static int __init alsa_card_wavefront_init(void)
 	}
 
 #ifdef CONFIG_PNP
-	i = pnp_register_card_driver(&wavefront_pnpc_driver);
-	if (i >= 0) {
+	err = pnp_register_card_driver(&wavefront_pnpc_driver);
+	if (!err) {
 		pnp_registered = 1;
-		cards += i;
+		cards += wavefront_pnp_devices;
 	}
 #endif
 
-- 
cgit v0.10.2


From 2115aea8185c8987b7688111953322742bd8795c Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Mon, 27 Mar 2006 01:17:19 -0800
Subject: [PATCH] vgacon: fix EGA cursor resize function

This corrects cursor resize on ega boards: registers are write-only, so we
shouldn't even try to read them.  And on ega, 31/30 produces a flat cursor.
Using 31/31 is better: except with 32 pixels high fonts, it shouldn't show
up.

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index 5a86978..61894d5 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -433,17 +433,22 @@ static void vgacon_set_cursor_size(int xpos, int from, int to)
 	cursor_size_lastto = to;
 
 	spin_lock_irqsave(&vga_lock, flags);
-	outb_p(0x0a, vga_video_port_reg);	/* Cursor start */
-	curs = inb_p(vga_video_port_val);
-	outb_p(0x0b, vga_video_port_reg);	/* Cursor end */
-	cure = inb_p(vga_video_port_val);
+	if (vga_video_type >= VIDEO_TYPE_VGAC) {
+		outb_p(VGA_CRTC_CURSOR_START, vga_video_port_reg);
+		curs = inb_p(vga_video_port_val);
+		outb_p(VGA_CRTC_CURSOR_END, vga_video_port_reg);
+		cure = inb_p(vga_video_port_val);
+	} else {
+		curs = 0;
+		cure = 0;
+	}
 
 	curs = (curs & 0xc0) | from;
 	cure = (cure & 0xe0) | to;
 
-	outb_p(0x0a, vga_video_port_reg);	/* Cursor start */
+	outb_p(VGA_CRTC_CURSOR_START, vga_video_port_reg);
 	outb_p(curs, vga_video_port_val);
-	outb_p(0x0b, vga_video_port_reg);	/* Cursor end */
+	outb_p(VGA_CRTC_CURSOR_END, vga_video_port_reg);
 	outb_p(cure, vga_video_port_val);
 	spin_unlock_irqrestore(&vga_lock, flags);
 }
@@ -455,7 +460,10 @@ static void vgacon_cursor(struct vc_data *c, int mode)
 	switch (mode) {
 	case CM_ERASE:
 		write_vga(14, (c->vc_pos - vga_vram_base) / 2);
-		vgacon_set_cursor_size(c->vc_x, 31, 30);
+	        if (vga_video_type >= VIDEO_TYPE_VGAC)
+			vgacon_set_cursor_size(c->vc_x, 31, 30);
+		else
+			vgacon_set_cursor_size(c->vc_x, 31, 31);
 		break;
 
 	case CM_MOVE:
@@ -493,7 +501,10 @@ static void vgacon_cursor(struct vc_data *c, int mode)
 						10 ? 1 : 2));
 			break;
 		case CUR_NONE:
-			vgacon_set_cursor_size(c->vc_x, 31, 30);
+			if (vga_video_type >= VIDEO_TYPE_VGAC)
+				vgacon_set_cursor_size(c->vc_x, 31, 30);
+			else
+				vgacon_set_cursor_size(c->vc_x, 31, 31);
 			break;
 		default:
 			vgacon_set_cursor_size(c->vc_x, 1,
-- 
cgit v0.10.2


From 15bdab959c9bb909c0317480dd9b35748a8f7887 Mon Sep 17 00:00:00 2001
From: "Antonino A. Daplas" <adaplas@gmail.com>
Date: Mon, 27 Mar 2006 01:17:20 -0800
Subject: [PATCH] vgacon: Add support for soft scrollback

The scrollback buffer of the VGA console is located in VGA RAM.  This RAM
is fixed in size and is very small.  To make the scrollback buffer larger,
it must be placed instead in System RAM.

This patch adds this feature.  The feature and the size of the buffer are
made as a kernel config option.  Besides consuming kernel memory, this
feature will slow down the console by approximately 20%.

Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: Jindrich Makovicka <makovick@kmlinux.fjfi.cvut.cz>
Cc: Martin Mares <mj@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig
index 6ee4498..4444bef 100644
--- a/drivers/video/console/Kconfig
+++ b/drivers/video/console/Kconfig
@@ -26,6 +26,30 @@ config VGA_CONSOLE
 #	   fi
 #	fi
 
+config VGACON_SOFT_SCROLLBACK
+       bool "Enable Scrollback Buffer in System RAM"
+       depends on VGA_CONSOLE
+       default n
+       help
+         The scrollback buffer of the standard VGA console is located in
+	 the VGA RAM.  The size of this RAM is fixed and is quite small.
+	 If you require a larger scrollback buffer, this can be placed in
+	 System RAM which is dynamically allocated during intialization.
+	 Placing the scrollback buffer in System RAM will slightly slow
+	 down the console.
+
+	 If you want this feature, say 'Y' here and enter the amount of
+	 RAM to allocate for this buffer.  If unsure, say 'N'.
+
+config VGACON_SOFT_SCROLLBACK_SIZE
+       int "Scrollback Buffer Size (in KB)"
+       depends on VGACON_SOFT_SCROLLBACK
+       default "64"
+       help
+         Enter the amount of System RAM to allocate for the scrollback
+	 buffer.  Each 64KB will give you approximately 16 80x25
+	 screenfuls of scrollback buffer
+
 config VIDEO_SELECT
 	bool "Video mode selection support"
 	depends on  X86 && VGA_CONSOLE
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index 61894d5..d5a04b6 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -93,7 +93,6 @@ static u8 vgacon_build_attr(struct vc_data *c, u8 color, u8 intensity,
 static void vgacon_invert_region(struct vc_data *c, u16 * p, int count);
 static unsigned long vgacon_uni_pagedir[2];
 
-
 /* Description of the hardware situation */
 static unsigned long	vga_vram_base;		/* Base of video memory */
 static unsigned long	vga_vram_end;		/* End of video memory */
@@ -161,6 +160,201 @@ static inline void write_vga(unsigned char reg, unsigned int val)
 	spin_unlock_irqrestore(&vga_lock, flags);
 }
 
+static inline void vga_set_mem_top(struct vc_data *c)
+{
+	write_vga(12, (c->vc_visible_origin - vga_vram_base) / 2);
+}
+
+#ifdef CONFIG_VGACON_SOFT_SCROLLBACK
+#include <linux/bootmem.h>
+/* software scrollback */
+static void *vgacon_scrollback;
+static int vgacon_scrollback_tail;
+static int vgacon_scrollback_size;
+static int vgacon_scrollback_rows;
+static int vgacon_scrollback_cnt;
+static int vgacon_scrollback_cur;
+static int vgacon_scrollback_save;
+static int vgacon_scrollback_restore;
+
+static void vgacon_scrollback_init(int pitch)
+{
+	int rows = CONFIG_VGACON_SOFT_SCROLLBACK_SIZE * 1024/pitch;
+
+	if (vgacon_scrollback) {
+		vgacon_scrollback_cnt  = 0;
+		vgacon_scrollback_tail = 0;
+		vgacon_scrollback_cur  = 0;
+		vgacon_scrollback_rows = rows - 1;
+		vgacon_scrollback_size = rows * pitch;
+	}
+}
+
+static void __init vgacon_scrollback_startup(void)
+{
+	vgacon_scrollback = alloc_bootmem(CONFIG_VGACON_SOFT_SCROLLBACK_SIZE
+					  * 1024);
+	vgacon_scrollback_init(vga_video_num_columns * 2);
+}
+
+static void vgacon_scrollback_update(struct vc_data *c, int t, int count)
+{
+	void *p;
+
+	if (!vgacon_scrollback_size || c->vc_num != fg_console)
+		return;
+
+	p = (void *) (c->vc_origin + t * c->vc_size_row);
+
+	while (count--) {
+		scr_memcpyw(vgacon_scrollback + vgacon_scrollback_tail,
+			    p, c->vc_size_row);
+		vgacon_scrollback_cnt++;
+		p += c->vc_size_row;
+		vgacon_scrollback_tail += c->vc_size_row;
+
+		if (vgacon_scrollback_tail >= vgacon_scrollback_size)
+			vgacon_scrollback_tail = 0;
+
+		if (vgacon_scrollback_cnt > vgacon_scrollback_rows)
+			vgacon_scrollback_cnt = vgacon_scrollback_rows;
+
+		vgacon_scrollback_cur = vgacon_scrollback_cnt;
+	}
+}
+
+static void vgacon_restore_screen(struct vc_data *c)
+{
+	vgacon_scrollback_save = 0;
+
+	if (!vga_is_gfx && !vgacon_scrollback_restore) {
+		scr_memcpyw((u16 *) c->vc_origin, (u16 *) c->vc_screenbuf,
+			    c->vc_screenbuf_size > vga_vram_size ?
+			    vga_vram_size : c->vc_screenbuf_size);
+		vgacon_scrollback_restore = 1;
+		vgacon_scrollback_cur = vgacon_scrollback_cnt;
+	}
+}
+
+static int vgacon_scrolldelta(struct vc_data *c, int lines)
+{
+	int start, end, count, soff, diff;
+	void *d, *s;
+
+	if (!lines) {
+		c->vc_visible_origin = c->vc_origin;
+		vga_set_mem_top(c);
+		return 1;
+	}
+
+	if (!vgacon_scrollback)
+		return 1;
+
+	if (!vgacon_scrollback_save) {
+		vgacon_cursor(c, CM_ERASE);
+		vgacon_save_screen(c);
+		vgacon_scrollback_save = 1;
+	}
+
+	vgacon_scrollback_restore = 0;
+	start = vgacon_scrollback_cur + lines;
+	end = start + abs(lines);
+
+	if (start < 0)
+		start = 0;
+
+	if (start > vgacon_scrollback_cnt)
+		start = vgacon_scrollback_cnt;
+
+	if (end < 0)
+		end = 0;
+
+	if (end > vgacon_scrollback_cnt)
+		end = vgacon_scrollback_cnt;
+
+	vgacon_scrollback_cur = start;
+	count = end - start;
+	soff = vgacon_scrollback_tail - ((vgacon_scrollback_cnt - end) *
+					 c->vc_size_row);
+	soff -= count * c->vc_size_row;
+
+	if (soff < 0)
+		soff += vgacon_scrollback_size;
+
+	count = vgacon_scrollback_cnt - start;
+
+	if (count > c->vc_rows)
+		count = c->vc_rows;
+
+	diff = c->vc_rows - count;
+
+	d = (void *) c->vc_origin;
+	s = (void *) c->vc_screenbuf;
+
+	while (count--) {
+		scr_memcpyw(d, vgacon_scrollback + soff, c->vc_size_row);
+		d += c->vc_size_row;
+		soff += c->vc_size_row;
+
+		if (soff >= vgacon_scrollback_size)
+			soff = 0;
+	}
+
+	if (diff == c->vc_rows) {
+		vgacon_cursor(c, CM_MOVE);
+	} else {
+		while (diff--) {
+			scr_memcpyw(d, s, c->vc_size_row);
+			d += c->vc_size_row;
+			s += c->vc_size_row;
+		}
+	}
+
+	return 1;
+}
+#else
+#define vgacon_scrollback_startup(...) do { } while (0)
+#define vgacon_scrollback_init(...)    do { } while (0)
+#define vgacon_scrollback_update(...)  do { } while (0)
+
+static void vgacon_restore_screen(struct vc_data *c)
+{
+	if (c->vc_origin != c->vc_visible_origin)
+		vgacon_scrolldelta(c, 0);
+}
+
+static int vgacon_scrolldelta(struct vc_data *c, int lines)
+{
+	if (!lines)		/* Turn scrollback off */
+		c->vc_visible_origin = c->vc_origin;
+	else {
+		int margin = c->vc_size_row * 4;
+		int ul, we, p, st;
+
+		if (vga_rolled_over >
+		    (c->vc_scr_end - vga_vram_base) + margin) {
+			ul = c->vc_scr_end - vga_vram_base;
+			we = vga_rolled_over + c->vc_size_row;
+		} else {
+			ul = 0;
+			we = vga_vram_size;
+		}
+		p = (c->vc_visible_origin - vga_vram_base - ul + we) % we +
+		    lines * c->vc_size_row;
+		st = (c->vc_origin - vga_vram_base - ul + we) % we;
+		if (st < 2 * margin)
+			margin = 0;
+		if (p < margin)
+			p = 0;
+		if (p > st - margin)
+			p = st;
+		c->vc_visible_origin = vga_vram_base + (p + ul) % we;
+	}
+	vga_set_mem_top(c);
+	return 1;
+}
+#endif /* CONFIG_VGACON_SOFT_SCROLLBACK */
+
 static const char __init *vgacon_startup(void)
 {
 	const char *display_desc = NULL;
@@ -330,7 +524,7 @@ static const char __init *vgacon_startup(void)
 
 	vgacon_xres = ORIG_VIDEO_COLS * VGA_FONTWIDTH;
 	vgacon_yres = vga_scan_lines;
-
+	vgacon_scrollback_startup();
 	return display_desc;
 }
 
@@ -357,11 +551,6 @@ static void vgacon_init(struct vc_data *c, int init)
 		con_set_default_unimap(c);
 }
 
-static inline void vga_set_mem_top(struct vc_data *c)
-{
-	write_vga(12, (c->vc_visible_origin - vga_vram_base) / 2);
-}
-
 static void vgacon_deinit(struct vc_data *c)
 {
 	/* When closing the last console, reset video origin */
@@ -455,8 +644,8 @@ static void vgacon_set_cursor_size(int xpos, int from, int to)
 
 static void vgacon_cursor(struct vc_data *c, int mode)
 {
-	if (c->vc_origin != c->vc_visible_origin)
-		vgacon_scrolldelta(c, 0);
+	vgacon_restore_screen(c);
+
 	switch (mode) {
 	case CM_ERASE:
 		write_vga(14, (c->vc_pos - vga_vram_base) / 2);
@@ -606,6 +795,7 @@ static int vgacon_switch(struct vc_data *c)
 			vgacon_doresize(c, c->vc_cols, c->vc_rows);
 	}
 
+	vgacon_scrollback_init(c->vc_size_row);
 	return 0;		/* Redrawing not needed */
 }
 
@@ -1073,37 +1263,6 @@ static int vgacon_resize(struct vc_data *c, unsigned int width,
 	return 0;
 }
 
-static int vgacon_scrolldelta(struct vc_data *c, int lines)
-{
-	if (!lines)		/* Turn scrollback off */
-		c->vc_visible_origin = c->vc_origin;
-	else {
-		int margin = c->vc_size_row * 4;
-		int ul, we, p, st;
-
-		if (vga_rolled_over >
-		    (c->vc_scr_end - vga_vram_base) + margin) {
-			ul = c->vc_scr_end - vga_vram_base;
-			we = vga_rolled_over + c->vc_size_row;
-		} else {
-			ul = 0;
-			we = vga_vram_size;
-		}
-		p = (c->vc_visible_origin - vga_vram_base - ul + we) % we +
-		    lines * c->vc_size_row;
-		st = (c->vc_origin - vga_vram_base - ul + we) % we;
-		if (st < 2 * margin)
-			margin = 0;
-		if (p < margin)
-			p = 0;
-		if (p > st - margin)
-			p = st;
-		c->vc_visible_origin = vga_vram_base + (p + ul) % we;
-	}
-	vga_set_mem_top(c);
-	return 1;
-}
-
 static int vgacon_set_origin(struct vc_data *c)
 {
 	if (vga_is_gfx ||	/* We don't play origin tricks in graphic modes */
@@ -1146,15 +1305,14 @@ static int vgacon_scroll(struct vc_data *c, int t, int b, int dir,
 	if (t || b != c->vc_rows || vga_is_gfx)
 		return 0;
 
-	if (c->vc_origin != c->vc_visible_origin)
-		vgacon_scrolldelta(c, 0);
-
 	if (!vga_hardscroll_enabled || lines >= c->vc_rows / 2)
 		return 0;
 
+	vgacon_restore_screen(c);
 	oldo = c->vc_origin;
 	delta = lines * c->vc_size_row;
 	if (dir == SM_UP) {
+		vgacon_scrollback_update(c, t, lines);
 		if (c->vc_scr_end + delta >= vga_vram_end) {
 			scr_memcpyw((u16 *) vga_vram_base,
 				    (u16 *) (oldo + delta),
-- 
cgit v0.10.2


From 7a07cd786dbd0111b9dd977e114438220cb4eee5 Mon Sep 17 00:00:00 2001
From: "Antonino A. Daplas" <adaplas@gmail.com>
Date: Mon, 27 Mar 2006 01:17:22 -0800
Subject: [PATCH] nvidiafb: add suspend and resume hooks

Add suspend and resume hooks to make software suspend more reliable.  Resuming
from standby should generally work.  Resuming from mem and from disk requires
that the GPU is disabled.  Adding these to the suspend script...

fbset -accel false -a
/* suspend here */
fbset -accel true -a

...  should generally work.  In addition, resuming from mem requires that the
video card has to be POSTed by the BIOS or some other utility.

Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/nvidia/nv_accel.c b/drivers/video/nvidia/nv_accel.c
index f377a29..4aefb8f 100644
--- a/drivers/video/nvidia/nv_accel.c
+++ b/drivers/video/nvidia/nv_accel.c
@@ -300,6 +300,9 @@ int nvidiafb_sync(struct fb_info *info)
 {
 	struct nvidia_par *par = info->par;
 
+	if (info->state != FBINFO_STATE_RUNNING)
+		return 0;
+
 	if (!par->lockup)
 		NVFlush(par);
 
@@ -313,6 +316,9 @@ void nvidiafb_copyarea(struct fb_info *info, const struct fb_copyarea *region)
 {
 	struct nvidia_par *par = info->par;
 
+	if (info->state != FBINFO_STATE_RUNNING)
+		return;
+
 	if (par->lockup)
 		return cfb_copyarea(info, region);
 
@@ -329,6 +335,9 @@ void nvidiafb_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 	struct nvidia_par *par = info->par;
 	u32 color;
 
+	if (info->state != FBINFO_STATE_RUNNING)
+		return;
+
 	if (par->lockup)
 		return cfb_fillrect(info, rect);
 
@@ -412,6 +421,9 @@ void nvidiafb_imageblit(struct fb_info *info, const struct fb_image *image)
 {
 	struct nvidia_par *par = info->par;
 
+	if (info->state != FBINFO_STATE_RUNNING)
+		return;
+
 	if (image->depth == 1 && !par->lockup)
 		nvidiafb_mono_color_expand(info, image);
 	else
diff --git a/drivers/video/nvidia/nv_type.h b/drivers/video/nvidia/nv_type.h
index e4a5b1d..acdc266 100644
--- a/drivers/video/nvidia/nv_type.h
+++ b/drivers/video/nvidia/nv_type.h
@@ -129,6 +129,7 @@ struct nvidia_par {
 	int fpHeight;
 	int PanelTweak;
 	int paneltweak;
+	int pm_state;
 	u32 crtcSync_read;
 	u32 fpSyncs;
 	u32 dmaPut;
diff --git a/drivers/video/nvidia/nvidia.c b/drivers/video/nvidia/nvidia.c
index a7c4e5e..c758b38 100644
--- a/drivers/video/nvidia/nvidia.c
+++ b/drivers/video/nvidia/nvidia.c
@@ -21,6 +21,7 @@
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/pci.h>
+#include <linux/console.h>
 #ifdef CONFIG_MTRR
 #include <asm/mtrr.h>
 #endif
@@ -615,6 +616,30 @@ static int nvidia_panel_tweak(struct nvidia_par *par,
    return tweak;
 }
 
+static void nvidia_vga_protect(struct nvidia_par *par, int on)
+{
+	unsigned char tmp;
+
+	if (on) {
+		/*
+		 * Turn off screen and disable sequencer.
+		 */
+		tmp = NVReadSeq(par, 0x01);
+
+		NVWriteSeq(par, 0x00, 0x01);		/* Synchronous Reset */
+		NVWriteSeq(par, 0x01, tmp | 0x20);	/* disable the display */
+	} else {
+		/*
+		 * Reenable sequencer, then turn on screen.
+		 */
+
+		tmp = NVReadSeq(par, 0x01);
+
+		NVWriteSeq(par, 0x01, tmp & ~0x20);	/* reenable display */
+		NVWriteSeq(par, 0x00, 0x03);		/* End Reset */
+	}
+}
+
 static void nvidia_save_vga(struct nvidia_par *par,
 			    struct _riva_hw_state *state)
 {
@@ -643,9 +668,9 @@ static void nvidia_save_vga(struct nvidia_par *par,
 
 #undef DUMP_REG
 
-static void nvidia_write_regs(struct nvidia_par *par)
+static void nvidia_write_regs(struct nvidia_par *par,
+			      struct _riva_hw_state *state)
 {
-	struct _riva_hw_state *state = &par->ModeReg;
 	int i;
 
 	NVTRACE_ENTER();
@@ -694,32 +719,6 @@ static void nvidia_write_regs(struct nvidia_par *par)
 	NVTRACE_LEAVE();
 }
 
-static void nvidia_vga_protect(struct nvidia_par *par, int on)
-{
-	unsigned char tmp;
-
-	if (on) {
-		/*
-		 * Turn off screen and disable sequencer.
-		 */
-		tmp = NVReadSeq(par, 0x01);
-
-		NVWriteSeq(par, 0x00, 0x01);		/* Synchronous Reset */
-		NVWriteSeq(par, 0x01, tmp | 0x20);	/* disable the display */
-	} else {
-		/*
-		 * Reenable sequencer, then turn on screen.
-		 */
-
-		tmp = NVReadSeq(par, 0x01);
-
-		NVWriteSeq(par, 0x01, tmp & ~0x20);	/* reenable display */
-		NVWriteSeq(par, 0x00, 0x03);		/* End Reset */
-	}
-}
-
-
-
 static int nvidia_calc_regs(struct fb_info *info)
 {
 	struct nvidia_par *par = info->par;
@@ -1068,7 +1067,8 @@ static int nvidiafb_set_par(struct fb_info *info)
 
 	nvidia_vga_protect(par, 1);
 
-	nvidia_write_regs(par);
+	nvidia_write_regs(par, &par->ModeReg);
+	NVSetStartAddress(par, 0);
 
 #if defined (__BIG_ENDIAN)
 	/* turn on LFB swapping */
@@ -1377,6 +1377,57 @@ static struct fb_ops nvidia_fb_ops = {
 	.fb_sync        = nvidiafb_sync,
 };
 
+#ifdef CONFIG_PM
+static int nvidiafb_suspend(struct pci_dev *dev, pm_message_t state)
+{
+	struct fb_info *info = pci_get_drvdata(dev);
+	struct nvidia_par *par = info->par;
+
+	acquire_console_sem();
+	par->pm_state = state.event;
+
+	if (state.event == PM_EVENT_FREEZE) {
+		dev->dev.power.power_state = state;
+	} else {
+		fb_set_suspend(info, 1);
+		nvidiafb_blank(FB_BLANK_POWERDOWN, info);
+		nvidia_write_regs(par, &par->SavedReg);
+		pci_save_state(dev);
+		pci_disable_device(dev);
+		pci_set_power_state(dev, pci_choose_state(dev, state));
+	}
+
+	release_console_sem();
+	return 0;
+}
+
+static int nvidiafb_resume(struct pci_dev *dev)
+{
+	struct fb_info *info = pci_get_drvdata(dev);
+	struct nvidia_par *par = info->par;
+
+	acquire_console_sem();
+	pci_set_power_state(dev, PCI_D0);
+
+	if (par->pm_state != PM_EVENT_FREEZE) {
+		pci_restore_state(dev);
+		pci_enable_device(dev);
+		pci_set_master(dev);
+	}
+
+	par->pm_state = PM_EVENT_ON;
+	nvidiafb_set_par(info);
+	fb_set_suspend (info, 0);
+	nvidiafb_blank(FB_BLANK_UNBLANK, info);
+
+	release_console_sem();
+	return 0;
+}
+#else
+#define nvidiafb_suspend NULL
+#define nvidiafb_resume NULL
+#endif
+
 static int __devinit nvidia_set_fbinfo(struct fb_info *info)
 {
 	struct fb_monspecs *specs = &info->monspecs;
@@ -1798,8 +1849,10 @@ static int __devinit nvidiafb_setup(char *options)
 static struct pci_driver nvidiafb_driver = {
 	.name = "nvidiafb",
 	.id_table = nvidiafb_pci_tbl,
-	.probe = nvidiafb_probe,
-	.remove = __exit_p(nvidiafb_remove),
+	.probe    = nvidiafb_probe,
+	.suspend  = nvidiafb_suspend,
+	.resume   = nvidiafb_resume,
+	.remove   = __exit_p(nvidiafb_remove),
 };
 
 /* ------------------------------------------------------------------------- *
-- 
cgit v0.10.2


From fc4effc7a98d0d320e478d1d42bc4a8a64380150 Mon Sep 17 00:00:00 2001
From: David Vrabel <dvrabel@arcom.com>
Date: Mon, 27 Mar 2006 01:17:23 -0800
Subject: [PATCH] fbdev: framebuffer driver for Geode GX

A framebuffer driver for the display controller in AMD Geode GX processors
(Geode GX533, Geode GX500 etc.).  Tested at 640x480, 800x600, 1024x768 and
1280x1024 at 8, 16, and 24 bpp with both CRT and TFT.  No accelerated features
currently implemented and compression remains disabled.

This driver requires that the BIOS (or the SoftVG/Firmbase code in the BIOS)
has created an appropriate virtual PCI header.

Signed-off-by: David Vrabel <dvrabel@arcom.com>
Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/geode/Kconfig b/drivers/video/geode/Kconfig
index 42fb9a8..4e173ef 100644
--- a/drivers/video/geode/Kconfig
+++ b/drivers/video/geode/Kconfig
@@ -8,9 +8,24 @@ config FB_GEODE
 	  Say 'Y' here to allow you to select framebuffer drivers for
 	  the AMD Geode family of processors.
 
+config FB_GEODE_GX
+	tristate "AMD Geode GX framebuffer support (EXPERIMENTAL)"
+	depends on FB && FB_GEODE && EXPERIMENTAL
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	---help---
+	  Framebuffer driver for the display controller integrated into the
+	  AMD Geode GX processors.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called gxfb.
+
+	  If unsure, say N.
+
 config FB_GEODE_GX1
 	tristate "AMD Geode GX1 framebuffer support (EXPERIMENTAL)"
-	depends on FB_GEODE && EXPERIMENTAL
+	depends on FB && FB_GEODE && EXPERIMENTAL
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
diff --git a/drivers/video/geode/Makefile b/drivers/video/geode/Makefile
index 13ad501e..f896565 100644
--- a/drivers/video/geode/Makefile
+++ b/drivers/video/geode/Makefile
@@ -1,5 +1,7 @@
 # Makefile for the Geode family framebuffer drivers
 
 obj-$(CONFIG_FB_GEODE_GX1) += gx1fb.o
+obj-$(CONFIG_FB_GEODE_GX)  += gxfb.o
 
-gx1fb-objs   		:= gx1fb_core.o display_gx1.o video_cs5530.o
+gx1fb-objs := gx1fb_core.o display_gx1.o video_cs5530.o
+gxfb-objs  := gxfb_core.o display_gx.o video_gx.o
diff --git a/drivers/video/geode/display_gx.c b/drivers/video/geode/display_gx.c
new file mode 100644
index 0000000..825c340
--- /dev/null
+++ b/drivers/video/geode/display_gx.c
@@ -0,0 +1,156 @@
+/*
+ * Geode GX display controller.
+ *
+ *   Copyright (C) 2005 Arcom Control Systems Ltd.
+ *
+ *   Portions from AMD's original 2.4 driver:
+ *     Copyright (C) 2004 Advanced Micro Devices, Inc.
+ *
+ *   This program is free software; you can redistribute it and/or modify it
+ *   under the terms of the GNU General Public License as published by * the
+ *   Free Software Foundation; either version 2 of the License, or * (at your
+ *   option) any later version.
+ */
+#include <linux/spinlock.h>
+#include <linux/fb.h>
+#include <linux/delay.h>
+#include <asm/io.h>
+#include <asm/div64.h>
+#include <asm/delay.h>
+
+#include "geodefb.h"
+#include "display_gx.h"
+
+int gx_frame_buffer_size(void)
+{
+	/* Assuming 16 MiB. */
+	return 16*1024*1024;
+}
+
+int gx_line_delta(int xres, int bpp)
+{
+	/* Must be a multiple of 8 bytes. */
+	return (xres * (bpp >> 3) + 7) & ~0x7;
+}
+
+static void gx_set_mode(struct fb_info *info)
+{
+	struct geodefb_par *par = info->par;
+	u32 gcfg, dcfg;
+	int hactive, hblankstart, hsyncstart, hsyncend, hblankend, htotal;
+	int vactive, vblankstart, vsyncstart, vsyncend, vblankend, vtotal;
+
+	/* Unlock the display controller registers. */
+	readl(par->dc_regs + DC_UNLOCK);
+	writel(DC_UNLOCK_CODE, par->dc_regs + DC_UNLOCK);
+
+	gcfg = readl(par->dc_regs + DC_GENERAL_CFG);
+	dcfg = readl(par->dc_regs + DC_DISPLAY_CFG);
+
+	/* Disable the timing generator. */
+	dcfg &= ~(DC_DCFG_TGEN);
+	writel(dcfg, par->dc_regs + DC_DISPLAY_CFG);
+
+	/* Wait for pending memory requests before disabling the FIFO load. */
+	udelay(100);
+
+	/* Disable FIFO load and compression. */
+	gcfg &= ~(DC_GCFG_DFLE | DC_GCFG_CMPE | DC_GCFG_DECE);
+	writel(gcfg, par->dc_regs + DC_GENERAL_CFG);
+
+	/* Setup DCLK and its divisor. */
+	par->vid_ops->set_dclk(info);
+
+	/*
+	 * Setup new mode.
+	 */
+
+	/* Clear all unused feature bits. */
+	gcfg &= DC_GCFG_YUVM | DC_GCFG_VDSE;
+	dcfg = 0;
+
+	/* Set FIFO priority (default 6/5) and enable. */
+	/* FIXME: increase fifo priority for 1280x1024 and higher modes? */
+	gcfg |= (6 << DC_GCFG_DFHPEL_POS) | (5 << DC_GCFG_DFHPSL_POS) | DC_GCFG_DFLE;
+
+	/* Framebuffer start offset. */
+	writel(0, par->dc_regs + DC_FB_ST_OFFSET);
+
+	/* Line delta and line buffer length. */
+	writel(info->fix.line_length >> 3, par->dc_regs + DC_GFX_PITCH);
+	writel(((info->var.xres * info->var.bits_per_pixel/8) >> 3) + 2,
+	       par->dc_regs + DC_LINE_SIZE);
+
+	/* Enable graphics and video data and unmask address lines. */
+	dcfg |= DC_DCFG_GDEN | DC_DCFG_VDEN | DC_DCFG_A20M | DC_DCFG_A18M;
+
+	/* Set pixel format. */
+	switch (info->var.bits_per_pixel) {
+	case 8:
+		dcfg |= DC_DCFG_DISP_MODE_8BPP;
+		break;
+	case 16:
+		dcfg |= DC_DCFG_DISP_MODE_16BPP;
+		dcfg |= DC_DCFG_16BPP_MODE_565;
+		break;
+	case 32:
+		dcfg |= DC_DCFG_DISP_MODE_24BPP;
+		dcfg |= DC_DCFG_PALB;
+		break;
+	}
+
+	/* Enable timing generator. */
+	dcfg |= DC_DCFG_TGEN;
+
+	/* Horizontal and vertical timings. */
+	hactive = info->var.xres;
+	hblankstart = hactive;
+	hsyncstart = hblankstart + info->var.right_margin;
+	hsyncend =  hsyncstart + info->var.hsync_len;
+	hblankend = hsyncend + info->var.left_margin;
+	htotal = hblankend;
+
+	vactive = info->var.yres;
+	vblankstart = vactive;
+	vsyncstart = vblankstart + info->var.lower_margin;
+	vsyncend =  vsyncstart + info->var.vsync_len;
+	vblankend = vsyncend + info->var.upper_margin;
+	vtotal = vblankend;
+
+	writel((hactive - 1)     | ((htotal - 1) << 16),    par->dc_regs + DC_H_ACTIVE_TIMING);
+	writel((hblankstart - 1) | ((hblankend - 1) << 16), par->dc_regs + DC_H_BLANK_TIMING);
+	writel((hsyncstart - 1)  | ((hsyncend - 1) << 16),  par->dc_regs + DC_H_SYNC_TIMING);
+
+	writel((vactive - 1)     | ((vtotal - 1) << 16),    par->dc_regs + DC_V_ACTIVE_TIMING);
+	writel((vblankstart - 1) | ((vblankend - 1) << 16), par->dc_regs + DC_V_BLANK_TIMING);
+	writel((vsyncstart - 1)  | ((vsyncend - 1) << 16),  par->dc_regs + DC_V_SYNC_TIMING);
+
+	/* Write final register values. */
+	writel(dcfg, par->dc_regs + DC_DISPLAY_CFG);
+	writel(gcfg, par->dc_regs + DC_GENERAL_CFG);
+
+	par->vid_ops->configure_display(info);
+
+	/* Relock display controller registers */
+	writel(0, par->dc_regs + DC_UNLOCK);
+}
+
+static void gx_set_hw_palette_reg(struct fb_info *info, unsigned regno,
+				   unsigned red, unsigned green, unsigned blue)
+{
+	struct geodefb_par *par = info->par;
+	int val;
+
+	/* Hardware palette is in RGB 8-8-8 format. */
+	val  = (red   << 8) & 0xff0000;
+	val |= (green)      & 0x00ff00;
+	val |= (blue  >> 8) & 0x0000ff;
+
+	writel(regno, par->dc_regs + DC_PAL_ADDRESS);
+	writel(val, par->dc_regs + DC_PAL_DATA);
+}
+
+struct geode_dc_ops gx_dc_ops = {
+	.set_mode	 = gx_set_mode,
+	.set_palette_reg = gx_set_hw_palette_reg,
+};
diff --git a/drivers/video/geode/display_gx.h b/drivers/video/geode/display_gx.h
new file mode 100644
index 0000000..86c6233
--- /dev/null
+++ b/drivers/video/geode/display_gx.h
@@ -0,0 +1,96 @@
+/*
+ * Geode GX display controller
+ *
+ * Copyright (C) 2006 Arcom Control Systems Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef __DISPLAY_GX_H__
+#define __DISPLAY_GX_H__
+
+int gx_frame_buffer_size(void);
+int gx_line_delta(int xres, int bpp);
+
+extern struct geode_dc_ops gx_dc_ops;
+
+/* Display controller registers */
+
+#define DC_UNLOCK 0x00
+#  define DC_UNLOCK_CODE 0x00004758
+
+#define DC_GENERAL_CFG 0x04
+#  define DC_GCFG_DFLE	      0x00000001
+#  define DC_GCFG_CURE	      0x00000002
+#  define DC_GCFG_ICNE	      0x00000004
+#  define DC_GCFG_VIDE	      0x00000008
+#  define DC_GCFG_CMPE	      0x00000020
+#  define DC_GCFG_DECE	      0x00000040
+#  define DC_GCFG_VGAE	      0x00000080
+#  define DC_GCFG_DFHPSL_MASK 0x00000F00
+#  define DC_GCFG_DFHPSL_POS	       8
+#  define DC_GCFG_DFHPEL_MASK 0x0000F000
+#  define DC_GCFG_DFHPEL_POS	      12
+#  define DC_GCFG_STFM	      0x00010000
+#  define DC_GCFG_FDTY	      0x00020000
+#  define DC_GCFG_VGAFT	      0x00040000
+#  define DC_GCFG_VDSE	      0x00080000
+#  define DC_GCFG_YUVM	      0x00100000
+#  define DC_GCFG_VFSL	      0x00800000
+#  define DC_GCFG_SIGE	      0x01000000
+#  define DC_GCFG_SGRE	      0x02000000
+#  define DC_GCFG_SGFR	      0x04000000
+#  define DC_GCFG_CRC_MODE    0x08000000
+#  define DC_GCFG_DIAG	      0x10000000
+#  define DC_GCFG_CFRW	      0x20000000
+
+#define DC_DISPLAY_CFG 0x08
+#  define DC_DCFG_TGEN            0x00000001
+#  define DC_DCFG_GDEN            0x00000008
+#  define DC_DCFG_VDEN            0x00000010
+#  define DC_DCFG_TRUP            0x00000040
+#  define DC_DCFG_DISP_MODE_MASK  0x00000300
+#  define DC_DCFG_DISP_MODE_8BPP  0x00000000
+#  define DC_DCFG_DISP_MODE_16BPP 0x00000100
+#  define DC_DCFG_DISP_MODE_24BPP 0x00000200
+#  define DC_DCFG_16BPP_MODE_MASK 0x00000c00
+#  define DC_DCFG_16BPP_MODE_565  0x00000000
+#  define DC_DCFG_16BPP_MODE_555  0x00000100
+#  define DC_DCFG_16BPP_MODE_444  0x00000200
+#  define DC_DCFG_DCEN            0x00080000
+#  define DC_DCFG_PALB            0x02000000
+#  define DC_DCFG_FRLK            0x04000000
+#  define DC_DCFG_VISL            0x08000000
+#  define DC_DCFG_FRSL            0x20000000
+#  define DC_DCFG_A18M            0x40000000
+#  define DC_DCFG_A20M            0x80000000
+
+#define DC_FB_ST_OFFSET 0x10
+
+#define DC_LINE_SIZE 0x30
+#  define DC_LINE_SIZE_FB_LINE_SIZE_MASK  0x000007ff
+#  define DC_LINE_SIZE_FB_LINE_SIZE_POS            0
+#  define DC_LINE_SIZE_CB_LINE_SIZE_MASK  0x007f0000
+#  define DC_LINE_SIZE_CB_LINE_SIZE_POS           16
+#  define DC_LINE_SIZE_VID_LINE_SIZE_MASK 0xff000000
+#  define DC_LINE_SIZE_VID_LINE_SIZE_POS          24
+
+#define DC_GFX_PITCH 0x34
+#  define DC_GFX_PITCH_FB_PITCH_MASK 0x0000ffff
+#  define DC_GFX_PITCH_FB_PITCH_POS           0
+#  define DC_GFX_PITCH_CB_PITCH_MASK 0xffff0000
+#  define DC_GFX_PITCH_CB_PITCH_POS          16
+
+#define DC_H_ACTIVE_TIMING 0x40
+#define DC_H_BLANK_TIMING  0x44
+#define DC_H_SYNC_TIMING   0x48
+#define DC_V_ACTIVE_TIMING 0x50
+#define DC_V_BLANK_TIMING  0x54
+#define DC_V_SYNC_TIMING   0x58
+
+#define DC_PAL_ADDRESS 0x70
+#define DC_PAL_DATA    0x74
+
+#endif /* !__DISPLAY_GX1_H__ */
diff --git a/drivers/video/geode/gxfb_core.c b/drivers/video/geode/gxfb_core.c
new file mode 100644
index 0000000..89c34b1
--- /dev/null
+++ b/drivers/video/geode/gxfb_core.c
@@ -0,0 +1,423 @@
+/*
+ * Geode GX framebuffer driver.
+ *
+ *   Copyright (C) 2006 Arcom Control Systems Ltd.
+ *
+ *   This program is free software; you can redistribute it and/or modify it
+ *   under the terms of the GNU General Public License as published by the
+ *   Free Software Foundation; either version 2 of the License, or (at your
+ *   option) any later version.
+ *
+ *
+ * This driver assumes that the BIOS has created a virtual PCI device header
+ * for the video device. The PCI header is assumed to contain the following
+ * BARs:
+ *
+ *    BAR0 - framebuffer memory
+ *    BAR1 - graphics processor registers
+ *    BAR2 - display controller registers
+ *    BAR3 - video processor and flat panel control registers.
+ *
+ * 16 MiB of framebuffer memory is assumed to be available.
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/tty.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/fb.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+
+#include "geodefb.h"
+#include "display_gx.h"
+#include "video_gx.h"
+
+static char mode_option[32] = "640x480-16@60";
+
+/* Modes relevant to the GX (taken from modedb.c) */
+static const struct fb_videomode __initdata gx_modedb[] = {
+	/* 640x480-60 VESA */
+	{ NULL, 60, 640, 480, 39682,  48, 16, 33, 10, 96, 2,
+	  0, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
+	/* 640x480-75 VESA */
+	{ NULL, 75, 640, 480, 31746, 120, 16, 16, 01, 64, 3,
+	  0, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
+	/* 640x480-85 VESA */
+	{ NULL, 85, 640, 480, 27777, 80, 56, 25, 01, 56, 3,
+	  0, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
+	/* 800x600-60 VESA */
+	{ NULL, 60, 800, 600, 25000, 88, 40, 23, 01, 128, 4,
+	  FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+	  FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
+	/* 800x600-75 VESA */
+	{ NULL, 75, 800, 600, 20202, 160, 16, 21, 01, 80, 3,
+	  FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+	  FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
+	/* 800x600-85 VESA */
+	{ NULL, 85, 800, 600, 17761, 152, 32, 27, 01, 64, 3,
+	  FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+	  FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
+	/* 1024x768-60 VESA */
+	{ NULL, 60, 1024, 768, 15384, 160, 24, 29, 3, 136, 6,
+	  0, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
+	/* 1024x768-75 VESA */
+	{ NULL, 75, 1024, 768, 12690, 176, 16, 28, 1, 96, 3,
+	  FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+	  FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
+	/* 1024x768-85 VESA */
+	{ NULL, 85, 1024, 768, 10582, 208, 48, 36, 1, 96, 3,
+	  FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+	  FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
+	/* 1280x960-60 VESA */
+	{ NULL, 60, 1280, 960, 9259, 312, 96, 36, 1, 112, 3,
+	  FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+	  FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
+	/* 1280x960-85 VESA */
+	{ NULL, 85, 1280, 960, 6734, 224, 64, 47, 1, 160, 3,
+	  FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+	  FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
+	/* 1280x1024-60 VESA */
+	{ NULL, 60, 1280, 1024, 9259, 248, 48, 38, 1, 112, 3,
+	  FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+	  FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
+	/* 1280x1024-75 VESA */
+	{ NULL, 75, 1280, 1024, 7407, 248, 16, 38, 1, 144, 3,
+	  FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+	  FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
+	/* 1280x1024-85 VESA */
+	{ NULL, 85, 1280, 1024, 6349, 224, 64, 44, 1, 160, 3,
+	  FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+	  FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
+	/* 1600x1200-60 VESA */
+	{ NULL, 60, 1600, 1200, 6172, 304, 64, 46, 1, 192, 3,
+	  FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+	  FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
+	/* 1600x1200-75 VESA */
+	{ NULL, 75, 1600, 1200, 4938, 304, 64, 46, 1, 192, 3,
+	  FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+	  FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
+	/* 1600x1200-85 VESA */
+	{ NULL, 85, 1600, 1200, 4357, 304, 64, 46, 1, 192, 3,
+	  FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+	  FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
+};
+
+static int gxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+{
+	if (var->xres > 1600 || var->yres > 1200)
+		return -EINVAL;
+	if ((var->xres > 1280 || var->yres > 1024) && var->bits_per_pixel > 16)
+		return -EINVAL;
+
+	if (var->bits_per_pixel == 32) {
+		var->red.offset   = 16; var->red.length   = 8;
+		var->green.offset =  8; var->green.length = 8;
+		var->blue.offset  =  0; var->blue.length  = 8;
+	} else if (var->bits_per_pixel == 16) {
+		var->red.offset   = 11; var->red.length   = 5;
+		var->green.offset =  5; var->green.length = 6;
+		var->blue.offset  =  0; var->blue.length  = 5;
+	} else if (var->bits_per_pixel == 8) {
+		var->red.offset   = 0; var->red.length   = 8;
+		var->green.offset = 0; var->green.length = 8;
+		var->blue.offset  = 0; var->blue.length  = 8;
+	} else
+		return -EINVAL;
+	var->transp.offset = 0; var->transp.length = 0;
+
+	/* Enough video memory? */
+	if (gx_line_delta(var->xres, var->bits_per_pixel) * var->yres > info->fix.smem_len)
+		return -EINVAL;
+
+	/* FIXME: Check timing parameters here? */
+
+	return 0;
+}
+
+static int gxfb_set_par(struct fb_info *info)
+{
+	struct geodefb_par *par = info->par;
+
+	if (info->var.bits_per_pixel > 8) {
+		info->fix.visual = FB_VISUAL_TRUECOLOR;
+		fb_dealloc_cmap(&info->cmap);
+	} else {
+		info->fix.visual = FB_VISUAL_PSEUDOCOLOR;
+		fb_alloc_cmap(&info->cmap, 1<<info->var.bits_per_pixel, 0);
+	}
+
+	info->fix.line_length = gx_line_delta(info->var.xres, info->var.bits_per_pixel);
+
+	par->dc_ops->set_mode(info);
+
+	return 0;
+}
+
+static inline u_int chan_to_field(u_int chan, struct fb_bitfield *bf)
+{
+	chan &= 0xffff;
+	chan >>= 16 - bf->length;
+	return chan << bf->offset;
+}
+
+static int gxfb_setcolreg(unsigned regno, unsigned red, unsigned green,
+			   unsigned blue, unsigned transp,
+			   struct fb_info *info)
+{
+	struct geodefb_par *par = info->par;
+
+	if (info->var.grayscale) {
+		/* grayscale = 0.30*R + 0.59*G + 0.11*B */
+		red = green = blue = (red * 77 + green * 151 + blue * 28) >> 8;
+	}
+
+	/* Truecolor has hardware independent palette */
+	if (info->fix.visual == FB_VISUAL_TRUECOLOR) {
+		u32 *pal = info->pseudo_palette;
+		u32 v;
+
+		if (regno >= 16)
+			return -EINVAL;
+
+		v  = chan_to_field(red, &info->var.red);
+		v |= chan_to_field(green, &info->var.green);
+		v |= chan_to_field(blue, &info->var.blue);
+
+		pal[regno] = v;
+	} else {
+		if (regno >= 256)
+			return -EINVAL;
+
+		par->dc_ops->set_palette_reg(info, regno, red, green, blue);
+	}
+
+	return 0;
+}
+
+static int gxfb_blank(int blank_mode, struct fb_info *info)
+{
+	struct geodefb_par *par = info->par;
+
+	return par->vid_ops->blank_display(info, blank_mode);
+}
+
+static int __init gxfb_map_video_memory(struct fb_info *info, struct pci_dev *dev)
+{
+	struct geodefb_par *par = info->par;
+	int fb_len;
+	int ret;
+
+	ret = pci_enable_device(dev);
+	if (ret < 0)
+		return ret;
+
+	ret = pci_request_region(dev, 3, "gxfb (video processor)");
+	if (ret < 0)
+		return ret;
+	par->vid_regs = ioremap(pci_resource_start(dev, 3),
+				pci_resource_len(dev, 3));
+	if (!par->vid_regs)
+		return -ENOMEM;
+
+	ret = pci_request_region(dev, 2, "gxfb (display controller)");
+	if (ret < 0)
+		return ret;
+	par->dc_regs = ioremap(pci_resource_start(dev, 2), pci_resource_len(dev, 2));
+	if (!par->dc_regs)
+		return -ENOMEM;
+
+	ret = pci_request_region(dev, 0, "gxfb (framebuffer)");
+	if (ret < 0)
+		return ret;
+	if ((fb_len = gx_frame_buffer_size()) < 0)
+		return -ENOMEM;
+	info->fix.smem_start = pci_resource_start(dev, 0);
+	info->fix.smem_len = fb_len;
+	info->screen_base = ioremap(info->fix.smem_start, info->fix.smem_len);
+	if (!info->screen_base)
+		return -ENOMEM;
+
+	dev_info(&dev->dev, "%d Kibyte of video memory at 0x%lx\n",
+		 info->fix.smem_len / 1024, info->fix.smem_start);
+
+	return 0;
+}
+
+static struct fb_ops gxfb_ops = {
+	.owner		= THIS_MODULE,
+	.fb_check_var	= gxfb_check_var,
+	.fb_set_par	= gxfb_set_par,
+	.fb_setcolreg	= gxfb_setcolreg,
+	.fb_blank       = gxfb_blank,
+	/* No HW acceleration for now. */
+	.fb_fillrect	= cfb_fillrect,
+	.fb_copyarea	= cfb_copyarea,
+	.fb_imageblit	= cfb_imageblit,
+};
+
+static struct fb_info * __init gxfb_init_fbinfo(struct device *dev)
+{
+	struct geodefb_par *par;
+	struct fb_info *info;
+
+	/* Alloc enough space for the pseudo palette. */
+	info = framebuffer_alloc(sizeof(struct geodefb_par) + sizeof(u32) * 16, dev);
+	if (!info)
+		return NULL;
+
+	par = info->par;
+
+	strcpy(info->fix.id, "Geode GX");
+
+	info->fix.type		= FB_TYPE_PACKED_PIXELS;
+	info->fix.type_aux	= 0;
+	info->fix.xpanstep	= 0;
+	info->fix.ypanstep	= 0;
+	info->fix.ywrapstep	= 0;
+	info->fix.accel		= FB_ACCEL_NONE;
+
+	info->var.nonstd	= 0;
+	info->var.activate	= FB_ACTIVATE_NOW;
+	info->var.height	= -1;
+	info->var.width	= -1;
+	info->var.accel_flags = 0;
+	info->var.vmode	= FB_VMODE_NONINTERLACED;
+
+	info->fbops		= &gxfb_ops;
+	info->flags		= FBINFO_DEFAULT;
+	info->node		= -1;
+
+	info->pseudo_palette	= (void *)par + sizeof(struct geodefb_par);
+
+	info->var.grayscale	= 0;
+
+	return info;
+}
+
+static int __init gxfb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct geodefb_par *par;
+	struct fb_info *info;
+	int ret;
+
+	info = gxfb_init_fbinfo(&pdev->dev);
+	if (!info)
+		return -ENOMEM;
+	par = info->par;
+
+	/* GX display controller and GX video device. */
+	par->dc_ops  = &gx_dc_ops;
+	par->vid_ops = &gx_vid_ops;
+
+	if ((ret = gxfb_map_video_memory(info, pdev)) < 0) {
+		dev_err(&pdev->dev, "failed to map frame buffer or controller registers\n");
+		goto err;
+	}
+
+	ret = fb_find_mode(&info->var, info, mode_option,
+			   gx_modedb, ARRAY_SIZE(gx_modedb), NULL, 16);
+	if (ret == 0 || ret == 4) {
+		dev_err(&pdev->dev, "could not find valid video mode\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+        /* Clear the frame buffer of garbage. */
+        memset_io(info->screen_base, 0, info->fix.smem_len);
+
+	gxfb_check_var(&info->var, info);
+	gxfb_set_par(info);
+
+	if (register_framebuffer(info) < 0) {
+		ret = -EINVAL;
+		goto err;
+	}
+	pci_set_drvdata(pdev, info);
+	printk(KERN_INFO "fb%d: %s frame buffer device\n", info->node, info->fix.id);
+	return 0;
+
+  err:
+	if (info->screen_base) {
+		iounmap(info->screen_base);
+		pci_release_region(pdev, 0);
+	}
+	if (par->vid_regs) {
+		iounmap(par->vid_regs);
+		pci_release_region(pdev, 3);
+	}
+	if (par->dc_regs) {
+		iounmap(par->dc_regs);
+		pci_release_region(pdev, 2);
+	}
+
+	pci_disable_device(pdev);
+
+	if (info)
+		framebuffer_release(info);
+	return ret;
+}
+
+static void gxfb_remove(struct pci_dev *pdev)
+{
+	struct fb_info *info = pci_get_drvdata(pdev);
+	struct geodefb_par *par = info->par;
+
+	unregister_framebuffer(info);
+
+	iounmap((void __iomem *)info->screen_base);
+	pci_release_region(pdev, 0);
+
+	iounmap(par->vid_regs);
+	pci_release_region(pdev, 3);
+
+	iounmap(par->dc_regs);
+	pci_release_region(pdev, 2);
+
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+
+	framebuffer_release(info);
+}
+
+static struct pci_device_id gxfb_id_table[] = {
+	{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_CS5535_VIDEO,
+	  PCI_ANY_ID, PCI_ANY_ID, PCI_BASE_CLASS_DISPLAY << 16,
+	  0xff0000, 0 },
+	{ 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, gxfb_id_table);
+
+static struct pci_driver gxfb_driver = {
+	.name		= "gxfb",
+	.id_table	= gxfb_id_table,
+	.probe		= gxfb_probe,
+	.remove		= gxfb_remove,
+};
+
+static int __init gxfb_init(void)
+{
+#ifndef MODULE
+	if (fb_get_options("gxfb", NULL))
+		return -ENODEV;
+#endif
+	return pci_register_driver(&gxfb_driver);
+}
+
+static void __exit gxfb_cleanup(void)
+{
+	pci_unregister_driver(&gxfb_driver);
+}
+
+module_init(gxfb_init);
+module_exit(gxfb_cleanup);
+
+module_param_string(mode, mode_option, sizeof(mode_option), 0444);
+MODULE_PARM_DESC(mode, "video mode (<x>x<y>[-<bpp>][@<refr>])");
+
+MODULE_DESCRIPTION("Framebuffer driver for the AMD Geode GX");
+MODULE_LICENSE("GPL");
diff --git a/drivers/video/geode/video_gx.c b/drivers/video/geode/video_gx.c
new file mode 100644
index 0000000..2b2a788
--- /dev/null
+++ b/drivers/video/geode/video_gx.c
@@ -0,0 +1,262 @@
+/*
+ * Geode GX video processor device.
+ *
+ *   Copyright (C) 2006 Arcom Control Systems Ltd.
+ *
+ *   Portions from AMD's original 2.4 driver:
+ *     Copyright (C) 2004 Advanced Micro Devices, Inc.
+ *
+ *   This program is free software; you can redistribute it and/or modify it
+ *   under the terms of the GNU General Public License as published by the
+ *   Free Software Foundation; either version 2 of the License, or (at your
+ *   option) any later version.
+ */
+#include <linux/fb.h>
+#include <linux/delay.h>
+#include <asm/io.h>
+#include <asm/delay.h>
+#include <asm/msr.h>
+
+#include "geodefb.h"
+#include "video_gx.h"
+
+
+/*
+ * Tables of register settings for various DOTCLKs.
+ */
+struct gx_pll_entry {
+	long pixclock; /* ps */
+	u32 sys_rstpll_bits;
+	u32 dotpll_value;
+};
+
+#define POSTDIV3 ((u32)MSR_GLCP_SYS_RSTPLL_DOTPOSTDIV3)
+#define PREMULT2 ((u32)MSR_GLCP_SYS_RSTPLL_DOTPREMULT2)
+#define PREDIV2  ((u32)MSR_GLCP_SYS_RSTPLL_DOTPOSTDIV3)
+
+static const struct gx_pll_entry gx_pll_table_48MHz[] = {
+	{ 40123, POSTDIV3,	    0x00000BF2 },	/*  24.9230 */
+	{ 39721, 0,		    0x00000037 },	/*  25.1750 */
+	{ 35308, POSTDIV3|PREMULT2, 0x00000B1A },	/*  28.3220 */
+	{ 31746, POSTDIV3,	    0x000002D2 },	/*  31.5000 */
+	{ 27777, POSTDIV3|PREMULT2, 0x00000FE2 },	/*  36.0000 */
+	{ 26666, POSTDIV3,	    0x0000057A },	/*  37.5000 */
+	{ 25000, POSTDIV3,	    0x0000030A },	/*  40.0000 */
+	{ 22271, 0,		    0x00000063 },	/*  44.9000 */
+	{ 20202, 0,		    0x0000054B },	/*  49.5000 */
+	{ 20000, 0,		    0x0000026E },	/*  50.0000 */
+	{ 19860, PREMULT2,	    0x00000037 },	/*  50.3500 */
+	{ 18518, POSTDIV3|PREMULT2, 0x00000B0D },	/*  54.0000 */
+	{ 17777, 0,		    0x00000577 },	/*  56.2500 */
+	{ 17733, 0,		    0x000007F7 },	/*  56.3916 */
+	{ 17653, 0,		    0x0000057B },	/*  56.6444 */
+	{ 16949, PREMULT2,	    0x00000707 },	/*  59.0000 */
+	{ 15873, POSTDIV3|PREMULT2, 0x00000B39 },	/*  63.0000 */
+	{ 15384, POSTDIV3|PREMULT2, 0x00000B45 },	/*  65.0000 */
+	{ 14814, POSTDIV3|PREMULT2, 0x00000FC1 },	/*  67.5000 */
+	{ 14124, POSTDIV3,	    0x00000561 },	/*  70.8000 */
+	{ 13888, POSTDIV3,	    0x000007E1 },	/*  72.0000 */
+	{ 13426, PREMULT2,	    0x00000F4A },	/*  74.4810 */
+	{ 13333, 0,		    0x00000052 },	/*  75.0000 */
+	{ 12698, 0,		    0x00000056 },	/*  78.7500 */
+	{ 12500, POSTDIV3|PREMULT2, 0x00000709 },	/*  80.0000 */
+	{ 11135, PREMULT2,	    0x00000262 },	/*  89.8000 */
+	{ 10582, 0,		    0x000002D2 },	/*  94.5000 */
+	{ 10101, PREMULT2,	    0x00000B4A },	/*  99.0000 */
+	{ 10000, PREMULT2,	    0x00000036 },	/* 100.0000 */
+	{  9259, 0,		    0x000007E2 },	/* 108.0000 */
+	{  8888, 0,		    0x000007F6 },	/* 112.5000 */
+	{  7692, POSTDIV3|PREMULT2, 0x00000FB0 },	/* 130.0000 */
+	{  7407, POSTDIV3|PREMULT2, 0x00000B50 },	/* 135.0000 */
+	{  6349, 0,		    0x00000055 },	/* 157.5000 */
+	{  6172, 0,		    0x000009C1 },	/* 162.0000 */
+	{  5787, PREMULT2,	    0x0000002D },	/* 172.798  */
+	{  5698, 0,		    0x000002C1 },	/* 175.5000 */
+	{  5291, 0,		    0x000002D1 },	/* 189.0000 */
+	{  4938, 0,		    0x00000551 },	/* 202.5000 */
+	{  4357, 0,		    0x0000057D },	/* 229.5000 */
+};
+
+static const struct gx_pll_entry gx_pll_table_14MHz[] = {
+	{ 39721, 0, 0x00000037 },	/*  25.1750 */
+	{ 35308, 0, 0x00000B7B },	/*  28.3220 */
+	{ 31746, 0, 0x000004D3 },	/*  31.5000 */
+	{ 27777, 0, 0x00000BE3 },	/*  36.0000 */
+	{ 26666, 0, 0x0000074F },	/*  37.5000 */
+	{ 25000, 0, 0x0000050B },	/*  40.0000 */
+	{ 22271, 0, 0x00000063 },	/*  44.9000 */
+	{ 20202, 0, 0x0000054B },	/*  49.5000 */
+	{ 20000, 0, 0x0000026E },	/*  50.0000 */
+	{ 19860, 0, 0x000007C3 },	/*  50.3500 */
+	{ 18518, 0, 0x000007E3 },	/*  54.0000 */
+	{ 17777, 0, 0x00000577 },	/*  56.2500 */
+	{ 17733, 0, 0x000002FB },	/*  56.3916 */
+	{ 17653, 0, 0x0000057B },	/*  56.6444 */
+	{ 16949, 0, 0x0000058B },	/*  59.0000 */
+	{ 15873, 0, 0x0000095E },	/*  63.0000 */
+	{ 15384, 0, 0x0000096A },	/*  65.0000 */
+	{ 14814, 0, 0x00000BC2 },	/*  67.5000 */
+	{ 14124, 0, 0x0000098A },	/*  70.8000 */
+	{ 13888, 0, 0x00000BE2 },	/*  72.0000 */
+	{ 13333, 0, 0x00000052 },	/*  75.0000 */
+	{ 12698, 0, 0x00000056 },	/*  78.7500 */
+	{ 12500, 0, 0x0000050A },	/*  80.0000 */
+	{ 11135, 0, 0x0000078E },	/*  89.8000 */
+	{ 10582, 0, 0x000002D2 },	/*  94.5000 */
+	{ 10101, 0, 0x000011F6 },	/*  99.0000 */
+	{ 10000, 0, 0x0000054E },	/* 100.0000 */
+	{  9259, 0, 0x000007E2 },	/* 108.0000 */
+	{  8888, 0, 0x000002FA },	/* 112.5000 */
+	{  7692, 0, 0x00000BB1 },	/* 130.0000 */
+	{  7407, 0, 0x00000975 },	/* 135.0000 */
+	{  6349, 0, 0x00000055 },	/* 157.5000 */
+	{  6172, 0, 0x000009C1 },	/* 162.0000 */
+	{  5698, 0, 0x000002C1 },	/* 175.5000 */
+	{  5291, 0, 0x00000539 },	/* 189.0000 */
+	{  4938, 0, 0x00000551 },	/* 202.5000 */
+	{  4357, 0, 0x0000057D },	/* 229.5000 */
+};
+
+static void gx_set_dclk_frequency(struct fb_info *info)
+{
+	const struct gx_pll_entry *pll_table;
+	int pll_table_len;
+	int i, best_i;
+	long min, diff;
+	u64 dotpll, sys_rstpll;
+	int timeout = 1000;
+
+	/* Rev. 1 Geode GXs use a 14 MHz reference clock instead of 48 MHz. */
+	if (cpu_data->x86_mask == 1) {
+		pll_table = gx_pll_table_14MHz;
+		pll_table_len = ARRAY_SIZE(gx_pll_table_14MHz);
+	} else {
+		pll_table = gx_pll_table_48MHz;
+		pll_table_len = ARRAY_SIZE(gx_pll_table_48MHz);
+	}
+
+	/* Search the table for the closest pixclock. */
+	best_i = 0;
+	min = abs(pll_table[0].pixclock - info->var.pixclock);
+	for (i = 1; i < pll_table_len; i++) {
+		diff = abs(pll_table[i].pixclock - info->var.pixclock);
+		if (diff < min) {
+			min = diff;
+			best_i = i;
+		}
+	}
+
+	rdmsrl(MSR_GLCP_SYS_RSTPLL, sys_rstpll);
+	rdmsrl(MSR_GLCP_DOTPLL, dotpll);
+
+	/* Program new M, N and P. */
+	dotpll &= 0x00000000ffffffffull;
+	dotpll |= (u64)pll_table[best_i].dotpll_value << 32;
+	dotpll |= MSR_GLCP_DOTPLL_DOTRESET;
+	dotpll &= ~MSR_GLCP_DOTPLL_BYPASS;
+
+	wrmsrl(MSR_GLCP_DOTPLL, dotpll);
+
+	/* Program dividers. */
+	sys_rstpll &= ~( MSR_GLCP_SYS_RSTPLL_DOTPREDIV2
+			 | MSR_GLCP_SYS_RSTPLL_DOTPREMULT2
+			 | MSR_GLCP_SYS_RSTPLL_DOTPOSTDIV3 );
+	sys_rstpll |= pll_table[best_i].sys_rstpll_bits;
+
+	wrmsrl(MSR_GLCP_SYS_RSTPLL, sys_rstpll);
+
+	/* Clear reset bit to start PLL. */
+	dotpll &= ~(MSR_GLCP_DOTPLL_DOTRESET);
+	wrmsrl(MSR_GLCP_DOTPLL, dotpll);
+
+	/* Wait for LOCK bit. */
+	do {
+		rdmsrl(MSR_GLCP_DOTPLL, dotpll);
+	} while (timeout-- && !(dotpll & MSR_GLCP_DOTPLL_LOCK));
+}
+
+static void gx_configure_display(struct fb_info *info)
+{
+	struct geodefb_par *par = info->par;
+	u32 dcfg, fp_pm;
+
+	dcfg = readl(par->vid_regs + GX_DCFG);
+
+	/* Clear bits from existing mode. */
+	dcfg &= ~(GX_DCFG_CRT_SYNC_SKW_MASK
+		  | GX_DCFG_CRT_HSYNC_POL   | GX_DCFG_CRT_VSYNC_POL
+		  | GX_DCFG_VSYNC_EN        | GX_DCFG_HSYNC_EN);
+
+	/* Set default sync skew.  */
+	dcfg |= GX_DCFG_CRT_SYNC_SKW_DFLT;
+
+	/* Enable hsync and vsync. */
+	dcfg |= GX_DCFG_HSYNC_EN | GX_DCFG_VSYNC_EN;
+
+	/* Sync polarities. */
+	if (info->var.sync & FB_SYNC_HOR_HIGH_ACT)
+		dcfg |= GX_DCFG_CRT_HSYNC_POL;
+	if (info->var.sync & FB_SYNC_VERT_HIGH_ACT)
+		dcfg |= GX_DCFG_CRT_VSYNC_POL;
+
+	writel(dcfg, par->vid_regs + GX_DCFG);
+
+	/* Power on flat panel. */
+	fp_pm = readl(par->vid_regs + GX_FP_PM);
+	fp_pm |= GX_FP_PM_P;
+	writel(fp_pm, par->vid_regs + GX_FP_PM);
+}
+
+static int gx_blank_display(struct fb_info *info, int blank_mode)
+{
+	struct geodefb_par *par = info->par;
+	u32 dcfg, fp_pm;
+	int blank, hsync, vsync;
+
+	/* CRT power saving modes. */
+	switch (blank_mode) {
+	case FB_BLANK_UNBLANK:
+		blank = 0; hsync = 1; vsync = 1;
+		break;
+	case FB_BLANK_NORMAL:
+		blank = 1; hsync = 1; vsync = 1;
+		break;
+	case FB_BLANK_VSYNC_SUSPEND:
+		blank = 1; hsync = 1; vsync = 0;
+		break;
+	case FB_BLANK_HSYNC_SUSPEND:
+		blank = 1; hsync = 0; vsync = 1;
+		break;
+	case FB_BLANK_POWERDOWN:
+		blank = 1; hsync = 0; vsync = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+	dcfg = readl(par->vid_regs + GX_DCFG);
+	dcfg &= ~(GX_DCFG_DAC_BL_EN
+		  | GX_DCFG_HSYNC_EN | GX_DCFG_VSYNC_EN);
+	if (!blank)
+		dcfg |= GX_DCFG_DAC_BL_EN;
+	if (hsync)
+		dcfg |= GX_DCFG_HSYNC_EN;
+	if (vsync)
+		dcfg |= GX_DCFG_VSYNC_EN;
+	writel(dcfg, par->vid_regs + GX_DCFG);
+
+	/* Power on/off flat panel. */
+	fp_pm = readl(par->vid_regs + GX_FP_PM);
+	if (blank_mode == FB_BLANK_POWERDOWN)
+		fp_pm &= ~GX_FP_PM_P;
+	else
+		fp_pm |= GX_FP_PM_P;
+	writel(fp_pm, par->vid_regs + GX_FP_PM);
+
+	return 0;
+}
+
+struct geode_vid_ops gx_vid_ops = {
+	.set_dclk	   = gx_set_dclk_frequency,
+	.configure_display = gx_configure_display,
+	.blank_display	   = gx_blank_display,
+};
diff --git a/drivers/video/geode/video_gx.h b/drivers/video/geode/video_gx.h
new file mode 100644
index 0000000..2d9211f
--- /dev/null
+++ b/drivers/video/geode/video_gx.h
@@ -0,0 +1,47 @@
+/*
+ * Geode GX video device
+ *
+ * Copyright (C) 2006 Arcom Control Systems Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef __VIDEO_GX_H__
+#define __VIDEO_GX_H__
+
+extern struct geode_vid_ops gx_vid_ops;
+
+/* Geode GX video processor registers */
+
+#define GX_DCFG		0x0008
+#  define GX_DCFG_CRT_EN		0x00000001
+#  define GX_DCFG_HSYNC_EN		0x00000002
+#  define GX_DCFG_VSYNC_EN		0x00000004
+#  define GX_DCFG_DAC_BL_EN		0x00000008
+#  define GX_DCFG_CRT_HSYNC_POL		0x00000100
+#  define GX_DCFG_CRT_VSYNC_POL		0x00000200
+#  define GX_DCFG_CRT_SYNC_SKW_MASK	0x0001C000
+#  define GX_DCFG_CRT_SYNC_SKW_DFLT	0x00010000
+#  define GX_DCFG_VG_CK			0x00100000
+#  define GX_DCFG_GV_GAM		0x00200000
+#  define GX_DCFG_DAC_VREF		0x04000000
+
+/* Geode GX flat panel display control registers */
+#define GX_FP_PM 0x410
+#  define GX_FP_PM_P 0x01000000
+
+/* Geode GX clock control MSRs */
+
+#define MSR_GLCP_SYS_RSTPLL	0x4c000014
+#  define MSR_GLCP_SYS_RSTPLL_DOTPREDIV2	(0x0000000000000002ull)
+#  define MSR_GLCP_SYS_RSTPLL_DOTPREMULT2	(0x0000000000000004ull)
+#  define MSR_GLCP_SYS_RSTPLL_DOTPOSTDIV3	(0x0000000000000008ull)
+
+#define MSR_GLCP_DOTPLL		0x4c000015
+#  define MSR_GLCP_DOTPLL_DOTRESET		(0x0000000000000001ull)
+#  define MSR_GLCP_DOTPLL_BYPASS		(0x0000000000008000ull)
+#  define MSR_GLCP_DOTPLL_LOCK			(0x0000000002000000ull)
+
+#endif /* !__VIDEO_GX_H__ */
-- 
cgit v0.10.2


From 756e21a022aba5214bafcf803f114aed8a783b99 Mon Sep 17 00:00:00 2001
From: Arthur Othieno <apgo@patchbomb.org>
Date: Mon, 27 Mar 2006 01:17:24 -0800
Subject: [PATCH] matroxfb: simply return what i2c_add_driver() does

insmod will tell us when the module failed to load.  We do no further
processing on the return from i2c_add_driver(), so just return what
i2c_add_driver() did, instead of storing it.

Add __init/__exit annotations while we're at it.

Signed-off-by: Arthur Othieno <apgo@patchbomb.org>
Acked-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Petr Vandrovec <petr@vandrovec.name>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/matrox/matroxfb_maven.c b/drivers/video/matrox/matroxfb_maven.c
index 6019710..531a0c3 100644
--- a/drivers/video/matrox/matroxfb_maven.c
+++ b/drivers/video/matrox/matroxfb_maven.c
@@ -1297,20 +1297,13 @@ static struct i2c_driver maven_driver={
 	.detach_client	= maven_detach_client,
 };
 
-/* ************************** */
-
-static int matroxfb_maven_init(void) {
-	int err;
-
-	err = i2c_add_driver(&maven_driver);
-	if (err) {
-		printk(KERN_ERR "maven: Maven driver failed to register (%d).\n", err);
-		return err;
-	}
-	return 0;
+static int __init matroxfb_maven_init(void)
+{
+	return i2c_add_driver(&maven_driver);
 }
 
-static void matroxfb_maven_exit(void) {
+static void __exit matroxfb_maven_exit(void)
+{
 	i2c_del_driver(&maven_driver);
 }
 
-- 
cgit v0.10.2


From d03c21ec0be7787ff6b75dcf56c0e96209ccbfbd Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Mon, 27 Mar 2006 01:17:26 -0800
Subject: [PATCH] matrox maven: memory allocation and other cleanups

A few cleanups which were done to almost all i2c drivers some times
ago, but matroxfb_maven was forgotten:

* Don't allocate two different structures at once.
* Use kzalloc instead of kmalloc+memset.
* Use strlcpy instead of strcpy.
* Drop duplicate error message on client deregistration failure.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Petr Vandrovec <vandrove@vc.cvut.cz>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/matrox/matroxfb_maven.c b/drivers/video/matrox/matroxfb_maven.c
index 531a0c3..d8b3429 100644
--- a/drivers/video/matrox/matroxfb_maven.c
+++ b/drivers/video/matrox/matroxfb_maven.c
@@ -129,7 +129,7 @@ static int get_ctrl_id(__u32 v4l2_id) {
 
 struct maven_data {
 	struct matrox_fb_info*		primary_head;
-	struct i2c_client*		client;
+	struct i2c_client		client;
 	int				version;
 };
 
@@ -970,7 +970,7 @@ static inline int maven_compute_timming(struct maven_data* md,
 
 static int maven_program_timming(struct maven_data* md,
 		const struct mavenregs* m) {
-	struct i2c_client* c = md->client;
+	struct i2c_client* c = &md->client;
 
 	if (m->mode == MATROXFB_OUTPUT_MODE_MONITOR) {
 		LR(0x80);
@@ -1007,7 +1007,7 @@ static int maven_program_timming(struct maven_data* md,
 }
 
 static inline int maven_resync(struct maven_data* md) {
-	struct i2c_client* c = md->client;
+	struct i2c_client* c = &md->client;
 	maven_set_reg(c, 0x95, 0x20);	/* start whole thing */
 	return 0;
 }
@@ -1065,48 +1065,48 @@ static int maven_set_control (struct maven_data* md,
 		  maven_compute_bwlevel(md, &blacklevel, &whitelevel);
 		  blacklevel = (blacklevel >> 2) | ((blacklevel & 3) << 8);
 		  whitelevel = (whitelevel >> 2) | ((whitelevel & 3) << 8);
-		  maven_set_reg_pair(md->client, 0x0e, blacklevel);
-		  maven_set_reg_pair(md->client, 0x1e, whitelevel);
+		  maven_set_reg_pair(&md->client, 0x0e, blacklevel);
+		  maven_set_reg_pair(&md->client, 0x1e, whitelevel);
 		}
 		break;
 		case V4L2_CID_SATURATION:
 		{
-		  maven_set_reg(md->client, 0x20, p->value);
-		  maven_set_reg(md->client, 0x22, p->value);
+		  maven_set_reg(&md->client, 0x20, p->value);
+		  maven_set_reg(&md->client, 0x22, p->value);
 		}
 		break;
 		case V4L2_CID_HUE:
 		{
-		  maven_set_reg(md->client, 0x25, p->value);
+		  maven_set_reg(&md->client, 0x25, p->value);
 		}
 		break;
 		case V4L2_CID_GAMMA:
 		{
 		  const struct maven_gamma* g;
 		  g = maven_compute_gamma(md);
-		  maven_set_reg(md->client, 0x83, g->reg83);
-		  maven_set_reg(md->client, 0x84, g->reg84);
-		  maven_set_reg(md->client, 0x85, g->reg85);
-		  maven_set_reg(md->client, 0x86, g->reg86);
-		  maven_set_reg(md->client, 0x87, g->reg87);
-		  maven_set_reg(md->client, 0x88, g->reg88);
-		  maven_set_reg(md->client, 0x89, g->reg89);
-		  maven_set_reg(md->client, 0x8a, g->reg8a);
-		  maven_set_reg(md->client, 0x8b, g->reg8b);
+		  maven_set_reg(&md->client, 0x83, g->reg83);
+		  maven_set_reg(&md->client, 0x84, g->reg84);
+		  maven_set_reg(&md->client, 0x85, g->reg85);
+		  maven_set_reg(&md->client, 0x86, g->reg86);
+		  maven_set_reg(&md->client, 0x87, g->reg87);
+		  maven_set_reg(&md->client, 0x88, g->reg88);
+		  maven_set_reg(&md->client, 0x89, g->reg89);
+		  maven_set_reg(&md->client, 0x8a, g->reg8a);
+		  maven_set_reg(&md->client, 0x8b, g->reg8b);
 		}
 		break;
 		case MATROXFB_CID_TESTOUT:
 		{
 			unsigned char val 
-			  = maven_get_reg (md->client,0x8d);
+			  = maven_get_reg(&md->client,0x8d);
 			if (p->value) val |= 0x10;
 			else          val &= ~0x10;
-			maven_set_reg (md->client, 0x8d, val);
+			maven_set_reg(&md->client, 0x8d, val);
 		}
 		break;
 		case MATROXFB_CID_DEFLICKER:
 		{
-		  maven_set_reg(md->client, 0x93, maven_compute_deflicker(md));
+		  maven_set_reg(&md->client, 0x93, maven_compute_deflicker(md));
 		}
 		break;
 	}
@@ -1185,7 +1185,6 @@ static int maven_init_client(struct i2c_client* clnt) {
 	MINFO_FROM(container_of(clnt->adapter, struct i2c_bit_adapter, adapter)->minfo);
 
 	md->primary_head = MINFO;
-	md->client = clnt;
 	down_write(&ACCESS_FBINFO(altout.lock));
 	ACCESS_FBINFO(outputs[1]).output = &maven_altout;
 	ACCESS_FBINFO(outputs[1]).src = ACCESS_FBINFO(outputs[1]).default_src;
@@ -1243,19 +1242,17 @@ static int maven_detect_client(struct i2c_adapter* adapter, int address, int kin
 					      I2C_FUNC_SMBUS_BYTE_DATA |
 					      I2C_FUNC_PROTOCOL_MANGLING))
 		goto ERROR0;
-	if (!(new_client = (struct i2c_client*)kmalloc(sizeof(*new_client) + sizeof(*data),
-			GFP_KERNEL))) {
+	if (!(data = kzalloc(sizeof(*data), GFP_KERNEL))) {
 		err = -ENOMEM;
 		goto ERROR0;
 	}
-	memset(new_client, 0, sizeof(*new_client) + sizeof(*data));
-	data = (struct maven_data*)(new_client + 1);
+	new_client = &data->client;
 	i2c_set_clientdata(new_client, data);
 	new_client->addr = address;
 	new_client->adapter = adapter;
 	new_client->driver = &maven_driver;
 	new_client->flags = 0;
-	strcpy(new_client->name, "maven client");
+	strlcpy(new_client->name, "maven", I2C_NAME_SIZE);
 	if ((err = i2c_attach_client(new_client)))
 		goto ERROR3;
 	err = maven_init_client(new_client);
@@ -1279,12 +1276,10 @@ static int maven_attach_adapter(struct i2c_adapter* adapter) {
 static int maven_detach_client(struct i2c_client* client) {
 	int err;
 
-	if ((err = i2c_detach_client(client))) {
-		printk(KERN_ERR "maven: Cannot deregister client\n");
+	if ((err = i2c_detach_client(client)))
 		return err;
-	}
 	maven_shutdown_client(client);
-	kfree(client);
+	kfree(i2c_get_clientdata(client));
 	return 0;
 }
 
-- 
cgit v0.10.2


From f95ec3c6df271ae4e6290cd6b95c18a009c76dc9 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Mon, 27 Mar 2006 01:17:27 -0800
Subject: [PATCH] au1200fb: Alchemy Au1200 framebuffer driver

Add support for Alchemy Au1200 framebuffer driver

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index fdebd60..3a89061 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1202,6 +1202,17 @@ config FB_AU1100
 	bool "Au1100 LCD Driver"
 	depends on (FB = y) && EXPERIMENTAL && PCI && MIPS && MIPS_PB1100=y
 
+config FB_AU1200
+	bool "Au1200 LCD Driver"
+	depends on FB && MIPS && SOC_AU1200
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	help
+	  This is the framebuffer driver for the AMD Au1200 SOC.  It can drive
+	  various panels and CRTs by passing in kernel cmd line option
+	  au1200fb:panel=<name>.
+
 source "drivers/video/geode/Kconfig"
 
 config FB_FFB
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index aa434e7..cb90218 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -86,6 +86,7 @@ obj-$(CONFIG_FB_ASILIANT)	  += asiliantfb.o
 obj-$(CONFIG_FB_PXA)		  += pxafb.o
 obj-$(CONFIG_FB_W100)		  += w100fb.o
 obj-$(CONFIG_FB_AU1100)		  += au1100fb.o
+obj-$(CONFIG_FB_AU1200)		  += au1200fb.o
 obj-$(CONFIG_FB_PMAG_AA)	  += pmag-aa-fb.o
 obj-$(CONFIG_FB_PMAG_BA)	  += pmag-ba-fb.o
 obj-$(CONFIG_FB_PMAGB_B)	  += pmagb-b-fb.o
diff --git a/drivers/video/au1200fb.c b/drivers/video/au1200fb.c
new file mode 100644
index 0000000..b367de3
--- /dev/null
+++ b/drivers/video/au1200fb.c
@@ -0,0 +1,3844 @@
+/*
+ * BRIEF MODULE DESCRIPTION
+ *	Au1200 LCD Driver.
+ *
+ * Copyright 2004-2005 AMD
+ * Author: AMD
+ *
+ * Based on:
+ * linux/drivers/video/skeletonfb.c -- Skeleton for a frame buffer device
+ *  Created 28 Dec 1997 by Geert Uytterhoeven
+ *
+ *  This program is free software; you can redistribute	 it and/or modify it
+ *  under  the terms of	 the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the	License, or (at your
+ *  option) any later version.
+ *
+ *  THIS  SOFTWARE  IS PROVIDED	  ``AS	IS'' AND   ANY	EXPRESS OR IMPLIED
+ *  WARRANTIES,	  INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+ *  NO	EVENT  SHALL   THE AUTHOR  BE	 LIABLE FOR ANY	  DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED	  TO, PROCUREMENT OF  SUBSTITUTE GOODS	OR SERVICES; LOSS OF
+ *  USE, DATA,	OR PROFITS; OR	BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ *  ANY THEORY OF LIABILITY, WHETHER IN	 CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/fb.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/ctype.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/mach-au1x00/au1000.h>
+#include "au1200fb.h"
+
+#ifdef CONFIG_PM
+#include <asm/mach-au1x00/au1xxx_pm.h>
+#endif
+
+#ifndef CONFIG_FB_AU1200_DEVS
+#define CONFIG_FB_AU1200_DEVS 4
+#endif
+
+#define DRIVER_NAME "au1200fb"
+#define DRIVER_DESC "LCD controller driver for AU1200 processors"
+
+#define DEBUG 1
+
+#define print_err(f, arg...) printk(KERN_ERR DRIVER_NAME ": " f "\n", ## arg)
+#define print_warn(f, arg...) printk(KERN_WARNING DRIVER_NAME ": " f "\n", ## arg)
+#define print_info(f, arg...) printk(KERN_INFO DRIVER_NAME ": " f "\n", ## arg)
+
+#if DEBUG
+#define print_dbg(f, arg...) printk(KERN_DEBUG __FILE__ ": " f "\n", ## arg)
+#else
+#define print_dbg(f, arg...) do {} while (0)
+#endif
+
+
+#define AU1200_LCD_FB_IOCTL 0x46FF
+
+#define AU1200_LCD_SET_SCREEN 1
+#define AU1200_LCD_GET_SCREEN 2
+#define AU1200_LCD_SET_WINDOW 3
+#define AU1200_LCD_GET_WINDOW 4
+#define AU1200_LCD_SET_PANEL  5
+#define AU1200_LCD_GET_PANEL  6
+
+#define SCREEN_SIZE		    (1<< 1)
+#define SCREEN_BACKCOLOR    (1<< 2)
+#define SCREEN_BRIGHTNESS   (1<< 3)
+#define SCREEN_COLORKEY     (1<< 4)
+#define SCREEN_MASK         (1<< 5)
+
+struct au1200_lcd_global_regs_t {
+	unsigned int flags;
+	unsigned int xsize;
+	unsigned int ysize;
+	unsigned int backcolor;
+	unsigned int brightness;
+	unsigned int colorkey;
+	unsigned int mask;
+	unsigned int panel_choice;
+	char panel_desc[80];
+
+};
+
+#define WIN_POSITION            (1<< 0)
+#define WIN_ALPHA_COLOR         (1<< 1)
+#define WIN_ALPHA_MODE          (1<< 2)
+#define WIN_PRIORITY            (1<< 3)
+#define WIN_CHANNEL             (1<< 4)
+#define WIN_BUFFER_FORMAT       (1<< 5)
+#define WIN_COLOR_ORDER         (1<< 6)
+#define WIN_PIXEL_ORDER         (1<< 7)
+#define WIN_SIZE                (1<< 8)
+#define WIN_COLORKEY_MODE       (1<< 9)
+#define WIN_DOUBLE_BUFFER_MODE  (1<< 10)
+#define WIN_RAM_ARRAY_MODE      (1<< 11)
+#define WIN_BUFFER_SCALE        (1<< 12)
+#define WIN_ENABLE	            (1<< 13)
+
+struct au1200_lcd_window_regs_t {
+	unsigned int flags;
+	unsigned int xpos;
+	unsigned int ypos;
+	unsigned int alpha_color;
+	unsigned int alpha_mode;
+	unsigned int priority;
+	unsigned int channel;
+	unsigned int buffer_format;
+	unsigned int color_order;
+	unsigned int pixel_order;
+	unsigned int xsize;
+	unsigned int ysize;
+	unsigned int colorkey_mode;
+	unsigned int double_buffer_mode;
+	unsigned int ram_array_mode;
+	unsigned int xscale;
+	unsigned int yscale;
+	unsigned int enable;
+};
+
+
+struct au1200_lcd_iodata_t {
+	unsigned int subcmd;
+	struct au1200_lcd_global_regs_t global;
+	struct au1200_lcd_window_regs_t window;
+};
+
+#if defined(__BIG_ENDIAN)
+#define LCD_CONTROL_DEFAULT_PO LCD_CONTROL_PO_11
+#else
+#define LCD_CONTROL_DEFAULT_PO LCD_CONTROL_PO_00
+#endif
+#define LCD_CONTROL_DEFAULT_SBPPF LCD_CONTROL_SBPPF_565
+
+/* Private, per-framebuffer management information (independent of the panel itself) */
+struct au1200fb_device {
+	struct fb_info fb_info;			/* FB driver info record */
+
+	int					plane;
+	unsigned char* 		fb_mem;		/* FrameBuffer memory map */
+	unsigned int		fb_len;
+	dma_addr_t    		fb_phys;
+};
+
+static struct au1200fb_device _au1200fb_devices[CONFIG_FB_AU1200_DEVS];
+/********************************************************************/
+
+/* LCD controller restrictions */
+#define AU1200_LCD_MAX_XRES	1280
+#define AU1200_LCD_MAX_YRES	1024
+#define AU1200_LCD_MAX_BPP	32
+#define AU1200_LCD_MAX_CLK	96000000 /* fixme: this needs to go away ? */
+#define AU1200_LCD_NBR_PALETTE_ENTRIES 256
+
+/* Default number of visible screen buffer to allocate */
+#define AU1200FB_NBR_VIDEO_BUFFERS 1
+
+/********************************************************************/
+
+static struct au1200_lcd *lcd = (struct au1200_lcd *) AU1200_LCD_ADDR;
+static int window_index = 2; /* default is zero */
+static int panel_index = 2; /* default is zero */
+static struct window_settings *win;
+static struct panel_settings *panel;
+static int noblanking = 1;
+static int nohwcursor = 0;
+
+struct window_settings {
+	unsigned char name[64];
+	uint32 mode_backcolor;
+	uint32 mode_colorkey;
+	uint32 mode_colorkeymsk;
+	struct {
+		int xres;
+		int yres;
+		int xpos;
+		int ypos;
+		uint32 mode_winctrl1; /* winctrl1[FRM,CCO,PO,PIPE] */
+		uint32 mode_winenable;
+	} w[4];
+};
+
+#if defined(__BIG_ENDIAN)
+#define LCD_WINCTRL1_PO_16BPP LCD_WINCTRL1_PO_00
+#else
+#define LCD_WINCTRL1_PO_16BPP LCD_WINCTRL1_PO_01
+#endif
+
+extern int board_au1200fb_panel_init (void);
+extern int board_au1200fb_panel_shutdown (void);
+
+#ifdef CONFIG_PM
+int au1200fb_pm_callback(au1xxx_power_dev_t *dev,
+		au1xxx_request_t request, void *data);
+au1xxx_power_dev_t *LCD_pm_dev;
+#endif
+
+/*
+ * Default window configurations
+ */
+static struct window_settings windows[] = {
+	{ /* Index 0 */
+		"0-FS gfx, 1-video, 2-ovly gfx, 3-ovly gfx",
+		/* mode_backcolor	*/ 0x006600ff,
+		/* mode_colorkey,msk*/ 0, 0,
+		{
+			{
+			/* xres, yres, xpos, ypos */ 0, 0, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565 |
+				LCD_WINCTRL1_PO_16BPP,
+			/* mode_winenable*/ LCD_WINENABLE_WEN0,
+			},
+			{
+			/* xres, yres, xpos, ypos */ 100, 100, 100, 100,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565 |
+				LCD_WINCTRL1_PO_16BPP |
+				LCD_WINCTRL1_PIPE,
+			/* mode_winenable*/ LCD_WINENABLE_WEN1,
+			},
+			{
+			/* xres, yres, xpos, ypos */ 0, 0, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565 |
+				LCD_WINCTRL1_PO_16BPP,
+			/* mode_winenable*/ 0,
+			},
+			{
+			/* xres, yres, xpos, ypos */ 0, 0, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565 |
+				LCD_WINCTRL1_PO_16BPP |
+				LCD_WINCTRL1_PIPE,
+			/* mode_winenable*/ 0,
+			},
+		},
+	},
+
+	{ /* Index 1 */
+		"0-FS gfx, 1-video, 2-ovly gfx, 3-ovly gfx",
+		/* mode_backcolor	*/ 0x006600ff,
+		/* mode_colorkey,msk*/ 0, 0,
+		{
+			{
+			/* xres, yres, xpos, ypos */ 320, 240, 5, 5,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_24BPP |
+				LCD_WINCTRL1_PO_00,
+			/* mode_winenable*/ LCD_WINENABLE_WEN0,
+			},
+			{
+			/* xres, yres, xpos, ypos */ 0, 0, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565
+				| LCD_WINCTRL1_PO_16BPP,
+			/* mode_winenable*/ 0,
+			},
+			{
+			/* xres, yres, xpos, ypos */ 100, 100, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565 |
+				LCD_WINCTRL1_PO_16BPP |
+				LCD_WINCTRL1_PIPE,
+			/* mode_winenable*/ 0/*LCD_WINENABLE_WEN2*/,
+			},
+			{
+			/* xres, yres, xpos, ypos */ 200, 25, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565 |
+				LCD_WINCTRL1_PO_16BPP |
+				LCD_WINCTRL1_PIPE,
+			/* mode_winenable*/ 0,
+			},
+		},
+	},
+	{ /* Index 2 */
+		"0-FS gfx, 1-video, 2-ovly gfx, 3-ovly gfx",
+		/* mode_backcolor	*/ 0x006600ff,
+		/* mode_colorkey,msk*/ 0, 0,
+		{
+			{
+			/* xres, yres, xpos, ypos */ 0, 0, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565 |
+				LCD_WINCTRL1_PO_16BPP,
+			/* mode_winenable*/ LCD_WINENABLE_WEN0,
+			},
+			{
+			/* xres, yres, xpos, ypos */ 0, 0, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565 |
+				LCD_WINCTRL1_PO_16BPP,
+			/* mode_winenable*/ 0,
+			},
+			{
+			/* xres, yres, xpos, ypos */ 0, 0, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_32BPP |
+				LCD_WINCTRL1_PO_00|LCD_WINCTRL1_PIPE,
+			/* mode_winenable*/ 0/*LCD_WINENABLE_WEN2*/,
+			},
+			{
+			/* xres, yres, xpos, ypos */ 0, 0, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565 |
+				LCD_WINCTRL1_PO_16BPP |
+				LCD_WINCTRL1_PIPE,
+			/* mode_winenable*/ 0,
+			},
+		},
+	},
+	/* Need VGA 640 @ 24bpp, @ 32bpp */
+	/* Need VGA 800 @ 24bpp, @ 32bpp */
+	/* Need VGA 1024 @ 24bpp, @ 32bpp */
+};
+
+/*
+ * Controller configurations for various panels.
+ */
+
+struct panel_settings
+{
+	const char name[25];		/* Full name <vendor>_<model> */
+
+	struct 	fb_monspecs monspecs; 	/* FB monitor specs */
+
+	/* panel timings */
+	uint32 mode_screen;
+	uint32 mode_horztiming;
+	uint32 mode_verttiming;
+	uint32 mode_clkcontrol;
+	uint32 mode_pwmdiv;
+	uint32 mode_pwmhi;
+	uint32 mode_outmask;
+	uint32 mode_fifoctrl;
+	uint32 mode_toyclksrc;
+	uint32 mode_backlight;
+	uint32 mode_auxpll;
+	int (*device_init)(void);
+	int (*device_shutdown)(void);
+#define Xres min_xres
+#define Yres min_yres
+	u32	min_xres;		/* Minimum horizontal resolution */
+	u32	max_xres;		/* Maximum horizontal resolution */
+	u32 	min_yres;		/* Minimum vertical resolution */
+	u32 	max_yres;		/* Maximum vertical resolution */
+};
+
+/********************************************************************/
+/* fixme: Maybe a modedb for the CRT ? otherwise panels should be as-is */
+
+/* List of panels known to work with the AU1200 LCD controller.
+ * To add a new panel, enter the same specifications as the
+ * Generic_TFT one, and MAKE SURE that it doesn't conflicts
+ * with the controller restrictions. Restrictions are:
+ *
+ * STN color panels: max_bpp <= 12
+ * STN mono panels: max_bpp <= 4
+ * TFT panels: max_bpp <= 16
+ * max_xres <= 800
+ * max_yres <= 600
+ */
+static struct panel_settings known_lcd_panels[] =
+{
+	[0] = { /* QVGA 320x240 H:33.3kHz V:110Hz */
+		.name = "QVGA_320x240",
+		.monspecs = {
+			.modedb = NULL,
+			.modedb_len = 0,
+			.hfmin = 30000,
+			.hfmax = 70000,
+			.vfmin = 60,
+			.vfmax = 60,
+			.dclkmin = 6000000,
+			.dclkmax = 28000000,
+			.input = FB_DISP_RGB,
+		},
+		.mode_screen		= LCD_SCREEN_SX_N(320) |
+			LCD_SCREEN_SY_N(240),
+		.mode_horztiming	= 0x00c4623b,
+		.mode_verttiming	= 0x00502814,
+		.mode_clkcontrol	= 0x00020002, /* /4=24Mhz */
+		.mode_pwmdiv		= 0x00000000,
+		.mode_pwmhi		= 0x00000000,
+		.mode_outmask	= 0x00FFFFFF,
+		.mode_fifoctrl	= 0x2f2f2f2f,
+		.mode_toyclksrc	= 0x00000004, /* AUXPLL directly */
+		.mode_backlight	= 0x00000000,
+		.mode_auxpll		= 8, /* 96MHz AUXPLL */
+		.device_init		= NULL,
+		.device_shutdown	= NULL,
+		320, 320,
+		240, 240,
+	},
+
+	[1] = { /* VGA 640x480 H:30.3kHz V:58Hz */
+		.name = "VGA_640x480",
+		.monspecs = {
+			.modedb = NULL,
+			.modedb_len = 0,
+			.hfmin = 30000,
+			.hfmax = 70000,
+			.vfmin = 60,
+			.vfmax = 60,
+			.dclkmin = 6000000,
+			.dclkmax = 28000000,
+			.input = FB_DISP_RGB,
+		},
+		.mode_screen		= 0x13f9df80,
+		.mode_horztiming	= 0x003c5859,
+		.mode_verttiming	= 0x00741201,
+		.mode_clkcontrol	= 0x00020001, /* /4=24Mhz */
+		.mode_pwmdiv		= 0x00000000,
+		.mode_pwmhi		= 0x00000000,
+		.mode_outmask	= 0x00FFFFFF,
+		.mode_fifoctrl	= 0x2f2f2f2f,
+		.mode_toyclksrc	= 0x00000004, /* AUXPLL directly */
+		.mode_backlight	= 0x00000000,
+		.mode_auxpll		= 8, /* 96MHz AUXPLL */
+		.device_init		= NULL,
+		.device_shutdown	= NULL,
+		640, 480,
+		640, 480,
+	},
+
+	[2] = { /* SVGA 800x600 H:46.1kHz V:69Hz */
+		.name = "SVGA_800x600",
+		.monspecs = {
+			.modedb = NULL,
+			.modedb_len = 0,
+			.hfmin = 30000,
+			.hfmax = 70000,
+			.vfmin = 60,
+			.vfmax = 60,
+			.dclkmin = 6000000,
+			.dclkmax = 28000000,
+			.input = FB_DISP_RGB,
+		},
+		.mode_screen		= 0x18fa5780,
+		.mode_horztiming	= 0x00dc7e77,
+		.mode_verttiming	= 0x00584805,
+		.mode_clkcontrol	= 0x00020000, /* /2=48Mhz */
+		.mode_pwmdiv		= 0x00000000,
+		.mode_pwmhi		= 0x00000000,
+		.mode_outmask	= 0x00FFFFFF,
+		.mode_fifoctrl	= 0x2f2f2f2f,
+		.mode_toyclksrc	= 0x00000004, /* AUXPLL directly */
+		.mode_backlight	= 0x00000000,
+		.mode_auxpll		= 8, /* 96MHz AUXPLL */
+		.device_init		= NULL,
+		.device_shutdown	= NULL,
+		800, 800,
+		600, 600,
+	},
+
+	[3] = { /* XVGA 1024x768 H:56.2kHz V:70Hz */
+		.name = "XVGA_1024x768",
+		.monspecs = {
+			.modedb = NULL,
+			.modedb_len = 0,
+			.hfmin = 30000,
+			.hfmax = 70000,
+			.vfmin = 60,
+			.vfmax = 60,
+			.dclkmin = 6000000,
+			.dclkmax = 28000000,
+			.input = FB_DISP_RGB,
+		},
+		.mode_screen		= 0x1ffaff80,
+		.mode_horztiming	= 0x007d0e57,
+		.mode_verttiming	= 0x00740a01,
+		.mode_clkcontrol	= 0x000A0000, /* /1 */
+		.mode_pwmdiv		= 0x00000000,
+		.mode_pwmhi		= 0x00000000,
+		.mode_outmask	= 0x00FFFFFF,
+		.mode_fifoctrl	= 0x2f2f2f2f,
+		.mode_toyclksrc	= 0x00000004, /* AUXPLL directly */
+		.mode_backlight	= 0x00000000,
+		.mode_auxpll		= 6, /* 72MHz AUXPLL */
+		.device_init		= NULL,
+		.device_shutdown	= NULL,
+		1024, 1024,
+		768, 768,
+	},
+
+	[4] = { /* XVGA XVGA 1280x1024 H:68.5kHz V:65Hz */
+		.name = "XVGA_1280x1024",
+		.monspecs = {
+			.modedb = NULL,
+			.modedb_len = 0,
+			.hfmin = 30000,
+			.hfmax = 70000,
+			.vfmin = 60,
+			.vfmax = 60,
+			.dclkmin = 6000000,
+			.dclkmax = 28000000,
+			.input = FB_DISP_RGB,
+		},
+		.mode_screen		= 0x27fbff80,
+		.mode_horztiming	= 0x00cdb2c7,
+		.mode_verttiming	= 0x00600002,
+		.mode_clkcontrol	= 0x000A0000, /* /1 */
+		.mode_pwmdiv		= 0x00000000,
+		.mode_pwmhi		= 0x00000000,
+		.mode_outmask	= 0x00FFFFFF,
+		.mode_fifoctrl	= 0x2f2f2f2f,
+		.mode_toyclksrc	= 0x00000004, /* AUXPLL directly */
+		.mode_backlight	= 0x00000000,
+		.mode_auxpll		= 10, /* 120MHz AUXPLL */
+		.device_init		= NULL,
+		.device_shutdown	= NULL,
+		1280, 1280,
+		1024, 1024,
+	},
+
+	[5] = { /* Samsung 1024x768 TFT */
+		.name = "Samsung_1024x768_TFT",
+		.monspecs = {
+			.modedb = NULL,
+			.modedb_len = 0,
+			.hfmin = 30000,
+			.hfmax = 70000,
+			.vfmin = 60,
+			.vfmax = 60,
+			.dclkmin = 6000000,
+			.dclkmax = 28000000,
+			.input = FB_DISP_RGB,
+		},
+		.mode_screen		= 0x1ffaff80,
+		.mode_horztiming	= 0x018cc677,
+		.mode_verttiming	= 0x00241217,
+		.mode_clkcontrol	= 0x00000000, /* SCB 0x1 /4=24Mhz */
+		.mode_pwmdiv		= 0x8000063f, /* SCB 0x0 */
+		.mode_pwmhi		= 0x03400000, /* SCB 0x0 */
+		.mode_outmask	= 0x00FFFFFF,
+		.mode_fifoctrl	= 0x2f2f2f2f,
+		.mode_toyclksrc	= 0x00000004, /* AUXPLL directly */
+		.mode_backlight	= 0x00000000,
+		.mode_auxpll		= 8, /* 96MHz AUXPLL */
+		.device_init		= board_au1200fb_panel_init,
+		.device_shutdown	= board_au1200fb_panel_shutdown,
+		1024, 1024,
+		768, 768,
+	},
+
+	[6] = { /* Toshiba 640x480 TFT */
+		.name = "Toshiba_640x480_TFT",
+		.monspecs = {
+			.modedb = NULL,
+			.modedb_len = 0,
+			.hfmin = 30000,
+			.hfmax = 70000,
+			.vfmin = 60,
+			.vfmax = 60,
+			.dclkmin = 6000000,
+			.dclkmax = 28000000,
+			.input = FB_DISP_RGB,
+		},
+		.mode_screen		= LCD_SCREEN_SX_N(640) |
+			LCD_SCREEN_SY_N(480),
+		.mode_horztiming	= LCD_HORZTIMING_HPW_N(96) |
+			LCD_HORZTIMING_HND1_N(13) | LCD_HORZTIMING_HND2_N(51),
+		.mode_verttiming	= LCD_VERTTIMING_VPW_N(2) |
+			LCD_VERTTIMING_VND1_N(11) | LCD_VERTTIMING_VND2_N(32),
+		.mode_clkcontrol	= 0x00000000, /* /4=24Mhz */
+		.mode_pwmdiv		= 0x8000063f,
+		.mode_pwmhi		= 0x03400000,
+		.mode_outmask	= 0x00fcfcfc,
+		.mode_fifoctrl	= 0x2f2f2f2f,
+		.mode_toyclksrc	= 0x00000004, /* AUXPLL directly */
+		.mode_backlight	= 0x00000000,
+		.mode_auxpll		= 8, /* 96MHz AUXPLL */
+		.device_init		= board_au1200fb_panel_init,
+		.device_shutdown	= board_au1200fb_panel_shutdown,
+		640, 480,
+		640, 480,
+	},
+
+	[7] = { /* Sharp 320x240 TFT */
+		.name = "Sharp_320x240_TFT",
+		.monspecs = {
+			.modedb = NULL,
+			.modedb_len = 0,
+			.hfmin = 12500,
+			.hfmax = 20000,
+			.vfmin = 38,
+			.vfmax = 81,
+			.dclkmin = 4500000,
+			.dclkmax = 6800000,
+			.input = FB_DISP_RGB,
+		},
+		.mode_screen		= LCD_SCREEN_SX_N(320) |
+			LCD_SCREEN_SY_N(240),
+		.mode_horztiming	= LCD_HORZTIMING_HPW_N(60) |
+			LCD_HORZTIMING_HND1_N(13) | LCD_HORZTIMING_HND2_N(2),
+		.mode_verttiming	= LCD_VERTTIMING_VPW_N(2) |
+			LCD_VERTTIMING_VND1_N(2) | LCD_VERTTIMING_VND2_N(5),
+		.mode_clkcontrol	= LCD_CLKCONTROL_PCD_N(7), /*16=6Mhz*/
+		.mode_pwmdiv		= 0x8000063f,
+		.mode_pwmhi		= 0x03400000,
+		.mode_outmask	= 0x00fcfcfc,
+		.mode_fifoctrl	= 0x2f2f2f2f,
+		.mode_toyclksrc	= 0x00000004, /* AUXPLL directly */
+		.mode_backlight	= 0x00000000,
+		.mode_auxpll		= 8, /* 96MHz AUXPLL */
+		.device_init		= board_au1200fb_panel_init,
+		.device_shutdown	= board_au1200fb_panel_shutdown,
+		320, 320,
+		240, 240,
+	},
+
+	[8] = { /* Toppoly TD070WGCB2 7" 856x480 TFT */
+		.name = "Toppoly_TD070WGCB2",
+		.monspecs = {
+			.modedb = NULL,
+			.modedb_len = 0,
+			.hfmin = 30000,
+			.hfmax = 70000,
+			.vfmin = 60,
+			.vfmax = 60,
+			.dclkmin = 6000000,
+			.dclkmax = 28000000,
+			.input = FB_DISP_RGB,
+		},
+		.mode_screen		= LCD_SCREEN_SX_N(856) |
+			LCD_SCREEN_SY_N(480),
+		.mode_horztiming	= LCD_HORZTIMING_HND2_N(43) |
+			LCD_HORZTIMING_HND1_N(43) | LCD_HORZTIMING_HPW_N(114),
+		.mode_verttiming	= LCD_VERTTIMING_VND2_N(20) |
+			LCD_VERTTIMING_VND1_N(21) | LCD_VERTTIMING_VPW_N(4),
+		.mode_clkcontrol	= 0x00020001, /* /4=24Mhz */
+		.mode_pwmdiv		= 0x8000063f,
+		.mode_pwmhi		= 0x03400000,
+		.mode_outmask	= 0x00fcfcfc,
+		.mode_fifoctrl	= 0x2f2f2f2f,
+		.mode_toyclksrc	= 0x00000004, /* AUXPLL directly */
+		.mode_backlight	= 0x00000000,
+		.mode_auxpll		= 8, /* 96MHz AUXPLL */
+		.device_init		= board_au1200fb_panel_init,
+		.device_shutdown	= board_au1200fb_panel_shutdown,
+		856, 856,
+		480, 480,
+	},
+};
+
+#define NUM_PANELS (ARRAY_SIZE(known_lcd_panels))
+
+/********************************************************************/
+
+#ifdef CONFIG_PM
+static int set_brightness(unsigned int brightness)
+{
+	unsigned int hi1, divider;
+
+	/* limit brightness pwm duty to >= 30/1600 */
+	if (brightness < 30) {
+		brightness = 30;
+	}
+	divider = (lcd->pwmdiv & 0x3FFFF) + 1;
+	hi1 = (lcd->pwmhi >> 16) + 1;
+	hi1 = (((brightness & 0xFF) + 1) * divider >> 8);
+	lcd->pwmhi &= 0xFFFF;
+	lcd->pwmhi |= (hi1 << 16);
+
+	return brightness;
+}
+#endif /* CONFIG_PM */
+
+static int winbpp (unsigned int winctrl1)
+{
+	int bits = 0;
+
+	/* how many bits are needed for each pixel format */
+	switch (winctrl1 & LCD_WINCTRL1_FRM) {
+	case LCD_WINCTRL1_FRM_1BPP:
+		bits = 1;
+		break;
+	case LCD_WINCTRL1_FRM_2BPP:
+		bits = 2;
+		break;
+	case LCD_WINCTRL1_FRM_4BPP:
+		bits = 4;
+		break;
+	case LCD_WINCTRL1_FRM_8BPP:
+		bits = 8;
+		break;
+	case LCD_WINCTRL1_FRM_12BPP:
+	case LCD_WINCTRL1_FRM_16BPP655:
+	case LCD_WINCTRL1_FRM_16BPP565:
+	case LCD_WINCTRL1_FRM_16BPP556:
+	case LCD_WINCTRL1_FRM_16BPPI1555:
+	case LCD_WINCTRL1_FRM_16BPPI5551:
+	case LCD_WINCTRL1_FRM_16BPPA1555:
+	case LCD_WINCTRL1_FRM_16BPPA5551:
+		bits = 16;
+		break;
+	case LCD_WINCTRL1_FRM_24BPP:
+	case LCD_WINCTRL1_FRM_32BPP:
+		bits = 32;
+		break;
+	}
+
+	return bits;
+}
+
+static int fbinfo2index (struct fb_info *fb_info)
+{
+	int i;
+
+	for (i = 0; i < CONFIG_FB_AU1200_DEVS; ++i) {
+		if (fb_info == (struct fb_info *)(&_au1200fb_devices[i].fb_info))
+			return i;
+	}
+	printk("au1200fb: ERROR: fbinfo2index failed!\n");
+	return -1;
+}
+
+static int au1200_setlocation (struct au1200fb_device *fbdev, int plane,
+	int xpos, int ypos)
+{
+	uint32 winctrl0, winctrl1, winenable, fb_offset = 0;
+	int xsz, ysz;
+
+	/* FIX!!! NOT CHECKING FOR COMPLETE OFFSCREEN YET */
+
+	winctrl0 = lcd->window[plane].winctrl0;
+	winctrl1 = lcd->window[plane].winctrl1;
+	winctrl0 &= (LCD_WINCTRL0_A | LCD_WINCTRL0_AEN);
+	winctrl1 &= ~(LCD_WINCTRL1_SZX | LCD_WINCTRL1_SZY);
+
+	/* Check for off-screen adjustments */
+	xsz = win->w[plane].xres;
+	ysz = win->w[plane].yres;
+	if ((xpos + win->w[plane].xres) > panel->Xres) {
+		/* Off-screen to the right */
+		xsz = panel->Xres - xpos; /* off by 1 ??? */
+		/*printk("off screen right\n");*/
+	}
+
+	if ((ypos + win->w[plane].yres) > panel->Yres) {
+		/* Off-screen to the bottom */
+		ysz = panel->Yres - ypos; /* off by 1 ??? */
+		/*printk("off screen bottom\n");*/
+	}
+
+	if (xpos < 0) {
+		/* Off-screen to the left */
+		xsz = win->w[plane].xres + xpos;
+		fb_offset += (((0 - xpos) * winbpp(lcd->window[plane].winctrl1))/8);
+		xpos = 0;
+		/*printk("off screen left\n");*/
+	}
+
+	if (ypos < 0) {
+		/* Off-screen to the top */
+		ysz = win->w[plane].yres + ypos;
+		/* fixme: fb_offset += ((0-ypos)*fb_pars[plane].line_length); */
+		ypos = 0;
+		/*printk("off screen top\n");*/
+	}
+
+	/* record settings */
+	win->w[plane].xpos = xpos;
+	win->w[plane].ypos = ypos;
+
+	xsz -= 1;
+	ysz -= 1;
+	winctrl0 |= (xpos << 21);
+	winctrl0 |= (ypos << 10);
+	winctrl1 |= (xsz << 11);
+	winctrl1 |= (ysz << 0);
+
+	/* Disable the window while making changes, then restore WINEN */
+	winenable = lcd->winenable & (1 << plane);
+	au_sync();
+	lcd->winenable &= ~(1 << plane);
+	lcd->window[plane].winctrl0 = winctrl0;
+	lcd->window[plane].winctrl1 = winctrl1;
+	lcd->window[plane].winbuf0 =
+	lcd->window[plane].winbuf1 = fbdev->fb_phys;
+	lcd->window[plane].winbufctrl = 0; /* select winbuf0 */
+	lcd->winenable |= winenable;
+	au_sync();
+
+	return 0;
+}
+
+static void au1200_setpanel (struct panel_settings *newpanel)
+{
+	/*
+	 * Perform global setup/init of LCD controller
+	 */
+	uint32 winenable;
+
+	/* Make sure all windows disabled */
+	winenable = lcd->winenable;
+	lcd->winenable = 0;
+	au_sync();
+	/*
+	 * Ensure everything is disabled before reconfiguring
+	 */
+	if (lcd->screen & LCD_SCREEN_SEN) {
+		/* Wait for vertical sync period */
+		lcd->intstatus = LCD_INT_SS;
+		while ((lcd->intstatus & LCD_INT_SS) == 0) {
+			au_sync();
+		}
+
+		lcd->screen &= ~LCD_SCREEN_SEN;	/*disable the controller*/
+
+		do {
+			lcd->intstatus = lcd->intstatus; /*clear interrupts*/
+			au_sync();
+		/*wait for controller to shut down*/
+		} while ((lcd->intstatus & LCD_INT_SD) == 0);
+
+		/* Call shutdown of current panel (if up) */
+		/* this must occur last, because if an external clock is driving
+		    the controller, the clock cannot be turned off before first
+			shutting down the controller.
+		 */
+		if (panel->device_shutdown != NULL)
+			panel->device_shutdown();
+	}
+
+	/* Newpanel == NULL indicates a shutdown operation only */
+	if (newpanel == NULL)
+		return;
+
+	panel = newpanel;
+
+	printk("Panel(%s), %dx%d\n", panel->name, panel->Xres, panel->Yres);
+
+	/*
+	 * Setup clocking if internal LCD clock source (assumes sys_auxpll valid)
+	 */
+	if (!(panel->mode_clkcontrol & LCD_CLKCONTROL_EXT))
+	{
+		uint32 sys_clksrc;
+		au_writel(panel->mode_auxpll, SYS_AUXPLL);
+		sys_clksrc = au_readl(SYS_CLKSRC) & ~0x0000001f;
+		sys_clksrc |= panel->mode_toyclksrc;
+		au_writel(sys_clksrc, SYS_CLKSRC);
+	}
+
+	/*
+	 * Configure panel timings
+	 */
+	lcd->screen = panel->mode_screen;
+	lcd->horztiming = panel->mode_horztiming;
+	lcd->verttiming = panel->mode_verttiming;
+	lcd->clkcontrol = panel->mode_clkcontrol;
+	lcd->pwmdiv = panel->mode_pwmdiv;
+	lcd->pwmhi = panel->mode_pwmhi;
+	lcd->outmask = panel->mode_outmask;
+	lcd->fifoctrl = panel->mode_fifoctrl;
+	au_sync();
+
+	/* fixme: Check window settings to make sure still valid
+	 * for new geometry */
+#if 0
+	au1200_setlocation(fbdev, 0, win->w[0].xpos, win->w[0].ypos);
+	au1200_setlocation(fbdev, 1, win->w[1].xpos, win->w[1].ypos);
+	au1200_setlocation(fbdev, 2, win->w[2].xpos, win->w[2].ypos);
+	au1200_setlocation(fbdev, 3, win->w[3].xpos, win->w[3].ypos);
+#endif
+	lcd->winenable = winenable;
+
+	/*
+	 * Re-enable screen now that it is configured
+	 */
+	lcd->screen |= LCD_SCREEN_SEN;
+	au_sync();
+
+	/* Call init of panel */
+	if (panel->device_init != NULL) panel->device_init();
+
+	/* FIX!!!! not appropriate on panel change!!! Global setup/init */
+	lcd->intenable = 0;
+	lcd->intstatus = ~0;
+	lcd->backcolor = win->mode_backcolor;
+
+	/* Setup Color Key - FIX!!! */
+	lcd->colorkey = win->mode_colorkey;
+	lcd->colorkeymsk = win->mode_colorkeymsk;
+
+	/* Setup HWCursor - FIX!!! Need to support this eventually */
+	lcd->hwc.cursorctrl = 0;
+	lcd->hwc.cursorpos = 0;
+	lcd->hwc.cursorcolor0 = 0;
+	lcd->hwc.cursorcolor1 = 0;
+	lcd->hwc.cursorcolor2 = 0;
+	lcd->hwc.cursorcolor3 = 0;
+
+
+#if 0
+#define D(X) printk("%25s: %08X\n", #X, X)
+	D(lcd->screen);
+	D(lcd->horztiming);
+	D(lcd->verttiming);
+	D(lcd->clkcontrol);
+	D(lcd->pwmdiv);
+	D(lcd->pwmhi);
+	D(lcd->outmask);
+	D(lcd->fifoctrl);
+	D(lcd->window[0].winctrl0);
+	D(lcd->window[0].winctrl1);
+	D(lcd->window[0].winctrl2);
+	D(lcd->window[0].winbuf0);
+	D(lcd->window[0].winbuf1);
+	D(lcd->window[0].winbufctrl);
+	D(lcd->window[1].winctrl0);
+	D(lcd->window[1].winctrl1);
+	D(lcd->window[1].winctrl2);
+	D(lcd->window[1].winbuf0);
+	D(lcd->window[1].winbuf1);
+	D(lcd->window[1].winbufctrl);
+	D(lcd->window[2].winctrl0);
+	D(lcd->window[2].winctrl1);
+	D(lcd->window[2].winctrl2);
+	D(lcd->window[2].winbuf0);
+	D(lcd->window[2].winbuf1);
+	D(lcd->window[2].winbufctrl);
+	D(lcd->window[3].winctrl0);
+	D(lcd->window[3].winctrl1);
+	D(lcd->window[3].winctrl2);
+	D(lcd->window[3].winbuf0);
+	D(lcd->window[3].winbuf1);
+	D(lcd->window[3].winbufctrl);
+	D(lcd->winenable);
+	D(lcd->intenable);
+	D(lcd->intstatus);
+	D(lcd->backcolor);
+	D(lcd->winenable);
+	D(lcd->colorkey);
+    D(lcd->colorkeymsk);
+	D(lcd->hwc.cursorctrl);
+	D(lcd->hwc.cursorpos);
+	D(lcd->hwc.cursorcolor0);
+	D(lcd->hwc.cursorcolor1);
+	D(lcd->hwc.cursorcolor2);
+	D(lcd->hwc.cursorcolor3);
+#endif
+}
+
+static void au1200_setmode(struct au1200fb_device *fbdev)
+{
+	int plane = fbdev->plane;
+	/* Window/plane setup */
+	lcd->window[plane].winctrl1 = ( 0
+		| LCD_WINCTRL1_PRI_N(plane)
+		| win->w[plane].mode_winctrl1 /* FRM,CCO,PO,PIPE */
+		) ;
+
+	au1200_setlocation(fbdev, plane, win->w[plane].xpos, win->w[plane].ypos);
+
+	lcd->window[plane].winctrl2 = ( 0
+		| LCD_WINCTRL2_CKMODE_00
+		| LCD_WINCTRL2_DBM
+		| LCD_WINCTRL2_BX_N( fbdev->fb_info.fix.line_length)
+		| LCD_WINCTRL2_SCX_1
+		| LCD_WINCTRL2_SCY_1
+		) ;
+	lcd->winenable |= win->w[plane].mode_winenable;
+	au_sync();
+}
+
+
+/* Inline helpers */
+
+/*#define panel_is_dual(panel)  ((panel->mode_screen & LCD_SCREEN_PT) == LCD_SCREEN_PT_010)*/
+/*#define panel_is_active(panel)((panel->mode_screen & LCD_SCREEN_PT) == LCD_SCREEN_PT_010)*/
+
+#define panel_is_color(panel) ((panel->mode_screen & LCD_SCREEN_PT) <= LCD_SCREEN_PT_CDSTN)
+
+/* Bitfields format supported by the controller. */
+static struct fb_bitfield rgb_bitfields[][4] = {
+  	/*     Red, 	   Green, 	 Blue, 	     Transp   */
+	[LCD_WINCTRL1_FRM_16BPP655 >> 25] =
+		{ { 10, 6, 0 }, { 5, 5, 0 }, { 0, 5, 0 }, { 0, 0, 0 } },
+
+	[LCD_WINCTRL1_FRM_16BPP565 >> 25] =
+		{ { 11, 5, 0 }, { 5, 6, 0 }, { 0, 5, 0 }, { 0, 0, 0 } },
+
+	[LCD_WINCTRL1_FRM_16BPP556 >> 25] =
+		{ { 11, 5, 0 }, { 6, 5, 0 }, { 0, 6, 0 }, { 0, 0, 0 } },
+
+	[LCD_WINCTRL1_FRM_16BPPI1555 >> 25] =
+		{ { 10, 5, 0 }, { 5, 5, 0 }, { 0, 5, 0 }, { 0, 0, 0 } },
+
+	[LCD_WINCTRL1_FRM_16BPPI5551 >> 25] =
+		{ { 11, 5, 0 }, { 6, 5, 0 }, { 1, 5, 0 }, { 0, 0, 0 } },
+
+	[LCD_WINCTRL1_FRM_16BPPA1555 >> 25] =
+		{ { 10, 5, 0 }, { 5, 5, 0 }, { 0, 5, 0 }, { 15, 1, 0 } },
+
+	[LCD_WINCTRL1_FRM_16BPPA5551 >> 25] =
+		{ { 11, 5, 0 }, { 6, 5, 0 }, { 1, 5, 0 }, { 0, 1, 0 } },
+
+	[LCD_WINCTRL1_FRM_24BPP >> 25] =
+		{ { 16, 8, 0 }, { 8, 8, 0 }, { 0, 8, 0 }, { 0, 0, 0 } },
+
+	[LCD_WINCTRL1_FRM_32BPP >> 25] =
+		{ { 16, 8, 0 }, { 8, 8, 0 }, { 0, 8, 0 }, { 24, 0, 0 } },
+};
+
+/*-------------------------------------------------------------------------*/
+
+/* Helpers */
+
+static void au1200fb_update_fbinfo(struct fb_info *fbi)
+{
+	/* FIX!!!! This also needs to take the window pixel format into account!!! */
+
+	/* Update var-dependent FB info */
+	if (panel_is_color(panel)) {
+		if (fbi->var.bits_per_pixel <= 8) {
+			/* palettized */
+			fbi->fix.visual = FB_VISUAL_PSEUDOCOLOR;
+			fbi->fix.line_length = fbi->var.xres_virtual /
+				(8/fbi->var.bits_per_pixel);
+		} else {
+			/* non-palettized */
+			fbi->fix.visual = FB_VISUAL_TRUECOLOR;
+			fbi->fix.line_length = fbi->var.xres_virtual * (fbi->var.bits_per_pixel / 8);
+		}
+	} else {
+		/* mono FIX!!! mono 8 and 4 bits */
+		fbi->fix.visual = FB_VISUAL_MONO10;
+		fbi->fix.line_length = fbi->var.xres_virtual / 8;
+	}
+
+	fbi->screen_size = fbi->fix.line_length * fbi->var.yres_virtual;
+	print_dbg("line length: %d\n", fbi->fix.line_length);
+	print_dbg("bits_per_pixel: %d\n", fbi->var.bits_per_pixel);
+}
+
+/*-------------------------------------------------------------------------*/
+
+/* AU1200 framebuffer driver */
+
+/* fb_check_var
+ * Validate var settings with hardware restrictions and modify it if necessary
+ */
+static int au1200fb_fb_check_var(struct fb_var_screeninfo *var,
+	struct fb_info *fbi)
+{
+	struct au1200fb_device *fbdev = (struct au1200fb_device *)fbi;
+	u32 pixclock;
+	int screen_size, plane;
+
+	plane = fbdev->plane;
+
+	/* Make sure that the mode respect all LCD controller and
+	 * panel restrictions. */
+	var->xres = win->w[plane].xres;
+	var->yres = win->w[plane].yres;
+
+	/* No need for virtual resolution support */
+	var->xres_virtual = var->xres;
+	var->yres_virtual = var->yres;
+
+	var->bits_per_pixel = winbpp(win->w[plane].mode_winctrl1);
+
+	screen_size = var->xres_virtual * var->yres_virtual;
+	if (var->bits_per_pixel > 8) screen_size *= (var->bits_per_pixel / 8);
+	else screen_size /= (8/var->bits_per_pixel);
+
+	if (fbdev->fb_len < screen_size)
+		return -EINVAL; /* Virtual screen is to big, abort */
+
+	/* FIX!!!! what are the implicaitons of ignoring this for windows ??? */
+	/* The max LCD clock is fixed to 48MHz (value of AUX_CLK). The pixel
+	 * clock can only be obtain by dividing this value by an even integer.
+	 * Fallback to a slower pixel clock if necessary. */
+	pixclock = max((u32)(PICOS2KHZ(var->pixclock) * 1000), fbi->monspecs.dclkmin);
+	pixclock = min(pixclock, min(fbi->monspecs.dclkmax, (u32)AU1200_LCD_MAX_CLK/2));
+
+	if (AU1200_LCD_MAX_CLK % pixclock) {
+		int diff = AU1200_LCD_MAX_CLK % pixclock;
+		pixclock -= diff;
+	}
+
+	var->pixclock = KHZ2PICOS(pixclock/1000);
+#if 0
+	if (!panel_is_active(panel)) {
+		int pcd = AU1200_LCD_MAX_CLK / (pixclock * 2) - 1;
+
+		if (!panel_is_color(panel)
+			&& (panel->control_base & LCD_CONTROL_MPI) && (pcd < 3)) {
+			/* STN 8bit mono panel support is up to 6MHz pixclock */
+			var->pixclock = KHZ2PICOS(6000);
+		} else if (!pcd) {
+			/* Other STN panel support is up to 12MHz  */
+			var->pixclock = KHZ2PICOS(12000);
+		}
+	}
+#endif
+	/* Set bitfield accordingly */
+	switch (var->bits_per_pixel) {
+		case 16:
+		{
+			/* 16bpp True color.
+			 * These must be set to MATCH WINCTRL[FORM] */
+			int idx;
+			idx = (win->w[0].mode_winctrl1 & LCD_WINCTRL1_FRM) >> 25;
+			var->red    = rgb_bitfields[idx][0];
+			var->green  = rgb_bitfields[idx][1];
+			var->blue   = rgb_bitfields[idx][2];
+			var->transp = rgb_bitfields[idx][3];
+			break;
+		}
+
+		case 32:
+		{
+			/* 32bpp True color.
+			 * These must be set to MATCH WINCTRL[FORM] */
+			int idx;
+			idx = (win->w[0].mode_winctrl1 & LCD_WINCTRL1_FRM) >> 25;
+			var->red    = rgb_bitfields[idx][0];
+			var->green  = rgb_bitfields[idx][1];
+			var->blue   = rgb_bitfields[idx][2];
+			var->transp = rgb_bitfields[idx][3];
+			break;
+		}
+		default:
+			print_dbg("Unsupported depth %dbpp", var->bits_per_pixel);
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* fb_set_par
+ * Set hardware with var settings. This will enable the controller with a
+ * specific mode, normally validated with the fb_check_var method
+ */
+static int au1200fb_fb_set_par(struct fb_info *fbi)
+{
+	struct au1200fb_device *fbdev = (struct au1200fb_device *)fbi;
+
+	au1200fb_update_fbinfo(fbi);
+	au1200_setmode(fbdev);
+
+	return 0;
+}
+
+/* fb_setcolreg
+ * Set color in LCD palette.
+ */
+static int au1200fb_fb_setcolreg(unsigned regno, unsigned red, unsigned green,
+	unsigned blue, unsigned transp, struct fb_info *fbi)
+{
+	volatile u32 *palette = lcd->palette;
+	u32 value;
+
+	if (regno > (AU1200_LCD_NBR_PALETTE_ENTRIES - 1))
+		return -EINVAL;
+
+	if (fbi->var.grayscale) {
+		/* Convert color to grayscale */
+		red = green = blue =
+			(19595 * red + 38470 * green + 7471 * blue) >> 16;
+	}
+
+	if (fbi->fix.visual == FB_VISUAL_TRUECOLOR) {
+		/* Place color in the pseudopalette */
+		if (regno > 16)
+			return -EINVAL;
+
+		palette = (u32*) fbi->pseudo_palette;
+
+		red   >>= (16 - fbi->var.red.length);
+		green >>= (16 - fbi->var.green.length);
+		blue  >>= (16 - fbi->var.blue.length);
+
+		value = (red   << fbi->var.red.offset) 	|
+			(green << fbi->var.green.offset)|
+			(blue  << fbi->var.blue.offset);
+		value &= 0xFFFF;
+
+	} else if (1 /*FIX!!! panel_is_active(fbdev->panel)*/) {
+		/* COLOR TFT PALLETTIZED (use RGB 565) */
+		value = (red & 0xF800)|((green >> 5) &
+				0x07E0)|((blue >> 11) & 0x001F);
+		value &= 0xFFFF;
+
+	} else if (0 /*panel_is_color(fbdev->panel)*/) {
+		/* COLOR STN MODE */
+		value = 0x1234;
+		value &= 0xFFF;
+	} else {
+		/* MONOCHROME MODE */
+		value = (green >> 12) & 0x000F;
+		value &= 0xF;
+	}
+
+	palette[regno] = value;
+
+	return 0;
+}
+
+/* fb_blank
+ * Blank the screen. Depending on the mode, the screen will be
+ * activated with the backlight color, or desactivated
+ */
+static int au1200fb_fb_blank(int blank_mode, struct fb_info *fbi)
+{
+	/* Short-circuit screen blanking */
+	if (noblanking)
+		return 0;
+
+	switch (blank_mode) {
+
+	case FB_BLANK_UNBLANK:
+	case FB_BLANK_NORMAL:
+		/* printk("turn on panel\n"); */
+		au1200_setpanel(panel);
+		break;
+	case FB_BLANK_VSYNC_SUSPEND:
+	case FB_BLANK_HSYNC_SUSPEND:
+	case FB_BLANK_POWERDOWN:
+		/* printk("turn off panel\n"); */
+		au1200_setpanel(NULL);
+		break;
+	default:
+		break;
+
+	}
+
+	/* FB_BLANK_NORMAL is a soft blank */
+	return (blank_mode == FB_BLANK_NORMAL) ? -EINVAL : 0;
+}
+
+/* fb_mmap
+ * Map video memory in user space. We don't use the generic fb_mmap
+ * method mainly to allow the use of the TLB streaming flag (CCA=6)
+ */
+static int au1200fb_fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
+
+{
+	unsigned int len;
+	unsigned long start=0, off;
+	struct au1200fb_device *fbdev = (struct au1200fb_device *) info;
+
+#ifdef CONFIG_PM
+	au1xxx_pm_access(LCD_pm_dev);
+#endif
+
+	if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT)) {
+		return -EINVAL;
+	}
+
+	start = fbdev->fb_phys & PAGE_MASK;
+	len = PAGE_ALIGN((start & ~PAGE_MASK) + fbdev->fb_len);
+
+	off = vma->vm_pgoff << PAGE_SHIFT;
+
+	if ((vma->vm_end - vma->vm_start + off) > len) {
+		return -EINVAL;
+	}
+
+	off += start;
+	vma->vm_pgoff = off >> PAGE_SHIFT;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	pgprot_val(vma->vm_page_prot) |= _CACHE_MASK; /* CCA=7 */
+
+	vma->vm_flags |= VM_IO;
+
+	return io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT,
+				  vma->vm_end - vma->vm_start,
+				  vma->vm_page_prot);
+
+	return 0;
+}
+
+static void set_global(u_int cmd, struct au1200_lcd_global_regs_t *pdata)
+{
+
+	unsigned int hi1, divider;
+
+	/* SCREEN_SIZE: user cannot reset size, must switch panel choice */
+
+	if (pdata->flags & SCREEN_BACKCOLOR)
+		lcd->backcolor = pdata->backcolor;
+
+	if (pdata->flags & SCREEN_BRIGHTNESS) {
+
+		// limit brightness pwm duty to >= 30/1600
+		if (pdata->brightness < 30) {
+			pdata->brightness = 30;
+		}
+		divider = (lcd->pwmdiv & 0x3FFFF) + 1;
+		hi1 = (lcd->pwmhi >> 16) + 1;
+		hi1 = (((pdata->brightness & 0xFF)+1) * divider >> 8);
+		lcd->pwmhi &= 0xFFFF;
+		lcd->pwmhi |= (hi1 << 16);
+	}
+
+	if (pdata->flags & SCREEN_COLORKEY)
+		lcd->colorkey = pdata->colorkey;
+
+	if (pdata->flags & SCREEN_MASK)
+		lcd->colorkeymsk = pdata->mask;
+	au_sync();
+}
+
+static void get_global(u_int cmd, struct au1200_lcd_global_regs_t *pdata)
+{
+	unsigned int hi1, divider;
+
+	pdata->xsize = ((lcd->screen & LCD_SCREEN_SX) >> 19) + 1;
+	pdata->ysize = ((lcd->screen & LCD_SCREEN_SY) >> 8) + 1;
+
+	pdata->backcolor = lcd->backcolor;
+	pdata->colorkey = lcd->colorkey;
+	pdata->mask = lcd->colorkeymsk;
+
+	// brightness
+	hi1 = (lcd->pwmhi >> 16) + 1;
+	divider = (lcd->pwmdiv & 0x3FFFF) + 1;
+	pdata->brightness = ((hi1 << 8) / divider) - 1;
+	au_sync();
+}
+
+static void set_window(unsigned int plane,
+	struct au1200_lcd_window_regs_t *pdata)
+{
+	unsigned int val, bpp;
+
+	/* Window control register 0 */
+	if (pdata->flags & WIN_POSITION) {
+		val = lcd->window[plane].winctrl0 & ~(LCD_WINCTRL0_OX |
+				LCD_WINCTRL0_OY);
+		val |= ((pdata->xpos << 21) & LCD_WINCTRL0_OX);
+		val |= ((pdata->ypos << 10) & LCD_WINCTRL0_OY);
+		lcd->window[plane].winctrl0 = val;
+	}
+	if (pdata->flags & WIN_ALPHA_COLOR) {
+		val = lcd->window[plane].winctrl0 & ~(LCD_WINCTRL0_A);
+		val |= ((pdata->alpha_color << 2) & LCD_WINCTRL0_A);
+		lcd->window[plane].winctrl0 = val;
+	}
+	if (pdata->flags & WIN_ALPHA_MODE) {
+		val = lcd->window[plane].winctrl0 & ~(LCD_WINCTRL0_AEN);
+		val |= ((pdata->alpha_mode << 1) & LCD_WINCTRL0_AEN);
+		lcd->window[plane].winctrl0 = val;
+	}
+
+	/* Window control register 1 */
+	if (pdata->flags & WIN_PRIORITY) {
+		val = lcd->window[plane].winctrl1 & ~(LCD_WINCTRL1_PRI);
+		val |= ((pdata->priority << 30) & LCD_WINCTRL1_PRI);
+		lcd->window[plane].winctrl1 = val;
+	}
+	if (pdata->flags & WIN_CHANNEL) {
+		val = lcd->window[plane].winctrl1 & ~(LCD_WINCTRL1_PIPE);
+		val |= ((pdata->channel << 29) & LCD_WINCTRL1_PIPE);
+		lcd->window[plane].winctrl1 = val;
+	}
+	if (pdata->flags & WIN_BUFFER_FORMAT) {
+		val = lcd->window[plane].winctrl1 & ~(LCD_WINCTRL1_FRM);
+		val |= ((pdata->buffer_format << 25) & LCD_WINCTRL1_FRM);
+		lcd->window[plane].winctrl1 = val;
+	}
+	if (pdata->flags & WIN_COLOR_ORDER) {
+		val = lcd->window[plane].winctrl1 & ~(LCD_WINCTRL1_CCO);
+		val |= ((pdata->color_order << 24) & LCD_WINCTRL1_CCO);
+		lcd->window[plane].winctrl1 = val;
+	}
+	if (pdata->flags & WIN_PIXEL_ORDER) {
+		val = lcd->window[plane].winctrl1 & ~(LCD_WINCTRL1_PO);
+		val |= ((pdata->pixel_order << 22) & LCD_WINCTRL1_PO);
+		lcd->window[plane].winctrl1 = val;
+	}
+	if (pdata->flags & WIN_SIZE) {
+		val = lcd->window[plane].winctrl1 & ~(LCD_WINCTRL1_SZX |
+				LCD_WINCTRL1_SZY);
+		val |= (((pdata->xsize << 11) - 1) & LCD_WINCTRL1_SZX);
+		val |= (((pdata->ysize) - 1) & LCD_WINCTRL1_SZY);
+		lcd->window[plane].winctrl1 = val;
+		/* program buffer line width */
+		bpp = winbpp(val) / 8;
+		val = lcd->window[plane].winctrl2 & ~(LCD_WINCTRL2_BX);
+		val |= (((pdata->xsize * bpp) << 8) & LCD_WINCTRL2_BX);
+		lcd->window[plane].winctrl2 = val;
+	}
+
+	/* Window control register 2 */
+	if (pdata->flags & WIN_COLORKEY_MODE) {
+		val = lcd->window[plane].winctrl2 & ~(LCD_WINCTRL2_CKMODE);
+		val |= ((pdata->colorkey_mode << 24) & LCD_WINCTRL2_CKMODE);
+		lcd->window[plane].winctrl2 = val;
+	}
+	if (pdata->flags & WIN_DOUBLE_BUFFER_MODE) {
+		val = lcd->window[plane].winctrl2 & ~(LCD_WINCTRL2_DBM);
+		val |= ((pdata->double_buffer_mode << 23) & LCD_WINCTRL2_DBM);
+		lcd->window[plane].winctrl2 = val;
+	}
+	if (pdata->flags & WIN_RAM_ARRAY_MODE) {
+		val = lcd->window[plane].winctrl2 & ~(LCD_WINCTRL2_RAM);
+		val |= ((pdata->ram_array_mode << 21) & LCD_WINCTRL2_RAM);
+		lcd->window[plane].winctrl2 = val;
+	}
+
+	/* Buffer line width programmed with WIN_SIZE */
+
+	if (pdata->flags & WIN_BUFFER_SCALE) {
+		val = lcd->window[plane].winctrl2 & ~(LCD_WINCTRL2_SCX |
+				LCD_WINCTRL2_SCY);
+		val |= ((pdata->xsize << 11) & LCD_WINCTRL2_SCX);
+		val |= ((pdata->ysize) & LCD_WINCTRL2_SCY);
+		lcd->window[plane].winctrl2 = val;
+	}
+
+	if (pdata->flags & WIN_ENABLE) {
+		val = lcd->winenable;
+		val &= ~(1<<plane);
+		val |= (pdata->enable & 1) << plane;
+		lcd->winenable = val;
+	}
+	au_sync();
+}
+
+static void get_window(unsigned int plane,
+	struct au1200_lcd_window_regs_t *pdata)
+{
+	/* Window control register 0 */
+	pdata->xpos = (lcd->window[plane].winctrl0 & LCD_WINCTRL0_OX) >> 21;
+	pdata->ypos = (lcd->window[plane].winctrl0 & LCD_WINCTRL0_OY) >> 10;
+	pdata->alpha_color = (lcd->window[plane].winctrl0 & LCD_WINCTRL0_A) >> 2;
+	pdata->alpha_mode = (lcd->window[plane].winctrl0 & LCD_WINCTRL0_AEN) >> 1;
+
+	/* Window control register 1 */
+	pdata->priority = (lcd->window[plane].winctrl1& LCD_WINCTRL1_PRI) >> 30;
+	pdata->channel = (lcd->window[plane].winctrl1 & LCD_WINCTRL1_PIPE) >> 29;
+	pdata->buffer_format = (lcd->window[plane].winctrl1 & LCD_WINCTRL1_FRM) >> 25;
+	pdata->color_order = (lcd->window[plane].winctrl1 & LCD_WINCTRL1_CCO) >> 24;
+	pdata->pixel_order = (lcd->window[plane].winctrl1 & LCD_WINCTRL1_PO) >> 22;
+	pdata->xsize = ((lcd->window[plane].winctrl1 & LCD_WINCTRL1_SZX) >> 11) + 1;
+	pdata->ysize = (lcd->window[plane].winctrl1 & LCD_WINCTRL1_SZY) + 1;
+
+	/* Window control register 2 */
+	pdata->colorkey_mode = (lcd->window[plane].winctrl2 & LCD_WINCTRL2_CKMODE) >> 24;
+	pdata->double_buffer_mode = (lcd->window[plane].winctrl2 & LCD_WINCTRL2_DBM) >> 23;
+	pdata->ram_array_mode = (lcd->window[plane].winctrl2 & LCD_WINCTRL2_RAM) >> 21;
+
+	pdata->enable = (lcd->winenable >> plane) & 1;
+	au_sync();
+}
+
+static int au1200fb_ioctl(struct fb_info *info, unsigned int cmd,
+                          unsigned long arg)
+{
+	int plane;
+	int val;
+
+#ifdef CONFIG_PM
+	au1xxx_pm_access(LCD_pm_dev);
+#endif
+
+	plane = fbinfo2index(info);
+	print_dbg("au1200fb: ioctl %d on plane %d\n", cmd, plane);
+
+	if (cmd == AU1200_LCD_FB_IOCTL) {
+		struct au1200_lcd_iodata_t iodata;
+
+		if (copy_from_user(&iodata, (void __user *) arg, sizeof(iodata)))
+			return -EFAULT;
+
+		print_dbg("FB IOCTL called\n");
+
+		switch (iodata.subcmd) {
+		case AU1200_LCD_SET_SCREEN:
+			print_dbg("AU1200_LCD_SET_SCREEN\n");
+			set_global(cmd, &iodata.global);
+			break;
+
+		case AU1200_LCD_GET_SCREEN:
+			print_dbg("AU1200_LCD_GET_SCREEN\n");
+			get_global(cmd, &iodata.global);
+			break;
+
+		case AU1200_LCD_SET_WINDOW:
+			print_dbg("AU1200_LCD_SET_WINDOW\n");
+			set_window(plane, &iodata.window);
+			break;
+
+		case AU1200_LCD_GET_WINDOW:
+			print_dbg("AU1200_LCD_GET_WINDOW\n");
+			get_window(plane, &iodata.window);
+			break;
+
+		case AU1200_LCD_SET_PANEL:
+			print_dbg("AU1200_LCD_SET_PANEL\n");
+			if ((iodata.global.panel_choice >= 0) &&
+					(iodata.global.panel_choice <
+					 NUM_PANELS))
+			{
+				struct panel_settings *newpanel;
+				panel_index = iodata.global.panel_choice;
+				newpanel = &known_lcd_panels[panel_index];
+				au1200_setpanel(newpanel);
+			}
+			break;
+
+		case AU1200_LCD_GET_PANEL:
+			print_dbg("AU1200_LCD_GET_PANEL\n");
+			iodata.global.panel_choice = panel_index;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+
+		val = copy_to_user((void __user *) arg, &iodata, sizeof(iodata));
+		if (val) {
+			print_dbg("error: could not copy %d bytes\n", val);
+			return -EFAULT;
+		}
+	}
+
+	return 0;
+}
+
+
+static struct fb_ops au1200fb_fb_ops = {
+	.owner		= THIS_MODULE,
+	.fb_check_var	= au1200fb_fb_check_var,
+	.fb_set_par	= au1200fb_fb_set_par,
+	.fb_setcolreg	= au1200fb_fb_setcolreg,
+	.fb_blank	= au1200fb_fb_blank,
+	.fb_fillrect	= cfb_fillrect,
+	.fb_copyarea	= cfb_copyarea,
+	.fb_imageblit	= cfb_imageblit,
+	.fb_sync	= NULL,
+	.fb_ioctl	= au1200fb_ioctl,
+	.fb_mmap	= au1200fb_fb_mmap,
+};
+
+/*-------------------------------------------------------------------------*/
+
+static irqreturn_t au1200fb_handle_irq(int irq, void* dev_id, struct pt_regs *regs)
+{
+	/* Nothing to do for now, just clear any pending interrupt */
+	lcd->intstatus = lcd->intstatus;
+	au_sync();
+
+	return IRQ_HANDLED;
+}
+
+/*-------------------------------------------------------------------------*/
+
+/* AU1200 LCD device probe helpers */
+
+static int au1200fb_init_fbinfo(struct au1200fb_device *fbdev)
+{
+	struct fb_info *fbi = &fbdev->fb_info;
+	int bpp;
+
+	memset(fbi, 0, sizeof(struct fb_info));
+	fbi->fbops = &au1200fb_fb_ops;
+
+	bpp = winbpp(win->w[fbdev->plane].mode_winctrl1);
+
+	/* Copy monitor specs from panel data */
+	/* fixme: we're setting up LCD controller windows, so these dont give a
+	damn as to what the monitor specs are (the panel itself does, but that
+	isnt done here...so maybe need a generic catchall monitor setting??? */
+	memcpy(&fbi->monspecs, &panel->monspecs, sizeof(struct fb_monspecs));
+
+	/* We first try the user mode passed in argument. If that failed,
+	 * or if no one has been specified, we default to the first mode of the
+	 * panel list. Note that after this call, var data will be set */
+	if (!fb_find_mode(&fbi->var,
+			  fbi,
+			  NULL, /* drv_info.opt_mode, */
+			  fbi->monspecs.modedb,
+			  fbi->monspecs.modedb_len,
+			  fbi->monspecs.modedb,
+			  bpp)) {
+
+		print_err("Cannot find valid mode for panel %s", panel->name);
+		return -EFAULT;
+	}
+
+	fbi->pseudo_palette = kmalloc(sizeof(u32) * 16, GFP_KERNEL);
+	if (!fbi->pseudo_palette) {
+		return -ENOMEM;
+	}
+	memset(fbi->pseudo_palette, 0, sizeof(u32) * 16);
+
+	if (fb_alloc_cmap(&fbi->cmap, AU1200_LCD_NBR_PALETTE_ENTRIES, 0) < 0) {
+		print_err("Fail to allocate colormap (%d entries)",
+			   AU1200_LCD_NBR_PALETTE_ENTRIES);
+		kfree(fbi->pseudo_palette);
+		return -EFAULT;
+	}
+
+	strncpy(fbi->fix.id, "AU1200", sizeof(fbi->fix.id));
+	fbi->fix.smem_start = fbdev->fb_phys;
+	fbi->fix.smem_len = fbdev->fb_len;
+	fbi->fix.type = FB_TYPE_PACKED_PIXELS;
+	fbi->fix.xpanstep = 0;
+	fbi->fix.ypanstep = 0;
+	fbi->fix.mmio_start = 0;
+	fbi->fix.mmio_len = 0;
+	fbi->fix.accel = FB_ACCEL_NONE;
+
+	fbi->screen_base = (char __iomem *) fbdev->fb_mem;
+
+	au1200fb_update_fbinfo(fbi);
+
+	return 0;
+}
+
+/*-------------------------------------------------------------------------*/
+
+/* AU1200 LCD controller device driver */
+
+static int au1200fb_drv_probe(struct device *dev)
+{
+	struct au1200fb_device *fbdev;
+	unsigned long page;
+	int bpp, plane, ret;
+
+	if (!dev)
+		return -EINVAL;
+
+	for (plane = 0; plane < CONFIG_FB_AU1200_DEVS; ++plane) {
+		bpp = winbpp(win->w[plane].mode_winctrl1);
+		if (win->w[plane].xres == 0)
+			win->w[plane].xres = panel->Xres;
+		if (win->w[plane].yres == 0)
+			win->w[plane].yres = panel->Yres;
+
+		fbdev = &_au1200fb_devices[plane];
+		memset(fbdev, 0, sizeof(struct au1200fb_device));
+		fbdev->plane = plane;
+
+		/* Allocate the framebuffer to the maximum screen size */
+		fbdev->fb_len = (win->w[plane].xres * win->w[plane].yres * bpp) / 8;
+
+		fbdev->fb_mem = dma_alloc_noncoherent(dev,
+				PAGE_ALIGN(fbdev->fb_len),
+				&fbdev->fb_phys, GFP_KERNEL);
+		if (!fbdev->fb_mem) {
+			print_err("fail to allocate frambuffer (size: %dK))",
+				  fbdev->fb_len / 1024);
+			return -ENOMEM;
+		}
+
+		/*
+		 * Set page reserved so that mmap will work. This is necessary
+		 * since we'll be remapping normal memory.
+		 */
+		for (page = (unsigned long)fbdev->fb_phys;
+		     page < PAGE_ALIGN((unsigned long)fbdev->fb_phys +
+			     fbdev->fb_len);
+		     page += PAGE_SIZE) {
+			SetPageReserved(pfn_to_page(page >> PAGE_SHIFT)); /* LCD DMA is NOT coherent on Au1200 */
+		}
+		print_dbg("Framebuffer memory map at %p", fbdev->fb_mem);
+		print_dbg("phys=0x%08x, size=%dK", fbdev->fb_phys, fbdev->fb_len / 1024);
+
+		/* Init FB data */
+		if ((ret = au1200fb_init_fbinfo(fbdev)) < 0)
+			goto failed;
+
+		/* Register new framebuffer */
+		if ((ret = register_framebuffer(&fbdev->fb_info)) < 0) {
+			print_err("cannot register new framebuffer");
+			goto failed;
+		}
+
+		au1200fb_fb_set_par(&fbdev->fb_info);
+
+#if !defined(CONFIG_FRAMEBUFFER_CONSOLE) && defined(CONFIG_LOGO)
+		if (plane == 0)
+			if (fb_prepare_logo(&fbdev->fb_info, FB_ROTATE_UR)) {
+				/* Start display and show logo on boot */
+				fb_set_cmap(&fbdev->fb_info.cmap,
+						&fbdev->fb_info);
+
+				fb_show_logo(&fbdev->fb_info, FB_ROTATE_UR);
+			}
+#endif
+	}
+
+	/* Now hook interrupt too */
+	if ((ret = request_irq(AU1200_LCD_INT, au1200fb_handle_irq,
+		 	  SA_INTERRUPT | SA_SHIRQ, "lcd", (void *)dev)) < 0) {
+		print_err("fail to request interrupt line %d (err: %d)",
+			  AU1200_LCD_INT, ret);
+		goto failed;
+	}
+
+	return 0;
+
+failed:
+	/* NOTE: This only does the current plane/window that failed; others are still active */
+	if (fbdev->fb_mem)
+		dma_free_noncoherent(dev, PAGE_ALIGN(fbdev->fb_len),
+				fbdev->fb_mem, fbdev->fb_phys);
+	if (fbdev->fb_info.cmap.len != 0)
+		fb_dealloc_cmap(&fbdev->fb_info.cmap);
+	if (fbdev->fb_info.pseudo_palette)
+		kfree(fbdev->fb_info.pseudo_palette);
+	if (plane == 0)
+		free_irq(AU1200_LCD_INT, (void*)dev);
+	return ret;
+}
+
+static int au1200fb_drv_remove(struct device *dev)
+{
+	struct au1200fb_device *fbdev;
+	int plane;
+
+	if (!dev)
+		return -ENODEV;
+
+	/* Turn off the panel */
+	au1200_setpanel(NULL);
+
+	for (plane = 0; plane < CONFIG_FB_AU1200_DEVS; ++plane)
+	{
+		fbdev = &_au1200fb_devices[plane];
+
+		/* Clean up all probe data */
+		unregister_framebuffer(&fbdev->fb_info);
+		if (fbdev->fb_mem)
+			dma_free_noncoherent(dev, PAGE_ALIGN(fbdev->fb_len),
+					fbdev->fb_mem, fbdev->fb_phys);
+		if (fbdev->fb_info.cmap.len != 0)
+			fb_dealloc_cmap(&fbdev->fb_info.cmap);
+		if (fbdev->fb_info.pseudo_palette)
+			kfree(fbdev->fb_info.pseudo_palette);
+	}
+
+	free_irq(AU1200_LCD_INT, (void *)dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int au1200fb_drv_suspend(struct device *dev, u32 state, u32 level)
+{
+	/* TODO */
+	return 0;
+}
+
+static int au1200fb_drv_resume(struct device *dev, u32 level)
+{
+	/* TODO */
+	return 0;
+}
+#endif /* CONFIG_PM */
+
+static struct device_driver au1200fb_driver = {
+	.name		= "au1200-lcd",
+	.bus		= &platform_bus_type,
+	.probe		= au1200fb_drv_probe,
+	.remove		= au1200fb_drv_remove,
+#ifdef CONFIG_PM
+	.suspend	= au1200fb_drv_suspend,
+	.resume		= au1200fb_drv_resume,
+#endif
+};
+
+/*-------------------------------------------------------------------------*/
+
+/* Kernel driver */
+
+static void au1200fb_setup(void)
+{
+	char* options = NULL;
+	char* this_opt;
+	int num_panels = ARRAY_SIZE(known_lcd_panels);
+	int panel_idx = -1;
+
+	fb_get_options(DRIVER_NAME, &options);
+
+	if (options) {
+		while ((this_opt = strsep(&options,",")) != NULL) {
+			/* Panel option - can be panel name,
+			 * "bs" for board-switch, or number/index */
+			if (!strncmp(this_opt, "panel:", 6)) {
+				int i;
+				long int li;
+				char *endptr;
+				this_opt += 6;
+				/* First check for index, which allows
+				 * to short circuit this mess */
+				li = simple_strtol(this_opt, &endptr, 0);
+				if (*endptr == '\0') {
+					panel_idx = (int)li;
+				}
+				else if (strcmp(this_opt, "bs") == 0) {
+					extern int board_au1200fb_panel(void);
+					panel_idx = board_au1200fb_panel();
+				}
+
+				else
+				for (i = 0; i < num_panels; i++) {
+					if (!strcmp(this_opt, known_lcd_panels[i].name)) {
+						panel_idx = i;
+						break;
+					}
+				}
+
+				if ((panel_idx < 0) || (panel_idx >= num_panels)) {
+						print_warn("Panel %s not supported!", this_opt);
+				}
+				else
+					panel_index = panel_idx;
+			}
+
+			else if (strncmp(this_opt, "nohwcursor", 10) == 0) {
+				nohwcursor = 1;
+			}
+
+			/* Unsupported option */
+			else {
+				print_warn("Unsupported option \"%s\"", this_opt);
+			}
+		}
+	}
+}
+
+#ifdef CONFIG_PM
+static int au1200fb_pm_callback(au1xxx_power_dev_t *dev,
+		au1xxx_request_t request, void *data) {
+	int retval = -1;
+	unsigned int d = 0;
+	unsigned int brightness = 0;
+
+	if (request == AU1XXX_PM_SLEEP) {
+		board_au1200fb_panel_shutdown();
+	}
+	else if (request == AU1XXX_PM_WAKEUP) {
+		if(dev->prev_state == SLEEP_STATE)
+		{
+			int plane;
+			au1200_setpanel(panel);
+			for (plane = 0; plane < CONFIG_FB_AU1200_DEVS; ++plane) 	{
+				struct au1200fb_device *fbdev;
+				fbdev = &_au1200fb_devices[plane];
+				au1200fb_fb_set_par(&fbdev->fb_info);
+			}
+		}
+
+		d = *((unsigned int*)data);
+		if(d <=10) brightness = 26;
+		else if(d<=20) brightness = 51;
+		else if(d<=30) brightness = 77;
+		else if(d<=40) brightness = 102;
+		else if(d<=50) brightness = 128;
+		else if(d<=60) brightness = 153;
+		else if(d<=70) brightness = 179;
+		else if(d<=80) brightness = 204;
+		else if(d<=90) brightness = 230;
+		else brightness = 255;
+		set_brightness(brightness);
+	} else if (request == AU1XXX_PM_GETSTATUS) {
+		return dev->cur_state;
+	} else if (request == AU1XXX_PM_ACCESS) {
+		if (dev->cur_state != SLEEP_STATE)
+			return retval;
+		else {
+			au1200_setpanel(panel);
+		}
+	} else if (request == AU1XXX_PM_IDLE) {
+	} else if (request == AU1XXX_PM_CLEANUP) {
+	}
+
+	return retval;
+}
+#endif
+
+static int __init au1200fb_init(void)
+{
+	print_info("" DRIVER_DESC "");
+
+	/* Setup driver with options */
+	au1200fb_setup();
+
+	/* Point to the panel selected */
+	panel = &known_lcd_panels[panel_index];
+	win = &windows[window_index];
+
+	printk(DRIVER_NAME ": Panel %d %s\n", panel_index, panel->name);
+	printk(DRIVER_NAME ": Win %d %s\n", window_index, win->name);
+
+	/* Kickstart the panel, the framebuffers/windows come soon enough */
+	au1200_setpanel(panel);
+
+	#ifdef CONFIG_PM
+	LCD_pm_dev = new_au1xxx_power_device("LCD", &au1200fb_pm_callback, NULL);
+	if ( LCD_pm_dev == NULL)
+		printk(KERN_INFO "Unable to create a power management device entry for the au1200fb.\n");
+	else
+		printk(KERN_INFO "Power management device entry for the au1200fb loaded.\n");
+	#endif
+
+	return driver_register(&au1200fb_driver);
+}
+
+static void __exit au1200fb_cleanup(void)
+{
+	driver_unregister(&au1200fb_driver);
+}
+
+module_init(au1200fb_init);
+module_exit(au1200fb_cleanup);
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+/*
+ * BRIEF MODULE DESCRIPTION
+ *	Au1200 LCD Driver.
+ *
+ * Copyright 2004-2005 AMD
+ * Author: AMD
+ *
+ * Based on:
+ * linux/drivers/video/skeletonfb.c -- Skeleton for a frame buffer device
+ *  Created 28 Dec 1997 by Geert Uytterhoeven
+ *
+ *  This program is free software; you can redistribute	 it and/or modify it
+ *  under  the terms of	 the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the	License, or (at your
+ *  option) any later version.
+ *
+ *  THIS  SOFTWARE  IS PROVIDED	  ``AS	IS'' AND   ANY	EXPRESS OR IMPLIED
+ *  WARRANTIES,	  INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+ *  NO	EVENT  SHALL   THE AUTHOR  BE	 LIABLE FOR ANY	  DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED	  TO, PROCUREMENT OF  SUBSTITUTE GOODS	OR SERVICES; LOSS OF
+ *  USE, DATA,	OR PROFITS; OR	BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ *  ANY THEORY OF LIABILITY, WHETHER IN	 CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/fb.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/ctype.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/mach-au1x00/au1000.h>
+#include "au1200fb.h"
+
+#ifdef CONFIG_PM
+#include <asm/mach-au1x00/au1xxx_pm.h>
+#endif
+
+#ifndef CONFIG_FB_AU1200_DEVS
+#define CONFIG_FB_AU1200_DEVS 4
+#endif
+
+#define DRIVER_NAME "au1200fb"
+#define DRIVER_DESC "LCD controller driver for AU1200 processors"
+
+#define DEBUG 1
+
+#define print_err(f, arg...) printk(KERN_ERR DRIVER_NAME ": " f "\n", ## arg)
+#define print_warn(f, arg...) printk(KERN_WARNING DRIVER_NAME ": " f "\n", ## arg)
+#define print_info(f, arg...) printk(KERN_INFO DRIVER_NAME ": " f "\n", ## arg)
+
+#if DEBUG
+#define print_dbg(f, arg...) printk(KERN_DEBUG __FILE__ ": " f "\n", ## arg)
+#else
+#define print_dbg(f, arg...) do {} while (0)
+#endif
+
+
+#define AU1200_LCD_FB_IOCTL 0x46FF
+
+#define AU1200_LCD_SET_SCREEN 1
+#define AU1200_LCD_GET_SCREEN 2
+#define AU1200_LCD_SET_WINDOW 3
+#define AU1200_LCD_GET_WINDOW 4
+#define AU1200_LCD_SET_PANEL  5
+#define AU1200_LCD_GET_PANEL  6
+
+#define SCREEN_SIZE		    (1<< 1)
+#define SCREEN_BACKCOLOR    (1<< 2)
+#define SCREEN_BRIGHTNESS   (1<< 3)
+#define SCREEN_COLORKEY     (1<< 4)
+#define SCREEN_MASK         (1<< 5)
+
+struct au1200_lcd_global_regs_t {
+	unsigned int flags;
+	unsigned int xsize;
+	unsigned int ysize;
+	unsigned int backcolor;
+	unsigned int brightness;
+	unsigned int colorkey;
+	unsigned int mask;
+	unsigned int panel_choice;
+	char panel_desc[80];
+
+};
+
+#define WIN_POSITION            (1<< 0)
+#define WIN_ALPHA_COLOR         (1<< 1)
+#define WIN_ALPHA_MODE          (1<< 2)
+#define WIN_PRIORITY            (1<< 3)
+#define WIN_CHANNEL             (1<< 4)
+#define WIN_BUFFER_FORMAT       (1<< 5)
+#define WIN_COLOR_ORDER         (1<< 6)
+#define WIN_PIXEL_ORDER         (1<< 7)
+#define WIN_SIZE                (1<< 8)
+#define WIN_COLORKEY_MODE       (1<< 9)
+#define WIN_DOUBLE_BUFFER_MODE  (1<< 10)
+#define WIN_RAM_ARRAY_MODE      (1<< 11)
+#define WIN_BUFFER_SCALE        (1<< 12)
+#define WIN_ENABLE	            (1<< 13)
+
+struct au1200_lcd_window_regs_t {
+	unsigned int flags;
+	unsigned int xpos;
+	unsigned int ypos;
+	unsigned int alpha_color;
+	unsigned int alpha_mode;
+	unsigned int priority;
+	unsigned int channel;
+	unsigned int buffer_format;
+	unsigned int color_order;
+	unsigned int pixel_order;
+	unsigned int xsize;
+	unsigned int ysize;
+	unsigned int colorkey_mode;
+	unsigned int double_buffer_mode;
+	unsigned int ram_array_mode;
+	unsigned int xscale;
+	unsigned int yscale;
+	unsigned int enable;
+};
+
+
+struct au1200_lcd_iodata_t {
+	unsigned int subcmd;
+	struct au1200_lcd_global_regs_t global;
+	struct au1200_lcd_window_regs_t window;
+};
+
+#if defined(__BIG_ENDIAN)
+#define LCD_CONTROL_DEFAULT_PO LCD_CONTROL_PO_11
+#else
+#define LCD_CONTROL_DEFAULT_PO LCD_CONTROL_PO_00
+#endif
+#define LCD_CONTROL_DEFAULT_SBPPF LCD_CONTROL_SBPPF_565
+
+/* Private, per-framebuffer management information (independent of the panel itself) */
+struct au1200fb_device {
+	struct fb_info fb_info;			/* FB driver info record */
+
+	int					plane;
+	unsigned char* 		fb_mem;		/* FrameBuffer memory map */
+	unsigned int		fb_len;
+	dma_addr_t    		fb_phys;
+};
+
+static struct au1200fb_device _au1200fb_devices[CONFIG_FB_AU1200_DEVS];
+/********************************************************************/
+
+/* LCD controller restrictions */
+#define AU1200_LCD_MAX_XRES	1280
+#define AU1200_LCD_MAX_YRES	1024
+#define AU1200_LCD_MAX_BPP	32
+#define AU1200_LCD_MAX_CLK	96000000 /* fixme: this needs to go away ? */
+#define AU1200_LCD_NBR_PALETTE_ENTRIES 256
+
+/* Default number of visible screen buffer to allocate */
+#define AU1200FB_NBR_VIDEO_BUFFERS 1
+
+/********************************************************************/
+
+static struct au1200_lcd *lcd = (struct au1200_lcd *) AU1200_LCD_ADDR;
+static int window_index = 2; /* default is zero */
+static int panel_index = 2; /* default is zero */
+static struct window_settings *win;
+static struct panel_settings *panel;
+static int noblanking = 1;
+static int nohwcursor = 0;
+
+struct window_settings {
+	unsigned char name[64];
+	uint32 mode_backcolor;
+	uint32 mode_colorkey;
+	uint32 mode_colorkeymsk;
+	struct {
+		int xres;
+		int yres;
+		int xpos;
+		int ypos;
+		uint32 mode_winctrl1; /* winctrl1[FRM,CCO,PO,PIPE] */
+		uint32 mode_winenable;
+	} w[4];
+};
+
+#if defined(__BIG_ENDIAN)
+#define LCD_WINCTRL1_PO_16BPP LCD_WINCTRL1_PO_00
+#else
+#define LCD_WINCTRL1_PO_16BPP LCD_WINCTRL1_PO_01
+#endif
+
+extern int board_au1200fb_panel_init (void);
+extern int board_au1200fb_panel_shutdown (void);
+
+#ifdef CONFIG_PM
+int au1200fb_pm_callback(au1xxx_power_dev_t *dev,
+		au1xxx_request_t request, void *data);
+au1xxx_power_dev_t *LCD_pm_dev;
+#endif
+
+/*
+ * Default window configurations
+ */
+static struct window_settings windows[] = {
+	{ /* Index 0 */
+		"0-FS gfx, 1-video, 2-ovly gfx, 3-ovly gfx",
+		/* mode_backcolor	*/ 0x006600ff,
+		/* mode_colorkey,msk*/ 0, 0,
+		{
+			{
+			/* xres, yres, xpos, ypos */ 0, 0, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565 |
+				LCD_WINCTRL1_PO_16BPP,
+			/* mode_winenable*/ LCD_WINENABLE_WEN0,
+			},
+			{
+			/* xres, yres, xpos, ypos */ 100, 100, 100, 100,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565 |
+				LCD_WINCTRL1_PO_16BPP |
+				LCD_WINCTRL1_PIPE,
+			/* mode_winenable*/ LCD_WINENABLE_WEN1,
+			},
+			{
+			/* xres, yres, xpos, ypos */ 0, 0, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565 |
+				LCD_WINCTRL1_PO_16BPP,
+			/* mode_winenable*/ 0,
+			},
+			{
+			/* xres, yres, xpos, ypos */ 0, 0, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565 |
+				LCD_WINCTRL1_PO_16BPP |
+				LCD_WINCTRL1_PIPE,
+			/* mode_winenable*/ 0,
+			},
+		},
+	},
+
+	{ /* Index 1 */
+		"0-FS gfx, 1-video, 2-ovly gfx, 3-ovly gfx",
+		/* mode_backcolor	*/ 0x006600ff,
+		/* mode_colorkey,msk*/ 0, 0,
+		{
+			{
+			/* xres, yres, xpos, ypos */ 320, 240, 5, 5,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_24BPP |
+				LCD_WINCTRL1_PO_00,
+			/* mode_winenable*/ LCD_WINENABLE_WEN0,
+			},
+			{
+			/* xres, yres, xpos, ypos */ 0, 0, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565
+				| LCD_WINCTRL1_PO_16BPP,
+			/* mode_winenable*/ 0,
+			},
+			{
+			/* xres, yres, xpos, ypos */ 100, 100, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565 |
+				LCD_WINCTRL1_PO_16BPP |
+				LCD_WINCTRL1_PIPE,
+			/* mode_winenable*/ 0/*LCD_WINENABLE_WEN2*/,
+			},
+			{
+			/* xres, yres, xpos, ypos */ 200, 25, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565 |
+				LCD_WINCTRL1_PO_16BPP |
+				LCD_WINCTRL1_PIPE,
+			/* mode_winenable*/ 0,
+			},
+		},
+	},
+	{ /* Index 2 */
+		"0-FS gfx, 1-video, 2-ovly gfx, 3-ovly gfx",
+		/* mode_backcolor	*/ 0x006600ff,
+		/* mode_colorkey,msk*/ 0, 0,
+		{
+			{
+			/* xres, yres, xpos, ypos */ 0, 0, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565 |
+				LCD_WINCTRL1_PO_16BPP,
+			/* mode_winenable*/ LCD_WINENABLE_WEN0,
+			},
+			{
+			/* xres, yres, xpos, ypos */ 0, 0, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565 |
+				LCD_WINCTRL1_PO_16BPP,
+			/* mode_winenable*/ 0,
+			},
+			{
+			/* xres, yres, xpos, ypos */ 0, 0, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_32BPP |
+				LCD_WINCTRL1_PO_00|LCD_WINCTRL1_PIPE,
+			/* mode_winenable*/ 0/*LCD_WINENABLE_WEN2*/,
+			},
+			{
+			/* xres, yres, xpos, ypos */ 0, 0, 0, 0,
+			/* mode_winctrl1 */ LCD_WINCTRL1_FRM_16BPP565 |
+				LCD_WINCTRL1_PO_16BPP |
+				LCD_WINCTRL1_PIPE,
+			/* mode_winenable*/ 0,
+			},
+		},
+	},
+	/* Need VGA 640 @ 24bpp, @ 32bpp */
+	/* Need VGA 800 @ 24bpp, @ 32bpp */
+	/* Need VGA 1024 @ 24bpp, @ 32bpp */
+};
+
+/*
+ * Controller configurations for various panels.
+ */
+
+struct panel_settings
+{
+	const char name[25];		/* Full name <vendor>_<model> */
+
+	struct 	fb_monspecs monspecs; 	/* FB monitor specs */
+
+	/* panel timings */
+	uint32 mode_screen;
+	uint32 mode_horztiming;
+	uint32 mode_verttiming;
+	uint32 mode_clkcontrol;
+	uint32 mode_pwmdiv;
+	uint32 mode_pwmhi;
+	uint32 mode_outmask;
+	uint32 mode_fifoctrl;
+	uint32 mode_toyclksrc;
+	uint32 mode_backlight;
+	uint32 mode_auxpll;
+	int (*device_init)(void);
+	int (*device_shutdown)(void);
+#define Xres min_xres
+#define Yres min_yres
+	u32	min_xres;		/* Minimum horizontal resolution */
+	u32	max_xres;		/* Maximum horizontal resolution */
+	u32 	min_yres;		/* Minimum vertical resolution */
+	u32 	max_yres;		/* Maximum vertical resolution */
+};
+
+/********************************************************************/
+/* fixme: Maybe a modedb for the CRT ? otherwise panels should be as-is */
+
+/* List of panels known to work with the AU1200 LCD controller.
+ * To add a new panel, enter the same specifications as the
+ * Generic_TFT one, and MAKE SURE that it doesn't conflicts
+ * with the controller restrictions. Restrictions are:
+ *
+ * STN color panels: max_bpp <= 12
+ * STN mono panels: max_bpp <= 4
+ * TFT panels: max_bpp <= 16
+ * max_xres <= 800
+ * max_yres <= 600
+ */
+static struct panel_settings known_lcd_panels[] =
+{
+	[0] = { /* QVGA 320x240 H:33.3kHz V:110Hz */
+		.name = "QVGA_320x240",
+		.monspecs = {
+			.modedb = NULL,
+			.modedb_len = 0,
+			.hfmin = 30000,
+			.hfmax = 70000,
+			.vfmin = 60,
+			.vfmax = 60,
+			.dclkmin = 6000000,
+			.dclkmax = 28000000,
+			.input = FB_DISP_RGB,
+		},
+		.mode_screen		= LCD_SCREEN_SX_N(320) |
+			LCD_SCREEN_SY_N(240),
+		.mode_horztiming	= 0x00c4623b,
+		.mode_verttiming	= 0x00502814,
+		.mode_clkcontrol	= 0x00020002, /* /4=24Mhz */
+		.mode_pwmdiv		= 0x00000000,
+		.mode_pwmhi		= 0x00000000,
+		.mode_outmask	= 0x00FFFFFF,
+		.mode_fifoctrl	= 0x2f2f2f2f,
+		.mode_toyclksrc	= 0x00000004, /* AUXPLL directly */
+		.mode_backlight	= 0x00000000,
+		.mode_auxpll		= 8, /* 96MHz AUXPLL */
+		.device_init		= NULL,
+		.device_shutdown	= NULL,
+		320, 320,
+		240, 240,
+	},
+
+	[1] = { /* VGA 640x480 H:30.3kHz V:58Hz */
+		.name = "VGA_640x480",
+		.monspecs = {
+			.modedb = NULL,
+			.modedb_len = 0,
+			.hfmin = 30000,
+			.hfmax = 70000,
+			.vfmin = 60,
+			.vfmax = 60,
+			.dclkmin = 6000000,
+			.dclkmax = 28000000,
+			.input = FB_DISP_RGB,
+		},
+		.mode_screen		= 0x13f9df80,
+		.mode_horztiming	= 0x003c5859,
+		.mode_verttiming	= 0x00741201,
+		.mode_clkcontrol	= 0x00020001, /* /4=24Mhz */
+		.mode_pwmdiv		= 0x00000000,
+		.mode_pwmhi		= 0x00000000,
+		.mode_outmask	= 0x00FFFFFF,
+		.mode_fifoctrl	= 0x2f2f2f2f,
+		.mode_toyclksrc	= 0x00000004, /* AUXPLL directly */
+		.mode_backlight	= 0x00000000,
+		.mode_auxpll		= 8, /* 96MHz AUXPLL */
+		.device_init		= NULL,
+		.device_shutdown	= NULL,
+		640, 480,
+		640, 480,
+	},
+
+	[2] = { /* SVGA 800x600 H:46.1kHz V:69Hz */
+		.name = "SVGA_800x600",
+		.monspecs = {
+			.modedb = NULL,
+			.modedb_len = 0,
+			.hfmin = 30000,
+			.hfmax = 70000,
+			.vfmin = 60,
+			.vfmax = 60,
+			.dclkmin = 6000000,
+			.dclkmax = 28000000,
+			.input = FB_DISP_RGB,
+		},
+		.mode_screen		= 0x18fa5780,
+		.mode_horztiming	= 0x00dc7e77,
+		.mode_verttiming	= 0x00584805,
+		.mode_clkcontrol	= 0x00020000, /* /2=48Mhz */
+		.mode_pwmdiv		= 0x00000000,
+		.mode_pwmhi		= 0x00000000,
+		.mode_outmask	= 0x00FFFFFF,
+		.mode_fifoctrl	= 0x2f2f2f2f,
+		.mode_toyclksrc	= 0x00000004, /* AUXPLL directly */
+		.mode_backlight	= 0x00000000,
+		.mode_auxpll		= 8, /* 96MHz AUXPLL */
+		.device_init		= NULL,
+		.device_shutdown	= NULL,
+		800, 800,
+		600, 600,
+	},
+
+	[3] = { /* XVGA 1024x768 H:56.2kHz V:70Hz */
+		.name = "XVGA_1024x768",
+		.monspecs = {
+			.modedb = NULL,
+			.modedb_len = 0,
+			.hfmin = 30000,
+			.hfmax = 70000,
+			.vfmin = 60,
+			.vfmax = 60,
+			.dclkmin = 6000000,
+			.dclkmax = 28000000,
+			.input = FB_DISP_RGB,
+		},
+		.mode_screen		= 0x1ffaff80,
+		.mode_horztiming	= 0x007d0e57,
+		.mode_verttiming	= 0x00740a01,
+		.mode_clkcontrol	= 0x000A0000, /* /1 */
+		.mode_pwmdiv		= 0x00000000,
+		.mode_pwmhi		= 0x00000000,
+		.mode_outmask	= 0x00FFFFFF,
+		.mode_fifoctrl	= 0x2f2f2f2f,
+		.mode_toyclksrc	= 0x00000004, /* AUXPLL directly */
+		.mode_backlight	= 0x00000000,
+		.mode_auxpll		= 6, /* 72MHz AUXPLL */
+		.device_init		= NULL,
+		.device_shutdown	= NULL,
+		1024, 1024,
+		768, 768,
+	},
+
+	[4] = { /* XVGA XVGA 1280x1024 H:68.5kHz V:65Hz */
+		.name = "XVGA_1280x1024",
+		.monspecs = {
+			.modedb = NULL,
+			.modedb_len = 0,
+			.hfmin = 30000,
+			.hfmax = 70000,
+			.vfmin = 60,
+			.vfmax = 60,
+			.dclkmin = 6000000,
+			.dclkmax = 28000000,
+			.input = FB_DISP_RGB,
+		},
+		.mode_screen		= 0x27fbff80,
+		.mode_horztiming	= 0x00cdb2c7,
+		.mode_verttiming	= 0x00600002,
+		.mode_clkcontrol	= 0x000A0000, /* /1 */
+		.mode_pwmdiv		= 0x00000000,
+		.mode_pwmhi		= 0x00000000,
+		.mode_outmask	= 0x00FFFFFF,
+		.mode_fifoctrl	= 0x2f2f2f2f,
+		.mode_toyclksrc	= 0x00000004, /* AUXPLL directly */
+		.mode_backlight	= 0x00000000,
+		.mode_auxpll		= 10, /* 120MHz AUXPLL */
+		.device_init		= NULL,
+		.device_shutdown	= NULL,
+		1280, 1280,
+		1024, 1024,
+	},
+
+	[5] = { /* Samsung 1024x768 TFT */
+		.name = "Samsung_1024x768_TFT",
+		.monspecs = {
+			.modedb = NULL,
+			.modedb_len = 0,
+			.hfmin = 30000,
+			.hfmax = 70000,
+			.vfmin = 60,
+			.vfmax = 60,
+			.dclkmin = 6000000,
+			.dclkmax = 28000000,
+			.input = FB_DISP_RGB,
+		},
+		.mode_screen		= 0x1ffaff80,
+		.mode_horztiming	= 0x018cc677,
+		.mode_verttiming	= 0x00241217,
+		.mode_clkcontrol	= 0x00000000, /* SCB 0x1 /4=24Mhz */
+		.mode_pwmdiv		= 0x8000063f, /* SCB 0x0 */
+		.mode_pwmhi		= 0x03400000, /* SCB 0x0 */
+		.mode_outmask	= 0x00FFFFFF,
+		.mode_fifoctrl	= 0x2f2f2f2f,
+		.mode_toyclksrc	= 0x00000004, /* AUXPLL directly */
+		.mode_backlight	= 0x00000000,
+		.mode_auxpll		= 8, /* 96MHz AUXPLL */
+		.device_init		= board_au1200fb_panel_init,
+		.device_shutdown	= board_au1200fb_panel_shutdown,
+		1024, 1024,
+		768, 768,
+	},
+
+	[6] = { /* Toshiba 640x480 TFT */
+		.name = "Toshiba_640x480_TFT",
+		.monspecs = {
+			.modedb = NULL,
+			.modedb_len = 0,
+			.hfmin = 30000,
+			.hfmax = 70000,
+			.vfmin = 60,
+			.vfmax = 60,
+			.dclkmin = 6000000,
+			.dclkmax = 28000000,
+			.input = FB_DISP_RGB,
+		},
+		.mode_screen		= LCD_SCREEN_SX_N(640) |
+			LCD_SCREEN_SY_N(480),
+		.mode_horztiming	= LCD_HORZTIMING_HPW_N(96) |
+			LCD_HORZTIMING_HND1_N(13) | LCD_HORZTIMING_HND2_N(51),
+		.mode_verttiming	= LCD_VERTTIMING_VPW_N(2) |
+			LCD_VERTTIMING_VND1_N(11) | LCD_VERTTIMING_VND2_N(32),
+		.mode_clkcontrol	= 0x00000000, /* /4=24Mhz */
+		.mode_pwmdiv		= 0x8000063f,
+		.mode_pwmhi		= 0x03400000,
+		.mode_outmask	= 0x00fcfcfc,
+		.mode_fifoctrl	= 0x2f2f2f2f,
+		.mode_toyclksrc	= 0x00000004, /* AUXPLL directly */
+		.mode_backlight	= 0x00000000,
+		.mode_auxpll		= 8, /* 96MHz AUXPLL */
+		.device_init		= board_au1200fb_panel_init,
+		.device_shutdown	= board_au1200fb_panel_shutdown,
+		640, 480,
+		640, 480,
+	},
+
+	[7] = { /* Sharp 320x240 TFT */
+		.name = "Sharp_320x240_TFT",
+		.monspecs = {
+			.modedb = NULL,
+			.modedb_len = 0,
+			.hfmin = 12500,
+			.hfmax = 20000,
+			.vfmin = 38,
+			.vfmax = 81,
+			.dclkmin = 4500000,
+			.dclkmax = 6800000,
+			.input = FB_DISP_RGB,
+		},
+		.mode_screen		= LCD_SCREEN_SX_N(320) |
+			LCD_SCREEN_SY_N(240),
+		.mode_horztiming	= LCD_HORZTIMING_HPW_N(60) |
+			LCD_HORZTIMING_HND1_N(13) | LCD_HORZTIMING_HND2_N(2),
+		.mode_verttiming	= LCD_VERTTIMING_VPW_N(2) |
+			LCD_VERTTIMING_VND1_N(2) | LCD_VERTTIMING_VND2_N(5),
+		.mode_clkcontrol	= LCD_CLKCONTROL_PCD_N(7), /*16=6Mhz*/
+		.mode_pwmdiv		= 0x8000063f,
+		.mode_pwmhi		= 0x03400000,
+		.mode_outmask	= 0x00fcfcfc,
+		.mode_fifoctrl	= 0x2f2f2f2f,
+		.mode_toyclksrc	= 0x00000004, /* AUXPLL directly */
+		.mode_backlight	= 0x00000000,
+		.mode_auxpll		= 8, /* 96MHz AUXPLL */
+		.device_init		= board_au1200fb_panel_init,
+		.device_shutdown	= board_au1200fb_panel_shutdown,
+		320, 320,
+		240, 240,
+	},
+
+	[8] = { /* Toppoly TD070WGCB2 7" 856x480 TFT */
+		.name = "Toppoly_TD070WGCB2",
+		.monspecs = {
+			.modedb = NULL,
+			.modedb_len = 0,
+			.hfmin = 30000,
+			.hfmax = 70000,
+			.vfmin = 60,
+			.vfmax = 60,
+			.dclkmin = 6000000,
+			.dclkmax = 28000000,
+			.input = FB_DISP_RGB,
+		},
+		.mode_screen		= LCD_SCREEN_SX_N(856) |
+			LCD_SCREEN_SY_N(480),
+		.mode_horztiming	= LCD_HORZTIMING_HND2_N(43) |
+			LCD_HORZTIMING_HND1_N(43) | LCD_HORZTIMING_HPW_N(114),
+		.mode_verttiming	= LCD_VERTTIMING_VND2_N(20) |
+			LCD_VERTTIMING_VND1_N(21) | LCD_VERTTIMING_VPW_N(4),
+		.mode_clkcontrol	= 0x00020001, /* /4=24Mhz */
+		.mode_pwmdiv		= 0x8000063f,
+		.mode_pwmhi		= 0x03400000,
+		.mode_outmask	= 0x00fcfcfc,
+		.mode_fifoctrl	= 0x2f2f2f2f,
+		.mode_toyclksrc	= 0x00000004, /* AUXPLL directly */
+		.mode_backlight	= 0x00000000,
+		.mode_auxpll		= 8, /* 96MHz AUXPLL */
+		.device_init		= board_au1200fb_panel_init,
+		.device_shutdown	= board_au1200fb_panel_shutdown,
+		856, 856,
+		480, 480,
+	},
+};
+
+#define NUM_PANELS (ARRAY_SIZE(known_lcd_panels))
+
+/********************************************************************/
+
+#ifdef CONFIG_PM
+static int set_brightness(unsigned int brightness)
+{
+	unsigned int hi1, divider;
+
+	/* limit brightness pwm duty to >= 30/1600 */
+	if (brightness < 30) {
+		brightness = 30;
+	}
+	divider = (lcd->pwmdiv & 0x3FFFF) + 1;
+	hi1 = (lcd->pwmhi >> 16) + 1;
+	hi1 = (((brightness & 0xFF) + 1) * divider >> 8);
+	lcd->pwmhi &= 0xFFFF;
+	lcd->pwmhi |= (hi1 << 16);
+
+	return brightness;
+}
+#endif /* CONFIG_PM */
+
+static int winbpp (unsigned int winctrl1)
+{
+	int bits = 0;
+
+	/* how many bits are needed for each pixel format */
+	switch (winctrl1 & LCD_WINCTRL1_FRM) {
+	case LCD_WINCTRL1_FRM_1BPP:
+		bits = 1;
+		break;
+	case LCD_WINCTRL1_FRM_2BPP:
+		bits = 2;
+		break;
+	case LCD_WINCTRL1_FRM_4BPP:
+		bits = 4;
+		break;
+	case LCD_WINCTRL1_FRM_8BPP:
+		bits = 8;
+		break;
+	case LCD_WINCTRL1_FRM_12BPP:
+	case LCD_WINCTRL1_FRM_16BPP655:
+	case LCD_WINCTRL1_FRM_16BPP565:
+	case LCD_WINCTRL1_FRM_16BPP556:
+	case LCD_WINCTRL1_FRM_16BPPI1555:
+	case LCD_WINCTRL1_FRM_16BPPI5551:
+	case LCD_WINCTRL1_FRM_16BPPA1555:
+	case LCD_WINCTRL1_FRM_16BPPA5551:
+		bits = 16;
+		break;
+	case LCD_WINCTRL1_FRM_24BPP:
+	case LCD_WINCTRL1_FRM_32BPP:
+		bits = 32;
+		break;
+	}
+
+	return bits;
+}
+
+static int fbinfo2index (struct fb_info *fb_info)
+{
+	int i;
+
+	for (i = 0; i < CONFIG_FB_AU1200_DEVS; ++i) {
+		if (fb_info == (struct fb_info *)(&_au1200fb_devices[i].fb_info))
+			return i;
+	}
+	printk("au1200fb: ERROR: fbinfo2index failed!\n");
+	return -1;
+}
+
+static int au1200_setlocation (struct au1200fb_device *fbdev, int plane,
+	int xpos, int ypos)
+{
+	uint32 winctrl0, winctrl1, winenable, fb_offset = 0;
+	int xsz, ysz;
+
+	/* FIX!!! NOT CHECKING FOR COMPLETE OFFSCREEN YET */
+
+	winctrl0 = lcd->window[plane].winctrl0;
+	winctrl1 = lcd->window[plane].winctrl1;
+	winctrl0 &= (LCD_WINCTRL0_A | LCD_WINCTRL0_AEN);
+	winctrl1 &= ~(LCD_WINCTRL1_SZX | LCD_WINCTRL1_SZY);
+
+	/* Check for off-screen adjustments */
+	xsz = win->w[plane].xres;
+	ysz = win->w[plane].yres;
+	if ((xpos + win->w[plane].xres) > panel->Xres) {
+		/* Off-screen to the right */
+		xsz = panel->Xres - xpos; /* off by 1 ??? */
+		/*printk("off screen right\n");*/
+	}
+
+	if ((ypos + win->w[plane].yres) > panel->Yres) {
+		/* Off-screen to the bottom */
+		ysz = panel->Yres - ypos; /* off by 1 ??? */
+		/*printk("off screen bottom\n");*/
+	}
+
+	if (xpos < 0) {
+		/* Off-screen to the left */
+		xsz = win->w[plane].xres + xpos;
+		fb_offset += (((0 - xpos) * winbpp(lcd->window[plane].winctrl1))/8);
+		xpos = 0;
+		/*printk("off screen left\n");*/
+	}
+
+	if (ypos < 0) {
+		/* Off-screen to the top */
+		ysz = win->w[plane].yres + ypos;
+		/* fixme: fb_offset += ((0-ypos)*fb_pars[plane].line_length); */
+		ypos = 0;
+		/*printk("off screen top\n");*/
+	}
+
+	/* record settings */
+	win->w[plane].xpos = xpos;
+	win->w[plane].ypos = ypos;
+
+	xsz -= 1;
+	ysz -= 1;
+	winctrl0 |= (xpos << 21);
+	winctrl0 |= (ypos << 10);
+	winctrl1 |= (xsz << 11);
+	winctrl1 |= (ysz << 0);
+
+	/* Disable the window while making changes, then restore WINEN */
+	winenable = lcd->winenable & (1 << plane);
+	au_sync();
+	lcd->winenable &= ~(1 << plane);
+	lcd->window[plane].winctrl0 = winctrl0;
+	lcd->window[plane].winctrl1 = winctrl1;
+	lcd->window[plane].winbuf0 =
+	lcd->window[plane].winbuf1 = fbdev->fb_phys;
+	lcd->window[plane].winbufctrl = 0; /* select winbuf0 */
+	lcd->winenable |= winenable;
+	au_sync();
+
+	return 0;
+}
+
+static void au1200_setpanel (struct panel_settings *newpanel)
+{
+	/*
+	 * Perform global setup/init of LCD controller
+	 */
+	uint32 winenable;
+
+	/* Make sure all windows disabled */
+	winenable = lcd->winenable;
+	lcd->winenable = 0;
+	au_sync();
+	/*
+	 * Ensure everything is disabled before reconfiguring
+	 */
+	if (lcd->screen & LCD_SCREEN_SEN) {
+		/* Wait for vertical sync period */
+		lcd->intstatus = LCD_INT_SS;
+		while ((lcd->intstatus & LCD_INT_SS) == 0) {
+			au_sync();
+		}
+
+		lcd->screen &= ~LCD_SCREEN_SEN;	/*disable the controller*/
+
+		do {
+			lcd->intstatus = lcd->intstatus; /*clear interrupts*/
+			au_sync();
+		/*wait for controller to shut down*/
+		} while ((lcd->intstatus & LCD_INT_SD) == 0);
+
+		/* Call shutdown of current panel (if up) */
+		/* this must occur last, because if an external clock is driving
+		    the controller, the clock cannot be turned off before first
+			shutting down the controller.
+		 */
+		if (panel->device_shutdown != NULL)
+			panel->device_shutdown();
+	}
+
+	/* Newpanel == NULL indicates a shutdown operation only */
+	if (newpanel == NULL)
+		return;
+
+	panel = newpanel;
+
+	printk("Panel(%s), %dx%d\n", panel->name, panel->Xres, panel->Yres);
+
+	/*
+	 * Setup clocking if internal LCD clock source (assumes sys_auxpll valid)
+	 */
+	if (!(panel->mode_clkcontrol & LCD_CLKCONTROL_EXT))
+	{
+		uint32 sys_clksrc;
+		au_writel(panel->mode_auxpll, SYS_AUXPLL);
+		sys_clksrc = au_readl(SYS_CLKSRC) & ~0x0000001f;
+		sys_clksrc |= panel->mode_toyclksrc;
+		au_writel(sys_clksrc, SYS_CLKSRC);
+	}
+
+	/*
+	 * Configure panel timings
+	 */
+	lcd->screen = panel->mode_screen;
+	lcd->horztiming = panel->mode_horztiming;
+	lcd->verttiming = panel->mode_verttiming;
+	lcd->clkcontrol = panel->mode_clkcontrol;
+	lcd->pwmdiv = panel->mode_pwmdiv;
+	lcd->pwmhi = panel->mode_pwmhi;
+	lcd->outmask = panel->mode_outmask;
+	lcd->fifoctrl = panel->mode_fifoctrl;
+	au_sync();
+
+	/* fixme: Check window settings to make sure still valid
+	 * for new geometry */
+#if 0
+	au1200_setlocation(fbdev, 0, win->w[0].xpos, win->w[0].ypos);
+	au1200_setlocation(fbdev, 1, win->w[1].xpos, win->w[1].ypos);
+	au1200_setlocation(fbdev, 2, win->w[2].xpos, win->w[2].ypos);
+	au1200_setlocation(fbdev, 3, win->w[3].xpos, win->w[3].ypos);
+#endif
+	lcd->winenable = winenable;
+
+	/*
+	 * Re-enable screen now that it is configured
+	 */
+	lcd->screen |= LCD_SCREEN_SEN;
+	au_sync();
+
+	/* Call init of panel */
+	if (panel->device_init != NULL) panel->device_init();
+
+	/* FIX!!!! not appropriate on panel change!!! Global setup/init */
+	lcd->intenable = 0;
+	lcd->intstatus = ~0;
+	lcd->backcolor = win->mode_backcolor;
+
+	/* Setup Color Key - FIX!!! */
+	lcd->colorkey = win->mode_colorkey;
+	lcd->colorkeymsk = win->mode_colorkeymsk;
+
+	/* Setup HWCursor - FIX!!! Need to support this eventually */
+	lcd->hwc.cursorctrl = 0;
+	lcd->hwc.cursorpos = 0;
+	lcd->hwc.cursorcolor0 = 0;
+	lcd->hwc.cursorcolor1 = 0;
+	lcd->hwc.cursorcolor2 = 0;
+	lcd->hwc.cursorcolor3 = 0;
+
+
+#if 0
+#define D(X) printk("%25s: %08X\n", #X, X)
+	D(lcd->screen);
+	D(lcd->horztiming);
+	D(lcd->verttiming);
+	D(lcd->clkcontrol);
+	D(lcd->pwmdiv);
+	D(lcd->pwmhi);
+	D(lcd->outmask);
+	D(lcd->fifoctrl);
+	D(lcd->window[0].winctrl0);
+	D(lcd->window[0].winctrl1);
+	D(lcd->window[0].winctrl2);
+	D(lcd->window[0].winbuf0);
+	D(lcd->window[0].winbuf1);
+	D(lcd->window[0].winbufctrl);
+	D(lcd->window[1].winctrl0);
+	D(lcd->window[1].winctrl1);
+	D(lcd->window[1].winctrl2);
+	D(lcd->window[1].winbuf0);
+	D(lcd->window[1].winbuf1);
+	D(lcd->window[1].winbufctrl);
+	D(lcd->window[2].winctrl0);
+	D(lcd->window[2].winctrl1);
+	D(lcd->window[2].winctrl2);
+	D(lcd->window[2].winbuf0);
+	D(lcd->window[2].winbuf1);
+	D(lcd->window[2].winbufctrl);
+	D(lcd->window[3].winctrl0);
+	D(lcd->window[3].winctrl1);
+	D(lcd->window[3].winctrl2);
+	D(lcd->window[3].winbuf0);
+	D(lcd->window[3].winbuf1);
+	D(lcd->window[3].winbufctrl);
+	D(lcd->winenable);
+	D(lcd->intenable);
+	D(lcd->intstatus);
+	D(lcd->backcolor);
+	D(lcd->winenable);
+	D(lcd->colorkey);
+    D(lcd->colorkeymsk);
+	D(lcd->hwc.cursorctrl);
+	D(lcd->hwc.cursorpos);
+	D(lcd->hwc.cursorcolor0);
+	D(lcd->hwc.cursorcolor1);
+	D(lcd->hwc.cursorcolor2);
+	D(lcd->hwc.cursorcolor3);
+#endif
+}
+
+static void au1200_setmode(struct au1200fb_device *fbdev)
+{
+	int plane = fbdev->plane;
+	/* Window/plane setup */
+	lcd->window[plane].winctrl1 = ( 0
+		| LCD_WINCTRL1_PRI_N(plane)
+		| win->w[plane].mode_winctrl1 /* FRM,CCO,PO,PIPE */
+		) ;
+
+	au1200_setlocation(fbdev, plane, win->w[plane].xpos, win->w[plane].ypos);
+
+	lcd->window[plane].winctrl2 = ( 0
+		| LCD_WINCTRL2_CKMODE_00
+		| LCD_WINCTRL2_DBM
+		| LCD_WINCTRL2_BX_N( fbdev->fb_info.fix.line_length)
+		| LCD_WINCTRL2_SCX_1
+		| LCD_WINCTRL2_SCY_1
+		) ;
+	lcd->winenable |= win->w[plane].mode_winenable;
+	au_sync();
+}
+
+
+/* Inline helpers */
+
+/*#define panel_is_dual(panel)  ((panel->mode_screen & LCD_SCREEN_PT) == LCD_SCREEN_PT_010)*/
+/*#define panel_is_active(panel)((panel->mode_screen & LCD_SCREEN_PT) == LCD_SCREEN_PT_010)*/
+
+#define panel_is_color(panel) ((panel->mode_screen & LCD_SCREEN_PT) <= LCD_SCREEN_PT_CDSTN)
+
+/* Bitfields format supported by the controller. */
+static struct fb_bitfield rgb_bitfields[][4] = {
+  	/*     Red, 	   Green, 	 Blue, 	     Transp   */
+	[LCD_WINCTRL1_FRM_16BPP655 >> 25] =
+		{ { 10, 6, 0 }, { 5, 5, 0 }, { 0, 5, 0 }, { 0, 0, 0 } },
+
+	[LCD_WINCTRL1_FRM_16BPP565 >> 25] =
+		{ { 11, 5, 0 }, { 5, 6, 0 }, { 0, 5, 0 }, { 0, 0, 0 } },
+
+	[LCD_WINCTRL1_FRM_16BPP556 >> 25] =
+		{ { 11, 5, 0 }, { 6, 5, 0 }, { 0, 6, 0 }, { 0, 0, 0 } },
+
+	[LCD_WINCTRL1_FRM_16BPPI1555 >> 25] =
+		{ { 10, 5, 0 }, { 5, 5, 0 }, { 0, 5, 0 }, { 0, 0, 0 } },
+
+	[LCD_WINCTRL1_FRM_16BPPI5551 >> 25] =
+		{ { 11, 5, 0 }, { 6, 5, 0 }, { 1, 5, 0 }, { 0, 0, 0 } },
+
+	[LCD_WINCTRL1_FRM_16BPPA1555 >> 25] =
+		{ { 10, 5, 0 }, { 5, 5, 0 }, { 0, 5, 0 }, { 15, 1, 0 } },
+
+	[LCD_WINCTRL1_FRM_16BPPA5551 >> 25] =
+		{ { 11, 5, 0 }, { 6, 5, 0 }, { 1, 5, 0 }, { 0, 1, 0 } },
+
+	[LCD_WINCTRL1_FRM_24BPP >> 25] =
+		{ { 16, 8, 0 }, { 8, 8, 0 }, { 0, 8, 0 }, { 0, 0, 0 } },
+
+	[LCD_WINCTRL1_FRM_32BPP >> 25] =
+		{ { 16, 8, 0 }, { 8, 8, 0 }, { 0, 8, 0 }, { 24, 0, 0 } },
+};
+
+/*-------------------------------------------------------------------------*/
+
+/* Helpers */
+
+static void au1200fb_update_fbinfo(struct fb_info *fbi)
+{
+	/* FIX!!!! This also needs to take the window pixel format into account!!! */
+
+	/* Update var-dependent FB info */
+	if (panel_is_color(panel)) {
+		if (fbi->var.bits_per_pixel <= 8) {
+			/* palettized */
+			fbi->fix.visual = FB_VISUAL_PSEUDOCOLOR;
+			fbi->fix.line_length = fbi->var.xres_virtual /
+				(8/fbi->var.bits_per_pixel);
+		} else {
+			/* non-palettized */
+			fbi->fix.visual = FB_VISUAL_TRUECOLOR;
+			fbi->fix.line_length = fbi->var.xres_virtual * (fbi->var.bits_per_pixel / 8);
+		}
+	} else {
+		/* mono FIX!!! mono 8 and 4 bits */
+		fbi->fix.visual = FB_VISUAL_MONO10;
+		fbi->fix.line_length = fbi->var.xres_virtual / 8;
+	}
+
+	fbi->screen_size = fbi->fix.line_length * fbi->var.yres_virtual;
+	print_dbg("line length: %d\n", fbi->fix.line_length);
+	print_dbg("bits_per_pixel: %d\n", fbi->var.bits_per_pixel);
+}
+
+/*-------------------------------------------------------------------------*/
+
+/* AU1200 framebuffer driver */
+
+/* fb_check_var
+ * Validate var settings with hardware restrictions and modify it if necessary
+ */
+static int au1200fb_fb_check_var(struct fb_var_screeninfo *var,
+	struct fb_info *fbi)
+{
+	struct au1200fb_device *fbdev = (struct au1200fb_device *)fbi;
+	u32 pixclock;
+	int screen_size, plane;
+
+	plane = fbdev->plane;
+
+	/* Make sure that the mode respect all LCD controller and
+	 * panel restrictions. */
+	var->xres = win->w[plane].xres;
+	var->yres = win->w[plane].yres;
+
+	/* No need for virtual resolution support */
+	var->xres_virtual = var->xres;
+	var->yres_virtual = var->yres;
+
+	var->bits_per_pixel = winbpp(win->w[plane].mode_winctrl1);
+
+	screen_size = var->xres_virtual * var->yres_virtual;
+	if (var->bits_per_pixel > 8) screen_size *= (var->bits_per_pixel / 8);
+	else screen_size /= (8/var->bits_per_pixel);
+
+	if (fbdev->fb_len < screen_size)
+		return -EINVAL; /* Virtual screen is to big, abort */
+
+	/* FIX!!!! what are the implicaitons of ignoring this for windows ??? */
+	/* The max LCD clock is fixed to 48MHz (value of AUX_CLK). The pixel
+	 * clock can only be obtain by dividing this value by an even integer.
+	 * Fallback to a slower pixel clock if necessary. */
+	pixclock = max((u32)(PICOS2KHZ(var->pixclock) * 1000), fbi->monspecs.dclkmin);
+	pixclock = min(pixclock, min(fbi->monspecs.dclkmax, (u32)AU1200_LCD_MAX_CLK/2));
+
+	if (AU1200_LCD_MAX_CLK % pixclock) {
+		int diff = AU1200_LCD_MAX_CLK % pixclock;
+		pixclock -= diff;
+	}
+
+	var->pixclock = KHZ2PICOS(pixclock/1000);
+#if 0
+	if (!panel_is_active(panel)) {
+		int pcd = AU1200_LCD_MAX_CLK / (pixclock * 2) - 1;
+
+		if (!panel_is_color(panel)
+			&& (panel->control_base & LCD_CONTROL_MPI) && (pcd < 3)) {
+			/* STN 8bit mono panel support is up to 6MHz pixclock */
+			var->pixclock = KHZ2PICOS(6000);
+		} else if (!pcd) {
+			/* Other STN panel support is up to 12MHz  */
+			var->pixclock = KHZ2PICOS(12000);
+		}
+	}
+#endif
+	/* Set bitfield accordingly */
+	switch (var->bits_per_pixel) {
+		case 16:
+		{
+			/* 16bpp True color.
+			 * These must be set to MATCH WINCTRL[FORM] */
+			int idx;
+			idx = (win->w[0].mode_winctrl1 & LCD_WINCTRL1_FRM) >> 25;
+			var->red    = rgb_bitfields[idx][0];
+			var->green  = rgb_bitfields[idx][1];
+			var->blue   = rgb_bitfields[idx][2];
+			var->transp = rgb_bitfields[idx][3];
+			break;
+		}
+
+		case 32:
+		{
+			/* 32bpp True color.
+			 * These must be set to MATCH WINCTRL[FORM] */
+			int idx;
+			idx = (win->w[0].mode_winctrl1 & LCD_WINCTRL1_FRM) >> 25;
+			var->red    = rgb_bitfields[idx][0];
+			var->green  = rgb_bitfields[idx][1];
+			var->blue   = rgb_bitfields[idx][2];
+			var->transp = rgb_bitfields[idx][3];
+			break;
+		}
+		default:
+			print_dbg("Unsupported depth %dbpp", var->bits_per_pixel);
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* fb_set_par
+ * Set hardware with var settings. This will enable the controller with a
+ * specific mode, normally validated with the fb_check_var method
+ */
+static int au1200fb_fb_set_par(struct fb_info *fbi)
+{
+	struct au1200fb_device *fbdev = (struct au1200fb_device *)fbi;
+
+	au1200fb_update_fbinfo(fbi);
+	au1200_setmode(fbdev);
+
+	return 0;
+}
+
+/* fb_setcolreg
+ * Set color in LCD palette.
+ */
+static int au1200fb_fb_setcolreg(unsigned regno, unsigned red, unsigned green,
+	unsigned blue, unsigned transp, struct fb_info *fbi)
+{
+	volatile u32 *palette = lcd->palette;
+	u32 value;
+
+	if (regno > (AU1200_LCD_NBR_PALETTE_ENTRIES - 1))
+		return -EINVAL;
+
+	if (fbi->var.grayscale) {
+		/* Convert color to grayscale */
+		red = green = blue =
+			(19595 * red + 38470 * green + 7471 * blue) >> 16;
+	}
+
+	if (fbi->fix.visual == FB_VISUAL_TRUECOLOR) {
+		/* Place color in the pseudopalette */
+		if (regno > 16)
+			return -EINVAL;
+
+		palette = (u32*) fbi->pseudo_palette;
+
+		red   >>= (16 - fbi->var.red.length);
+		green >>= (16 - fbi->var.green.length);
+		blue  >>= (16 - fbi->var.blue.length);
+
+		value = (red   << fbi->var.red.offset) 	|
+			(green << fbi->var.green.offset)|
+			(blue  << fbi->var.blue.offset);
+		value &= 0xFFFF;
+
+	} else if (1 /*FIX!!! panel_is_active(fbdev->panel)*/) {
+		/* COLOR TFT PALLETTIZED (use RGB 565) */
+		value = (red & 0xF800)|((green >> 5) &
+				0x07E0)|((blue >> 11) & 0x001F);
+		value &= 0xFFFF;
+
+	} else if (0 /*panel_is_color(fbdev->panel)*/) {
+		/* COLOR STN MODE */
+		value = 0x1234;
+		value &= 0xFFF;
+	} else {
+		/* MONOCHROME MODE */
+		value = (green >> 12) & 0x000F;
+		value &= 0xF;
+	}
+
+	palette[regno] = value;
+
+	return 0;
+}
+
+/* fb_blank
+ * Blank the screen. Depending on the mode, the screen will be
+ * activated with the backlight color, or desactivated
+ */
+static int au1200fb_fb_blank(int blank_mode, struct fb_info *fbi)
+{
+	/* Short-circuit screen blanking */
+	if (noblanking)
+		return 0;
+
+	switch (blank_mode) {
+
+	case FB_BLANK_UNBLANK:
+	case FB_BLANK_NORMAL:
+		/* printk("turn on panel\n"); */
+		au1200_setpanel(panel);
+		break;
+	case FB_BLANK_VSYNC_SUSPEND:
+	case FB_BLANK_HSYNC_SUSPEND:
+	case FB_BLANK_POWERDOWN:
+		/* printk("turn off panel\n"); */
+		au1200_setpanel(NULL);
+		break;
+	default:
+		break;
+
+	}
+
+	/* FB_BLANK_NORMAL is a soft blank */
+	return (blank_mode == FB_BLANK_NORMAL) ? -EINVAL : 0;
+}
+
+/* fb_mmap
+ * Map video memory in user space. We don't use the generic fb_mmap
+ * method mainly to allow the use of the TLB streaming flag (CCA=6)
+ */
+static int au1200fb_fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
+
+{
+	unsigned int len;
+	unsigned long start=0, off;
+	struct au1200fb_device *fbdev = (struct au1200fb_device *) info;
+
+#ifdef CONFIG_PM
+	au1xxx_pm_access(LCD_pm_dev);
+#endif
+
+	if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT)) {
+		return -EINVAL;
+	}
+
+	start = fbdev->fb_phys & PAGE_MASK;
+	len = PAGE_ALIGN((start & ~PAGE_MASK) + fbdev->fb_len);
+
+	off = vma->vm_pgoff << PAGE_SHIFT;
+
+	if ((vma->vm_end - vma->vm_start + off) > len) {
+		return -EINVAL;
+	}
+
+	off += start;
+	vma->vm_pgoff = off >> PAGE_SHIFT;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	pgprot_val(vma->vm_page_prot) |= _CACHE_MASK; /* CCA=7 */
+
+	vma->vm_flags |= VM_IO;
+
+	return io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT,
+				  vma->vm_end - vma->vm_start,
+				  vma->vm_page_prot);
+
+	return 0;
+}
+
+static void set_global(u_int cmd, struct au1200_lcd_global_regs_t *pdata)
+{
+
+	unsigned int hi1, divider;
+
+	/* SCREEN_SIZE: user cannot reset size, must switch panel choice */
+
+	if (pdata->flags & SCREEN_BACKCOLOR)
+		lcd->backcolor = pdata->backcolor;
+
+	if (pdata->flags & SCREEN_BRIGHTNESS) {
+
+		// limit brightness pwm duty to >= 30/1600
+		if (pdata->brightness < 30) {
+			pdata->brightness = 30;
+		}
+		divider = (lcd->pwmdiv & 0x3FFFF) + 1;
+		hi1 = (lcd->pwmhi >> 16) + 1;
+		hi1 = (((pdata->brightness & 0xFF)+1) * divider >> 8);
+		lcd->pwmhi &= 0xFFFF;
+		lcd->pwmhi |= (hi1 << 16);
+	}
+
+	if (pdata->flags & SCREEN_COLORKEY)
+		lcd->colorkey = pdata->colorkey;
+
+	if (pdata->flags & SCREEN_MASK)
+		lcd->colorkeymsk = pdata->mask;
+	au_sync();
+}
+
+static void get_global(u_int cmd, struct au1200_lcd_global_regs_t *pdata)
+{
+	unsigned int hi1, divider;
+
+	pdata->xsize = ((lcd->screen & LCD_SCREEN_SX) >> 19) + 1;
+	pdata->ysize = ((lcd->screen & LCD_SCREEN_SY) >> 8) + 1;
+
+	pdata->backcolor = lcd->backcolor;
+	pdata->colorkey = lcd->colorkey;
+	pdata->mask = lcd->colorkeymsk;
+
+	// brightness
+	hi1 = (lcd->pwmhi >> 16) + 1;
+	divider = (lcd->pwmdiv & 0x3FFFF) + 1;
+	pdata->brightness = ((hi1 << 8) / divider) - 1;
+	au_sync();
+}
+
+static void set_window(unsigned int plane,
+	struct au1200_lcd_window_regs_t *pdata)
+{
+	unsigned int val, bpp;
+
+	/* Window control register 0 */
+	if (pdata->flags & WIN_POSITION) {
+		val = lcd->window[plane].winctrl0 & ~(LCD_WINCTRL0_OX |
+				LCD_WINCTRL0_OY);
+		val |= ((pdata->xpos << 21) & LCD_WINCTRL0_OX);
+		val |= ((pdata->ypos << 10) & LCD_WINCTRL0_OY);
+		lcd->window[plane].winctrl0 = val;
+	}
+	if (pdata->flags & WIN_ALPHA_COLOR) {
+		val = lcd->window[plane].winctrl0 & ~(LCD_WINCTRL0_A);
+		val |= ((pdata->alpha_color << 2) & LCD_WINCTRL0_A);
+		lcd->window[plane].winctrl0 = val;
+	}
+	if (pdata->flags & WIN_ALPHA_MODE) {
+		val = lcd->window[plane].winctrl0 & ~(LCD_WINCTRL0_AEN);
+		val |= ((pdata->alpha_mode << 1) & LCD_WINCTRL0_AEN);
+		lcd->window[plane].winctrl0 = val;
+	}
+
+	/* Window control register 1 */
+	if (pdata->flags & WIN_PRIORITY) {
+		val = lcd->window[plane].winctrl1 & ~(LCD_WINCTRL1_PRI);
+		val |= ((pdata->priority << 30) & LCD_WINCTRL1_PRI);
+		lcd->window[plane].winctrl1 = val;
+	}
+	if (pdata->flags & WIN_CHANNEL) {
+		val = lcd->window[plane].winctrl1 & ~(LCD_WINCTRL1_PIPE);
+		val |= ((pdata->channel << 29) & LCD_WINCTRL1_PIPE);
+		lcd->window[plane].winctrl1 = val;
+	}
+	if (pdata->flags & WIN_BUFFER_FORMAT) {
+		val = lcd->window[plane].winctrl1 & ~(LCD_WINCTRL1_FRM);
+		val |= ((pdata->buffer_format << 25) & LCD_WINCTRL1_FRM);
+		lcd->window[plane].winctrl1 = val;
+	}
+	if (pdata->flags & WIN_COLOR_ORDER) {
+		val = lcd->window[plane].winctrl1 & ~(LCD_WINCTRL1_CCO);
+		val |= ((pdata->color_order << 24) & LCD_WINCTRL1_CCO);
+		lcd->window[plane].winctrl1 = val;
+	}
+	if (pdata->flags & WIN_PIXEL_ORDER) {
+		val = lcd->window[plane].winctrl1 & ~(LCD_WINCTRL1_PO);
+		val |= ((pdata->pixel_order << 22) & LCD_WINCTRL1_PO);
+		lcd->window[plane].winctrl1 = val;
+	}
+	if (pdata->flags & WIN_SIZE) {
+		val = lcd->window[plane].winctrl1 & ~(LCD_WINCTRL1_SZX |
+				LCD_WINCTRL1_SZY);
+		val |= (((pdata->xsize << 11) - 1) & LCD_WINCTRL1_SZX);
+		val |= (((pdata->ysize) - 1) & LCD_WINCTRL1_SZY);
+		lcd->window[plane].winctrl1 = val;
+		/* program buffer line width */
+		bpp = winbpp(val) / 8;
+		val = lcd->window[plane].winctrl2 & ~(LCD_WINCTRL2_BX);
+		val |= (((pdata->xsize * bpp) << 8) & LCD_WINCTRL2_BX);
+		lcd->window[plane].winctrl2 = val;
+	}
+
+	/* Window control register 2 */
+	if (pdata->flags & WIN_COLORKEY_MODE) {
+		val = lcd->window[plane].winctrl2 & ~(LCD_WINCTRL2_CKMODE);
+		val |= ((pdata->colorkey_mode << 24) & LCD_WINCTRL2_CKMODE);
+		lcd->window[plane].winctrl2 = val;
+	}
+	if (pdata->flags & WIN_DOUBLE_BUFFER_MODE) {
+		val = lcd->window[plane].winctrl2 & ~(LCD_WINCTRL2_DBM);
+		val |= ((pdata->double_buffer_mode << 23) & LCD_WINCTRL2_DBM);
+		lcd->window[plane].winctrl2 = val;
+	}
+	if (pdata->flags & WIN_RAM_ARRAY_MODE) {
+		val = lcd->window[plane].winctrl2 & ~(LCD_WINCTRL2_RAM);
+		val |= ((pdata->ram_array_mode << 21) & LCD_WINCTRL2_RAM);
+		lcd->window[plane].winctrl2 = val;
+	}
+
+	/* Buffer line width programmed with WIN_SIZE */
+
+	if (pdata->flags & WIN_BUFFER_SCALE) {
+		val = lcd->window[plane].winctrl2 & ~(LCD_WINCTRL2_SCX |
+				LCD_WINCTRL2_SCY);
+		val |= ((pdata->xsize << 11) & LCD_WINCTRL2_SCX);
+		val |= ((pdata->ysize) & LCD_WINCTRL2_SCY);
+		lcd->window[plane].winctrl2 = val;
+	}
+
+	if (pdata->flags & WIN_ENABLE) {
+		val = lcd->winenable;
+		val &= ~(1<<plane);
+		val |= (pdata->enable & 1) << plane;
+		lcd->winenable = val;
+	}
+	au_sync();
+}
+
+static void get_window(unsigned int plane,
+	struct au1200_lcd_window_regs_t *pdata)
+{
+	/* Window control register 0 */
+	pdata->xpos = (lcd->window[plane].winctrl0 & LCD_WINCTRL0_OX) >> 21;
+	pdata->ypos = (lcd->window[plane].winctrl0 & LCD_WINCTRL0_OY) >> 10;
+	pdata->alpha_color = (lcd->window[plane].winctrl0 & LCD_WINCTRL0_A) >> 2;
+	pdata->alpha_mode = (lcd->window[plane].winctrl0 & LCD_WINCTRL0_AEN) >> 1;
+
+	/* Window control register 1 */
+	pdata->priority = (lcd->window[plane].winctrl1& LCD_WINCTRL1_PRI) >> 30;
+	pdata->channel = (lcd->window[plane].winctrl1 & LCD_WINCTRL1_PIPE) >> 29;
+	pdata->buffer_format = (lcd->window[plane].winctrl1 & LCD_WINCTRL1_FRM) >> 25;
+	pdata->color_order = (lcd->window[plane].winctrl1 & LCD_WINCTRL1_CCO) >> 24;
+	pdata->pixel_order = (lcd->window[plane].winctrl1 & LCD_WINCTRL1_PO) >> 22;
+	pdata->xsize = ((lcd->window[plane].winctrl1 & LCD_WINCTRL1_SZX) >> 11) + 1;
+	pdata->ysize = (lcd->window[plane].winctrl1 & LCD_WINCTRL1_SZY) + 1;
+
+	/* Window control register 2 */
+	pdata->colorkey_mode = (lcd->window[plane].winctrl2 & LCD_WINCTRL2_CKMODE) >> 24;
+	pdata->double_buffer_mode = (lcd->window[plane].winctrl2 & LCD_WINCTRL2_DBM) >> 23;
+	pdata->ram_array_mode = (lcd->window[plane].winctrl2 & LCD_WINCTRL2_RAM) >> 21;
+
+	pdata->enable = (lcd->winenable >> plane) & 1;
+	au_sync();
+}
+
+static int au1200fb_ioctl(struct fb_info *info, unsigned int cmd,
+                          unsigned long arg)
+{
+	int plane;
+	int val;
+
+#ifdef CONFIG_PM
+	au1xxx_pm_access(LCD_pm_dev);
+#endif
+
+	plane = fbinfo2index(info);
+	print_dbg("au1200fb: ioctl %d on plane %d\n", cmd, plane);
+
+	if (cmd == AU1200_LCD_FB_IOCTL) {
+		struct au1200_lcd_iodata_t iodata;
+
+		if (copy_from_user(&iodata, (void __user *) arg, sizeof(iodata)))
+			return -EFAULT;
+
+		print_dbg("FB IOCTL called\n");
+
+		switch (iodata.subcmd) {
+		case AU1200_LCD_SET_SCREEN:
+			print_dbg("AU1200_LCD_SET_SCREEN\n");
+			set_global(cmd, &iodata.global);
+			break;
+
+		case AU1200_LCD_GET_SCREEN:
+			print_dbg("AU1200_LCD_GET_SCREEN\n");
+			get_global(cmd, &iodata.global);
+			break;
+
+		case AU1200_LCD_SET_WINDOW:
+			print_dbg("AU1200_LCD_SET_WINDOW\n");
+			set_window(plane, &iodata.window);
+			break;
+
+		case AU1200_LCD_GET_WINDOW:
+			print_dbg("AU1200_LCD_GET_WINDOW\n");
+			get_window(plane, &iodata.window);
+			break;
+
+		case AU1200_LCD_SET_PANEL:
+			print_dbg("AU1200_LCD_SET_PANEL\n");
+			if ((iodata.global.panel_choice >= 0) &&
+					(iodata.global.panel_choice <
+					 NUM_PANELS))
+			{
+				struct panel_settings *newpanel;
+				panel_index = iodata.global.panel_choice;
+				newpanel = &known_lcd_panels[panel_index];
+				au1200_setpanel(newpanel);
+			}
+			break;
+
+		case AU1200_LCD_GET_PANEL:
+			print_dbg("AU1200_LCD_GET_PANEL\n");
+			iodata.global.panel_choice = panel_index;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+
+		val = copy_to_user((void __user *) arg, &iodata, sizeof(iodata));
+		if (val) {
+			print_dbg("error: could not copy %d bytes\n", val);
+			return -EFAULT;
+		}
+	}
+
+	return 0;
+}
+
+
+static struct fb_ops au1200fb_fb_ops = {
+	.owner		= THIS_MODULE,
+	.fb_check_var	= au1200fb_fb_check_var,
+	.fb_set_par	= au1200fb_fb_set_par,
+	.fb_setcolreg	= au1200fb_fb_setcolreg,
+	.fb_blank	= au1200fb_fb_blank,
+	.fb_fillrect	= cfb_fillrect,
+	.fb_copyarea	= cfb_copyarea,
+	.fb_imageblit	= cfb_imageblit,
+	.fb_sync	= NULL,
+	.fb_ioctl	= au1200fb_ioctl,
+	.fb_mmap	= au1200fb_fb_mmap,
+};
+
+/*-------------------------------------------------------------------------*/
+
+static irqreturn_t au1200fb_handle_irq(int irq, void* dev_id, struct pt_regs *regs)
+{
+	/* Nothing to do for now, just clear any pending interrupt */
+	lcd->intstatus = lcd->intstatus;
+	au_sync();
+
+	return IRQ_HANDLED;
+}
+
+/*-------------------------------------------------------------------------*/
+
+/* AU1200 LCD device probe helpers */
+
+static int au1200fb_init_fbinfo(struct au1200fb_device *fbdev)
+{
+	struct fb_info *fbi = &fbdev->fb_info;
+	int bpp;
+
+	memset(fbi, 0, sizeof(struct fb_info));
+	fbi->fbops = &au1200fb_fb_ops;
+
+	bpp = winbpp(win->w[fbdev->plane].mode_winctrl1);
+
+	/* Copy monitor specs from panel data */
+	/* fixme: we're setting up LCD controller windows, so these dont give a
+	damn as to what the monitor specs are (the panel itself does, but that
+	isnt done here...so maybe need a generic catchall monitor setting??? */
+	memcpy(&fbi->monspecs, &panel->monspecs, sizeof(struct fb_monspecs));
+
+	/* We first try the user mode passed in argument. If that failed,
+	 * or if no one has been specified, we default to the first mode of the
+	 * panel list. Note that after this call, var data will be set */
+	if (!fb_find_mode(&fbi->var,
+			  fbi,
+			  NULL, /* drv_info.opt_mode, */
+			  fbi->monspecs.modedb,
+			  fbi->monspecs.modedb_len,
+			  fbi->monspecs.modedb,
+			  bpp)) {
+
+		print_err("Cannot find valid mode for panel %s", panel->name);
+		return -EFAULT;
+	}
+
+	fbi->pseudo_palette = kmalloc(sizeof(u32) * 16, GFP_KERNEL);
+	if (!fbi->pseudo_palette) {
+		return -ENOMEM;
+	}
+	memset(fbi->pseudo_palette, 0, sizeof(u32) * 16);
+
+	if (fb_alloc_cmap(&fbi->cmap, AU1200_LCD_NBR_PALETTE_ENTRIES, 0) < 0) {
+		print_err("Fail to allocate colormap (%d entries)",
+			   AU1200_LCD_NBR_PALETTE_ENTRIES);
+		kfree(fbi->pseudo_palette);
+		return -EFAULT;
+	}
+
+	strncpy(fbi->fix.id, "AU1200", sizeof(fbi->fix.id));
+	fbi->fix.smem_start = fbdev->fb_phys;
+	fbi->fix.smem_len = fbdev->fb_len;
+	fbi->fix.type = FB_TYPE_PACKED_PIXELS;
+	fbi->fix.xpanstep = 0;
+	fbi->fix.ypanstep = 0;
+	fbi->fix.mmio_start = 0;
+	fbi->fix.mmio_len = 0;
+	fbi->fix.accel = FB_ACCEL_NONE;
+
+	fbi->screen_base = (char __iomem *) fbdev->fb_mem;
+
+	au1200fb_update_fbinfo(fbi);
+
+	return 0;
+}
+
+/*-------------------------------------------------------------------------*/
+
+/* AU1200 LCD controller device driver */
+
+static int au1200fb_drv_probe(struct device *dev)
+{
+	struct au1200fb_device *fbdev;
+	unsigned long page;
+	int bpp, plane, ret;
+
+	if (!dev)
+		return -EINVAL;
+
+	for (plane = 0; plane < CONFIG_FB_AU1200_DEVS; ++plane) {
+		bpp = winbpp(win->w[plane].mode_winctrl1);
+		if (win->w[plane].xres == 0)
+			win->w[plane].xres = panel->Xres;
+		if (win->w[plane].yres == 0)
+			win->w[plane].yres = panel->Yres;
+
+		fbdev = &_au1200fb_devices[plane];
+		memset(fbdev, 0, sizeof(struct au1200fb_device));
+		fbdev->plane = plane;
+
+		/* Allocate the framebuffer to the maximum screen size */
+		fbdev->fb_len = (win->w[plane].xres * win->w[plane].yres * bpp) / 8;
+
+		fbdev->fb_mem = dma_alloc_noncoherent(dev,
+				PAGE_ALIGN(fbdev->fb_len),
+				&fbdev->fb_phys, GFP_KERNEL);
+		if (!fbdev->fb_mem) {
+			print_err("fail to allocate frambuffer (size: %dK))",
+				  fbdev->fb_len / 1024);
+			return -ENOMEM;
+		}
+
+		/*
+		 * Set page reserved so that mmap will work. This is necessary
+		 * since we'll be remapping normal memory.
+		 */
+		for (page = (unsigned long)fbdev->fb_phys;
+		     page < PAGE_ALIGN((unsigned long)fbdev->fb_phys +
+			     fbdev->fb_len);
+		     page += PAGE_SIZE) {
+			SetPageReserved(pfn_to_page(page >> PAGE_SHIFT)); /* LCD DMA is NOT coherent on Au1200 */
+		}
+		print_dbg("Framebuffer memory map at %p", fbdev->fb_mem);
+		print_dbg("phys=0x%08x, size=%dK", fbdev->fb_phys, fbdev->fb_len / 1024);
+
+		/* Init FB data */
+		if ((ret = au1200fb_init_fbinfo(fbdev)) < 0)
+			goto failed;
+
+		/* Register new framebuffer */
+		if ((ret = register_framebuffer(&fbdev->fb_info)) < 0) {
+			print_err("cannot register new framebuffer");
+			goto failed;
+		}
+
+		au1200fb_fb_set_par(&fbdev->fb_info);
+
+#if !defined(CONFIG_FRAMEBUFFER_CONSOLE) && defined(CONFIG_LOGO)
+		if (plane == 0)
+			if (fb_prepare_logo(&fbdev->fb_info, FB_ROTATE_UR)) {
+				/* Start display and show logo on boot */
+				fb_set_cmap(&fbdev->fb_info.cmap,
+						&fbdev->fb_info);
+
+				fb_show_logo(&fbdev->fb_info, FB_ROTATE_UR);
+			}
+#endif
+	}
+
+	/* Now hook interrupt too */
+	if ((ret = request_irq(AU1200_LCD_INT, au1200fb_handle_irq,
+		 	  SA_INTERRUPT | SA_SHIRQ, "lcd", (void *)dev)) < 0) {
+		print_err("fail to request interrupt line %d (err: %d)",
+			  AU1200_LCD_INT, ret);
+		goto failed;
+	}
+
+	return 0;
+
+failed:
+	/* NOTE: This only does the current plane/window that failed; others are still active */
+	if (fbdev->fb_mem)
+		dma_free_noncoherent(dev, PAGE_ALIGN(fbdev->fb_len),
+				fbdev->fb_mem, fbdev->fb_phys);
+	if (fbdev->fb_info.cmap.len != 0)
+		fb_dealloc_cmap(&fbdev->fb_info.cmap);
+	if (fbdev->fb_info.pseudo_palette)
+		kfree(fbdev->fb_info.pseudo_palette);
+	if (plane == 0)
+		free_irq(AU1200_LCD_INT, (void*)dev);
+	return ret;
+}
+
+static int au1200fb_drv_remove(struct device *dev)
+{
+	struct au1200fb_device *fbdev;
+	int plane;
+
+	if (!dev)
+		return -ENODEV;
+
+	/* Turn off the panel */
+	au1200_setpanel(NULL);
+
+	for (plane = 0; plane < CONFIG_FB_AU1200_DEVS; ++plane)
+	{
+		fbdev = &_au1200fb_devices[plane];
+
+		/* Clean up all probe data */
+		unregister_framebuffer(&fbdev->fb_info);
+		if (fbdev->fb_mem)
+			dma_free_noncoherent(dev, PAGE_ALIGN(fbdev->fb_len),
+					fbdev->fb_mem, fbdev->fb_phys);
+		if (fbdev->fb_info.cmap.len != 0)
+			fb_dealloc_cmap(&fbdev->fb_info.cmap);
+		if (fbdev->fb_info.pseudo_palette)
+			kfree(fbdev->fb_info.pseudo_palette);
+	}
+
+	free_irq(AU1200_LCD_INT, (void *)dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int au1200fb_drv_suspend(struct device *dev, u32 state, u32 level)
+{
+	/* TODO */
+	return 0;
+}
+
+static int au1200fb_drv_resume(struct device *dev, u32 level)
+{
+	/* TODO */
+	return 0;
+}
+#endif /* CONFIG_PM */
+
+static struct device_driver au1200fb_driver = {
+	.name		= "au1200-lcd",
+	.bus		= &platform_bus_type,
+	.probe		= au1200fb_drv_probe,
+	.remove		= au1200fb_drv_remove,
+#ifdef CONFIG_PM
+	.suspend	= au1200fb_drv_suspend,
+	.resume		= au1200fb_drv_resume,
+#endif
+};
+
+/*-------------------------------------------------------------------------*/
+
+/* Kernel driver */
+
+static void au1200fb_setup(void)
+{
+	char* options = NULL;
+	char* this_opt;
+	int num_panels = ARRAY_SIZE(known_lcd_panels);
+	int panel_idx = -1;
+
+	fb_get_options(DRIVER_NAME, &options);
+
+	if (options) {
+		while ((this_opt = strsep(&options,",")) != NULL) {
+			/* Panel option - can be panel name,
+			 * "bs" for board-switch, or number/index */
+			if (!strncmp(this_opt, "panel:", 6)) {
+				int i;
+				long int li;
+				char *endptr;
+				this_opt += 6;
+				/* First check for index, which allows
+				 * to short circuit this mess */
+				li = simple_strtol(this_opt, &endptr, 0);
+				if (*endptr == '\0') {
+					panel_idx = (int)li;
+				}
+				else if (strcmp(this_opt, "bs") == 0) {
+					extern int board_au1200fb_panel(void);
+					panel_idx = board_au1200fb_panel();
+				}
+
+				else
+				for (i = 0; i < num_panels; i++) {
+					if (!strcmp(this_opt, known_lcd_panels[i].name)) {
+						panel_idx = i;
+						break;
+					}
+				}
+
+				if ((panel_idx < 0) || (panel_idx >= num_panels)) {
+						print_warn("Panel %s not supported!", this_opt);
+				}
+				else
+					panel_index = panel_idx;
+			}
+
+			else if (strncmp(this_opt, "nohwcursor", 10) == 0) {
+				nohwcursor = 1;
+			}
+
+			/* Unsupported option */
+			else {
+				print_warn("Unsupported option \"%s\"", this_opt);
+			}
+		}
+	}
+}
+
+#ifdef CONFIG_PM
+static int au1200fb_pm_callback(au1xxx_power_dev_t *dev,
+		au1xxx_request_t request, void *data) {
+	int retval = -1;
+	unsigned int d = 0;
+	unsigned int brightness = 0;
+
+	if (request == AU1XXX_PM_SLEEP) {
+		board_au1200fb_panel_shutdown();
+	}
+	else if (request == AU1XXX_PM_WAKEUP) {
+		if(dev->prev_state == SLEEP_STATE)
+		{
+			int plane;
+			au1200_setpanel(panel);
+			for (plane = 0; plane < CONFIG_FB_AU1200_DEVS; ++plane) 	{
+				struct au1200fb_device *fbdev;
+				fbdev = &_au1200fb_devices[plane];
+				au1200fb_fb_set_par(&fbdev->fb_info);
+			}
+		}
+
+		d = *((unsigned int*)data);
+		if(d <=10) brightness = 26;
+		else if(d<=20) brightness = 51;
+		else if(d<=30) brightness = 77;
+		else if(d<=40) brightness = 102;
+		else if(d<=50) brightness = 128;
+		else if(d<=60) brightness = 153;
+		else if(d<=70) brightness = 179;
+		else if(d<=80) brightness = 204;
+		else if(d<=90) brightness = 230;
+		else brightness = 255;
+		set_brightness(brightness);
+	} else if (request == AU1XXX_PM_GETSTATUS) {
+		return dev->cur_state;
+	} else if (request == AU1XXX_PM_ACCESS) {
+		if (dev->cur_state != SLEEP_STATE)
+			return retval;
+		else {
+			au1200_setpanel(panel);
+		}
+	} else if (request == AU1XXX_PM_IDLE) {
+	} else if (request == AU1XXX_PM_CLEANUP) {
+	}
+
+	return retval;
+}
+#endif
+
+static int __init au1200fb_init(void)
+{
+	print_info("" DRIVER_DESC "");
+
+	/* Setup driver with options */
+	au1200fb_setup();
+
+	/* Point to the panel selected */
+	panel = &known_lcd_panels[panel_index];
+	win = &windows[window_index];
+
+	printk(DRIVER_NAME ": Panel %d %s\n", panel_index, panel->name);
+	printk(DRIVER_NAME ": Win %d %s\n", window_index, win->name);
+
+	/* Kickstart the panel, the framebuffers/windows come soon enough */
+	au1200_setpanel(panel);
+
+	#ifdef CONFIG_PM
+	LCD_pm_dev = new_au1xxx_power_device("LCD", &au1200fb_pm_callback, NULL);
+	if ( LCD_pm_dev == NULL)
+		printk(KERN_INFO "Unable to create a power management device entry for the au1200fb.\n");
+	else
+		printk(KERN_INFO "Power management device entry for the au1200fb loaded.\n");
+	#endif
+
+	return driver_register(&au1200fb_driver);
+}
+
+static void __exit au1200fb_cleanup(void)
+{
+	driver_unregister(&au1200fb_driver);
+}
+
+module_init(au1200fb_init);
+module_exit(au1200fb_cleanup);
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
diff --git a/drivers/video/au1200fb.h b/drivers/video/au1200fb.h
new file mode 100644
index 0000000..e267271
--- /dev/null
+++ b/drivers/video/au1200fb.h
@@ -0,0 +1,572 @@
+/*
+ * BRIEF MODULE DESCRIPTION
+ *	Hardware definitions for the Au1200 LCD controller
+ *
+ * Copyright 2004 AMD
+ * Author:	AMD
+ *
+ *  This program is free software; you can redistribute	 it and/or modify it
+ *  under  the terms of	 the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the	License, or (at your
+ *  option) any later version.
+ *
+ *  THIS  SOFTWARE  IS PROVIDED	  ``AS	IS'' AND   ANY	EXPRESS OR IMPLIED
+ *  WARRANTIES,	  INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+ *  NO	EVENT  SHALL   THE AUTHOR  BE	 LIABLE FOR ANY	  DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED	  TO, PROCUREMENT OF  SUBSTITUTE GOODS	OR SERVICES; LOSS OF
+ *  USE, DATA,	OR PROFITS; OR	BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ *  ANY THEORY OF LIABILITY, WHETHER IN	 CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _AU1200LCD_H
+#define _AU1200LCD_H
+
+/********************************************************************/
+#define AU1200_LCD_ADDR		0xB5000000
+
+#define uint8 unsigned char
+#define uint32 unsigned int
+
+struct au1200_lcd {
+	volatile uint32	reserved0;
+	volatile uint32	screen;
+	volatile uint32	backcolor;
+	volatile uint32	horztiming;
+	volatile uint32	verttiming;
+	volatile uint32	clkcontrol;
+	volatile uint32	pwmdiv;
+	volatile uint32	pwmhi;
+	volatile uint32	reserved1;
+	volatile uint32	winenable;
+	volatile uint32	colorkey;
+	volatile uint32	colorkeymsk;
+	struct
+	{
+		volatile uint32	cursorctrl;
+		volatile uint32	cursorpos;
+		volatile uint32	cursorcolor0;
+		volatile uint32	cursorcolor1;
+		volatile uint32	cursorcolor2;
+		uint32	cursorcolor3;
+	} hwc;
+	volatile uint32	intstatus;
+	volatile uint32	intenable;
+	volatile uint32	outmask;
+	volatile uint32	fifoctrl;
+	uint32	reserved2[(0x0100-0x0058)/4];
+	struct
+	{
+		volatile uint32	winctrl0;
+		volatile uint32	winctrl1;
+		volatile uint32	winctrl2;
+		volatile uint32	winbuf0;
+		volatile uint32	winbuf1;
+		volatile uint32	winbufctrl;
+		uint32	winreserved0;
+		uint32	winreserved1;
+	} window[4];
+
+	uint32	reserved3[(0x0400-0x0180)/4];
+
+	volatile uint32	palette[(0x0800-0x0400)/4];
+
+	volatile uint8	cursorpattern[256];
+};
+
+/* lcd_screen */
+#define LCD_SCREEN_SEN		(1<<31)
+#define LCD_SCREEN_SX		(0x07FF<<19)
+#define LCD_SCREEN_SY		(0x07FF<< 8)
+#define LCD_SCREEN_SWP		(1<<7)
+#define LCD_SCREEN_SWD		(1<<6)
+#define LCD_SCREEN_PT		(7<<0)
+#define LCD_SCREEN_PT_TFT	(0<<0)
+#define LCD_SCREEN_SX_N(WIDTH)	((WIDTH-1)<<19)
+#define LCD_SCREEN_SY_N(HEIGHT)	((HEIGHT-1)<<8)
+#define LCD_SCREEN_PT_CSTN	(1<<0)
+#define LCD_SCREEN_PT_CDSTN	(2<<0)
+#define LCD_SCREEN_PT_M8STN	(3<<0)
+#define LCD_SCREEN_PT_M4STN	(4<<0)
+
+/* lcd_backcolor */
+#define LCD_BACKCOLOR_SBGR		(0xFF<<16)
+#define LCD_BACKCOLOR_SBGG		(0xFF<<8)
+#define LCD_BACKCOLOR_SBGB		(0xFF<<0)
+#define LCD_BACKCOLOR_SBGR_N(N)	((N)<<16)
+#define LCD_BACKCOLOR_SBGG_N(N)	((N)<<8)
+#define LCD_BACKCOLOR_SBGB_N(N)	((N)<<0)
+
+/* lcd_winenable */
+#define LCD_WINENABLE_WEN3		(1<<3)
+#define LCD_WINENABLE_WEN2		(1<<2)
+#define LCD_WINENABLE_WEN1		(1<<1)
+#define LCD_WINENABLE_WEN0		(1<<0)
+
+/* lcd_colorkey */
+#define LCD_COLORKEY_CKR		(0xFF<<16)
+#define LCD_COLORKEY_CKG		(0xFF<<8)
+#define LCD_COLORKEY_CKB		(0xFF<<0)
+#define LCD_COLORKEY_CKR_N(N)	((N)<<16)
+#define LCD_COLORKEY_CKG_N(N)	((N)<<8)
+#define LCD_COLORKEY_CKB_N(N)	((N)<<0)
+
+/* lcd_colorkeymsk */
+#define LCD_COLORKEYMSK_CKMR		(0xFF<<16)
+#define LCD_COLORKEYMSK_CKMG		(0xFF<<8)
+#define LCD_COLORKEYMSK_CKMB		(0xFF<<0)
+#define LCD_COLORKEYMSK_CKMR_N(N)	((N)<<16)
+#define LCD_COLORKEYMSK_CKMG_N(N)	((N)<<8)
+#define LCD_COLORKEYMSK_CKMB_N(N)	((N)<<0)
+
+/* lcd windows control 0 */
+#define LCD_WINCTRL0_OX		(0x07FF<<21)
+#define LCD_WINCTRL0_OY		(0x07FF<<10)
+#define LCD_WINCTRL0_A		(0x00FF<<2)
+#define LCD_WINCTRL0_AEN	(1<<1)
+#define LCD_WINCTRL0_OX_N(N) ((N)<<21)
+#define LCD_WINCTRL0_OY_N(N) ((N)<<10)
+#define LCD_WINCTRL0_A_N(N) ((N)<<2)
+
+/* lcd windows control 1 */
+#define LCD_WINCTRL1_PRI	(3<<30)
+#define LCD_WINCTRL1_PIPE	(1<<29)
+#define LCD_WINCTRL1_FRM	(0xF<<25)
+#define LCD_WINCTRL1_CCO	(1<<24)
+#define LCD_WINCTRL1_PO		(3<<22)
+#define LCD_WINCTRL1_SZX	(0x07FF<<11)
+#define LCD_WINCTRL1_SZY	(0x07FF<<0)
+#define LCD_WINCTRL1_FRM_1BPP	(0<<25)
+#define LCD_WINCTRL1_FRM_2BPP	(1<<25)
+#define LCD_WINCTRL1_FRM_4BPP	(2<<25)
+#define LCD_WINCTRL1_FRM_8BPP	(3<<25)
+#define LCD_WINCTRL1_FRM_12BPP	(4<<25)
+#define LCD_WINCTRL1_FRM_16BPP655	(5<<25)
+#define LCD_WINCTRL1_FRM_16BPP565	(6<<25)
+#define LCD_WINCTRL1_FRM_16BPP556	(7<<25)
+#define LCD_WINCTRL1_FRM_16BPPI1555	(8<<25)
+#define LCD_WINCTRL1_FRM_16BPPI5551	(9<<25)
+#define LCD_WINCTRL1_FRM_16BPPA1555	(10<<25)
+#define LCD_WINCTRL1_FRM_16BPPA5551	(11<<25)
+#define LCD_WINCTRL1_FRM_24BPP		(12<<25)
+#define LCD_WINCTRL1_FRM_32BPP		(13<<25)
+#define LCD_WINCTRL1_PRI_N(N)	((N)<<30)
+#define LCD_WINCTRL1_PO_00		(0<<22)
+#define LCD_WINCTRL1_PO_01		(1<<22)
+#define LCD_WINCTRL1_PO_10		(2<<22)
+#define LCD_WINCTRL1_PO_11		(3<<22)
+#define LCD_WINCTRL1_SZX_N(N)	((N-1)<<11)
+#define LCD_WINCTRL1_SZY_N(N)	((N-1)<<0)
+
+/* lcd windows control 2 */
+#define LCD_WINCTRL2_CKMODE		(3<<24)
+#define LCD_WINCTRL2_DBM		(1<<23)
+#define LCD_WINCTRL2_RAM		(3<<21)
+#define LCD_WINCTRL2_BX			(0x1FFF<<8)
+#define LCD_WINCTRL2_SCX		(0xF<<4)
+#define LCD_WINCTRL2_SCY		(0xF<<0)
+#define LCD_WINCTRL2_CKMODE_00		(0<<24)
+#define LCD_WINCTRL2_CKMODE_01		(1<<24)
+#define LCD_WINCTRL2_CKMODE_10		(2<<24)
+#define LCD_WINCTRL2_CKMODE_11		(3<<24)
+#define LCD_WINCTRL2_RAM_NONE		(0<<21)
+#define LCD_WINCTRL2_RAM_PALETTE	(1<<21)
+#define LCD_WINCTRL2_RAM_GAMMA		(2<<21)
+#define LCD_WINCTRL2_RAM_BUFFER		(3<<21)
+#define LCD_WINCTRL2_BX_N(N)	((N)<<8)
+#define LCD_WINCTRL2_SCX_1		(0<<4)
+#define LCD_WINCTRL2_SCX_2		(1<<4)
+#define LCD_WINCTRL2_SCX_4		(2<<4)
+#define LCD_WINCTRL2_SCY_1		(0<<0)
+#define LCD_WINCTRL2_SCY_2		(1<<0)
+#define LCD_WINCTRL2_SCY_4		(2<<0)
+
+/* lcd windows buffer control */
+#define LCD_WINBUFCTRL_DB		(1<<1)
+#define LCD_WINBUFCTRL_DBN		(1<<0)
+
+/* lcd_intstatus, lcd_intenable */
+#define LCD_INT_IFO				(0xF<<14)
+#define LCD_INT_IFU				(0xF<<10)
+#define LCD_INT_OFO				(1<<9)
+#define LCD_INT_OFU				(1<<8)
+#define LCD_INT_WAIT			(1<<3)
+#define LCD_INT_SD				(1<<2)
+#define LCD_INT_SA				(1<<1)
+#define LCD_INT_SS				(1<<0)
+
+/* lcd_horztiming */
+#define LCD_HORZTIMING_HND2		(0x1FF<<18)
+#define LCD_HORZTIMING_HND1		(0x1FF<<9)
+#define LCD_HORZTIMING_HPW		(0x1FF<<0)
+#define LCD_HORZTIMING_HND2_N(N)(((N)-1)<<18)
+#define LCD_HORZTIMING_HND1_N(N)(((N)-1)<<9)
+#define LCD_HORZTIMING_HPW_N(N)	(((N)-1)<<0)
+
+/* lcd_verttiming */
+#define LCD_VERTTIMING_VND2		(0x1FF<<18)
+#define LCD_VERTTIMING_VND1		(0x1FF<<9)
+#define LCD_VERTTIMING_VPW		(0x1FF<<0)
+#define LCD_VERTTIMING_VND2_N(N)(((N)-1)<<18)
+#define LCD_VERTTIMING_VND1_N(N)(((N)-1)<<9)
+#define LCD_VERTTIMING_VPW_N(N)	(((N)-1)<<0)
+
+/* lcd_clkcontrol */
+#define LCD_CLKCONTROL_EXT		(1<<22)
+#define LCD_CLKCONTROL_DELAY	(3<<20)
+#define LCD_CLKCONTROL_CDD		(1<<19)
+#define LCD_CLKCONTROL_IB		(1<<18)
+#define LCD_CLKCONTROL_IC		(1<<17)
+#define LCD_CLKCONTROL_IH		(1<<16)
+#define LCD_CLKCONTROL_IV		(1<<15)
+#define LCD_CLKCONTROL_BF		(0x1F<<10)
+#define LCD_CLKCONTROL_PCD		(0x3FF<<0)
+#define LCD_CLKCONTROL_BF_N(N)	(((N)-1)<<10)
+#define LCD_CLKCONTROL_PCD_N(N)	((N)<<0)
+
+/* lcd_pwmdiv */
+#define LCD_PWMDIV_EN			(1<<31)
+#define LCD_PWMDIV_PWMDIV		(0x1FFFF<<0)
+#define LCD_PWMDIV_PWMDIV_N(N)	((N)<<0)
+
+/* lcd_pwmhi */
+#define LCD_PWMHI_PWMHI1		(0xFFFF<<16)
+#define LCD_PWMHI_PWMHI0		(0xFFFF<<0)
+#define LCD_PWMHI_PWMHI1_N(N)	((N)<<16)
+#define LCD_PWMHI_PWMHI0_N(N)	((N)<<0)
+
+/* lcd_hwccon */
+#define LCD_HWCCON_EN			(1<<0)
+
+/* lcd_cursorpos */
+#define LCD_CURSORPOS_HWCXOFF		(0x1F<<27)
+#define LCD_CURSORPOS_HWCXPOS		(0x07FF<<16)
+#define LCD_CURSORPOS_HWCYOFF		(0x1F<<11)
+#define LCD_CURSORPOS_HWCYPOS		(0x07FF<<0)
+#define LCD_CURSORPOS_HWCXOFF_N(N)	((N)<<27)
+#define LCD_CURSORPOS_HWCXPOS_N(N)	((N)<<16)
+#define LCD_CURSORPOS_HWCYOFF_N(N)	((N)<<11)
+#define LCD_CURSORPOS_HWCYPOS_N(N)	((N)<<0)
+
+/* lcd_cursorcolor */
+#define LCD_CURSORCOLOR_HWCA		(0xFF<<24)
+#define LCD_CURSORCOLOR_HWCR		(0xFF<<16)
+#define LCD_CURSORCOLOR_HWCG		(0xFF<<8)
+#define LCD_CURSORCOLOR_HWCB		(0xFF<<0)
+#define LCD_CURSORCOLOR_HWCA_N(N)	((N)<<24)
+#define LCD_CURSORCOLOR_HWCR_N(N)	((N)<<16)
+#define LCD_CURSORCOLOR_HWCG_N(N)	((N)<<8)
+#define LCD_CURSORCOLOR_HWCB_N(N)	((N)<<0)
+
+/* lcd_fifoctrl */
+#define LCD_FIFOCTRL_F3IF		(1<<29)
+#define LCD_FIFOCTRL_F3REQ		(0x1F<<24)
+#define LCD_FIFOCTRL_F2IF		(1<<29)
+#define LCD_FIFOCTRL_F2REQ		(0x1F<<16)
+#define LCD_FIFOCTRL_F1IF		(1<<29)
+#define LCD_FIFOCTRL_F1REQ		(0x1F<<8)
+#define LCD_FIFOCTRL_F0IF		(1<<29)
+#define LCD_FIFOCTRL_F0REQ		(0x1F<<0)
+#define LCD_FIFOCTRL_F3REQ_N(N)	((N-1)<<24)
+#define LCD_FIFOCTRL_F2REQ_N(N)	((N-1)<<16)
+#define LCD_FIFOCTRL_F1REQ_N(N)	((N-1)<<8)
+#define LCD_FIFOCTRL_F0REQ_N(N)	((N-1)<<0)
+
+/* lcd_outmask */
+#define LCD_OUTMASK_MASK		(0x00FFFFFF)
+
+/********************************************************************/
+#endif /* _AU1200LCD_H */
+/*
+ * BRIEF MODULE DESCRIPTION
+ *	Hardware definitions for the Au1200 LCD controller
+ *
+ * Copyright 2004 AMD
+ * Author:	AMD
+ *
+ *  This program is free software; you can redistribute	 it and/or modify it
+ *  under  the terms of	 the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the	License, or (at your
+ *  option) any later version.
+ *
+ *  THIS  SOFTWARE  IS PROVIDED	  ``AS	IS'' AND   ANY	EXPRESS OR IMPLIED
+ *  WARRANTIES,	  INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+ *  NO	EVENT  SHALL   THE AUTHOR  BE	 LIABLE FOR ANY	  DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED	  TO, PROCUREMENT OF  SUBSTITUTE GOODS	OR SERVICES; LOSS OF
+ *  USE, DATA,	OR PROFITS; OR	BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ *  ANY THEORY OF LIABILITY, WHETHER IN	 CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _AU1200LCD_H
+#define _AU1200LCD_H
+
+/********************************************************************/
+#define AU1200_LCD_ADDR		0xB5000000
+
+#define uint8 unsigned char
+#define uint32 unsigned int
+
+struct au1200_lcd {
+	volatile uint32	reserved0;
+	volatile uint32	screen;
+	volatile uint32	backcolor;
+	volatile uint32	horztiming;
+	volatile uint32	verttiming;
+	volatile uint32	clkcontrol;
+	volatile uint32	pwmdiv;
+	volatile uint32	pwmhi;
+	volatile uint32	reserved1;
+	volatile uint32	winenable;
+	volatile uint32	colorkey;
+	volatile uint32	colorkeymsk;
+	struct
+	{
+		volatile uint32	cursorctrl;
+		volatile uint32	cursorpos;
+		volatile uint32	cursorcolor0;
+		volatile uint32	cursorcolor1;
+		volatile uint32	cursorcolor2;
+		uint32	cursorcolor3;
+	} hwc;
+	volatile uint32	intstatus;
+	volatile uint32	intenable;
+	volatile uint32	outmask;
+	volatile uint32	fifoctrl;
+	uint32	reserved2[(0x0100-0x0058)/4];
+	struct
+	{
+		volatile uint32	winctrl0;
+		volatile uint32	winctrl1;
+		volatile uint32	winctrl2;
+		volatile uint32	winbuf0;
+		volatile uint32	winbuf1;
+		volatile uint32	winbufctrl;
+		uint32	winreserved0;
+		uint32	winreserved1;
+	} window[4];
+
+	uint32	reserved3[(0x0400-0x0180)/4];
+
+	volatile uint32	palette[(0x0800-0x0400)/4];
+
+	volatile uint8	cursorpattern[256];
+};
+
+/* lcd_screen */
+#define LCD_SCREEN_SEN		(1<<31)
+#define LCD_SCREEN_SX		(0x07FF<<19)
+#define LCD_SCREEN_SY		(0x07FF<< 8)
+#define LCD_SCREEN_SWP		(1<<7)
+#define LCD_SCREEN_SWD		(1<<6)
+#define LCD_SCREEN_PT		(7<<0)
+#define LCD_SCREEN_PT_TFT	(0<<0)
+#define LCD_SCREEN_SX_N(WIDTH)	((WIDTH-1)<<19)
+#define LCD_SCREEN_SY_N(HEIGHT)	((HEIGHT-1)<<8)
+#define LCD_SCREEN_PT_CSTN	(1<<0)
+#define LCD_SCREEN_PT_CDSTN	(2<<0)
+#define LCD_SCREEN_PT_M8STN	(3<<0)
+#define LCD_SCREEN_PT_M4STN	(4<<0)
+
+/* lcd_backcolor */
+#define LCD_BACKCOLOR_SBGR		(0xFF<<16)
+#define LCD_BACKCOLOR_SBGG		(0xFF<<8)
+#define LCD_BACKCOLOR_SBGB		(0xFF<<0)
+#define LCD_BACKCOLOR_SBGR_N(N)	((N)<<16)
+#define LCD_BACKCOLOR_SBGG_N(N)	((N)<<8)
+#define LCD_BACKCOLOR_SBGB_N(N)	((N)<<0)
+
+/* lcd_winenable */
+#define LCD_WINENABLE_WEN3		(1<<3)
+#define LCD_WINENABLE_WEN2		(1<<2)
+#define LCD_WINENABLE_WEN1		(1<<1)
+#define LCD_WINENABLE_WEN0		(1<<0)
+
+/* lcd_colorkey */
+#define LCD_COLORKEY_CKR		(0xFF<<16)
+#define LCD_COLORKEY_CKG		(0xFF<<8)
+#define LCD_COLORKEY_CKB		(0xFF<<0)
+#define LCD_COLORKEY_CKR_N(N)	((N)<<16)
+#define LCD_COLORKEY_CKG_N(N)	((N)<<8)
+#define LCD_COLORKEY_CKB_N(N)	((N)<<0)
+
+/* lcd_colorkeymsk */
+#define LCD_COLORKEYMSK_CKMR		(0xFF<<16)
+#define LCD_COLORKEYMSK_CKMG		(0xFF<<8)
+#define LCD_COLORKEYMSK_CKMB		(0xFF<<0)
+#define LCD_COLORKEYMSK_CKMR_N(N)	((N)<<16)
+#define LCD_COLORKEYMSK_CKMG_N(N)	((N)<<8)
+#define LCD_COLORKEYMSK_CKMB_N(N)	((N)<<0)
+
+/* lcd windows control 0 */
+#define LCD_WINCTRL0_OX		(0x07FF<<21)
+#define LCD_WINCTRL0_OY		(0x07FF<<10)
+#define LCD_WINCTRL0_A		(0x00FF<<2)
+#define LCD_WINCTRL0_AEN	(1<<1)
+#define LCD_WINCTRL0_OX_N(N) ((N)<<21)
+#define LCD_WINCTRL0_OY_N(N) ((N)<<10)
+#define LCD_WINCTRL0_A_N(N) ((N)<<2)
+
+/* lcd windows control 1 */
+#define LCD_WINCTRL1_PRI	(3<<30)
+#define LCD_WINCTRL1_PIPE	(1<<29)
+#define LCD_WINCTRL1_FRM	(0xF<<25)
+#define LCD_WINCTRL1_CCO	(1<<24)
+#define LCD_WINCTRL1_PO		(3<<22)
+#define LCD_WINCTRL1_SZX	(0x07FF<<11)
+#define LCD_WINCTRL1_SZY	(0x07FF<<0)
+#define LCD_WINCTRL1_FRM_1BPP	(0<<25)
+#define LCD_WINCTRL1_FRM_2BPP	(1<<25)
+#define LCD_WINCTRL1_FRM_4BPP	(2<<25)
+#define LCD_WINCTRL1_FRM_8BPP	(3<<25)
+#define LCD_WINCTRL1_FRM_12BPP	(4<<25)
+#define LCD_WINCTRL1_FRM_16BPP655	(5<<25)
+#define LCD_WINCTRL1_FRM_16BPP565	(6<<25)
+#define LCD_WINCTRL1_FRM_16BPP556	(7<<25)
+#define LCD_WINCTRL1_FRM_16BPPI1555	(8<<25)
+#define LCD_WINCTRL1_FRM_16BPPI5551	(9<<25)
+#define LCD_WINCTRL1_FRM_16BPPA1555	(10<<25)
+#define LCD_WINCTRL1_FRM_16BPPA5551	(11<<25)
+#define LCD_WINCTRL1_FRM_24BPP		(12<<25)
+#define LCD_WINCTRL1_FRM_32BPP		(13<<25)
+#define LCD_WINCTRL1_PRI_N(N)	((N)<<30)
+#define LCD_WINCTRL1_PO_00		(0<<22)
+#define LCD_WINCTRL1_PO_01		(1<<22)
+#define LCD_WINCTRL1_PO_10		(2<<22)
+#define LCD_WINCTRL1_PO_11		(3<<22)
+#define LCD_WINCTRL1_SZX_N(N)	((N-1)<<11)
+#define LCD_WINCTRL1_SZY_N(N)	((N-1)<<0)
+
+/* lcd windows control 2 */
+#define LCD_WINCTRL2_CKMODE		(3<<24)
+#define LCD_WINCTRL2_DBM		(1<<23)
+#define LCD_WINCTRL2_RAM		(3<<21)
+#define LCD_WINCTRL2_BX			(0x1FFF<<8)
+#define LCD_WINCTRL2_SCX		(0xF<<4)
+#define LCD_WINCTRL2_SCY		(0xF<<0)
+#define LCD_WINCTRL2_CKMODE_00		(0<<24)
+#define LCD_WINCTRL2_CKMODE_01		(1<<24)
+#define LCD_WINCTRL2_CKMODE_10		(2<<24)
+#define LCD_WINCTRL2_CKMODE_11		(3<<24)
+#define LCD_WINCTRL2_RAM_NONE		(0<<21)
+#define LCD_WINCTRL2_RAM_PALETTE	(1<<21)
+#define LCD_WINCTRL2_RAM_GAMMA		(2<<21)
+#define LCD_WINCTRL2_RAM_BUFFER		(3<<21)
+#define LCD_WINCTRL2_BX_N(N)	((N)<<8)
+#define LCD_WINCTRL2_SCX_1		(0<<4)
+#define LCD_WINCTRL2_SCX_2		(1<<4)
+#define LCD_WINCTRL2_SCX_4		(2<<4)
+#define LCD_WINCTRL2_SCY_1		(0<<0)
+#define LCD_WINCTRL2_SCY_2		(1<<0)
+#define LCD_WINCTRL2_SCY_4		(2<<0)
+
+/* lcd windows buffer control */
+#define LCD_WINBUFCTRL_DB		(1<<1)
+#define LCD_WINBUFCTRL_DBN		(1<<0)
+
+/* lcd_intstatus, lcd_intenable */
+#define LCD_INT_IFO				(0xF<<14)
+#define LCD_INT_IFU				(0xF<<10)
+#define LCD_INT_OFO				(1<<9)
+#define LCD_INT_OFU				(1<<8)
+#define LCD_INT_WAIT			(1<<3)
+#define LCD_INT_SD				(1<<2)
+#define LCD_INT_SA				(1<<1)
+#define LCD_INT_SS				(1<<0)
+
+/* lcd_horztiming */
+#define LCD_HORZTIMING_HND2		(0x1FF<<18)
+#define LCD_HORZTIMING_HND1		(0x1FF<<9)
+#define LCD_HORZTIMING_HPW		(0x1FF<<0)
+#define LCD_HORZTIMING_HND2_N(N)(((N)-1)<<18)
+#define LCD_HORZTIMING_HND1_N(N)(((N)-1)<<9)
+#define LCD_HORZTIMING_HPW_N(N)	(((N)-1)<<0)
+
+/* lcd_verttiming */
+#define LCD_VERTTIMING_VND2		(0x1FF<<18)
+#define LCD_VERTTIMING_VND1		(0x1FF<<9)
+#define LCD_VERTTIMING_VPW		(0x1FF<<0)
+#define LCD_VERTTIMING_VND2_N(N)(((N)-1)<<18)
+#define LCD_VERTTIMING_VND1_N(N)(((N)-1)<<9)
+#define LCD_VERTTIMING_VPW_N(N)	(((N)-1)<<0)
+
+/* lcd_clkcontrol */
+#define LCD_CLKCONTROL_EXT		(1<<22)
+#define LCD_CLKCONTROL_DELAY	(3<<20)
+#define LCD_CLKCONTROL_CDD		(1<<19)
+#define LCD_CLKCONTROL_IB		(1<<18)
+#define LCD_CLKCONTROL_IC		(1<<17)
+#define LCD_CLKCONTROL_IH		(1<<16)
+#define LCD_CLKCONTROL_IV		(1<<15)
+#define LCD_CLKCONTROL_BF		(0x1F<<10)
+#define LCD_CLKCONTROL_PCD		(0x3FF<<0)
+#define LCD_CLKCONTROL_BF_N(N)	(((N)-1)<<10)
+#define LCD_CLKCONTROL_PCD_N(N)	((N)<<0)
+
+/* lcd_pwmdiv */
+#define LCD_PWMDIV_EN			(1<<31)
+#define LCD_PWMDIV_PWMDIV		(0x1FFFF<<0)
+#define LCD_PWMDIV_PWMDIV_N(N)	((N)<<0)
+
+/* lcd_pwmhi */
+#define LCD_PWMHI_PWMHI1		(0xFFFF<<16)
+#define LCD_PWMHI_PWMHI0		(0xFFFF<<0)
+#define LCD_PWMHI_PWMHI1_N(N)	((N)<<16)
+#define LCD_PWMHI_PWMHI0_N(N)	((N)<<0)
+
+/* lcd_hwccon */
+#define LCD_HWCCON_EN			(1<<0)
+
+/* lcd_cursorpos */
+#define LCD_CURSORPOS_HWCXOFF		(0x1F<<27)
+#define LCD_CURSORPOS_HWCXPOS		(0x07FF<<16)
+#define LCD_CURSORPOS_HWCYOFF		(0x1F<<11)
+#define LCD_CURSORPOS_HWCYPOS		(0x07FF<<0)
+#define LCD_CURSORPOS_HWCXOFF_N(N)	((N)<<27)
+#define LCD_CURSORPOS_HWCXPOS_N(N)	((N)<<16)
+#define LCD_CURSORPOS_HWCYOFF_N(N)	((N)<<11)
+#define LCD_CURSORPOS_HWCYPOS_N(N)	((N)<<0)
+
+/* lcd_cursorcolor */
+#define LCD_CURSORCOLOR_HWCA		(0xFF<<24)
+#define LCD_CURSORCOLOR_HWCR		(0xFF<<16)
+#define LCD_CURSORCOLOR_HWCG		(0xFF<<8)
+#define LCD_CURSORCOLOR_HWCB		(0xFF<<0)
+#define LCD_CURSORCOLOR_HWCA_N(N)	((N)<<24)
+#define LCD_CURSORCOLOR_HWCR_N(N)	((N)<<16)
+#define LCD_CURSORCOLOR_HWCG_N(N)	((N)<<8)
+#define LCD_CURSORCOLOR_HWCB_N(N)	((N)<<0)
+
+/* lcd_fifoctrl */
+#define LCD_FIFOCTRL_F3IF		(1<<29)
+#define LCD_FIFOCTRL_F3REQ		(0x1F<<24)
+#define LCD_FIFOCTRL_F2IF		(1<<29)
+#define LCD_FIFOCTRL_F2REQ		(0x1F<<16)
+#define LCD_FIFOCTRL_F1IF		(1<<29)
+#define LCD_FIFOCTRL_F1REQ		(0x1F<<8)
+#define LCD_FIFOCTRL_F0IF		(1<<29)
+#define LCD_FIFOCTRL_F0REQ		(0x1F<<0)
+#define LCD_FIFOCTRL_F3REQ_N(N)	((N-1)<<24)
+#define LCD_FIFOCTRL_F2REQ_N(N)	((N-1)<<16)
+#define LCD_FIFOCTRL_F1REQ_N(N)	((N-1)<<8)
+#define LCD_FIFOCTRL_F0REQ_N(N)	((N-1)<<0)
+
+/* lcd_outmask */
+#define LCD_OUTMASK_MASK		(0x00FFFFFF)
+
+/********************************************************************/
+#endif /* _AU1200LCD_H */
-- 
cgit v0.10.2


From 59153f7d7effdb5b3c81eb6d03914a866157b319 Mon Sep 17 00:00:00 2001
From: "Antonino A. Daplas" <adaplas@gmail.com>
Date: Mon, 27 Mar 2006 01:17:29 -0800
Subject: [PATCH] fbdev: Make BIOS EDID reading configurable

DDC reading via the Video BIOS may take several tens of seconds with some
combination of display cards and monitors.

Make this option configurable.  It defaults to `y' to minimise disruption.

Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S
index 2ac40c8..0000a26 100644
--- a/arch/i386/boot/video.S
+++ b/arch/i386/boot/video.S
@@ -1924,6 +1924,7 @@ skip10:	movb	%ah, %al
 	ret
 
 store_edid:
+#ifdef CONFIG_FB_FIRMWARE_EDID
 	pushw	%es				# just save all registers
 	pushw	%ax
 	pushw	%bx
@@ -1954,6 +1955,7 @@ store_edid:
 	popw	%bx
 	popw	%ax
 	popw	%es
+#endif
 	ret
 
 # VIDEO_SELECT-only variables
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 3a89061..22e9d69 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -70,6 +70,22 @@ config FB_MACMODES
        depends on FB
        default n
 
+config FB_FIRMWARE_EDID
+       bool "Enable firmware EDID"
+       depends on FB
+       default y
+       ---help---
+         This enables access to the EDID transferred from the firmware.
+	 On the i386, this is from the Video BIOS. Enable this if DDC/I2C
+	 transfers do not work for your driver and if you are using
+	 nvidiafb, i810fb or savagefb.
+
+	 In general, choosing Y for this option is safe.  If you
+	 experience extremely long delays while booting before you get
+	 something on your display, try setting this to N.  Matrox cards in
+	 combination with certain motherboards and monitors are known to
+	 suffer from this problem.
+
 config FB_MODE_HELPERS
         bool "Enable Video Mode Handling Helpers"
         depends on FB
diff --git a/drivers/video/fbmon.c b/drivers/video/fbmon.c
index 7c74e73..53beeb4 100644
--- a/drivers/video/fbmon.c
+++ b/drivers/video/fbmon.c
@@ -1281,7 +1281,7 @@ int fb_validate_mode(const struct fb_var_screeninfo *var, struct fb_info *info)
 		-EINVAL : 0;
 }
 
-#if defined(__i386__)
+#if defined(CONFIG_FB_FIRMWARE_EDID) && defined(__i386__)
 #include <linux/pci.h>
 
 /*
@@ -1311,11 +1311,11 @@ const unsigned char *fb_firmware_edid(struct device *device)
 {
 	return NULL;
 }
-#endif /* _i386_ */
+#endif
+EXPORT_SYMBOL(fb_firmware_edid);
 
 EXPORT_SYMBOL(fb_parse_edid);
 EXPORT_SYMBOL(fb_edid_to_monspecs);
-EXPORT_SYMBOL(fb_firmware_edid);
 EXPORT_SYMBOL(fb_get_mode);
 EXPORT_SYMBOL(fb_validate_mode);
 EXPORT_SYMBOL(fb_destroy_modedb);
-- 
cgit v0.10.2


From db77ec270d00098ff4fbf15f62f4506f6efb25d2 Mon Sep 17 00:00:00 2001
From: Alan Curry <pacman@TheWorld.com>
Date: Mon, 27 Mar 2006 01:17:30 -0800
Subject: [PATCH] framebuffer: cmap-setting return values

A set of 3 small bugfixes, all of which are related to bogus return values
of fb colormap-setting functions.

First, fb_alloc_cmap returns -1 if memory allocation fails. This is a hard
condition to reproduce since you'd have to be really low on memory, but from
studying the contexts in which it is called, I think this function should be
returning a negative errno, and the -1 will be seen as an EPERM. Switching it
to -ENOMEM makes sense.

Second, the store_cmap function which is called for writes to
/sys/class/graphics/fb0/color_map returns 0 for success, but it should be
returning the count of bytes written since its return value ends up in
userspace as the result of the write() syscall.

Third, radeonfb returns 1 instead of a negative errno when FBIOPUTCMAP is
called with an oversized colormap.  This is seen in userspace as a return
value of 1 from the ioctl() syscall with errno left unchanged.  A more
useful return value would be -EINVAL.

Signed-off-by: Alan Curry <pacman@TheWorld.com>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c
index c9f0c5a..9a6b5b3 100644
--- a/drivers/video/aty/radeon_base.c
+++ b/drivers/video/aty/radeon_base.c
@@ -1067,7 +1067,7 @@ static int radeon_setcolreg (unsigned regno, unsigned red, unsigned green,
 
 
 	if (regno > 255)
-		return 1;
+		return -EINVAL;
 
 	red >>= 8;
 	green >>= 8;
@@ -1086,9 +1086,9 @@ static int radeon_setcolreg (unsigned regno, unsigned red, unsigned green,
 			pindex = regno * 8;
 
 			if (rinfo->depth == 16 && regno > 63)
-				return 1;
+				return -EINVAL;
 			if (rinfo->depth == 15 && regno > 31)
-				return 1;
+				return -EINVAL;
 
 			/* For 565, the green component is mixed one order
 			 * below
diff --git a/drivers/video/fbcmap.c b/drivers/video/fbcmap.c
index c32a2a5..1f98392 100644
--- a/drivers/video/fbcmap.c
+++ b/drivers/video/fbcmap.c
@@ -85,7 +85,7 @@ static struct fb_cmap default_16_colors = {
  *	Allocates memory for a colormap @cmap.  @len is the
  *	number of entries in the palette.
  *
- *	Returns -1 errno on error, or zero on success.
+ *	Returns negative errno on error, or zero on success.
  *
  */
 
@@ -116,7 +116,7 @@ int fb_alloc_cmap(struct fb_cmap *cmap, int len, int transp)
 
 fail:
     fb_dealloc_cmap(cmap);
-    return -1;
+    return -ENOMEM;
 }
 
 /**
diff --git a/drivers/video/fbsysfs.c b/drivers/video/fbsysfs.c
index 6d26057..b72b052 100644
--- a/drivers/video/fbsysfs.c
+++ b/drivers/video/fbsysfs.c
@@ -348,7 +348,7 @@ static ssize_t store_cmap(struct class_device *class_device, const char *buf,
 		fb_copy_cmap(&umap, &fb_info->cmap);
 		fb_dealloc_cmap(&umap);
 
-		return rc;
+		return rc ?: count;
 	}
 	for (i = 0; i < length; i++) {
 		u16 red, blue, green, tsp;
@@ -367,7 +367,7 @@ static ssize_t store_cmap(struct class_device *class_device, const char *buf,
 		if (transp)
 			fb_info->cmap.transp[i] = tsp;
 	}
-	return 0;
+	return count;
 }
 
 static ssize_t show_cmap(struct class_device *class_device, char *buf)
-- 
cgit v0.10.2


From 4d7b84d1cbb213fdfd2b25ffd1217f249ba1d55a Mon Sep 17 00:00:00 2001
From: "Antonino A. Daplas" <adaplas@gmail.com>
Date: Mon, 27 Mar 2006 01:17:31 -0800
Subject: [PATCH] rivafb: Remove NULL check

Remove unnecessary NULL check, as struct info will never be NULL.

Coverity Bug 836

Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/riva/fbdev.c b/drivers/video/riva/fbdev.c
index 6c19ab6..f841f01 100644
--- a/drivers/video/riva/fbdev.c
+++ b/drivers/video/riva/fbdev.c
@@ -2072,8 +2072,6 @@ static void __exit rivafb_remove(struct pci_dev *pd)
 	struct riva_par *par = info->par;
 	
 	NVTRACE_ENTER();
-	if (!info)
-		return;
 
 #ifdef CONFIG_FB_RIVA_I2C
 	riva_delete_i2c_busses(par);
-- 
cgit v0.10.2


From 8ff7727d2a92f8b99cc709b827859da5d3c46a8a Mon Sep 17 00:00:00 2001
From: "Antonino A. Daplas" <adaplas@gmail.com>
Date: Mon, 27 Mar 2006 01:17:32 -0800
Subject: [PATCH] nvidiafb: Remove NULL check

Remove unnecessary NULL check, as struct info will never be NULL.

Coverity Bug 835

Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/nvidia/nvidia.c b/drivers/video/nvidia/nvidia.c
index c758b38..018dd81 100644
--- a/drivers/video/nvidia/nvidia.c
+++ b/drivers/video/nvidia/nvidia.c
@@ -1771,8 +1771,6 @@ static void __exit nvidiafb_remove(struct pci_dev *pd)
 	struct nvidia_par *par = info->par;
 
 	NVTRACE_ENTER();
-	if (!info)
-		return;
 
 	unregister_framebuffer(info);
 #ifdef CONFIG_MTRR
-- 
cgit v0.10.2


From 3d5b191fff144634a04800e610b7ad0c1aefa6ed Mon Sep 17 00:00:00 2001
From: "Antonino A. Daplas" <adaplas@gmail.com>
Date: Mon, 27 Mar 2006 01:17:33 -0800
Subject: [PATCH] nvidiafb: Remove NULL check #2

Remove unnecessary NULL check.

Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/nvidia/nv_i2c.c b/drivers/video/nvidia/nv_i2c.c
index bd9eca0..1edb1c4 100644
--- a/drivers/video/nvidia/nv_i2c.c
+++ b/drivers/video/nvidia/nv_i2c.c
@@ -218,8 +218,7 @@ int nvidia_probe_i2c_connector(struct fb_info *info, int conn, u8 **out_edid)
 		}
 	}
 
-	if (out_edid)
-		*out_edid = edid;
+	*out_edid = edid;
 
 	return (edid) ? 0 : 1;
 }
-- 
cgit v0.10.2


From 7c518eb84ce75d4c8e8799f4fcad59837f6d1894 Mon Sep 17 00:00:00 2001
From: "Antonino A. Daplas" <adaplas@gmail.com>
Date: Mon, 27 Mar 2006 01:17:33 -0800
Subject: [PATCH] i810fb: Remove NULL check

Remove unnecessary NULL check.  Being a function private to the driver,
out_edid can never be NULL.

Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/i810/i810-i2c.c b/drivers/video/i810/i810-i2c.c
index e3c8b5f..3fe3ae1 100644
--- a/drivers/video/i810/i810-i2c.c
+++ b/drivers/video/i810/i810-i2c.c
@@ -210,8 +210,7 @@ int i810_probe_i2c_connector(struct fb_info *info, u8 **out_edid, int conn)
 		}
 	}
 
-        if (out_edid)
-                *out_edid = edid;
+	*out_edid = edid;
 
         return (edid) ? 0 : 1;
 }
-- 
cgit v0.10.2


From 0e4be28023c14624e03a09b4494e919e088513f0 Mon Sep 17 00:00:00 2001
From: "Antonino A. Daplas" <adaplas@gmail.com>
Date: Mon, 27 Mar 2006 01:17:34 -0800
Subject: [PATCH] savagefb: Remove NULL check

Remove unnecessary NULL check.

Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/savage/savagefb-i2c.c b/drivers/video/savage/savagefb-i2c.c
index 00719a9..21debed 100644
--- a/drivers/video/savage/savagefb-i2c.c
+++ b/drivers/video/savage/savagefb-i2c.c
@@ -273,8 +273,7 @@ int savagefb_probe_i2c_connector(struct fb_info *info, u8 **out_edid)
 		}
 	}
 
-	if (out_edid)
-		*out_edid = edid;
+	*out_edid = edid;
 
 	return (edid) ? 0 : 1;
 }
-- 
cgit v0.10.2


From eba87e8e8d7024da827accb00ce7e3affd10b7de Mon Sep 17 00:00:00 2001
From: "Antonino A. Daplas" <adaplas@gmail.com>
Date: Mon, 27 Mar 2006 01:17:35 -0800
Subject: [PATCH] atyfb: Remove dead code

Remove code that can never be reached.

Coverity Bug 67

Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index 485be38..d790ee7 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -2298,6 +2298,10 @@ static int __init aty_init(struct fb_info *info, const char *name)
 		case CLK_ATI18818_1:
 			par->pll_ops = &aty_pll_ati18818_1;
 			break;
+		case CLK_IBMRGB514:
+			par->pll_ops = &aty_pll_ibm514;
+			break;
+#if 0 /* dead code */
 		case CLK_STG1703:
 			par->pll_ops = &aty_pll_stg1703;
 			break;
@@ -2307,9 +2311,7 @@ static int __init aty_init(struct fb_info *info, const char *name)
 		case CLK_ATT20C408:
 			par->pll_ops = &aty_pll_att20c408;
 			break;
-		case CLK_IBMRGB514:
-			par->pll_ops = &aty_pll_ibm514;
-			break;
+#endif
 		default:
 			PRINTKI("aty_init: CLK type not implemented yet!");
 			par->pll_ops = &aty_pll_unsupported;
-- 
cgit v0.10.2


From 6257ffacb9e1dc99a910d620ccb92ec88da20a4f Mon Sep 17 00:00:00 2001
From: "Antonino A. Daplas" <adaplas@gmail.com>
Date: Mon, 27 Mar 2006 01:17:36 -0800
Subject: [PATCH] imsttfb: Remove dead code

clk_p is always 0.

Coverity Bug 67

Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/imsttfb.c b/drivers/video/imsttfb.c
index 7db4254..859ba7e 100644
--- a/drivers/video/imsttfb.c
+++ b/drivers/video/imsttfb.c
@@ -440,9 +440,9 @@ getclkMHz(struct imstt_par *par)
 static void
 setclkMHz(struct imstt_par *par, __u32 MHz)
 {
-	__u32 clk_m, clk_n, clk_p, x, stage, spilled;
+	__u32 clk_m, clk_n, x, stage, spilled;
 
-	clk_m = clk_n = clk_p = 0;
+	clk_m = clk_n = 0;
 	stage = spilled = 0;
 	for (;;) {
 		switch (stage) {
@@ -453,7 +453,7 @@ setclkMHz(struct imstt_par *par, __u32 MHz)
 				clk_n++;
 				break;
 		}
-		x = 20 * (clk_m + 1) / ((clk_n + 1) * (clk_p ? 2 * clk_p : 1));
+		x = 20 * (clk_m + 1) / (clk_n + 1);
 		if (x == MHz)
 			break;
 		if (x > MHz) {
@@ -466,7 +466,7 @@ setclkMHz(struct imstt_par *par, __u32 MHz)
 
 	par->init.pclk_m = clk_m;
 	par->init.pclk_n = clk_n;
-	par->init.pclk_p = clk_p;
+	par->init.pclk_p = 0;
 }
 
 static struct imstt_regvals *
-- 
cgit v0.10.2


From ed49843b897da9969e349c279ffc832efcb93213 Mon Sep 17 00:00:00 2001
From: Pavel Roskin <proski@gnu.org>
Date: Mon, 27 Mar 2006 01:17:36 -0800
Subject: [PATCH] Add ID for Quadro NVS280

Quadro NVS280 is a dual-head PCIe card with PCI ID 10de:00fd and subsystem ID
10de:0215.

Signed-off-by: Pavel Roskin <proski@gnu.org>
Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/nvidia/nvidia.c b/drivers/video/nvidia/nvidia.c
index 018dd81..6d3e489 100644
--- a/drivers/video/nvidia/nvidia.c
+++ b/drivers/video/nvidia/nvidia.c
@@ -297,6 +297,8 @@ static struct pci_device_id nvidiafb_pci_tbl[] = {
 	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 	{PCI_VENDOR_ID_NVIDIA, PCIE_DEVICE_ID_NVIDIA_GEFORCE_6800_GT,
 	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_NVIDIA, PCIE_DEVICE_ID_NVIDIA_QUADRO_NVS280,
+	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 	{PCI_VENDOR_ID_NVIDIA, 0x0252,
 	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 	{PCI_VENDOR_ID_NVIDIA, 0x0313,
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 6f080ae..02f6cf2 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1052,6 +1052,7 @@
 #define PCIE_DEVICE_ID_NVIDIA_GEFORCE_6600_ALT2 0x00f2
 #define PCIE_DEVICE_ID_NVIDIA_GEFORCE_6200_ALT1 0x00f3
 #define PCIE_DEVICE_ID_NVIDIA_GEFORCE_6800_GT   0x00f9
+#define PCIE_DEVICE_ID_NVIDIA_QUADRO_NVS280	0x00fd
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE_SDR	0x0100
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE_DDR	0x0101
 #define PCI_DEVICE_ID_NVIDIA_QUADRO		0x0103
-- 
cgit v0.10.2


From ac3f9087d337a6ffa1a5fbdf2c923eb0aad1aef6 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Mon, 27 Mar 2006 01:17:37 -0800
Subject: [PATCH] sparse: Fix warnings in newport driver about non-static
 functions

There are more sparse warnings but fixing those will require some more work
than I want to do without hardware for testing at hand.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/console/newport_con.c b/drivers/video/console/newport_con.c
index 762c7a5..e99fe30 100644
--- a/drivers/video/console/newport_con.c
+++ b/drivers/video/console/newport_con.c
@@ -149,7 +149,7 @@ static inline void newport_clear_lines(int ystart, int yend, int ci)
 	newport_clear_screen(0, ystart, 1280 + 63, yend, ci);
 }
 
-void newport_reset(void)
+static void newport_reset(void)
 {
 	unsigned short treg;
 	int i;
@@ -193,7 +193,7 @@ void newport_reset(void)
  * calculate the actual screen size by reading
  * the video timing out of the VC2
  */
-void newport_get_screensize(void)
+static void newport_get_screensize(void)
 {
 	int i, cols;
 	unsigned short ventry, treg;
-- 
cgit v0.10.2


From b0c87978216836455ef5fbcac6df1ce6679750b0 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olh@suse.de>
Date: Mon, 27 Mar 2006 01:17:38 -0800
Subject: [PATCH] fbdev: add modeline for 1680x1050@60

Add a modeline for the Philips 200W display.  aty128fb does not do DDC, it
picks 1920x1440 or similar.  It works ok with nvidiafb because it can ask
for DDC data.

mode "1680x1050-60"
    # D: 146.028 MHz, H: 65.191 kHz, V: 59.863 Hz
    geometry 1680 1050 1680 1050 16
    timings 6848 280 104 30 3 176 6
    hsync high
    vsync high
    rgba 5/11,6/5,5/0,0/0
endmode

 hwinfo --monitor
20: None 00.0: 10000 Monitor
  [Created at monitor.206]
  Unique ID: rdCR.pzUFTofo1S4
  Parent ID: 002j.bJRsY88eNSC
  Hardware Class: monitor
  Model: "PHILIPS Philips 200W"
  Vendor: PHL "PHILIPS"
  Device: eisa 0x0832 "Philips 200W"
  Serial ID: "VN  016596"
  Resolution: 720x400@70Hz
  Resolution: 640x480@60Hz
  Resolution: 640x480@67Hz
  Resolution: 640x480@72Hz
  Resolution: 640x480@75Hz
  Resolution: 800x600@56Hz
  Resolution: 800x600@60Hz
  Resolution: 800x600@72Hz
  Resolution: 800x600@75Hz
  Resolution: 832x624@75Hz
  Resolution: 1024x768@60Hz
  Resolution: 1024x768@70Hz
  Resolution: 1024x768@75Hz
  Resolution: 1280x1024@75Hz
  Resolution: 1152x864@70Hz
  Resolution: 1152x864@75Hz
  Resolution: 1280x960@60Hz
  Resolution: 1280x1024@60Hz
  Resolution: 1680x1050@60Hz
  Size: 433x271 mm
  Driver Info #0:
    Max. Resolution: 1680x1050
    Vert. Sync Range: 56-85 Hz
    Hor. Sync Range: 30-93 kHz
  Config Status: cfg=new, avail=yes, need=no, active=unknown
  Attached to: #5 (VGA compatible controller)

Signed-off-by: Olaf Hering <olh@suse.de>
Acked-by: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/modedb.c b/drivers/video/modedb.c
index 1da2f84..244a21a 100644
--- a/drivers/video/modedb.c
+++ b/drivers/video/modedb.c
@@ -183,6 +183,10 @@ static const struct fb_videomode modedb[] = {
 	NULL, 75, 1600, 1200, 4938, 304, 64, 46, 1, 192, 3,
 	FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED
     }, {
+	/* 1680x1050 @ 60 Hz, 65.191 kHz hsync */
+	NULL, 60, 1680, 1050, 6848, 280, 104, 30, 3, 176, 6,
+	FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED
+    }, {
 	/* 1600x1200 @ 85 Hz, 105.77 kHz hsync */
 	NULL, 85, 1600, 1200, 4545, 272, 16, 37, 4, 192, 3,
 	FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED
-- 
cgit v0.10.2


From d1ae418eef4ce763a95edec0b5fc095af5daca2e Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@nuerscht.ch>
Date: Mon, 27 Mar 2006 01:17:39 -0800
Subject: [PATCH] drivers/video: Use ARRAY_SIZE macro

Use ARRAY_SIZE macro instead of sizeof(x)/sizeof(x[0]) and remove
duplicates of ARRAY_SIZE.  Some coding style and trailing whitespaces are
also fixed.

Compile-tested where possible (some are other arch or BROKEN)

Signed-off-by: Tobias Klauser <tklauser@nuerscht.ch>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/acornfb.c b/drivers/video/acornfb.c
index 76448d6..98baecc 100644
--- a/drivers/video/acornfb.c
+++ b/drivers/video/acornfb.c
@@ -1308,7 +1308,7 @@ static int __init acornfb_probe(struct platform_device *dev)
 	/*
 	 * Try to select a suitable default mode
 	 */
-	for (i = 0; i < sizeof(modedb) / sizeof(*modedb); i++) {
+	for (i = 0; i < ARRAY_SIZE(modedb); i++) {
 		unsigned long hs;
 
 		hs = modedb[i].refresh *
@@ -1380,7 +1380,7 @@ static int __init acornfb_probe(struct platform_device *dev)
 	 */
 	free_unused_pages(PAGE_OFFSET + size, PAGE_OFFSET + MAX_SIZE);
 #endif
-	
+
 	fb_info.fix.smem_len = size;
 	current_par.palette_size   = VIDC_PALETTE_SIZE;
 
@@ -1391,7 +1391,7 @@ static int __init acornfb_probe(struct platform_device *dev)
 	 */
 	do {
 		rc = fb_find_mode(&fb_info.var, &fb_info, NULL, modedb,
-				 sizeof(modedb) / sizeof(*modedb),
+				 ARRAY_SIZE(modedb),
 				 &acornfb_default_mode, DEFAULT_BPP);
 		/*
 		 * If we found an exact match, all ok.
@@ -1408,7 +1408,7 @@ static int __init acornfb_probe(struct platform_device *dev)
 			break;
 
 		rc = fb_find_mode(&fb_info.var, &fb_info, NULL, modedb,
-				 sizeof(modedb) / sizeof(*modedb),
+				 ARRAY_SIZE(modedb),
 				 &acornfb_default_mode, DEFAULT_BPP);
 		if (rc)
 			break;
diff --git a/drivers/video/asiliantfb.c b/drivers/video/asiliantfb.c
index c924d81..29f9f0d 100644
--- a/drivers/video/asiliantfb.c
+++ b/drivers/video/asiliantfb.c
@@ -353,8 +353,6 @@ struct chips_init_reg {
 	unsigned char data;
 };
 
-#define N_ELTS(x)	(sizeof(x) / sizeof(x[0]))
-
 static struct chips_init_reg chips_init_sr[] =
 {
 	{0x00, 0x03},		/* Reset register */
@@ -460,22 +458,22 @@ static void __devinit chips_hw_init(struct fb_info *p)
 {
 	int i;
 
-	for (i = 0; i < N_ELTS(chips_init_xr); ++i)
+	for (i = 0; i < ARRAY_SIZE(chips_init_xr); ++i)
 		write_xr(chips_init_xr[i].addr, chips_init_xr[i].data);
 	write_xr(0x81, 0x12);
 	write_xr(0x82, 0x08);
 	write_xr(0x20, 0x00);
-	for (i = 0; i < N_ELTS(chips_init_sr); ++i)
+	for (i = 0; i < ARRAY_SIZE(chips_init_sr); ++i)
 		write_sr(chips_init_sr[i].addr, chips_init_sr[i].data);
-	for (i = 0; i < N_ELTS(chips_init_gr); ++i)
+	for (i = 0; i < ARRAY_SIZE(chips_init_gr); ++i)
 		write_gr(chips_init_gr[i].addr, chips_init_gr[i].data);
-	for (i = 0; i < N_ELTS(chips_init_ar); ++i)
+	for (i = 0; i < ARRAY_SIZE(chips_init_ar); ++i)
 		write_ar(chips_init_ar[i].addr, chips_init_ar[i].data);
 	/* Enable video output in attribute index register */
 	writeb(0x20, mmio_base + 0x780);
-	for (i = 0; i < N_ELTS(chips_init_cr); ++i)
+	for (i = 0; i < ARRAY_SIZE(chips_init_cr); ++i)
 		write_cr(chips_init_cr[i].addr, chips_init_cr[i].data);
-	for (i = 0; i < N_ELTS(chips_init_fr); ++i)
+	for (i = 0; i < ARRAY_SIZE(chips_init_fr); ++i)
 		write_fr(chips_init_fr[i].addr, chips_init_fr[i].data);
 }
 
diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c
index 620c9a9..821c6da 100644
--- a/drivers/video/aty/aty128fb.c
+++ b/drivers/video/aty/aty128fb.c
@@ -1725,9 +1725,9 @@ static int __init aty128_init(struct pci_dev *pdev, const struct pci_device_id *
 	strcpy(video_card, "Rage128 XX ");
 	video_card[8] = ent->device >> 8;
 	video_card[9] = ent->device & 0xFF;
-	    
+
 	/* range check to make sure */
-	if (ent->driver_data < (sizeof(r128_family)/sizeof(char *)))
+	if (ent->driver_data < ARRAY_SIZE(r128_family))
 	    strncat(video_card, r128_family[ent->driver_data], sizeof(video_card));
 
 	printk(KERN_INFO "aty128fb: %s [chip rev 0x%x] ", video_card, chip_rev);
diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index d790ee7..e799fcc 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -434,7 +434,7 @@ static int __devinit correct_chipset(struct atyfb_par *par)
 	const char *name;
 	int i;
 
-	for (i = sizeof(aty_chips) / sizeof(*aty_chips) - 1; i >= 0; i--)
+	for (i = ARRAY_SIZE(aty_chips) - 1; i >= 0; i--)
 		if (par->pci_id == aty_chips[i].pci_id)
 			break;
 
@@ -2168,10 +2168,10 @@ static void __init aty_calc_mem_refresh(struct atyfb_par *par, int xclk)
 
 	if (IS_XL(par->pci_id) || IS_MOBILITY(par->pci_id)) {
 		refresh_tbl = ragexl_tbl;
-		size = sizeof(ragexl_tbl)/sizeof(int);
+		size = ARRAY_SIZE(ragexl_tbl);
 	} else {
 		refresh_tbl = ragepro_tbl;
-		size = sizeof(ragepro_tbl)/sizeof(int);
+		size = ARRAY_SIZE(ragepro_tbl);
 	}
 
 	for (i=0; i < size; i++) {
@@ -3399,7 +3399,7 @@ static int __devinit atyfb_pci_probe(struct pci_dev *pdev, const struct pci_devi
 	struct atyfb_par *par;
 	int i, rc = -ENOMEM;
 
-	for (i = sizeof(aty_chips) / sizeof(*aty_chips) - 1; i >= 0; i--)
+	for (i = ARRAY_SIZE(aty_chips); i >= 0; i--)
 		if (pdev->device == aty_chips[i].pci_id)
 			break;
 
diff --git a/drivers/video/aty/mach64_gx.c b/drivers/video/aty/mach64_gx.c
index 01fdff7..2045639 100644
--- a/drivers/video/aty/mach64_gx.c
+++ b/drivers/video/aty/mach64_gx.c
@@ -149,8 +149,7 @@ static int aty_var_to_pll_514(const struct fb_info *info, u32 vclk_per,
 	};
 	int i;
 
-	for (i = 0; i < sizeof(RGB514_clocks) / sizeof(*RGB514_clocks);
-	     i++)
+	for (i = 0; i < ARRAY_SIZE(RGB514_clocks); i++)
 		if (vclk_per <= RGB514_clocks[i].limit) {
 			pll->ibm514.m = RGB514_clocks[i].m;
 			pll->ibm514.n = RGB514_clocks[i].n;
diff --git a/drivers/video/chipsfb.c b/drivers/video/chipsfb.c
index bc061d4..72ff6bf 100644
--- a/drivers/video/chipsfb.c
+++ b/drivers/video/chipsfb.c
@@ -178,8 +178,6 @@ struct chips_init_reg {
 	unsigned char data;
 };
 
-#define N_ELTS(x)	(sizeof(x) / sizeof(x[0]))
-
 static struct chips_init_reg chips_init_sr[] = {
 	{ 0x00, 0x03 },
 	{ 0x01, 0x01 },
@@ -287,18 +285,18 @@ static void __init chips_hw_init(void)
 {
 	int i;
 
-	for (i = 0; i < N_ELTS(chips_init_xr); ++i)
+	for (i = 0; i < ARRAY_SIZE(chips_init_xr); ++i)
 		write_xr(chips_init_xr[i].addr, chips_init_xr[i].data);
 	outb(0x29, 0x3c2); /* set misc output reg */
-	for (i = 0; i < N_ELTS(chips_init_sr); ++i)
+	for (i = 0; i < ARRAY_SIZE(chips_init_sr); ++i)
 		write_sr(chips_init_sr[i].addr, chips_init_sr[i].data);
-	for (i = 0; i < N_ELTS(chips_init_gr); ++i)
+	for (i = 0; i < ARRAY_SIZE(chips_init_gr); ++i)
 		write_gr(chips_init_gr[i].addr, chips_init_gr[i].data);
-	for (i = 0; i < N_ELTS(chips_init_ar); ++i)
+	for (i = 0; i < ARRAY_SIZE(chips_init_ar); ++i)
 		write_ar(chips_init_ar[i].addr, chips_init_ar[i].data);
-	for (i = 0; i < N_ELTS(chips_init_cr); ++i)
+	for (i = 0; i < ARRAY_SIZE(chips_init_cr); ++i)
 		write_cr(chips_init_cr[i].addr, chips_init_cr[i].data);
-	for (i = 0; i < N_ELTS(chips_init_fr); ++i)
+	for (i = 0; i < ARRAY_SIZE(chips_init_fr); ++i)
 		write_fr(chips_init_fr[i].addr, chips_init_fr[i].data);
 }
 
diff --git a/drivers/video/console/fonts.c b/drivers/video/console/fonts.c
index 4fd07d9..0cc1bfd 100644
--- a/drivers/video/console/fonts.c
+++ b/drivers/video/console/fonts.c
@@ -66,7 +66,7 @@ static const struct font_desc *fonts[] = {
 #endif
 };
 
-#define num_fonts (sizeof(fonts)/sizeof(*fonts))
+#define num_fonts ARRAY_SIZE(fonts)
 
 #ifdef NO_FONTS
 #error No fonts configured.
diff --git a/drivers/video/imsttfb.c b/drivers/video/imsttfb.c
index 859ba7e..f73c642 100644
--- a/drivers/video/imsttfb.c
+++ b/drivers/video/imsttfb.c
@@ -1372,18 +1372,24 @@ init_imstt(struct fb_info *info)
 	write_reg_le32(par->dc_regs, STGCTL, tmp & ~0x1);
 	write_reg_le32(par->dc_regs, SSR, 0);
 
-	/* set default values for DAC registers */ 
+	/* set default values for DAC registers */
 	if (par->ramdac == IBM) {
-		par->cmap_regs[PPMASK] = 0xff;	eieio();
-		par->cmap_regs[PIDXHI] = 0;	eieio();
-		for (i = 0; i < sizeof(ibm_initregs) / sizeof(*ibm_initregs); i++) {
-			par->cmap_regs[PIDXLO] = ibm_initregs[i].addr;	eieio();
-			par->cmap_regs[PIDXDATA] = ibm_initregs[i].value;	eieio();
+		par->cmap_regs[PPMASK] = 0xff;
+		eieio();
+		par->cmap_regs[PIDXHI] = 0;
+		eieio();
+		for (i = 0; i < ARRAY_SIZE(ibm_initregs); i++) {
+			par->cmap_regs[PIDXLO] = ibm_initregs[i].addr;
+			eieio();
+			par->cmap_regs[PIDXDATA] = ibm_initregs[i].value;
+			eieio();
 		}
 	} else {
-		for (i = 0; i < sizeof(tvp_initregs) / sizeof(*tvp_initregs); i++) {
-			par->cmap_regs[TVPADDRW] = tvp_initregs[i].addr;	eieio();
-			par->cmap_regs[TVPIDATA] = tvp_initregs[i].value;	eieio();
+		for (i = 0; i < ARRAY_SIZE(tvp_initregs); i++) {
+			par->cmap_regs[TVPADDRW] = tvp_initregs[i].addr;
+			eieio();
+			par->cmap_regs[TVPIDATA] = tvp_initregs[i].value;
+			eieio();
 		}
 	}
 
diff --git a/drivers/video/macmodes.c b/drivers/video/macmodes.c
index 2fc7108..c0385c6 100644
--- a/drivers/video/macmodes.c
+++ b/drivers/video/macmodes.c
@@ -380,7 +380,7 @@ int __init mac_find_mode(struct fb_var_screeninfo *var, struct fb_info *info,
     if (mode_option && !strncmp(mode_option, "mac", 3)) {
 	mode_option += 3;
 	db = mac_modedb;
-	dbsize = sizeof(mac_modedb)/sizeof(*mac_modedb);
+	dbsize = ARRAY_SIZE(mac_modedb);
     }
     return fb_find_mode(var, info, mode_option, db, dbsize,
 			&mac_modedb[DEFAULT_MODEDB_INDEX], default_bpp);
diff --git a/drivers/video/matrox/matroxfb_g450.c b/drivers/video/matrox/matroxfb_g450.c
index c122d87..4d610b4 100644
--- a/drivers/video/matrox/matroxfb_g450.c
+++ b/drivers/video/matrox/matroxfb_g450.c
@@ -59,7 +59,7 @@ static const struct mctl g450_controls[] =
 	}, offsetof(struct matrox_fb_info, altout.tvo_params.testout) },
 };
 
-#define G450CTRLS (sizeof(g450_controls)/sizeof(g450_controls[0]))
+#define G450CTRLS ARRAY_SIZE(g450_controls)
 
 /* Return: positive number: id found
            -EINVAL:         id not found, return failure
diff --git a/drivers/video/matrox/matroxfb_maven.c b/drivers/video/matrox/matroxfb_maven.c
index d8b3429..5d29a26 100644
--- a/drivers/video/matrox/matroxfb_maven.c
+++ b/drivers/video/matrox/matroxfb_maven.c
@@ -89,12 +89,12 @@ static const struct mctl maven_controls[] =
 	}, offsetof(struct matrox_fb_info, altout.tvo_params.hue) },
 	{ { V4L2_CID_GAMMA, V4L2_CTRL_TYPE_INTEGER,
 	  "gamma",
-	  0, sizeof(maven_gamma)/sizeof(maven_gamma[0])-1, 1, 3,
+	  0, ARRAY_SIZE(maven_gamma) - 1, 1, 3,
 	  0,
 	}, offsetof(struct matrox_fb_info, altout.tvo_params.gamma) },
 	{ { MATROXFB_CID_TESTOUT, V4L2_CTRL_TYPE_BOOLEAN,
 	  "test output",
-	  0, 1, 1, 0, 
+	  0, 1, 1, 0,
 	  0,
 	}, offsetof(struct matrox_fb_info, altout.tvo_params.testout) },
 	{ { MATROXFB_CID_DEFLICKER, V4L2_CTRL_TYPE_INTEGER,
@@ -105,7 +105,7 @@ static const struct mctl maven_controls[] =
 
 };
 
-#define MAVCTRLS (sizeof(maven_controls)/sizeof(maven_controls[0]))
+#define MAVCTRLS ARRAY_SIZE(maven_controls)
 
 /* Return: positive number: id found
            -EINVAL:         id not found, return failure
diff --git a/drivers/video/modedb.c b/drivers/video/modedb.c
index 244a21a..26a1c61 100644
--- a/drivers/video/modedb.c
+++ b/drivers/video/modedb.c
@@ -500,7 +500,7 @@ int fb_find_mode(struct fb_var_screeninfo *var,
     /* Set up defaults */
     if (!db) {
 	db = modedb;
-	dbsize = sizeof(modedb)/sizeof(*modedb);
+	dbsize = ARRAY_SIZE(modedb);
     }
     if (!default_mode)
 	default_mode = &modedb[DEFAULT_MODEDB_INDEX];
diff --git a/drivers/video/neofb.c b/drivers/video/neofb.c
index b961d56..24b12f7 100644
--- a/drivers/video/neofb.c
+++ b/drivers/video/neofb.c
@@ -165,20 +165,20 @@ static int neoFindMode(int xres, int yres, int depth)
 
 	switch (depth) {
 	case 8:
-		size = sizeof(bios8) / sizeof(biosMode);
+		size = ARRAY_SIZE(bios8);
 		mode = bios8;
 		break;
 	case 16:
-		size = sizeof(bios16) / sizeof(biosMode);
+		size = ARRAY_SIZE(bios16);
 		mode = bios16;
 		break;
 	case 24:
-		size = sizeof(bios24) / sizeof(biosMode);
+		size = ARRAY_SIZE(bios24);
 		mode = bios24;
 		break;
 #ifdef NO_32BIT_SUPPORT_YET
 	case 32:
-		size = sizeof(bios32) / sizeof(biosMode);
+		size = ARRAY_SIZE(bios32);
 		mode = bios32;
 		break;
 #endif
diff --git a/drivers/video/pmagb-b-fb.c b/drivers/video/pmagb-b-fb.c
index eeeac92..73e2d7d 100644
--- a/drivers/video/pmagb-b-fb.c
+++ b/drivers/video/pmagb-b-fb.c
@@ -228,7 +228,7 @@ static void __init pmagbbfb_osc_setup(struct fb_info *info)
 
 	freq1 = (par->osc0 * count1 + count0 / 2) / count0;
 	par->osc1 = freq1;
-	for (i = 0; i < sizeof(pmagbbfb_freqs) / sizeof(*pmagbbfb_freqs); i++)
+	for (i = 0; i < ARRAY_SIZE(pmagbbfb_freqs); i++)
 		if (freq1 >= pmagbbfb_freqs[i] -
 			     (pmagbbfb_freqs[i] + 128) / 256 &&
 		    freq1 <= pmagbbfb_freqs[i] +
diff --git a/drivers/video/radeonfb.c b/drivers/video/radeonfb.c
index db9fb90..24982ad 100644
--- a/drivers/video/radeonfb.c
+++ b/drivers/video/radeonfb.c
@@ -759,7 +759,7 @@ static void __iomem *radeon_find_rom(struct radeonfb_info *rinfo)
                 rom = rom_base;
         
                 for (i = 0; (i < 512) && (stage != 4); i++) {
-                    for(j = 0;j < sizeof(radeon_sig)/sizeof(char *);j++) {
+                    for (j = 0; j < ARRAY_SIZE(radeon_sig); j++) {
                         if (radeon_sig[j][0] == *rom)
                                 if (strncmp(radeon_sig[j], rom,
                                             strlen(radeon_sig[j])) == 0) {
diff --git a/drivers/video/sstfb.c b/drivers/video/sstfb.c
index 8c1a8b5..c44de90 100644
--- a/drivers/video/sstfb.c
+++ b/drivers/video/sstfb.c
@@ -1194,10 +1194,11 @@ static struct dac_switch dacs[] __devinitdata = {
 static int __devinit sst_detect_dactype(struct fb_info *info, struct sstfb_par *par)
 {
 	int i, ret = 0;
-	
-	for (i=0; i<sizeof(dacs)/sizeof(dacs[0]); i++) {
+
+	for (i = 0; i < ARRAY_SIZE(dacs); i++) {
 		ret = dacs[i].detect(info);
-		if (ret) break;
+		if (ret)
+			break;
 	}
 	if (!ret)
 		return 0;
@@ -1604,8 +1605,8 @@ static int sstfb_dump_regs(struct fb_info *info)
 		{FBZMODE,"fbzmode"},
 	};
 
-	const int pci_s = sizeof(pci_regs)/sizeof(pci_regs[0]);
-	const int sst_s = sizeof(sst_regs)/sizeof(sst_regs[0]);
+	const int pci_s = ARRAY_SIZE(pci_regs);
+	const int sst_s = ARRAY_SIZE(sst_regs);
 	struct sstfb_par *par = info->par;
 	struct pci_dev *dev = par->dev;
 	u32 pci_res[pci_s];
diff --git a/drivers/video/virgefb.c b/drivers/video/virgefb.c
index ed78747..5ea2345 100644
--- a/drivers/video/virgefb.c
+++ b/drivers/video/virgefb.c
@@ -616,8 +616,7 @@ static struct {
 #endif
 };
 
-#define arraysize(x)	(sizeof(x)/sizeof(*(x)))
-#define NUM_TOTAL_MODES	arraysize(virgefb_predefined)
+#define NUM_TOTAL_MODES	ARRAY_SIZE(virgefb_predefined)
 
 /*
  *    Default to 800x600 for video=virge8:, virge16: or virge32:
-- 
cgit v0.10.2


From e4ccde33de202fae1b1e2a940604ee9e295450d9 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 27 Mar 2006 01:17:41 -0800
Subject: [PATCH] video/sis/init301.c:SiS_ChrontelDoSomething2(): remove dead
 code

The Coverity checker spotted these two unused variables.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: Thomas Winischhofer <thomas@winischhofer.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/sis/init301.c b/drivers/video/sis/init301.c
index 2d88f90..c3e070a 100644
--- a/drivers/video/sis/init301.c
+++ b/drivers/video/sis/init301.c
@@ -8564,11 +8564,9 @@ SiS_ChrontelDoSomething3(struct SiS_Private *SiS_Pr, unsigned short ModeNo)
 static void
 SiS_ChrontelDoSomething2(struct SiS_Private *SiS_Pr)
 {
-     unsigned short temp,tempcl,tempch;
+     unsigned short temp;
 
      SiS_LongDelay(SiS_Pr, 1);
-     tempcl = 3;
-     tempch = 0;
 
      do {
        temp = SiS_GetCH701x(SiS_Pr,0x66);
@@ -8582,13 +8580,6 @@ SiS_ChrontelDoSomething2(struct SiS_Private *SiS_Pr)
 
        SiS_SetCH701xForLCD(SiS_Pr);
 
-       if(tempcl == 0) {
-           if(tempch == 3) break;
-	   SiS_ChrontelResetDB(SiS_Pr);
-	   tempcl = 3;
-	   tempch++;
-       }
-       tempcl--;
        temp = SiS_GetCH701x(SiS_Pr,0x76);
        temp &= 0xfb;  /* Reset PLL */
        SiS_SetCH701x(SiS_Pr,0x76,temp);
-- 
cgit v0.10.2


From eccf081799be8d83852f183838bf26e1ca099db4 Mon Sep 17 00:00:00 2001
From: Alasdair G Kergon <agk@redhat.com>
Date: Mon, 27 Mar 2006 01:17:42 -0800
Subject: [PATCH] device-mapper snapshot: fix origin_write pending_exception
 submission

Say you have several snapshots of the same origin and then you issue a write
to some place in the origin for the first time.

Before the device-mapper snapshot target lets the write go through to the
underlying device, it needs to make a copy of the data that is about to be
overwritten.  Each snapshot is independent, so it makes one copy for each
snapshot.

__origin_write() loops through each snapshot and checks to see whether a copy
is needed for that snapshot.  (A copy is only needed the first time that data
changes.)

If a copy is needed, the code allocates a 'pending_exception' structure
holding the details.  It links these together for all the snapshots, then
works its way through this list and submits the copying requests to the kcopyd
thread by calling start_copy().  When each request is completed, the original
pending_exception structure gets freed in pending_complete().

If you're very unlucky, this structure can get freed *before* the submission
process has finished walking the list.

This patch:

  1) Creates a new temporary list pe_queue to hold the pending exception
     structures;

  2) Does all the bookkeeping up-front, then walks through the new list
     safely and calls start_copy() for each pending_exception that needed it;

  3) Avoids attempting to add pe->siblings to the list if it's already
     connected.

[NB This does not fix all the races in this code.  More patches will follow.]

Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 7401540..874f145 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -49,6 +49,11 @@ struct pending_exception {
 	struct bio_list snapshot_bios;
 
 	/*
+	 * Short-term queue of pending exceptions prior to submission.
+	 */
+	struct list_head list;
+
+	/*
 	 * Other pending_exceptions that are processing this
 	 * chunk.  When this list is empty, we know we can
 	 * complete the origins.
@@ -930,8 +935,9 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
 	int r = 1, first = 1;
 	struct dm_snapshot *snap;
 	struct exception *e;
-	struct pending_exception *pe, *last = NULL;
+	struct pending_exception *pe, *next_pe, *last = NULL;
 	chunk_t chunk;
+	LIST_HEAD(pe_queue);
 
 	/* Do all the snapshots on this origin */
 	list_for_each_entry (snap, snapshots, list) {
@@ -965,12 +971,19 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
 				snap->valid = 0;
 
 			} else {
-				if (last)
+				if (first) {
+					bio_list_add(&pe->origin_bios, bio);
+					r = 0;
+					first = 0;
+				}
+				if (last && list_empty(&pe->siblings))
 					list_merge(&pe->siblings,
 						   &last->siblings);
-
+				if (!pe->started) {
+					pe->started = 1;
+					list_add_tail(&pe->list, &pe_queue);
+				}
 				last = pe;
-				r = 0;
 			}
 		}
 
@@ -980,24 +993,8 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
 	/*
 	 * Now that we have a complete pe list we can start the copying.
 	 */
-	if (last) {
-		pe = last;
-		do {
-			down_write(&pe->snap->lock);
-			if (first)
-				bio_list_add(&pe->origin_bios, bio);
-			if (!pe->started) {
-				pe->started = 1;
-				up_write(&pe->snap->lock);
-				start_copy(pe);
-			} else
-				up_write(&pe->snap->lock);
-			first = 0;
-			pe = list_entry(pe->siblings.next,
-					struct pending_exception, siblings);
-
-		} while (pe != last);
-	}
+	list_for_each_entry_safe(pe, next_pe, &pe_queue, list)
+		start_copy(pe);
 
 	return r;
 }
-- 
cgit v0.10.2


From b4b610f684d13bf8691feeae5d4d7a8bd1f1033e Mon Sep 17 00:00:00 2001
From: Alasdair G Kergon <agk@redhat.com>
Date: Mon, 27 Mar 2006 01:17:44 -0800
Subject: [PATCH] device-mapper snapshot: replace sibling list

The siblings "list" is used unsafely at the moment.

Firstly, only the element on the list being changed gets locked (via the
snapshot lock), not the next and previous elements which have pointers that
are also being changed.

Secondly, if you have two or more snapshots and write to the same chunk a
second time before every snapshot has finished making its private copy of the
data, if you're unlucky, _origin_write() could attempt its list_merge() and
dereference a 'last' pointer to a pending_exception structure that has just
been freed.

Analysis reveals that the list is actually only there for reference counting.
If 5 pending_exceptions are needed in origin_write, then the 5 are joined
together into a 5-element list - without a separate list head because there's
nowhere suitable to store it.  As the pending_exceptions complete, they are
removed from the list one-by-one and any contents of origin_bios get moved
across to one of the remaining pending_exceptions on the list.  Whichever one
is last is detected because list_empty() is then true and the origin_bios get
submitted.

The fix proposed here uses an alternative reference counting mechanism by
choosing one of the pending_exceptions as primary and maintaining an atomic
counter there.

Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 874f145..475514b 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -54,11 +54,21 @@ struct pending_exception {
 	struct list_head list;
 
 	/*
-	 * Other pending_exceptions that are processing this
-	 * chunk.  When this list is empty, we know we can
-	 * complete the origins.
+	 * The primary pending_exception is the one that holds
+	 * the sibling_count and the list of origin_bios for a
+	 * group of pending_exceptions.  It is always last to get freed.
+	 * These fields get set up when writing to the origin.
 	 */
-	struct list_head siblings;
+	struct pending_exception *primary_pe;
+
+	/*
+	 * Number of pending_exceptions processing this chunk.
+	 * When this drops to zero we must complete the origin bios.
+	 * If incrementing or decrementing this, hold pe->snap->lock for
+	 * the sibling concerned and not pe->primary_pe->snap->lock unless
+	 * they are the same.
+	 */
+	atomic_t sibling_count;
 
 	/* Pointer back to snapshot context */
 	struct dm_snapshot *snap;
@@ -593,20 +603,15 @@ static void error_bios(struct bio *bio)
 
 static struct bio *__flush_bios(struct pending_exception *pe)
 {
-	struct pending_exception *sibling;
-
-	if (list_empty(&pe->siblings))
-		return bio_list_get(&pe->origin_bios);
-
-	sibling = list_entry(pe->siblings.next,
-			     struct pending_exception, siblings);
-
-	list_del(&pe->siblings);
-
-	/* This is fine as long as kcopyd is single-threaded. If kcopyd
-	 * becomes multi-threaded, we'll need some locking here.
+	/*
+	 * If this pe is involved in a write to the origin and
+	 * it is the last sibling to complete then release
+	 * the bios for the original write to the origin.
 	 */
-	bio_list_merge(&sibling->origin_bios, &pe->origin_bios);
+
+	if (pe->primary_pe &&
+	    atomic_dec_and_test(&pe->primary_pe->sibling_count))
+		return bio_list_get(&pe->primary_pe->origin_bios);
 
 	return NULL;
 }
@@ -614,6 +619,7 @@ static struct bio *__flush_bios(struct pending_exception *pe)
 static void pending_complete(struct pending_exception *pe, int success)
 {
 	struct exception *e;
+	struct pending_exception *primary_pe;
 	struct dm_snapshot *s = pe->snap;
 	struct bio *flush = NULL;
 
@@ -662,7 +668,20 @@ static void pending_complete(struct pending_exception *pe, int success)
 	}
 
  out:
-	free_pending_exception(pe);
+	primary_pe = pe->primary_pe;
+
+	/*
+	 * Free the pe if it's not linked to an origin write or if
+	 * it's not itself a primary pe.
+	 */
+	if (!primary_pe || primary_pe != pe)
+		free_pending_exception(pe);
+
+	/*
+	 * Free the primary pe if nothing references it.
+	 */
+	if (primary_pe && !atomic_read(&primary_pe->sibling_count))
+		free_pending_exception(primary_pe);
 
 	if (flush)
 		flush_bios(flush);
@@ -757,7 +776,8 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio)
 			pe->e.old_chunk = chunk;
 			bio_list_init(&pe->origin_bios);
 			bio_list_init(&pe->snapshot_bios);
-			INIT_LIST_HEAD(&pe->siblings);
+			pe->primary_pe = NULL;
+			atomic_set(&pe->sibling_count, 1);
 			pe->snap = s;
 			pe->started = 0;
 
@@ -916,26 +936,12 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
 /*-----------------------------------------------------------------
  * Origin methods
  *---------------------------------------------------------------*/
-static void list_merge(struct list_head *l1, struct list_head *l2)
-{
-	struct list_head *l1_n, *l2_p;
-
-	l1_n = l1->next;
-	l2_p = l2->prev;
-
-	l1->next = l2;
-	l2->prev = l1;
-
-	l2_p->next = l1_n;
-	l1_n->prev = l2_p;
-}
-
 static int __origin_write(struct list_head *snapshots, struct bio *bio)
 {
-	int r = 1, first = 1;
+	int r = 1, first = 0;
 	struct dm_snapshot *snap;
 	struct exception *e;
-	struct pending_exception *pe, *next_pe, *last = NULL;
+	struct pending_exception *pe, *next_pe, *primary_pe = NULL;
 	chunk_t chunk;
 	LIST_HEAD(pe_queue);
 
@@ -962,6 +968,9 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
 		 * Check exception table to see if block
 		 * is already remapped in this snapshot
 		 * and trigger an exception if not.
+		 *
+		 * sibling_count is initialised to 1 so pending_complete()
+		 * won't destroy the primary_pe while we're inside this loop.
 		 */
 		e = lookup_exception(&snap->complete, chunk);
 		if (!e) {
@@ -971,31 +980,60 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
 				snap->valid = 0;
 
 			} else {
-				if (first) {
-					bio_list_add(&pe->origin_bios, bio);
+				if (!primary_pe) {
+					/*
+					 * Either every pe here has same
+					 * primary_pe or none has one yet.
+					 */
+					if (pe->primary_pe)
+						primary_pe = pe->primary_pe;
+					else {
+						primary_pe = pe;
+						first = 1;
+					}
+
+					bio_list_add(&primary_pe->origin_bios,
+						     bio);
 					r = 0;
-					first = 0;
 				}
-				if (last && list_empty(&pe->siblings))
-					list_merge(&pe->siblings,
-						   &last->siblings);
+				if (!pe->primary_pe) {
+					atomic_inc(&primary_pe->sibling_count);
+					pe->primary_pe = primary_pe;
+				}
 				if (!pe->started) {
 					pe->started = 1;
 					list_add_tail(&pe->list, &pe_queue);
 				}
-				last = pe;
 			}
 		}
 
 		up_write(&snap->lock);
 	}
 
+	if (!primary_pe)
+		goto out;
+
+	/*
+	 * If this is the first time we're processing this chunk and
+	 * sibling_count is now 1 it means all the pending exceptions
+	 * got completed while we were in the loop above, so it falls to
+	 * us here to remove the primary_pe and submit any origin_bios.
+	 */
+
+	if (first && atomic_dec_and_test(&primary_pe->sibling_count)) {
+		flush_bios(bio_list_get(&primary_pe->origin_bios));
+		free_pending_exception(primary_pe);
+		/* If we got here, pe_queue is necessarily empty. */
+		goto out;
+	}
+
 	/*
 	 * Now that we have a complete pe list we can start the copying.
 	 */
 	list_for_each_entry_safe(pe, next_pe, &pe_queue, list)
 		start_copy(pe);
 
+ out:
 	return r;
 }
 
-- 
cgit v0.10.2


From 76df1c651b66bdf07d60b3d60789feb5f58d73e3 Mon Sep 17 00:00:00 2001
From: Alasdair G Kergon <agk@redhat.com>
Date: Mon, 27 Mar 2006 01:17:45 -0800
Subject: [PATCH] device-mapper snapshot: fix invalidation

When a snapshot becomes invalid, s->valid is set to 0.  In this state, a
snapshot can no longer be accessed.

When s->lock is acquired, before doing anything else, s->valid must be checked
to ensure the snapshot remains valid.

This patch eliminates some races (that may cause panics) by adding some
missing checks.  At the same time, some unnecessary levels of indentation are
removed and snapshot invalidation is moved into a single function that always
generates a device-mapper event.

Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 475514b..14bd1a1 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -392,6 +392,8 @@ static void read_snapshot_metadata(struct dm_snapshot *s)
 		down_write(&s->lock);
 		s->valid = 0;
 		up_write(&s->lock);
+
+		dm_table_event(s->table);
 	}
 }
 
@@ -601,6 +603,11 @@ static void error_bios(struct bio *bio)
 	}
 }
 
+static inline void error_snapshot_bios(struct pending_exception *pe)
+{
+	error_bios(bio_list_get(&pe->snapshot_bios));
+}
+
 static struct bio *__flush_bios(struct pending_exception *pe)
 {
 	/*
@@ -616,6 +623,28 @@ static struct bio *__flush_bios(struct pending_exception *pe)
 	return NULL;
 }
 
+static void __invalidate_snapshot(struct dm_snapshot *s,
+				struct pending_exception *pe, int err)
+{
+	if (!s->valid)
+		return;
+
+	if (err == -EIO)
+		DMERR("Invalidating snapshot: Error reading/writing.");
+	else if (err == -ENOMEM)
+		DMERR("Invalidating snapshot: Unable to allocate exception.");
+
+	if (pe)
+		remove_exception(&pe->e);
+
+	if (s->store.drop_snapshot)
+		s->store.drop_snapshot(&s->store);
+
+	s->valid = 0;
+
+	dm_table_event(s->table);
+}
+
 static void pending_complete(struct pending_exception *pe, int success)
 {
 	struct exception *e;
@@ -623,50 +652,53 @@ static void pending_complete(struct pending_exception *pe, int success)
 	struct dm_snapshot *s = pe->snap;
 	struct bio *flush = NULL;
 
-	if (success) {
-		e = alloc_exception();
-		if (!e) {
-			DMWARN("Unable to allocate exception.");
-			down_write(&s->lock);
-			s->store.drop_snapshot(&s->store);
-			s->valid = 0;
-			flush = __flush_bios(pe);
-			up_write(&s->lock);
-
-			error_bios(bio_list_get(&pe->snapshot_bios));
-			goto out;
-		}
-		*e = pe->e;
-
-		/*
-		 * Add a proper exception, and remove the
-		 * in-flight exception from the list.
-		 */
+	if (!success) {
+		/* Read/write error - snapshot is unusable */
 		down_write(&s->lock);
-		insert_exception(&s->complete, e);
-		remove_exception(&pe->e);
+		__invalidate_snapshot(s, pe, -EIO);
 		flush = __flush_bios(pe);
-
-		/* Submit any pending write bios */
 		up_write(&s->lock);
 
-		flush_bios(bio_list_get(&pe->snapshot_bios));
-	} else {
-		/* Read/write error - snapshot is unusable */
+		error_snapshot_bios(pe);
+		goto out;
+	}
+
+	e = alloc_exception();
+	if (!e) {
 		down_write(&s->lock);
-		if (s->valid)
-			DMERR("Error reading/writing snapshot");
-		s->store.drop_snapshot(&s->store);
-		s->valid = 0;
-		remove_exception(&pe->e);
+		__invalidate_snapshot(s, pe, -ENOMEM);
 		flush = __flush_bios(pe);
 		up_write(&s->lock);
 
-		error_bios(bio_list_get(&pe->snapshot_bios));
+		error_snapshot_bios(pe);
+		goto out;
+	}
+	*e = pe->e;
 
-		dm_table_event(s->table);
+	/*
+	 * Add a proper exception, and remove the
+	 * in-flight exception from the list.
+	 */
+	down_write(&s->lock);
+	if (!s->valid) {
+		flush = __flush_bios(pe);
+		up_write(&s->lock);
+
+		free_exception(e);
+
+		error_snapshot_bios(pe);
+		goto out;
 	}
 
+	insert_exception(&s->complete, e);
+	remove_exception(&pe->e);
+	flush = __flush_bios(pe);
+
+	up_write(&s->lock);
+
+	/* Submit any pending write bios */
+	flush_bios(bio_list_get(&pe->snapshot_bios));
+
  out:
 	primary_pe = pe->primary_pe;
 
@@ -758,39 +790,45 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio)
 	if (e) {
 		/* cast the exception to a pending exception */
 		pe = container_of(e, struct pending_exception, e);
+		goto out;
+	}
 
-	} else {
-		/*
-		 * Create a new pending exception, we don't want
-		 * to hold the lock while we do this.
-		 */
-		up_write(&s->lock);
-		pe = alloc_pending_exception();
-		down_write(&s->lock);
+	/*
+	 * Create a new pending exception, we don't want
+	 * to hold the lock while we do this.
+	 */
+	up_write(&s->lock);
+	pe = alloc_pending_exception();
+	down_write(&s->lock);
 
-		e = lookup_exception(&s->pending, chunk);
-		if (e) {
-			free_pending_exception(pe);
-			pe = container_of(e, struct pending_exception, e);
-		} else {
-			pe->e.old_chunk = chunk;
-			bio_list_init(&pe->origin_bios);
-			bio_list_init(&pe->snapshot_bios);
-			pe->primary_pe = NULL;
-			atomic_set(&pe->sibling_count, 1);
-			pe->snap = s;
-			pe->started = 0;
-
-			if (s->store.prepare_exception(&s->store, &pe->e)) {
-				free_pending_exception(pe);
-				s->valid = 0;
-				return NULL;
-			}
+	if (!s->valid) {
+		free_pending_exception(pe);
+		return NULL;
+	}
 
-			insert_exception(&s->pending, &pe->e);
-		}
+	e = lookup_exception(&s->pending, chunk);
+	if (e) {
+		free_pending_exception(pe);
+		pe = container_of(e, struct pending_exception, e);
+		goto out;
 	}
 
+	pe->e.old_chunk = chunk;
+	bio_list_init(&pe->origin_bios);
+	bio_list_init(&pe->snapshot_bios);
+	pe->primary_pe = NULL;
+	atomic_set(&pe->sibling_count, 1);
+	pe->snap = s;
+	pe->started = 0;
+
+	if (s->store.prepare_exception(&s->store, &pe->e)) {
+		free_pending_exception(pe);
+		return NULL;
+	}
+
+	insert_exception(&s->pending, &pe->e);
+
+ out:
 	return pe;
 }
 
@@ -807,13 +845,15 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
 {
 	struct exception *e;
 	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
+	int copy_needed = 0;
 	int r = 1;
 	chunk_t chunk;
-	struct pending_exception *pe;
+	struct pending_exception *pe = NULL;
 
 	chunk = sector_to_chunk(s, bio->bi_sector);
 
 	/* Full snapshots are not usable */
+	/* To get here the table must be live so s->active is always set. */
 	if (!s->valid)
 		return -EIO;
 
@@ -831,36 +871,41 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
 		 * to copy an exception */
 		down_write(&s->lock);
 
+		if (!s->valid) {
+			r = -EIO;
+			goto out_unlock;
+		}
+
 		/* If the block is already remapped - use that, else remap it */
 		e = lookup_exception(&s->complete, chunk);
 		if (e) {
 			remap_exception(s, e, bio);
-			up_write(&s->lock);
-
-		} else {
-			pe = __find_pending_exception(s, bio);
-
-			if (!pe) {
-				if (s->store.drop_snapshot)
-					s->store.drop_snapshot(&s->store);
-				s->valid = 0;
-				r = -EIO;
-				up_write(&s->lock);
-			} else {
-				remap_exception(s, &pe->e, bio);
-				bio_list_add(&pe->snapshot_bios, bio);
-
-				if (!pe->started) {
-					/* this is protected by snap->lock */
-					pe->started = 1;
-					up_write(&s->lock);
-					start_copy(pe);
-				} else
-					up_write(&s->lock);
-				r = 0;
-			}
+			goto out_unlock;
+		}
+
+		pe = __find_pending_exception(s, bio);
+		if (!pe) {
+			__invalidate_snapshot(s, pe, -ENOMEM);
+			r = -EIO;
+			goto out_unlock;
 		}
 
+		remap_exception(s, &pe->e, bio);
+		bio_list_add(&pe->snapshot_bios, bio);
+
+		if (!pe->started) {
+			/* this is protected by snap->lock */
+			pe->started = 1;
+			copy_needed = 1;
+		}
+
+		r = 0;
+
+ out_unlock:
+		up_write(&s->lock);
+
+		if (copy_needed)
+			start_copy(pe);
 	} else {
 		/*
 		 * FIXME: this read path scares me because we
@@ -872,6 +917,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
 		/* Do reads */
 		down_read(&s->lock);
 
+		if (!s->valid) {
+			up_read(&s->lock);
+			return -EIO;
+		}
+
 		/* See if it it has been remapped */
 		e = lookup_exception(&s->complete, chunk);
 		if (e)
@@ -948,15 +998,15 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
 	/* Do all the snapshots on this origin */
 	list_for_each_entry (snap, snapshots, list) {
 
+		down_write(&snap->lock);
+
 		/* Only deal with valid and active snapshots */
 		if (!snap->valid || !snap->active)
-			continue;
+			goto next_snapshot;
 
 		/* Nothing to do if writing beyond end of snapshot */
 		if (bio->bi_sector >= dm_table_get_size(snap->table))
-			continue;
-
-		down_write(&snap->lock);
+			goto next_snapshot;
 
 		/*
 		 * Remember, different snapshots can have
@@ -973,40 +1023,43 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
 		 * won't destroy the primary_pe while we're inside this loop.
 		 */
 		e = lookup_exception(&snap->complete, chunk);
-		if (!e) {
-			pe = __find_pending_exception(snap, bio);
-			if (!pe) {
-				snap->store.drop_snapshot(&snap->store);
-				snap->valid = 0;
-
-			} else {
-				if (!primary_pe) {
-					/*
-					 * Either every pe here has same
-					 * primary_pe or none has one yet.
-					 */
-					if (pe->primary_pe)
-						primary_pe = pe->primary_pe;
-					else {
-						primary_pe = pe;
-						first = 1;
-					}
-
-					bio_list_add(&primary_pe->origin_bios,
-						     bio);
-					r = 0;
-				}
-				if (!pe->primary_pe) {
-					atomic_inc(&primary_pe->sibling_count);
-					pe->primary_pe = primary_pe;
-				}
-				if (!pe->started) {
-					pe->started = 1;
-					list_add_tail(&pe->list, &pe_queue);
-				}
+		if (e)
+			goto next_snapshot;
+
+		pe = __find_pending_exception(snap, bio);
+		if (!pe) {
+			__invalidate_snapshot(snap, pe, ENOMEM);
+			goto next_snapshot;
+		}
+
+		if (!primary_pe) {
+			/*
+			 * Either every pe here has same
+			 * primary_pe or none has one yet.
+			 */
+			if (pe->primary_pe)
+				primary_pe = pe->primary_pe;
+			else {
+				primary_pe = pe;
+				first = 1;
 			}
+
+			bio_list_add(&primary_pe->origin_bios, bio);
+
+			r = 0;
+		}
+
+		if (!pe->primary_pe) {
+			atomic_inc(&primary_pe->sibling_count);
+			pe->primary_pe = primary_pe;
+		}
+
+		if (!pe->started) {
+			pe->started = 1;
+			list_add_tail(&pe->list, &pe_queue);
 		}
 
+ next_snapshot:
 		up_write(&snap->lock);
 	}
 
-- 
cgit v0.10.2


From 930d332a23682202c07df0276dd665a57755b37d Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 27 Mar 2006 01:17:47 -0800
Subject: [PATCH] drivers/md/dm-raid1.c: Fix inconsistent mirroring after
 interrupted recovery

dm-mirror has potential data corruption problem: while on-disk log shows
that all disk contents are in-sync, actual contents of the disks are not
synchronized.  This problem occurs if initial recovery (synching) is
interrupted and resumed.

Attached patch fixes this problem.

Background:

rh_dec() changes the region state from RH_NOSYNC (out-of-sync) to RH_CLEAN
(in-sync), which results in the corresponding bit of clean_bits being set.

This is harmful if on-disk log is used and the map is removed/suspended
before the initial sync is completed.  The clean_bits is written down to
the on-disk log at the map removal, and, upon resume, it's read and copied
to sync_bits.  Since the recovery process refers to the sync_bits to find a
region to be recovered, the region whose state was changed from RH_NOSYNC
to RH_CLEAN is no longer recovered.

If you haven't applied dm-raid1-read-balancing.patch proposed in dm-devel
sometimes ago, the contents of the mirrored disk just corrupt silently.  If
you have, balanced read may get bogus data from out-of-sync disks.

The patch keeps RH_NOSYNC state unchanged.  It will be changed to
RH_RECOVERING when recovery starts and get reclaimed when the recovery
completes.  So it doesn't leak the region hash entry.

Description:

Keep RH_NOSYNC state unchanged when I/O on the region completes.

rh_dec() changes the region state from RH_NOSYNC (out-of-sync) to RH_CLEAN
(in-sync), which results in the corresponding bit of clean_bits being set.

This is harmful if on-disk log is used and the map is removed/suspended
before the initial sync is completed.  The clean_bits is written down to
the on-disk log at the map removal, and, upon resume, it's read and copied
to sync_bits.  Since the recovery process refers to the sync_bits to find a
region to be recovered, the region whose state was changed from RH_NOSYNC
to RH_CLEAN is no longer recovered.

If you haven't applied dm-raid1-read-balancing.patch proposed in dm-devel
sometimes ago, the contents of the mirrored disk just corrupt silently.  If
you have, balanced read may get bogus data from out-of-sync disks.

The RH_NOSYNC region will be changed to RH_RECOVERING when recovery starts
on the region and get reclaimed when the recovery completes.  So it doesn't
leak the region hash entry.

Alasdair said:

  I've analysed the relevant part of the state machine and I believe that
  the patch is correct.

  (Further work on this code is still needed - this patch has the
  side-effect of holding onto memory unnecessarily for long periods of time
  under certain workloads - but better that than corrupting data.)

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 4e90f23..3d90f1b 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -402,9 +402,21 @@ static void rh_dec(struct region_hash *rh, region_t region)
 
 	spin_lock_irqsave(&rh->region_lock, flags);
 	if (atomic_dec_and_test(&reg->pending)) {
+		/*
+		 * There is no pending I/O for this region.
+		 * We can move the region to corresponding list for next action.
+		 * At this point, the region is not yet connected to any list.
+		 *
+		 * If the state is RH_NOSYNC, the region should be kept off
+		 * from clean list.
+		 * The hash entry for RH_NOSYNC will remain in memory
+		 * until the region is recovered or the map is reloaded.
+		 */
+
+		/* do nothing for RH_NOSYNC */
 		if (reg->state == RH_RECOVERING) {
 			list_add_tail(&reg->list, &rh->quiesced_regions);
-		} else {
+		} else if (reg->state == RH_DIRTY) {
 			reg->state = RH_CLEAN;
 			list_add(&reg->list, &rh->clean_regions);
 		}
-- 
cgit v0.10.2


From 4ee218cd67b385759993a6c840ea45f0ee0a8b30 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 27 Mar 2006 01:17:48 -0800
Subject: [PATCH] dm: remove SECTOR_FORMAT

We don't know what type sector_t has.  Sometimes it's unsigned long, sometimes
it's unsigned long long.  For example on ppc64 it's unsigned long with
CONFIG_LBD=n and on x86_64 it's unsigned long long with CONFIG_LBD=n.

The way to handle all of this is to always use unsigned long long and to
always typecast the sector_t when printing it.

Acked-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 259e86f..61a590b 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -518,6 +518,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	char *ivopts;
 	unsigned int crypto_flags;
 	unsigned int key_size;
+	unsigned long long tmpll;
 
 	if (argc != 5) {
 		ti->error = PFX "Not enough arguments";
@@ -633,15 +634,17 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad5;
 	}
 
-	if (sscanf(argv[2], SECTOR_FORMAT, &cc->iv_offset) != 1) {
+	if (sscanf(argv[2], "%llu", &tmpll) != 1) {
 		ti->error = PFX "Invalid iv_offset sector";
 		goto bad5;
 	}
+	cc->iv_offset = tmpll;
 
-	if (sscanf(argv[4], SECTOR_FORMAT, &cc->start) != 1) {
+	if (sscanf(argv[4], "%llu", &tmpll) != 1) {
 		ti->error = PFX "Invalid device sector";
 		goto bad5;
 	}
+	cc->start = tmpll;
 
 	if (dm_get_device(ti, argv[3], cc->start, ti->len,
 	                  dm_table_get_mode(ti->table), &cc->dev)) {
@@ -885,8 +888,8 @@ static int crypt_status(struct dm_target *ti, status_type_t type,
 			result[sz++] = '-';
 		}
 
-		DMEMIT(" " SECTOR_FORMAT " %s " SECTOR_FORMAT,
-		       cc->iv_offset, cc->dev->name, cc->start);
+		DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
+				cc->dev->name, (unsigned long long)cc->start);
 		break;
 	}
 	return 0;
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 6a2cd5d..daf586c 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -26,6 +26,7 @@ struct linear_c {
 static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
 	struct linear_c *lc;
+	unsigned long long tmp;
 
 	if (argc != 2) {
 		ti->error = "dm-linear: Invalid argument count";
@@ -38,10 +39,11 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		return -ENOMEM;
 	}
 
-	if (sscanf(argv[1], SECTOR_FORMAT, &lc->start) != 1) {
+	if (sscanf(argv[1], "%llu", &tmp) != 1) {
 		ti->error = "dm-linear: Invalid device sector";
 		goto bad;
 	}
+	lc->start = tmp;
 
 	if (dm_get_device(ti, argv[0], lc->start, ti->len,
 			  dm_table_get_mode(ti->table), &lc->dev)) {
@@ -87,8 +89,8 @@ static int linear_status(struct dm_target *ti, status_type_t type,
 		break;
 
 	case STATUSTYPE_TABLE:
-		snprintf(result, maxlen, "%s " SECTOR_FORMAT, lc->dev->name,
-			 lc->start);
+		snprintf(result, maxlen, "%s %llu", lc->dev->name,
+				(unsigned long long)lc->start);
 		break;
 	}
 	return 0;
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 3d90f1b..d12cf3e 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -934,9 +934,9 @@ static inline int _check_region_size(struct dm_target *ti, uint32_t size)
 static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
 		      unsigned int mirror, char **argv)
 {
-	sector_t offset;
+	unsigned long long offset;
 
-	if (sscanf(argv[1], SECTOR_FORMAT, &offset) != 1) {
+	if (sscanf(argv[1], "%llu", &offset) != 1) {
 		ti->error = "dm-mirror: Invalid offset";
 		return -EINVAL;
 	}
@@ -1203,16 +1203,17 @@ static int mirror_status(struct dm_target *ti, status_type_t type,
 		for (m = 0; m < ms->nr_mirrors; m++)
 			DMEMIT("%s ", ms->mirror[m].dev->name);
 
-		DMEMIT(SECTOR_FORMAT "/" SECTOR_FORMAT,
-		       ms->rh.log->type->get_sync_count(ms->rh.log),
-		       ms->nr_regions);
+		DMEMIT("%llu/%llu",
+			(unsigned long long)ms->rh.log->type->
+				get_sync_count(ms->rh.log),
+			(unsigned long long)ms->nr_regions);
 		break;
 
 	case STATUSTYPE_TABLE:
 		DMEMIT("%d ", ms->nr_mirrors);
 		for (m = 0; m < ms->nr_mirrors; m++)
-			DMEMIT("%s " SECTOR_FORMAT " ",
-			       ms->mirror[m].dev->name, ms->mirror[m].offset);
+			DMEMIT("%s %llu ", ms->mirror[m].dev->name,
+				(unsigned long long)ms->mirror[m].offset);
 	}
 
 	return 0;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 14bd1a1..a5765f9 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -959,9 +959,9 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
 				snap->store.fraction_full(&snap->store,
 							  &numerator,
 							  &denominator);
-				snprintf(result, maxlen,
-					 SECTOR_FORMAT "/" SECTOR_FORMAT,
-					 numerator, denominator);
+				snprintf(result, maxlen, "%llu/%llu",
+					(unsigned long long)numerator,
+					(unsigned long long)denominator);
 			}
 			else
 				snprintf(result, maxlen, "Unknown");
@@ -974,9 +974,10 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
 		 * to make private copies if the output is to
 		 * make sense.
 		 */
-		snprintf(result, maxlen, "%s %s %c " SECTOR_FORMAT,
+		snprintf(result, maxlen, "%s %s %c %llu",
 			 snap->origin->name, snap->cow->name,
-			 snap->type, snap->chunk_size);
+			 snap->type,
+			 (unsigned long long)snap->chunk_size);
 		break;
 	}
 
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 697aaca..204f796ee 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -49,9 +49,9 @@ static inline struct stripe_c *alloc_context(unsigned int stripes)
 static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
 		      unsigned int stripe, char **argv)
 {
-	sector_t start;
+	unsigned long long start;
 
-	if (sscanf(argv[1], SECTOR_FORMAT, &start) != 1)
+	if (sscanf(argv[1], "%llu", &start) != 1)
 		return -EINVAL;
 
 	if (dm_get_device(ti, argv[0], start, sc->stripe_width,
@@ -201,10 +201,11 @@ static int stripe_status(struct dm_target *ti,
 		break;
 
 	case STATUSTYPE_TABLE:
-		DMEMIT("%d " SECTOR_FORMAT, sc->stripes, sc->chunk_mask + 1);
+		DMEMIT("%d %llu", sc->stripes,
+			(unsigned long long)sc->chunk_mask + 1);
 		for (i = 0; i < sc->stripes; i++)
-			DMEMIT(" %s " SECTOR_FORMAT, sc->stripe[i].dev->name,
-			       sc->stripe[i].physical_start);
+			DMEMIT(" %s %llu", sc->stripe[i].dev->name,
+			    (unsigned long long)sc->stripe[i].physical_start);
 		break;
 	}
 	return 0;
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 4eaf075..0ff11d6 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -23,16 +23,6 @@
 #define DMEMIT(x...) sz += ((sz >= maxlen) ? \
 			  0 : scnprintf(result + sz, maxlen - sz, x))
 
-/*
- * FIXME: I think this should be with the definition of sector_t
- * in types.h.
- */
-#ifdef CONFIG_LBD
-#define SECTOR_FORMAT "%llu"
-#else
-#define SECTOR_FORMAT "%lu"
-#endif
-
 #define SECTOR_SHIFT 9
 
 /*
-- 
cgit v0.10.2


From 969429b504ae866d3f8b1cafd68a2c099e305093 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:17:49 -0800
Subject: [PATCH] dm: make sure QUEUE_FLAG_CLUSTER is set properly

This flag should be set for a virtual device iff it is set for all
underlying devices.

Signed-off-by: Neil Brown <neilb@suse.de>
Acked-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 907b08d..9558a4a 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -97,6 +97,8 @@ static void combine_restrictions_low(struct io_restrictions *lhs,
 
 	lhs->seg_boundary_mask =
 		min_not_zero(lhs->seg_boundary_mask, rhs->seg_boundary_mask);
+
+	lhs->no_cluster |= rhs->no_cluster;
 }
 
 /*
@@ -523,6 +525,8 @@ int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
 		rs->seg_boundary_mask =
 			min_not_zero(rs->seg_boundary_mask,
 				     q->seg_boundary_mask);
+
+		rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
 	}
 
 	return r;
@@ -832,6 +836,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
 	q->hardsect_size = t->limits.hardsect_size;
 	q->max_segment_size = t->limits.max_segment_size;
 	q->seg_boundary_mask = t->limits.seg_boundary_mask;
+	if (t->limits.no_cluster)
+		q->queue_flags &= ~(1 << QUEUE_FLAG_CLUSTER);
+	else
+		q->queue_flags |= (1 << QUEUE_FLAG_CLUSTER);
+
 }
 
 unsigned int dm_table_get_num_targets(struct dm_table *t)
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 51e0e95..aee10b2 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -97,6 +97,7 @@ struct io_restrictions {
 	unsigned short		hardsect_size;
 	unsigned int		max_segment_size;
 	unsigned long		seg_boundary_mask;
+	unsigned char		no_cluster; /* inverted so that 0 is default */
 };
 
 struct dm_target {
-- 
cgit v0.10.2


From 138728dc96529f20dfe970c470e51885a60e329f Mon Sep 17 00:00:00 2001
From: Alasdair G Kergon <agk@redhat.com>
Date: Mon, 27 Mar 2006 01:17:50 -0800
Subject: [PATCH] dm snapshot: fix kcopyd destructor

Before removing a snapshot, wait for the completion of any kcopyd jobs using
it.

Do this by maintaining a count (nr_jobs) of how many outstanding jobs each
kcopyd_client has.

The snapshot destructor first unregisters the snapshot so that no new kcopyd
jobs (created by writes to the origin) will reference that particular
snapshot.  kcopyd_client_destroy() is now run next to wait for the completion
of any outstanding jobs before the snapshot exception structures (that those
jobs reference) are freed.

Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index a5765f9..08312b4 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -559,8 +559,12 @@ static void snapshot_dtr(struct dm_target *ti)
 {
 	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
 
+	/* Prevent further origin writes from using this snapshot. */
+	/* After this returns there can be no new kcopyd jobs. */
 	unregister_snapshot(s);
 
+	kcopyd_client_destroy(s->kcopyd_client);
+
 	exit_exception_table(&s->pending, pending_cache);
 	exit_exception_table(&s->complete, exception_cache);
 
@@ -569,7 +573,7 @@ static void snapshot_dtr(struct dm_target *ti)
 
 	dm_put_device(ti, s->origin);
 	dm_put_device(ti, s->cow);
-	kcopyd_client_destroy(s->kcopyd_client);
+
 	kfree(s);
 }
 
diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c
index 9dcb2c8..ed71f3f 100644
--- a/drivers/md/kcopyd.c
+++ b/drivers/md/kcopyd.c
@@ -44,6 +44,9 @@ struct kcopyd_client {
 	struct page_list *pages;
 	unsigned int nr_pages;
 	unsigned int nr_free_pages;
+
+	wait_queue_head_t destroyq;
+	atomic_t nr_jobs;
 };
 
 static struct page_list *alloc_pl(void)
@@ -292,10 +295,15 @@ static int run_complete_job(struct kcopyd_job *job)
 	int read_err = job->read_err;
 	unsigned int write_err = job->write_err;
 	kcopyd_notify_fn fn = job->fn;
+	struct kcopyd_client *kc = job->kc;
 
-	kcopyd_put_pages(job->kc, job->pages);
+	kcopyd_put_pages(kc, job->pages);
 	mempool_free(job, _job_pool);
 	fn(read_err, write_err, context);
+
+	if (atomic_dec_and_test(&kc->nr_jobs))
+		wake_up(&kc->destroyq);
+
 	return 0;
 }
 
@@ -430,6 +438,7 @@ static void do_work(void *ignored)
  */
 static void dispatch_job(struct kcopyd_job *job)
 {
+	atomic_inc(&job->kc->nr_jobs);
 	push(&_pages_jobs, job);
 	wake();
 }
@@ -669,6 +678,9 @@ int kcopyd_client_create(unsigned int nr_pages, struct kcopyd_client **result)
 		return r;
 	}
 
+	init_waitqueue_head(&kc->destroyq);
+	atomic_set(&kc->nr_jobs, 0);
+
 	client_add(kc);
 	*result = kc;
 	return 0;
@@ -676,6 +688,9 @@ int kcopyd_client_create(unsigned int nr_pages, struct kcopyd_client **result)
 
 void kcopyd_client_destroy(struct kcopyd_client *kc)
 {
+	/* Wait for completion of all jobs submitted by this client. */
+	wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs));
+
 	dm_io_put(kc->nr_pages);
 	client_free_pages(kc);
 	client_del(kc);
-- 
cgit v0.10.2


From 1ecac7fd74f2e5fb06a7719ecba55fb5778a9a47 Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 27 Mar 2006 01:17:51 -0800
Subject: [PATCH] dm flush queue EINTR

If dm_suspend() is cancelled, bios already added to the deferred list need to
be submitted.  Otherwise they remain 'in limbo' until there's a dm_resume().

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index a64798e..48be69f 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1107,6 +1107,7 @@ int dm_suspend(struct mapped_device *md, int do_lockfs)
 {
 	struct dm_table *map = NULL;
 	DECLARE_WAITQUEUE(wait, current);
+	struct bio *def;
 	int r = -EINVAL;
 
 	down(&md->suspend_lock);
@@ -1166,9 +1167,11 @@ int dm_suspend(struct mapped_device *md, int do_lockfs)
 	/* were we interrupted ? */
 	r = -EINTR;
 	if (atomic_read(&md->pending)) {
+		clear_bit(DMF_BLOCK_IO, &md->flags);
+		def = bio_list_get(&md->deferred);
+		__flush_deferred_io(md, def);
 		up_write(&md->io_lock);
 		unlock_fs(md);
-		clear_bit(DMF_BLOCK_IO, &md->flags);
 		goto out;
 	}
 	up_write(&md->io_lock);
-- 
cgit v0.10.2


From 7e51f257e87297a5b6fe6d136a8ef67206aaf3a8 Mon Sep 17 00:00:00 2001
From: Mike Anderson <andmike@us.ibm.com>
Date: Mon, 27 Mar 2006 01:17:52 -0800
Subject: [PATCH] dm: store md name

The patch stores a printable device number in struct mapped_device for use in
warning messages and with a proposed netlink interface.

Signed-off-by: Mike Anderson <andmike@us.ibm.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 48be69f..f140d49 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -69,6 +69,7 @@ struct mapped_device {
 
 	request_queue_t *queue;
 	struct gendisk *disk;
+	char name[16];
 
 	void *interface_ptr;
 
@@ -842,6 +843,7 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent)
 	md->disk->private_data = md;
 	sprintf(md->disk->disk_name, "dm-%d", minor);
 	add_disk(md->disk);
+	format_dev_t(md->name, MKDEV(_major, minor));
 
 	atomic_set(&md->pending, 0);
 	init_waitqueue_head(&md->wait);
-- 
cgit v0.10.2


From 9ade92a9a5b0a3a10efa6551b8c67a9277bf0438 Mon Sep 17 00:00:00 2001
From: Alasdair G Kergon <agk@redhat.com>
Date: Mon, 27 Mar 2006 01:17:53 -0800
Subject: [PATCH] dm: tidy mdptr

Change dm_get_mdptr() to take a struct mapped_device instead of dev_t.

Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 442e2be..0693b6f 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -600,12 +600,22 @@ static int dev_create(struct dm_ioctl *param, size_t param_size)
  */
 static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param)
 {
+	struct mapped_device *md;
+	void *mdptr = NULL;
+
 	if (*param->uuid)
 		return __get_uuid_cell(param->uuid);
-	else if (*param->name)
+
+	if (*param->name)
 		return __get_name_cell(param->name);
-	else
-		return dm_get_mdptr(huge_decode_dev(param->dev));
+
+	md = dm_get_md(huge_decode_dev(param->dev));
+	if (md) {
+		mdptr = dm_get_mdptr(md);
+		dm_put(md);
+	}
+
+	return mdptr;
 }
 
 static struct mapped_device *find_device(struct dm_ioctl *param)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index f140d49..3d121cb 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -990,15 +990,9 @@ struct mapped_device *dm_get_md(dev_t dev)
 	return md;
 }
 
-void *dm_get_mdptr(dev_t dev)
+void *dm_get_mdptr(struct mapped_device *md)
 {
-	struct mapped_device *md;
-	void *mdptr = NULL;
-
-	md = dm_find_md(dev);
-	if (md)
-		mdptr = md->interface_ptr;
-	return mdptr;
+	return md->interface_ptr;
 }
 
 void dm_set_mdptr(struct mapped_device *md, void *ptr)
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 0ff11d6..17ffa8d 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -47,7 +47,7 @@ struct mapped_device;
 int dm_create(struct mapped_device **md);
 int dm_create_with_minor(unsigned int minor, struct mapped_device **md);
 void dm_set_mdptr(struct mapped_device *md, void *ptr);
-void *dm_get_mdptr(dev_t dev);
+void *dm_get_mdptr(struct mapped_device *md);
 struct mapped_device *dm_get_md(dev_t dev);
 
 /*
-- 
cgit v0.10.2


From 1134e5ae79bab61c05657ca35a6297cf87202e35 Mon Sep 17 00:00:00 2001
From: Mike Anderson <andmike@us.ibm.com>
Date: Mon, 27 Mar 2006 01:17:54 -0800
Subject: [PATCH] dm table: store md

Store an up-pointer to the owning struct mapped_device in every table when it
is created.

Access it with:
  struct mapped_device *dm_table_get_md(struct dm_table *t)

Tables linked to md must be destroyed before the md itself.

Signed-off-by: Mike Anderson <andmike@us.ibm.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 0693b6f..65826bd 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -244,9 +244,9 @@ static void __hash_remove(struct hash_cell *hc)
 		dm_table_put(table);
 	}
 
-	dm_put(hc->md);
 	if (hc->new_map)
 		dm_table_put(hc->new_map);
+	dm_put(hc->md);
 	free_cell(hc);
 }
 
@@ -985,33 +985,43 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
 	int r;
 	struct hash_cell *hc;
 	struct dm_table *t;
+	struct mapped_device *md;
+
+	md = find_device(param);
+	if (!md)
+		return -ENXIO;
 
-	r = dm_table_create(&t, get_mode(param), param->target_count);
+	r = dm_table_create(&t, get_mode(param), param->target_count, md);
 	if (r)
-		return r;
+		goto out;
 
 	r = populate_table(t, param, param_size);
 	if (r) {
 		dm_table_put(t);
-		return r;
+		goto out;
 	}
 
 	down_write(&_hash_lock);
-	hc = __find_device_hash_cell(param);
-	if (!hc) {
-		DMWARN("device doesn't appear to be in the dev hash table.");
-		up_write(&_hash_lock);
+	hc = dm_get_mdptr(md);
+	if (!hc || hc->md != md) {
+		DMWARN("device has been removed from the dev hash table.");
 		dm_table_put(t);
-		return -ENXIO;
+		up_write(&_hash_lock);
+		r = -ENXIO;
+		goto out;
 	}
 
 	if (hc->new_map)
 		dm_table_put(hc->new_map);
 	hc->new_map = t;
+	up_write(&_hash_lock);
+
 	param->flags |= DM_INACTIVE_PRESENT_FLAG;
+	r = __dev_status(md, param);
+
+out:
+	dm_put(md);
 
-	r = __dev_status(hc->md, param);
-	up_write(&_hash_lock);
 	return r;
 }
 
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 9558a4a..84f4594 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -22,6 +22,7 @@
 #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
 
 struct dm_table {
+	struct mapped_device *md;
 	atomic_t holders;
 
 	/* btree table */
@@ -206,7 +207,8 @@ static int alloc_targets(struct dm_table *t, unsigned int num)
 	return 0;
 }
 
-int dm_table_create(struct dm_table **result, int mode, unsigned num_targets)
+int dm_table_create(struct dm_table **result, int mode,
+		    unsigned num_targets, struct mapped_device *md)
 {
 	struct dm_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
 
@@ -229,6 +231,7 @@ int dm_table_create(struct dm_table **result, int mode, unsigned num_targets)
 	}
 
 	t->mode = mode;
+	t->md = md;
 	*result = t;
 	return 0;
 }
@@ -952,12 +955,20 @@ int dm_table_flush_all(struct dm_table *t)
 	return ret;
 }
 
+struct mapped_device *dm_table_get_md(struct dm_table *t)
+{
+	dm_get(t->md);
+
+	return t->md;
+}
+
 EXPORT_SYMBOL(dm_vcalloc);
 EXPORT_SYMBOL(dm_get_device);
 EXPORT_SYMBOL(dm_put_device);
 EXPORT_SYMBOL(dm_table_event);
 EXPORT_SYMBOL(dm_table_get_size);
 EXPORT_SYMBOL(dm_table_get_mode);
+EXPORT_SYMBOL(dm_table_get_md);
 EXPORT_SYMBOL(dm_table_put);
 EXPORT_SYMBOL(dm_table_get);
 EXPORT_SYMBOL(dm_table_unplug_all);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 3d121cb..b99df48 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1007,18 +1007,18 @@ void dm_get(struct mapped_device *md)
 
 void dm_put(struct mapped_device *md)
 {
-	struct dm_table *map = dm_get_table(md);
+	struct dm_table *map;
 
 	if (atomic_dec_and_test(&md->holders)) {
+		map = dm_get_table(md);
 		if (!dm_suspended(md)) {
 			dm_table_presuspend_targets(map);
 			dm_table_postsuspend_targets(map);
 		}
 		__unbind(md);
+		dm_table_put(map);
 		free_dev(md);
 	}
-
-	dm_table_put(map);
 }
 
 /*
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 17ffa8d..79e8000 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -89,7 +89,8 @@ int dm_suspended(struct mapped_device *md);
  * Functions for manipulating a table.  Tables are also reference
  * counted.
  *---------------------------------------------------------------*/
-int dm_table_create(struct dm_table **result, int mode, unsigned num_targets);
+int dm_table_create(struct dm_table **result, int mode,
+		    unsigned num_targets, struct mapped_device *md);
 
 void dm_table_get(struct dm_table *t);
 void dm_table_put(struct dm_table *t);
@@ -107,6 +108,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q);
 unsigned int dm_table_get_num_targets(struct dm_table *t);
 struct list_head *dm_table_get_devices(struct dm_table *t);
 int dm_table_get_mode(struct dm_table *t);
+struct mapped_device *dm_table_get_md(struct dm_table *t);
 void dm_table_presuspend_targets(struct dm_table *t);
 void dm_table_postsuspend_targets(struct dm_table *t);
 void dm_table_resume_targets(struct dm_table *t);
-- 
cgit v0.10.2


From 3ac51e741a46af7a20f55e79d3e3aeaa93c6c544 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Mon, 27 Mar 2006 01:17:54 -0800
Subject: [PATCH] dm store geometry

Allow drive geometry to be stored with a new DM_DEV_SET_GEOMETRY ioctl.
Device-mapper will now respond to HDIO_GETGEO.  If the geometry information is
not available, zero will be returned for all of the parameters.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 65826bd..8edd643 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/devfs_fs_kernel.h>
 #include <linux/dm-ioctl.h>
+#include <linux/hdreg.h>
 
 #include <asm/uaccess.h>
 
@@ -700,6 +701,54 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
 	return dm_hash_rename(param->name, new_name);
 }
 
+static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
+{
+	int r = -EINVAL, x;
+	struct mapped_device *md;
+	struct hd_geometry geometry;
+	unsigned long indata[4];
+	char *geostr = (char *) param + param->data_start;
+
+	md = find_device(param);
+	if (!md)
+		return -ENXIO;
+
+	if (geostr < (char *) (param + 1) ||
+	    invalid_str(geostr, (void *) param + param_size)) {
+		DMWARN("Invalid geometry supplied.");
+		goto out;
+	}
+
+	x = sscanf(geostr, "%lu %lu %lu %lu", indata,
+		   indata + 1, indata + 2, indata + 3);
+
+	if (x != 4) {
+		DMWARN("Unable to interpret geometry settings.");
+		goto out;
+	}
+
+	if (indata[0] > 65535 || indata[1] > 255 ||
+	    indata[2] > 255 || indata[3] > ULONG_MAX) {
+		DMWARN("Geometry exceeds range limits.");
+		goto out;
+	}
+
+	geometry.cylinders = indata[0];
+	geometry.heads = indata[1];
+	geometry.sectors = indata[2];
+	geometry.start = indata[3];
+
+	r = dm_set_geometry(md, &geometry);
+	if (!r)
+		r = __dev_status(md, param);
+
+	param->data_size = 0;
+
+out:
+	dm_put(md);
+	return r;
+}
+
 static int do_suspend(struct dm_ioctl *param)
 {
 	int r = 0;
@@ -1234,7 +1283,8 @@ static ioctl_fn lookup_ioctl(unsigned int cmd)
 
 		{DM_LIST_VERSIONS_CMD, list_versions},
 
-		{DM_TARGET_MSG_CMD, target_message}
+		{DM_TARGET_MSG_CMD, target_message},
+		{DM_DEV_SET_GEOMETRY_CMD, dev_set_geometry}
 	};
 
 	return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b99df48..973e63d 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -17,6 +17,7 @@
 #include <linux/mempool.h>
 #include <linux/slab.h>
 #include <linux/idr.h>
+#include <linux/hdreg.h>
 #include <linux/blktrace_api.h>
 
 static const char *_name = DM_NAME;
@@ -102,6 +103,9 @@ struct mapped_device {
 	 */
 	struct super_block *frozen_sb;
 	struct block_device *suspended_bdev;
+
+	/* forced geometry settings */
+	struct hd_geometry geometry;
 };
 
 #define MIN_IOS 256
@@ -227,6 +231,13 @@ static int dm_blk_close(struct inode *inode, struct file *file)
 	return 0;
 }
 
+static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct mapped_device *md = bdev->bd_disk->private_data;
+
+	return dm_get_geometry(md, geo);
+}
+
 static inline struct dm_io *alloc_io(struct mapped_device *md)
 {
 	return mempool_alloc(md->io_pool, GFP_NOIO);
@@ -313,6 +324,33 @@ struct dm_table *dm_get_table(struct mapped_device *md)
 	return t;
 }
 
+/*
+ * Get the geometry associated with a dm device
+ */
+int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo)
+{
+	*geo = md->geometry;
+
+	return 0;
+}
+
+/*
+ * Set the geometry of a device.
+ */
+int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo)
+{
+	sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors;
+
+	if (geo->start > sz) {
+		DMWARN("Start sector is beyond the geometry limits.");
+		return -EINVAL;
+	}
+
+	md->geometry = *geo;
+
+	return 0;
+}
+
 /*-----------------------------------------------------------------
  * CRUD START:
  *   A more elegant soln is in the works that uses the queue
@@ -906,6 +944,13 @@ static int __bind(struct mapped_device *md, struct dm_table *t)
 	sector_t size;
 
 	size = dm_table_get_size(t);
+
+	/*
+	 * Wipe any geometry if the size of the table changed.
+	 */
+	if (size != get_capacity(md->disk))
+		memset(&md->geometry, 0, sizeof(md->geometry));
+
 	__set_size(md, size);
 	if (size == 0)
 		return 0;
@@ -1261,6 +1306,7 @@ int dm_suspended(struct mapped_device *md)
 static struct block_device_operations dm_blk_dops = {
 	.open = dm_blk_open,
 	.release = dm_blk_close,
+	.getgeo = dm_blk_getgeo,
 	.owner = THIS_MODULE
 };
 
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 79e8000..fd90bc8 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -14,6 +14,7 @@
 #include <linux/device-mapper.h>
 #include <linux/list.h>
 #include <linux/blkdev.h>
+#include <linux/hdreg.h>
 
 #define DM_NAME "device-mapper"
 #define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x)
@@ -85,6 +86,12 @@ int dm_wait_event(struct mapped_device *md, int event_nr);
 struct gendisk *dm_disk(struct mapped_device *md);
 int dm_suspended(struct mapped_device *md);
 
+/*
+ * Geometry functions.
+ */
+int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo);
+int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo);
+
 /*-----------------------------------------------------------------
  * Functions for manipulating a table.  Tables are also reference
  * counted.
diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h
index efb518f..89ab677 100644
--- a/include/linux/compat_ioctl.h
+++ b/include/linux/compat_ioctl.h
@@ -140,6 +140,7 @@ COMPATIBLE_IOCTL(DM_TABLE_DEPS_32)
 COMPATIBLE_IOCTL(DM_TABLE_STATUS_32)
 COMPATIBLE_IOCTL(DM_LIST_VERSIONS_32)
 COMPATIBLE_IOCTL(DM_TARGET_MSG_32)
+COMPATIBLE_IOCTL(DM_DEV_SET_GEOMETRY_32)
 COMPATIBLE_IOCTL(DM_VERSION)
 COMPATIBLE_IOCTL(DM_REMOVE_ALL)
 COMPATIBLE_IOCTL(DM_LIST_DEVICES)
@@ -155,6 +156,7 @@ COMPATIBLE_IOCTL(DM_TABLE_DEPS)
 COMPATIBLE_IOCTL(DM_TABLE_STATUS)
 COMPATIBLE_IOCTL(DM_LIST_VERSIONS)
 COMPATIBLE_IOCTL(DM_TARGET_MSG)
+COMPATIBLE_IOCTL(DM_DEV_SET_GEOMETRY)
 /* Big K */
 COMPATIBLE_IOCTL(PIO_FONT)
 COMPATIBLE_IOCTL(GIO_FONT)
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index fa75ba0..c67c678 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -80,6 +80,16 @@
  *
  * DM_TARGET_MSG:
  * Pass a message string to the target at a specific offset of a device.
+ *
+ * DM_DEV_SET_GEOMETRY:
+ * Set the geometry of a device by passing in a string in this format:
+ *
+ * "cylinders heads sectors_per_track start_sector"
+ *
+ * Beware that CHS geometry is nearly obsolete and only provided
+ * for compatibility with dm devices that can be booted by a PC
+ * BIOS.  See struct hd_geometry for range limits.  Also note that
+ * the geometry is erased if the device size changes.
  */
 
 /*
@@ -218,6 +228,7 @@ enum {
 	/* Added later */
 	DM_LIST_VERSIONS_CMD,
 	DM_TARGET_MSG_CMD,
+	DM_DEV_SET_GEOMETRY_CMD
 };
 
 /*
@@ -247,6 +258,7 @@ typedef char ioctl_struct[308];
 #define DM_TABLE_STATUS_32  _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, ioctl_struct)
 #define DM_LIST_VERSIONS_32 _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, ioctl_struct)
 #define DM_TARGET_MSG_32    _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, ioctl_struct)
+#define DM_DEV_SET_GEOMETRY_32	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, ioctl_struct)
 #endif
 
 #define DM_IOCTL 0xfd
@@ -270,11 +282,12 @@ typedef char ioctl_struct[308];
 #define DM_LIST_VERSIONS _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, struct dm_ioctl)
 
 #define DM_TARGET_MSG	 _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, struct dm_ioctl)
+#define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	5
+#define DM_VERSION_MINOR	6
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2005-10-04)"
+#define DM_VERSION_EXTRA	"-ioctl (2006-02-17)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
-- 
cgit v0.10.2


From 6a4d44c1f1108d6c9e8850e8cf166aaba0e56eae Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 27 Mar 2006 01:17:55 -0800
Subject: [PATCH] dm/md dependency tree in sysfs: holders/slaves subdirectory

Creating "slaves" and "holders" directories in /sys/block/<disk> and
creating "holders" directory under /sys/block/<disk>/<partition>

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index f924f45..60523ce 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -297,6 +297,30 @@ struct kobj_type ktype_part = {
 	.sysfs_ops	= &part_sysfs_ops,
 };
 
+#ifdef CONFIG_SYSFS
+static inline void partition_sysfs_add_subdir(struct hd_struct *p)
+{
+	struct kobject *k;
+
+	k = kobject_get(&p->kobj);
+	p->holder_dir = kobject_add_dir(k, "holders");
+	kobject_put(k);
+}
+
+static inline void disk_sysfs_add_subdirs(struct gendisk *disk)
+{
+	struct kobject *k;
+
+	k = kobject_get(&disk->kobj);
+	disk->holder_dir = kobject_add_dir(k, "holders");
+	disk->slave_dir = kobject_add_dir(k, "slaves");
+	kobject_put(k);
+}
+#else
+#define partition_sysfs_add_subdir(x)	do { } while (0)
+#define disk_sysfs_add_subdirs(x)	do { } while (0)
+#endif
+
 void delete_partition(struct gendisk *disk, int part)
 {
 	struct hd_struct *p = disk->part[part-1];
@@ -310,6 +334,10 @@ void delete_partition(struct gendisk *disk, int part)
 	p->ios[0] = p->ios[1] = 0;
 	p->sectors[0] = p->sectors[1] = 0;
 	devfs_remove("%s/part%d", disk->devfs_name, part);
+#ifdef CONFIG_SYSFS
+	if (p->holder_dir)
+		kobject_unregister(p->holder_dir);
+#endif
 	kobject_unregister(&p->kobj);
 }
 
@@ -337,6 +365,7 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
 	p->kobj.parent = &disk->kobj;
 	p->kobj.ktype = &ktype_part;
 	kobject_register(&p->kobj);
+	partition_sysfs_add_subdir(p);
 	disk->part[part-1] = p;
 }
 
@@ -383,6 +412,7 @@ void register_disk(struct gendisk *disk)
 	if ((err = kobject_add(&disk->kobj)))
 		return;
 	disk_sysfs_symlinks(disk);
+ 	disk_sysfs_add_subdirs(disk);
 	kobject_uevent(&disk->kobj, KOBJ_ADD);
 
 	/* No minors to use for partitions */
@@ -483,6 +513,12 @@ void del_gendisk(struct gendisk *disk)
 
 	devfs_remove_disk(disk);
 
+#ifdef CONFIG_SYSFS
+	if (disk->holder_dir)
+		kobject_unregister(disk->holder_dir);
+	if (disk->slave_dir)
+		kobject_unregister(disk->slave_dir);
+#endif
 	if (disk->driverfs_dev) {
 		char *disk_name = make_block_name(disk);
 		sysfs_remove_link(&disk->kobj, "device");
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index fd647fd..eea61cc 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -78,6 +78,9 @@ struct hd_struct {
 	sector_t start_sect;
 	sector_t nr_sects;
 	struct kobject kobj;
+#ifdef CONFIG_SYSFS
+	struct kobject *holder_dir;
+#endif
 	unsigned ios[2], sectors[2];	/* READs and WRITEs */
 	int policy, partno;
 };
@@ -114,6 +117,10 @@ struct gendisk {
 	int number;			/* more of the same */
 	struct device *driverfs_dev;
 	struct kobject kobj;
+#ifdef CONFIG_SYSFS
+	struct kobject *holder_dir;
+	struct kobject *slave_dir;
+#endif
 
 	struct timer_rand_state *random;
 	int policy;
-- 
cgit v0.10.2


From 100873687d81d4ce7b1299b447d33e87ba1e9583 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 27 Mar 2006 01:17:56 -0800
Subject: [PATCH]
 dm-md-dependency-tree-in-sysfs-holders-slaves-subdirectory-tidy

Remove all the CONFIG_SYSFS stuff.  That's supposed to all be implemented up
in header files.

Yes, the CONFIG_SYSFS=n data structures will be a little larger than
necessary, but that's a tradeoff we can decide to make.

Cc: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 60523ce..af0cb4b 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -297,7 +297,6 @@ struct kobj_type ktype_part = {
 	.sysfs_ops	= &part_sysfs_ops,
 };
 
-#ifdef CONFIG_SYSFS
 static inline void partition_sysfs_add_subdir(struct hd_struct *p)
 {
 	struct kobject *k;
@@ -316,10 +315,6 @@ static inline void disk_sysfs_add_subdirs(struct gendisk *disk)
 	disk->slave_dir = kobject_add_dir(k, "slaves");
 	kobject_put(k);
 }
-#else
-#define partition_sysfs_add_subdir(x)	do { } while (0)
-#define disk_sysfs_add_subdirs(x)	do { } while (0)
-#endif
 
 void delete_partition(struct gendisk *disk, int part)
 {
@@ -334,10 +329,8 @@ void delete_partition(struct gendisk *disk, int part)
 	p->ios[0] = p->ios[1] = 0;
 	p->sectors[0] = p->sectors[1] = 0;
 	devfs_remove("%s/part%d", disk->devfs_name, part);
-#ifdef CONFIG_SYSFS
 	if (p->holder_dir)
 		kobject_unregister(p->holder_dir);
-#endif
 	kobject_unregister(&p->kobj);
 }
 
@@ -513,12 +506,10 @@ void del_gendisk(struct gendisk *disk)
 
 	devfs_remove_disk(disk);
 
-#ifdef CONFIG_SYSFS
 	if (disk->holder_dir)
 		kobject_unregister(disk->holder_dir);
 	if (disk->slave_dir)
 		kobject_unregister(disk->slave_dir);
-#endif
 	if (disk->driverfs_dev) {
 		char *disk_name = make_block_name(disk);
 		sysfs_remove_link(&disk->kobj, "device");
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index eea61cc..bd7db86 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -78,9 +78,7 @@ struct hd_struct {
 	sector_t start_sect;
 	sector_t nr_sects;
 	struct kobject kobj;
-#ifdef CONFIG_SYSFS
 	struct kobject *holder_dir;
-#endif
 	unsigned ios[2], sectors[2];	/* READs and WRITEs */
 	int policy, partno;
 };
@@ -117,10 +115,8 @@ struct gendisk {
 	int number;			/* more of the same */
 	struct device *driverfs_dev;
 	struct kobject kobj;
-#ifdef CONFIG_SYSFS
 	struct kobject *holder_dir;
 	struct kobject *slave_dir;
-#endif
 
 	struct timer_rand_state *random;
 	int policy;
-- 
cgit v0.10.2


From 641dc636b0475582e48584340b774bd1e90d40d9 Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 27 Mar 2006 01:17:57 -0800
Subject: [PATCH] dm/md dependency tree in sysfs: bd_claim_by_kobject

Adding bd_claim_by_kobject() function which takes kobject as additional
signature of holder device and creates sysfs symlinks between holder device
and claimed device.  bd_release_from_kobject() is a counterpart of
bd_claim_by_kobject.

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 5983d42..3f36df7 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -266,6 +266,9 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 		mutex_init(&bdev->bd_mount_mutex);
 		INIT_LIST_HEAD(&bdev->bd_inodes);
 		INIT_LIST_HEAD(&bdev->bd_list);
+#ifdef CONFIG_SYSFS
+		INIT_LIST_HEAD(&bdev->bd_holder_list);
+#endif
 		inode_init_once(&ei->vfs_inode);
 	}
 }
@@ -490,6 +493,300 @@ void bd_release(struct block_device *bdev)
 
 EXPORT_SYMBOL(bd_release);
 
+#ifdef CONFIG_SYSFS
+/*
+ * Functions for bd_claim_by_kobject / bd_release_from_kobject
+ *
+ *     If a kobject is passed to bd_claim_by_kobject()
+ *     and the kobject has a parent directory,
+ *     following symlinks are created:
+ *        o from the kobject to the claimed bdev
+ *        o from "holders" directory of the bdev to the parent of the kobject
+ *     bd_release_from_kobject() removes these symlinks.
+ *
+ *     Example:
+ *        If /dev/dm-0 maps to /dev/sda, kobject corresponding to
+ *        /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then:
+ *           /sys/block/dm-0/slaves/sda --> /sys/block/sda
+ *           /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
+ */
+
+static struct kobject *bdev_get_kobj(struct block_device *bdev)
+{
+	if (bdev->bd_contains != bdev)
+		return kobject_get(&bdev->bd_part->kobj);
+	else
+		return kobject_get(&bdev->bd_disk->kobj);
+}
+
+static struct kobject *bdev_get_holder(struct block_device *bdev)
+{
+	if (bdev->bd_contains != bdev)
+		return kobject_get(bdev->bd_part->holder_dir);
+	else
+		return kobject_get(bdev->bd_disk->holder_dir);
+}
+
+static void add_symlink(struct kobject *from, struct kobject *to)
+{
+	if (!from || !to)
+		return;
+	sysfs_create_link(from, to, kobject_name(to));
+}
+
+static void del_symlink(struct kobject *from, struct kobject *to)
+{
+	if (!from || !to)
+		return;
+	sysfs_remove_link(from, kobject_name(to));
+}
+
+/*
+ * 'struct bd_holder' contains pointers to kobjects symlinked by
+ * bd_claim_by_kobject.
+ * It's connected to bd_holder_list which is protected by bdev->bd_sem.
+ */
+struct bd_holder {
+	struct list_head list;	/* chain of holders of the bdev */
+	int count;		/* references from the holder */
+	struct kobject *sdir;	/* holder object, e.g. "/block/dm-0/slaves" */
+	struct kobject *hdev;	/* e.g. "/block/dm-0" */
+	struct kobject *hdir;	/* e.g. "/block/sda/holders" */
+	struct kobject *sdev;	/* e.g. "/block/sda" */
+};
+
+/*
+ * Get references of related kobjects at once.
+ * Returns 1 on success. 0 on failure.
+ *
+ * Should call bd_holder_release_dirs() after successful use.
+ */
+static int bd_holder_grab_dirs(struct block_device *bdev,
+			struct bd_holder *bo)
+{
+	if (!bdev || !bo)
+		return 0;
+
+	bo->sdir = kobject_get(bo->sdir);
+	if (!bo->sdir)
+		return 0;
+
+	bo->hdev = kobject_get(bo->sdir->parent);
+	if (!bo->hdev)
+		goto fail_put_sdir;
+
+	bo->sdev = bdev_get_kobj(bdev);
+	if (!bo->sdev)
+		goto fail_put_hdev;
+
+	bo->hdir = bdev_get_holder(bdev);
+	if (!bo->hdir)
+		goto fail_put_sdev;
+
+	return 1;
+
+fail_put_sdev:
+	kobject_put(bo->sdev);
+fail_put_hdev:
+	kobject_put(bo->hdev);
+fail_put_sdir:
+	kobject_put(bo->sdir);
+
+	return 0;
+}
+
+/* Put references of related kobjects at once. */
+static void bd_holder_release_dirs(struct bd_holder *bo)
+{
+	kobject_put(bo->hdir);
+	kobject_put(bo->sdev);
+	kobject_put(bo->hdev);
+	kobject_put(bo->sdir);
+}
+
+static struct bd_holder *alloc_bd_holder(struct kobject *kobj)
+{
+	struct bd_holder *bo;
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo)
+		return NULL;
+
+	bo->count = 1;
+	bo->sdir = kobj;
+
+	return bo;
+}
+
+static void free_bd_holder(struct bd_holder *bo)
+{
+	kfree(bo);
+}
+
+/**
+ * add_bd_holder - create sysfs symlinks for bd_claim() relationship
+ *
+ * @bdev:	block device to be bd_claimed
+ * @bo:		preallocated and initialized by alloc_bd_holder()
+ *
+ * If there is no matching entry with @bo in @bdev->bd_holder_list,
+ * add @bo to the list, create symlinks.
+ *
+ * Returns 1 if @bo was added to the list.
+ * Returns 0 if @bo wasn't used by any reason and should be freed.
+ */
+static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
+{
+	struct bd_holder *tmp;
+
+	if (!bo)
+		return 0;
+
+	list_for_each_entry(tmp, &bdev->bd_holder_list, list) {
+		if (tmp->sdir == bo->sdir) {
+			tmp->count++;
+			return 0;
+		}
+	}
+
+	if (!bd_holder_grab_dirs(bdev, bo))
+		return 0;
+
+	add_symlink(bo->sdir, bo->sdev);
+	add_symlink(bo->hdir, bo->hdev);
+	list_add_tail(&bo->list, &bdev->bd_holder_list);
+	return 1;
+}
+
+/**
+ * del_bd_holder - delete sysfs symlinks for bd_claim() relationship
+ *
+ * @bdev:	block device to be bd_claimed
+ * @kobj:	holder's kobject
+ *
+ * If there is matching entry with @kobj in @bdev->bd_holder_list
+ * and no other bd_claim() from the same kobject,
+ * remove the struct bd_holder from the list, delete symlinks for it.
+ *
+ * Returns a pointer to the struct bd_holder when it's removed from the list
+ * and ready to be freed.
+ * Returns NULL if matching claim isn't found or there is other bd_claim()
+ * by the same kobject.
+ */
+static struct bd_holder *del_bd_holder(struct block_device *bdev,
+					struct kobject *kobj)
+{
+	struct bd_holder *bo;
+
+	list_for_each_entry(bo, &bdev->bd_holder_list, list) {
+		if (bo->sdir == kobj) {
+			bo->count--;
+			BUG_ON(bo->count < 0);
+			if (!bo->count) {
+				list_del(&bo->list);
+				del_symlink(bo->sdir, bo->sdev);
+				del_symlink(bo->hdir, bo->hdev);
+				bd_holder_release_dirs(bo);
+				return bo;
+			}
+			break;
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * bd_claim_by_kobject - bd_claim() with additional kobject signature
+ *
+ * @bdev:	block device to be claimed
+ * @holder:	holder's signature
+ * @kobj:	holder's kobject
+ *
+ * Do bd_claim() and if it succeeds, create sysfs symlinks between
+ * the bdev and the holder's kobject.
+ * Use bd_release_from_kobject() when relesing the claimed bdev.
+ *
+ * Returns 0 on success. (same as bd_claim())
+ * Returns errno on failure.
+ */
+static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
+				struct kobject *kobj)
+{
+	int res;
+	struct bd_holder *bo;
+
+	if (!kobj)
+		return -EINVAL;
+
+	bo = alloc_bd_holder(kobj);
+	if (!bo)
+		return -ENOMEM;
+
+	down(&bdev->bd_sem);
+	res = bd_claim(bdev, holder);
+	if (res || !add_bd_holder(bdev, bo))
+		free_bd_holder(bo);
+	up(&bdev->bd_sem);
+
+	return res;
+}
+
+/**
+ * bd_release_from_kobject - bd_release() with additional kobject signature
+ *
+ * @bdev:	block device to be released
+ * @kobj:	holder's kobject
+ *
+ * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject().
+ */
+static void bd_release_from_kobject(struct block_device *bdev,
+					struct kobject *kobj)
+{
+	struct bd_holder *bo;
+
+	if (!kobj)
+		return;
+
+	down(&bdev->bd_sem);
+	bd_release(bdev);
+	if ((bo = del_bd_holder(bdev, kobj)))
+		free_bd_holder(bo);
+	up(&bdev->bd_sem);
+}
+
+/**
+ * bd_claim_by_disk - wrapper function for bd_claim_by_kobject()
+ *
+ * @bdev:	block device to be claimed
+ * @holder:	holder's signature
+ * @disk:	holder's gendisk
+ *
+ * Call bd_claim_by_kobject() with getting @disk->slave_dir.
+ */
+int bd_claim_by_disk(struct block_device *bdev, void *holder,
+			struct gendisk *disk)
+{
+	return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir));
+}
+EXPORT_SYMBOL_GPL(bd_claim_by_disk);
+
+/**
+ * bd_release_from_disk - wrapper function for bd_release_from_kobject()
+ *
+ * @bdev:	block device to be claimed
+ * @disk:	holder's gendisk
+ *
+ * Call bd_release_from_kobject() and put @disk->slave_dir.
+ */
+void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk)
+{
+	bd_release_from_kobject(bdev, disk->slave_dir);
+	kobject_put(disk->slave_dir);
+}
+EXPORT_SYMBOL_GPL(bd_release_from_disk);
+#endif
+
 /*
  * Tries to open block device by device number.  Use it ONLY if you
  * really do not have anything better - i.e. when you are behind a
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9d96749..680d913 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -410,6 +410,9 @@ struct block_device {
 	struct list_head	bd_inodes;
 	void *			bd_holder;
 	int			bd_holders;
+#ifdef CONFIG_SYSFS
+	struct list_head	bd_holder_list;
+#endif
 	struct block_device *	bd_contains;
 	unsigned		bd_block_size;
 	struct hd_struct *	bd_part;
@@ -1399,6 +1402,13 @@ extern int blkdev_get(struct block_device *, mode_t, unsigned);
 extern int blkdev_put(struct block_device *);
 extern int bd_claim(struct block_device *, void *);
 extern void bd_release(struct block_device *);
+#ifdef CONFIG_SYSFS
+extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *);
+extern void bd_release_from_disk(struct block_device *, struct gendisk *);
+#else
+#define bd_claim_by_disk(bdev, holder, disk)	bd_claim(bdev, holder)
+#define bd_release_from_disk(bdev, disk)	bd_release(bdev)
+#endif
 
 /* fs/char_dev.c */
 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
-- 
cgit v0.10.2


From 5463c7904c952aa6b6804dd902c72a5332fa5221 Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 27 Mar 2006 01:17:58 -0800
Subject: [PATCH] dm/md dependency tree in sysfs: md to use bd_claim_by_disk

Use bd_claim_by_disk.

Following symlinks are created if md0 is built from sda and sdb
  /sys/block/md0/slaves/sda --> /sys/block/sda
  /sys/block/md0/slaves/sdb --> /sys/block/sdb
  /sys/block/sda/holders/md0 --> /sys/block/md0
  /sys/block/sdb/holders/md0 --> /sys/block/md0

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5ed2228..bde3e96 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1301,6 +1301,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
 	else
 		ko = &rdev->bdev->bd_disk->kobj;
 	sysfs_create_link(&rdev->kobj, ko, "block");
+	bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk);
 	return 0;
 }
 
@@ -1311,6 +1312,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
 		MD_BUG();
 		return;
 	}
+	bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
 	list_del_init(&rdev->same_set);
 	printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
 	rdev->mddev = NULL;
-- 
cgit v0.10.2


From f165921df46a977e3561f1bd9f13a348441486d1 Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 27 Mar 2006 01:17:59 -0800
Subject: [PATCH] dm/md dependency tree in sysfs: dm to use bd_claim_by_disk

Use bd_claim_by_disk.

Following symlinks are created if dm-0 maps to sda:
  /sys/block/dm-0/slaves/sda --> /sys/block/sda
  /sys/block/sda/holders/dm-0 --> /sys/block/dm-0

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 84f4594..76610a6 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -350,7 +350,7 @@ static struct dm_dev *find_device(struct list_head *l, dev_t dev)
 /*
  * Open a device so we can use it as a map destination.
  */
-static int open_dev(struct dm_dev *d, dev_t dev)
+static int open_dev(struct dm_dev *d, dev_t dev, struct mapped_device *md)
 {
 	static char *_claim_ptr = "I belong to device-mapper";
 	struct block_device *bdev;
@@ -362,7 +362,7 @@ static int open_dev(struct dm_dev *d, dev_t dev)
 	bdev = open_by_devnum(dev, d->mode);
 	if (IS_ERR(bdev))
 		return PTR_ERR(bdev);
-	r = bd_claim(bdev, _claim_ptr);
+	r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md));
 	if (r)
 		blkdev_put(bdev);
 	else
@@ -373,12 +373,12 @@ static int open_dev(struct dm_dev *d, dev_t dev)
 /*
  * Close a device that we've been using.
  */
-static void close_dev(struct dm_dev *d)
+static void close_dev(struct dm_dev *d, struct mapped_device *md)
 {
 	if (!d->bdev)
 		return;
 
-	bd_release(d->bdev);
+	bd_release_from_disk(d->bdev, dm_disk(md));
 	blkdev_put(d->bdev);
 	d->bdev = NULL;
 }
@@ -399,7 +399,7 @@ static int check_device_area(struct dm_dev *dd, sector_t start, sector_t len)
  * careful to leave things as they were if we fail to reopen the
  * device.
  */
-static int upgrade_mode(struct dm_dev *dd, int new_mode)
+static int upgrade_mode(struct dm_dev *dd, int new_mode, struct mapped_device *md)
 {
 	int r;
 	struct dm_dev dd_copy;
@@ -409,9 +409,9 @@ static int upgrade_mode(struct dm_dev *dd, int new_mode)
 
 	dd->mode |= new_mode;
 	dd->bdev = NULL;
-	r = open_dev(dd, dev);
+	r = open_dev(dd, dev, md);
 	if (!r)
-		close_dev(&dd_copy);
+		close_dev(&dd_copy, md);
 	else
 		*dd = dd_copy;
 
@@ -453,7 +453,7 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
 		dd->mode = mode;
 		dd->bdev = NULL;
 
-		if ((r = open_dev(dd, dev))) {
+		if ((r = open_dev(dd, dev, t->md))) {
 			kfree(dd);
 			return r;
 		}
@@ -464,7 +464,7 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
 		list_add(&dd->list, &t->devices);
 
 	} else if (dd->mode != (mode | dd->mode)) {
-		r = upgrade_mode(dd, mode);
+		r = upgrade_mode(dd, mode, t->md);
 		if (r)
 			return r;
 	}
@@ -541,7 +541,7 @@ int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
 void dm_put_device(struct dm_target *ti, struct dm_dev *dd)
 {
 	if (atomic_dec_and_test(&dd->count)) {
-		close_dev(dd);
+		close_dev(dd, ti->table->md);
 		list_del(&dd->list);
 		kfree(dd);
 	}
-- 
cgit v0.10.2


From b4cf1b72eec0e197257a5b07dc9ec53552cdd123 Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 27 Mar 2006 01:18:00 -0800
Subject: [PATCH] dm/md dependency tree in sysfs: convert bd_sem to bd_mutex

Convert bd_sem to bd_mutex

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3f36df7..17c7618 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -723,11 +723,11 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
 	if (!bo)
 		return -ENOMEM;
 
-	down(&bdev->bd_sem);
+	mutex_lock(&bdev->bd_mutex);
 	res = bd_claim(bdev, holder);
 	if (res || !add_bd_holder(bdev, bo))
 		free_bd_holder(bo);
-	up(&bdev->bd_sem);
+	mutex_unlock(&bdev->bd_mutex);
 
 	return res;
 }
@@ -748,11 +748,11 @@ static void bd_release_from_kobject(struct block_device *bdev,
 	if (!kobj)
 		return;
 
-	down(&bdev->bd_sem);
+	mutex_lock(&bdev->bd_mutex);
 	bd_release(bdev);
 	if ((bo = del_bd_holder(bdev, kobj)))
 		free_bd_holder(bo);
-	up(&bdev->bd_sem);
+	mutex_unlock(&bdev->bd_mutex);
 }
 
 /**
-- 
cgit v0.10.2


From a22c96c737a9cefbe8d6e991c0032ad6db825a67 Mon Sep 17 00:00:00 2001
From: Kevin Corry <kevcorry@us.ibm.com>
Date: Mon, 27 Mar 2006 01:18:01 -0800
Subject: [PATCH] dm: remove unnecessary typecast

Signed-off-by: Kevin Corry <kevcorry@us.ibm.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 204f796ee..08328a8 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -103,7 +103,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		return -EINVAL;
 	}
 
-	if (((uint32_t)ti->len) & (chunk_size - 1)) {
+	if (ti->len & (chunk_size - 1)) {
 		ti->error = "dm-stripe: Target length not divisible by "
 		    "chunk size";
 		return -EINVAL;
-- 
cgit v0.10.2


From 89e5c8b5b85d6d46e8a28cdfa076313ae691d35c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:02 -0800
Subject: [PATCH] md: Make sure QUEUE_FLAG_CLUSTER is set properly for md.

This flag should be set for a virtual device iff it is set for all underlying
devices.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 062067f..a2e333a 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -785,6 +785,8 @@ void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b)
 	t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);
 	t->max_segment_size = min(t->max_segment_size,b->max_segment_size);
 	t->hardsect_size = max(t->hardsect_size,b->hardsect_size);
+	if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
+		clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags);
 }
 
 EXPORT_SYMBOL(blk_queue_stack_limits);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index bde3e96..3254ff1 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -266,6 +266,7 @@ static mddev_t * mddev_find(dev_t unit)
 		kfree(new);
 		return NULL;
 	}
+	set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags);
 
 	blk_queue_make_request(new->queue, md_fail_request);
 
-- 
cgit v0.10.2


From c5a10f62c5c496c49db749af103b991873b7e2dc Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:03 -0800
Subject: [PATCH] md: Add '4' to the list of levels for which bitmaps are
 supported

I really should make this a function of the personality....

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 3254ff1..875bced 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -765,7 +765,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 
 		if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
 		    mddev->bitmap_file == NULL) {
-			if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6
+			if (mddev->level != 1 && mddev->level != 4
+			    && mddev->level != 5 && mddev->level != 6
 			    && mddev->level != 10) {
 				/* FIXME use a better test */
 				printk(KERN_WARNING "md: bitmaps not supported for this level.\n");
-- 
cgit v0.10.2


From 1be7892fffb45f6017494a88ff68fe84c6de26b4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:03 -0800
Subject: [PATCH] md: Fix the 'failed' count for version-0 superblocks

We are counting failed devices twice, once of the device that is failed, and
once for the hole that has been left in the array.  Remove the former so
'failed' matches 'missing'.  Storing these counts in the superblock is a bit
silly anyway....

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 875bced..686314f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -895,10 +895,9 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 			d->raid_disk = rdev2->raid_disk;
 		else
 			d->raid_disk = rdev2->desc_nr; /* compatibility */
-		if (test_bit(Faulty, &rdev2->flags)) {
+		if (test_bit(Faulty, &rdev2->flags))
 			d->state = (1<<MD_DISK_FAULTY);
-			failed++;
-		} else if (test_bit(In_sync, &rdev2->flags)) {
+		else if (test_bit(In_sync, &rdev2->flags)) {
 			d->state = (1<<MD_DISK_ACTIVE);
 			d->state |= (1<<MD_DISK_SYNC);
 			active++;
-- 
cgit v0.10.2


From 4588b42e9d0d0904a745c96cead66506c75bae21 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:04 -0800
Subject: [PATCH] md: Update status_resync to handle LARGE devices

status_resync - used by /proc/mdstat to report the status of a resync, assumes
that device sizes will always fit into an 'unsigned long' This is no longer
the case...

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 686314f..a3ecaf8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4044,7 +4044,10 @@ static void status_unused(struct seq_file *seq)
 
 static void status_resync(struct seq_file *seq, mddev_t * mddev)
 {
-	unsigned long max_blocks, resync, res, dt, db, rt;
+	sector_t max_blocks, resync, res;
+	unsigned long dt, db, rt;
+	int scale;
+	unsigned int per_milli;
 
 	resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2;
 
@@ -4060,9 +4063,22 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
 		MD_BUG();
 		return;
 	}
-	res = (resync/1024)*1000/(max_blocks/1024 + 1);
+	/* Pick 'scale' such that (resync>>scale)*1000 will fit
+	 * in a sector_t, and (max_blocks>>scale) will fit in a
+	 * u32, as those are the requirements for sector_div.
+	 * Thus 'scale' must be at least 10
+	 */
+	scale = 10;
+	if (sizeof(sector_t) > sizeof(unsigned long)) {
+		while ( max_blocks/2 > (1ULL<<(scale+32)))
+			scale++;
+	}
+	res = (resync>>scale)*1000;
+	sector_div(res, (u32)((max_blocks>>scale)+1));
+
+	per_milli = res;
 	{
-		int i, x = res/50, y = 20-x;
+		int i, x = per_milli/50, y = 20-x;
 		seq_printf(seq, "[");
 		for (i = 0; i < x; i++)
 			seq_printf(seq, "=");
@@ -4071,10 +4087,12 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
 			seq_printf(seq, ".");
 		seq_printf(seq, "] ");
 	}
-	seq_printf(seq, " %s =%3lu.%lu%% (%lu/%lu)",
+	seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
 		      (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
 		       "resync" : "recovery"),
-		      res/10, res % 10, resync, max_blocks);
+		      per_milli/10, per_milli % 10,
+		   (unsigned long long) resync,
+		   (unsigned long long) max_blocks);
 
 	/*
 	 * We do not want to overflow, so the order of operands and
@@ -4088,7 +4106,7 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
 	dt = ((jiffies - mddev->resync_mark) / HZ);
 	if (!dt) dt++;
 	db = resync - (mddev->resync_mark_cnt/2);
-	rt = (dt * ((max_blocks-resync) / (db/100+1)))/100;
+	rt = (dt * ((unsigned long)(max_blocks-resync) / (db/100+1)))/100;
 
 	seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6);
 
-- 
cgit v0.10.2


From b55e6bfcd23cb2f7249095050c649f7aea813f9f Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:06 -0800
Subject: [PATCH] md: Split disks array out of raid5 conf structure so it is
 easier to grow

The remainder of this batch implements raid5 reshaping.  Currently the only
shape change that is supported is added a device, but it is envisioned that
changing the chunksize and layout will also be supported, as well as changing
the level (e.g.  1->5, 5->6).

The reshape process naturally has to move all of the data in the array, and so
should be used with caution.  It is believed to work, and some testing does
support this, but wider testing would be great for increasing my confidence.

You will need a version of mdadm newer than 2.3.1 to make use of raid5 growth.
 This is because mdadm need to take a copy of a 'critical section' at the
start of the array incase there is a crash at an awkward moment.  On restart,
mdadm will restore the critical section and allow reshape to continue.

I hope to release a 2.4-pre by early next week - it still needs a little more
polishing.

This patch:

Previously the array of disk information was included in the raid5 'conf'
structure which was allocated to an appropriate size.  This makes it awkward
to change the size of that array.  So we split it off into a separate
kmalloced array which will require a little extra indexing, but is much easier
to grow.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2dba305..03f3137 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1822,11 +1822,13 @@ static int run(mddev_t *mddev)
 		return -EIO;
 	}
 
-	mddev->private = kzalloc(sizeof (raid5_conf_t)
-				 + mddev->raid_disks * sizeof(struct disk_info),
-				 GFP_KERNEL);
+	mddev->private = kzalloc(sizeof (raid5_conf_t), GFP_KERNEL);
 	if ((conf = mddev->private) == NULL)
 		goto abort;
+	conf->disks = kzalloc(mddev->raid_disks * sizeof(struct disk_info),
+			      GFP_KERNEL);
+	if (!conf->disks)
+		goto abort;
 
 	conf->mddev = mddev;
 
@@ -1966,6 +1968,7 @@ static int run(mddev_t *mddev)
 abort:
 	if (conf) {
 		print_raid5_conf(conf);
+		kfree(conf->disks);
 		kfree(conf->stripe_hashtbl);
 		kfree(conf);
 	}
@@ -1986,6 +1989,7 @@ static int stop(mddev_t *mddev)
 	kfree(conf->stripe_hashtbl);
 	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
 	sysfs_remove_group(&mddev->kobj, &raid5_attrs_group);
+	kfree(conf->disks);
 	kfree(conf);
 	mddev->private = NULL;
 	return 0;
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
index cd477eb..c7632f6 100644
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -2006,11 +2006,14 @@ static int run(mddev_t *mddev)
 		return -EIO;
 	}
 
-	mddev->private = kzalloc(sizeof (raid6_conf_t)
-				 + mddev->raid_disks * sizeof(struct disk_info),
-				 GFP_KERNEL);
+	mddev->private = kzalloc(sizeof (raid6_conf_t), GFP_KERNEL);
 	if ((conf = mddev->private) == NULL)
 		goto abort;
+	conf->disks = kzalloc(mddev->raid_disks * sizeof(struct disk_info),
+				 GFP_KERNEL);
+	if (!conf->disks)
+		goto abort;
+
 	conf->mddev = mddev;
 
 	if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
@@ -2158,6 +2161,7 @@ abort:
 		print_raid6_conf(conf);
 		safe_put_page(conf->spare_page);
 		kfree(conf->stripe_hashtbl);
+		kfree(conf->disks);
 		kfree(conf);
 	}
 	mddev->private = NULL;
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index 394da82..94dbdd4 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -240,7 +240,7 @@ struct raid5_private_data {
 							 * waiting for 25% to be free
 							 */        
 	spinlock_t		device_lock;
-	struct disk_info	disks[0];
+	struct disk_info	*disks;
 };
 
 typedef struct raid5_private_data raid5_conf_t;
-- 
cgit v0.10.2


From ad01c9e3752f4ba4f3d99c89b7370fa4983a25b5 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:07 -0800
Subject: [PATCH] md: Allow stripes to be expanded in preparation for expanding
 an array

Before a RAID-5 can be expanded, we need to be able to expand the stripe-cache
data structure.

This requires allocating new stripes in a new kmem_cache.  If this succeeds,
we copy cache pages over and release the old stripes and kmem_cache.

We then allocate new pages.  If that fails, we leave the stripe cache at it's
new size.  It isn't worth the effort to shrink it back again.

Unfortuanately this means we need two kmem_cache names as we, for a short
period of time, we have two kmem_caches.  So they are raid5/%s and
raid5/%s-alt

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index a3ecaf8..c7b7656 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2775,7 +2775,6 @@ static void autorun_array(mddev_t *mddev)
  */
 static void autorun_devices(int part)
 {
-	struct list_head candidates;
 	struct list_head *tmp;
 	mdk_rdev_t *rdev0, *rdev;
 	mddev_t *mddev;
@@ -2784,6 +2783,7 @@ static void autorun_devices(int part)
 	printk(KERN_INFO "md: autorun ...\n");
 	while (!list_empty(&pending_raid_disks)) {
 		dev_t dev;
+		LIST_HEAD(candidates);
 		rdev0 = list_entry(pending_raid_disks.next,
 					 mdk_rdev_t, same_set);
 
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 03f3137..6c20b44 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -313,20 +313,143 @@ static int grow_stripes(raid5_conf_t *conf, int num)
 	kmem_cache_t *sc;
 	int devs = conf->raid_disks;
 
-	sprintf(conf->cache_name, "raid5/%s", mdname(conf->mddev));
-
-	sc = kmem_cache_create(conf->cache_name, 
+	sprintf(conf->cache_name[0], "raid5/%s", mdname(conf->mddev));
+	sprintf(conf->cache_name[1], "raid5/%s-alt", mdname(conf->mddev));
+	conf->active_name = 0;
+	sc = kmem_cache_create(conf->cache_name[conf->active_name],
 			       sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
 			       0, 0, NULL, NULL);
 	if (!sc)
 		return 1;
 	conf->slab_cache = sc;
+	conf->pool_size = devs;
 	while (num--) {
 		if (!grow_one_stripe(conf))
 			return 1;
 	}
 	return 0;
 }
+static int resize_stripes(raid5_conf_t *conf, int newsize)
+{
+	/* Make all the stripes able to hold 'newsize' devices.
+	 * New slots in each stripe get 'page' set to a new page.
+	 *
+	 * This happens in stages:
+	 * 1/ create a new kmem_cache and allocate the required number of
+	 *    stripe_heads.
+	 * 2/ gather all the old stripe_heads and tranfer the pages across
+	 *    to the new stripe_heads.  This will have the side effect of
+	 *    freezing the array as once all stripe_heads have been collected,
+	 *    no IO will be possible.  Old stripe heads are freed once their
+	 *    pages have been transferred over, and the old kmem_cache is
+	 *    freed when all stripes are done.
+	 * 3/ reallocate conf->disks to be suitable bigger.  If this fails,
+	 *    we simple return a failre status - no need to clean anything up.
+	 * 4/ allocate new pages for the new slots in the new stripe_heads.
+	 *    If this fails, we don't bother trying the shrink the
+	 *    stripe_heads down again, we just leave them as they are.
+	 *    As each stripe_head is processed the new one is released into
+	 *    active service.
+	 *
+	 * Once step2 is started, we cannot afford to wait for a write,
+	 * so we use GFP_NOIO allocations.
+	 */
+	struct stripe_head *osh, *nsh;
+	LIST_HEAD(newstripes);
+	struct disk_info *ndisks;
+	int err = 0;
+	kmem_cache_t *sc;
+	int i;
+
+	if (newsize <= conf->pool_size)
+		return 0; /* never bother to shrink */
+
+	/* Step 1 */
+	sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
+			       sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
+			       0, 0, NULL, NULL);
+	if (!sc)
+		return -ENOMEM;
+
+	for (i = conf->max_nr_stripes; i; i--) {
+		nsh = kmem_cache_alloc(sc, GFP_KERNEL);
+		if (!nsh)
+			break;
+
+		memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev));
+
+		nsh->raid_conf = conf;
+		spin_lock_init(&nsh->lock);
+
+		list_add(&nsh->lru, &newstripes);
+	}
+	if (i) {
+		/* didn't get enough, give up */
+		while (!list_empty(&newstripes)) {
+			nsh = list_entry(newstripes.next, struct stripe_head, lru);
+			list_del(&nsh->lru);
+			kmem_cache_free(sc, nsh);
+		}
+		kmem_cache_destroy(sc);
+		return -ENOMEM;
+	}
+	/* Step 2 - Must use GFP_NOIO now.
+	 * OK, we have enough stripes, start collecting inactive
+	 * stripes and copying them over
+	 */
+	list_for_each_entry(nsh, &newstripes, lru) {
+		spin_lock_irq(&conf->device_lock);
+		wait_event_lock_irq(conf->wait_for_stripe,
+				    !list_empty(&conf->inactive_list),
+				    conf->device_lock,
+				    unplug_slaves(conf->mddev);
+			);
+		osh = get_free_stripe(conf);
+		spin_unlock_irq(&conf->device_lock);
+		atomic_set(&nsh->count, 1);
+		for(i=0; i<conf->pool_size; i++)
+			nsh->dev[i].page = osh->dev[i].page;
+		for( ; i<newsize; i++)
+			nsh->dev[i].page = NULL;
+		kmem_cache_free(conf->slab_cache, osh);
+	}
+	kmem_cache_destroy(conf->slab_cache);
+
+	/* Step 3.
+	 * At this point, we are holding all the stripes so the array
+	 * is completely stalled, so now is a good time to resize
+	 * conf->disks.
+	 */
+	ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
+	if (ndisks) {
+		for (i=0; i<conf->raid_disks; i++)
+			ndisks[i] = conf->disks[i];
+		kfree(conf->disks);
+		conf->disks = ndisks;
+	} else
+		err = -ENOMEM;
+
+	/* Step 4, return new stripes to service */
+	while(!list_empty(&newstripes)) {
+		nsh = list_entry(newstripes.next, struct stripe_head, lru);
+		list_del_init(&nsh->lru);
+		for (i=conf->raid_disks; i < newsize; i++)
+			if (nsh->dev[i].page == NULL) {
+				struct page *p = alloc_page(GFP_NOIO);
+				nsh->dev[i].page = p;
+				if (!p)
+					err = -ENOMEM;
+			}
+		release_stripe(nsh);
+	}
+	/* critical section pass, GFP_NOIO no longer needed */
+
+	conf->slab_cache = sc;
+	conf->active_name = 1-conf->active_name;
+	conf->pool_size = newsize;
+	return err;
+}
+
 
 static int drop_one_stripe(raid5_conf_t *conf)
 {
@@ -339,7 +462,7 @@ static int drop_one_stripe(raid5_conf_t *conf)
 		return 0;
 	if (atomic_read(&sh->count))
 		BUG();
-	shrink_buffers(sh, conf->raid_disks);
+	shrink_buffers(sh, conf->pool_size);
 	kmem_cache_free(conf->slab_cache, sh);
 	atomic_dec(&conf->active_stripes);
 	return 1;
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
index c7632f6..6df4930 100644
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -331,9 +331,9 @@ static int grow_stripes(raid6_conf_t *conf, int num)
 	kmem_cache_t *sc;
 	int devs = conf->raid_disks;
 
-	sprintf(conf->cache_name, "raid6/%s", mdname(conf->mddev));
+	sprintf(conf->cache_name[0], "raid6/%s", mdname(conf->mddev));
 
-	sc = kmem_cache_create(conf->cache_name,
+	sc = kmem_cache_create(conf->cache_name[0],
 			       sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
 			       0, 0, NULL, NULL);
 	if (!sc)
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index 94dbdd4..b7b2653 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -216,7 +216,11 @@ struct raid5_private_data {
 	struct list_head	bitmap_list; /* stripes delaying awaiting bitmap update */
 	atomic_t		preread_active_stripes; /* stripes with scheduled io */
 
-	char			cache_name[20];
+	/* unfortunately we need two cache names as we temporarily have
+	 * two caches.
+	 */
+	int			active_name;
+	char			cache_name[2][20];
 	kmem_cache_t		*slab_cache; /* for allocating stripes */
 
 	int			seq_flush, seq_write;
@@ -238,7 +242,8 @@ struct raid5_private_data {
 	wait_queue_head_t	wait_for_overlap;
 	int			inactive_blocked;	/* release of inactive stripes blocked,
 							 * waiting for 25% to be free
-							 */        
+							 */
+	int			pool_size; /* number of disks in stripeheads in pool */
 	spinlock_t		device_lock;
 	struct disk_info	*disks;
 };
-- 
cgit v0.10.2


From 7ecaa1e6a1ad69862e9980b6c777e11f26c4782d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:08 -0800
Subject: [PATCH] md: Infrastructure to allow normal IO to continue while array
 is expanding

We need to allow that different stripes are of different effective sizes, and
use the appropriate size.  Also, when a stripe is being expanded, we must
block any IO attempts until the stripe is stable again.

Key elements in this change are:
 - each stripe_head gets a 'disk' field which is part of the key,
   thus there can sometimes be two stripe heads of the same area of
   the array, but covering different numbers of devices.  One of these
   will be marked STRIPE_EXPANDING and so won't accept new requests.
 - conf->expand_progress tracks how the expansion is progressing and
   is used to determine whether the target part of the array has been
   expanded yet or not.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 6c20b44..7a6df51 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -178,10 +178,10 @@ static int grow_buffers(struct stripe_head *sh, int num)
 
 static void raid5_build_block (struct stripe_head *sh, int i);
 
-static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx)
+static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int disks)
 {
 	raid5_conf_t *conf = sh->raid_conf;
-	int disks = conf->raid_disks, i;
+	int i;
 
 	if (atomic_read(&sh->count) != 0)
 		BUG();
@@ -198,7 +198,9 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx)
 	sh->pd_idx = pd_idx;
 	sh->state = 0;
 
-	for (i=disks; i--; ) {
+	sh->disks = disks;
+
+	for (i = sh->disks; i--; ) {
 		struct r5dev *dev = &sh->dev[i];
 
 		if (dev->toread || dev->towrite || dev->written ||
@@ -215,7 +217,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx)
 	insert_hash(conf, sh);
 }
 
-static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector)
+static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, int disks)
 {
 	struct stripe_head *sh;
 	struct hlist_node *hn;
@@ -223,7 +225,7 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector)
 	CHECK_DEVLOCK();
 	PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector);
 	hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash)
-		if (sh->sector == sector)
+		if (sh->sector == sector && sh->disks == disks)
 			return sh;
 	PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector);
 	return NULL;
@@ -232,8 +234,8 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector)
 static void unplug_slaves(mddev_t *mddev);
 static void raid5_unplug_device(request_queue_t *q);
 
-static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector,
-					     int pd_idx, int noblock) 
+static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector, int disks,
+					     int pd_idx, int noblock)
 {
 	struct stripe_head *sh;
 
@@ -245,7 +247,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    conf->quiesce == 0,
 				    conf->device_lock, /* nothing */);
-		sh = __find_stripe(conf, sector);
+		sh = __find_stripe(conf, sector, disks);
 		if (!sh) {
 			if (!conf->inactive_blocked)
 				sh = get_free_stripe(conf);
@@ -263,7 +265,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
 					);
 				conf->inactive_blocked = 0;
 			} else
-				init_stripe(sh, sector, pd_idx);
+				init_stripe(sh, sector, pd_idx, disks);
 		} else {
 			if (atomic_read(&sh->count)) {
 				if (!list_empty(&sh->lru))
@@ -300,6 +302,7 @@ static int grow_one_stripe(raid5_conf_t *conf)
 		kmem_cache_free(conf->slab_cache, sh);
 		return 0;
 	}
+	sh->disks = conf->raid_disks;
 	/* we just created an active stripe so... */
 	atomic_set(&sh->count, 1);
 	atomic_inc(&conf->active_stripes);
@@ -483,7 +486,7 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
 {
  	struct stripe_head *sh = bi->bi_private;
 	raid5_conf_t *conf = sh->raid_conf;
-	int disks = conf->raid_disks, i;
+	int disks = sh->disks, i;
 	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
 
 	if (bi->bi_size)
@@ -581,7 +584,7 @@ static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done,
 {
  	struct stripe_head *sh = bi->bi_private;
 	raid5_conf_t *conf = sh->raid_conf;
-	int disks = conf->raid_disks, i;
+	int disks = sh->disks, i;
 	unsigned long flags;
 	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
 
@@ -735,7 +738,7 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
 static sector_t compute_blocknr(struct stripe_head *sh, int i)
 {
 	raid5_conf_t *conf = sh->raid_conf;
-	int raid_disks = conf->raid_disks, data_disks = raid_disks - 1;
+	int raid_disks = sh->disks, data_disks = raid_disks - 1;
 	sector_t new_sector = sh->sector, check;
 	int sectors_per_chunk = conf->chunk_size >> 9;
 	sector_t stripe;
@@ -836,8 +839,7 @@ static void copy_data(int frombio, struct bio *bio,
 
 static void compute_block(struct stripe_head *sh, int dd_idx)
 {
-	raid5_conf_t *conf = sh->raid_conf;
-	int i, count, disks = conf->raid_disks;
+	int i, count, disks = sh->disks;
 	void *ptr[MAX_XOR_BLOCKS], *p;
 
 	PRINTK("compute_block, stripe %llu, idx %d\n", 
@@ -867,7 +869,7 @@ static void compute_block(struct stripe_head *sh, int dd_idx)
 static void compute_parity(struct stripe_head *sh, int method)
 {
 	raid5_conf_t *conf = sh->raid_conf;
-	int i, pd_idx = sh->pd_idx, disks = conf->raid_disks, count;
+	int i, pd_idx = sh->pd_idx, disks = sh->disks, count;
 	void *ptr[MAX_XOR_BLOCKS];
 	struct bio *chosen;
 
@@ -1055,7 +1057,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
 static void handle_stripe(struct stripe_head *sh)
 {
 	raid5_conf_t *conf = sh->raid_conf;
-	int disks = conf->raid_disks;
+	int disks = sh->disks;
 	struct bio *return_bi= NULL;
 	struct bio *bi;
 	int i;
@@ -1649,12 +1651,10 @@ static inline void raid5_plug_device(raid5_conf_t *conf)
 	spin_unlock_irq(&conf->device_lock);
 }
 
-static int make_request (request_queue_t *q, struct bio * bi)
+static int make_request(request_queue_t *q, struct bio * bi)
 {
 	mddev_t *mddev = q->queuedata;
 	raid5_conf_t *conf = mddev_to_conf(mddev);
-	const unsigned int raid_disks = conf->raid_disks;
-	const unsigned int data_disks = raid_disks - 1;
 	unsigned int dd_idx, pd_idx;
 	sector_t new_sector;
 	sector_t logical_sector, last_sector;
@@ -1678,20 +1678,48 @@ static int make_request (request_queue_t *q, struct bio * bi)
 
 	for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
 		DEFINE_WAIT(w);
+		int disks;
 		
-		new_sector = raid5_compute_sector(logical_sector,
-						  raid_disks, data_disks, &dd_idx, &pd_idx, conf);
-
+	retry:
+		if (likely(conf->expand_progress == MaxSector))
+			disks = conf->raid_disks;
+		else {
+			spin_lock_irq(&conf->device_lock);
+			disks = conf->raid_disks;
+			if (logical_sector >= conf->expand_progress)
+				disks = conf->previous_raid_disks;
+			spin_unlock_irq(&conf->device_lock);
+		}
+ 		new_sector = raid5_compute_sector(logical_sector, disks, disks - 1,
+						  &dd_idx, &pd_idx, conf);
 		PRINTK("raid5: make_request, sector %llu logical %llu\n",
 			(unsigned long long)new_sector, 
 			(unsigned long long)logical_sector);
 
-	retry:
 		prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
-		sh = get_active_stripe(conf, new_sector, pd_idx, (bi->bi_rw&RWA_MASK));
+		sh = get_active_stripe(conf, new_sector, disks, pd_idx, (bi->bi_rw&RWA_MASK));
 		if (sh) {
-			if (!add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {
-				/* Add failed due to overlap.  Flush everything
+			if (unlikely(conf->expand_progress != MaxSector)) {
+				/* expansion might have moved on while waiting for a
+				 * stripe, so we much do the range check again.
+				 */
+				int must_retry = 0;
+				spin_lock_irq(&conf->device_lock);
+				if (logical_sector <  conf->expand_progress &&
+				    disks == conf->previous_raid_disks)
+					/* mismatch, need to try again */
+					must_retry = 1;
+				spin_unlock_irq(&conf->device_lock);
+				if (must_retry) {
+					release_stripe(sh);
+					goto retry;
+				}
+			}
+
+			if (test_bit(STRIPE_EXPANDING, &sh->state) ||
+			    !add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {
+				/* Stripe is busy expanding or
+				 * add failed due to overlap.  Flush everything
 				 * and wait a while
 				 */
 				raid5_unplug_device(mddev->queue);
@@ -1703,7 +1731,6 @@ static int make_request (request_queue_t *q, struct bio * bi)
 			raid5_plug_device(conf);
 			handle_stripe(sh);
 			release_stripe(sh);
-
 		} else {
 			/* cannot get stripe for read-ahead, just give-up */
 			clear_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -1779,9 +1806,9 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 
 	first_sector = raid5_compute_sector((sector_t)stripe*data_disks*sectors_per_chunk
 		+ chunk_offset, raid_disks, data_disks, &dd_idx, &pd_idx, conf);
-	sh = get_active_stripe(conf, sector_nr, pd_idx, 1);
+	sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 1);
 	if (sh == NULL) {
-		sh = get_active_stripe(conf, sector_nr, pd_idx, 0);
+		sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 0);
 		/* make sure we don't swamp the stripe cache if someone else
 		 * is trying to get access 
 		 */
@@ -1998,6 +2025,7 @@ static int run(mddev_t *mddev)
 	conf->level = mddev->level;
 	conf->algorithm = mddev->layout;
 	conf->max_nr_stripes = NR_STRIPES;
+	conf->expand_progress = MaxSector;
 
 	/* device size must be a multiple of chunk size */
 	mddev->size &= ~(mddev->chunk_size/1024 -1);
@@ -2128,7 +2156,7 @@ static void print_sh (struct stripe_head *sh)
 	printk("sh %llu,  count %d.\n",
 		(unsigned long long)sh->sector, atomic_read(&sh->count));
 	printk("sh %llu, ", (unsigned long long)sh->sector);
-	for (i = 0; i < sh->raid_conf->raid_disks; i++) {
+	for (i = 0; i < sh->disks; i++) {
 		printk("(cache%d: %p %ld) ", 
 			i, sh->dev[i].page, sh->dev[i].flags);
 	}
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index b7b2653..6fa274a 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -135,6 +135,7 @@ struct stripe_head {
 	atomic_t		count;			/* nr of active thread/requests */
 	spinlock_t		lock;
 	int			bm_seq;	/* sequence number for bitmap flushes */
+	int			disks;			/* disks in stripe */
 	struct r5dev {
 		struct bio	req;
 		struct bio_vec	vec;
@@ -174,6 +175,7 @@ struct stripe_head {
 #define	STRIPE_DELAYED		6
 #define	STRIPE_DEGRADED		7
 #define	STRIPE_BIT_DELAY	8
+#define	STRIPE_EXPANDING	9
 
 /*
  * Plugging:
@@ -211,6 +213,10 @@ struct raid5_private_data {
 	int			raid_disks, working_disks, failed_disks;
 	int			max_nr_stripes;
 
+	/* used during an expand */
+	sector_t		expand_progress;	/* MaxSector when no expand happening */
+	int			previous_raid_disks;
+
 	struct list_head	handle_list; /* stripes needing handling */
 	struct list_head	delayed_list; /* stripes that have plugged requests */
 	struct list_head	bitmap_list; /* stripes delaying awaiting bitmap update */
-- 
cgit v0.10.2


From ccfcc3c10b2a5cb8fd3c918199a4ff904fc6fb3e Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:09 -0800
Subject: [PATCH] md: Core of raid5 resize process

This patch provides the core of the resize/expand process.

sync_request notices if a 'reshape' is happening and acts accordingly.

It allocated new stripe_heads for the next chunk-wide-stripe in the target
geometry, marking them STRIPE_EXPANDING.

Then it finds which stripe heads in the old geometry can provide data needed
by these and marks them STRIPE_EXPAND_SOURCE.  This causes stripe_handle to
read all blocks on those stripes.

Once all blocks on a STRIPE_EXPAND_SOURCE stripe_head are read, any that are
needed are copied into the corresponding STRIPE_EXPANDING stripe_head.  Once a
STRIPE_EXPANDING stripe_head is full, it is marks STRIPE_EXPAND_READY and then
is written out and released.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index c7b7656..8e65986 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2165,7 +2165,9 @@ action_show(mddev_t *mddev, char *page)
 	char *type = "idle";
 	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
 	    test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) {
-		if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
+		if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
+			type = "reshape";
+		else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
 			if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
 				type = "resync";
 			else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
@@ -4088,8 +4090,10 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
 		seq_printf(seq, "] ");
 	}
 	seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
+		   (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
+		    "reshape" :
 		      (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
-		       "resync" : "recovery"),
+		       "resync" : "recovery")),
 		      per_milli/10, per_milli % 10,
 		   (unsigned long long) resync,
 		   (unsigned long long) max_blocks);
@@ -4543,7 +4547,9 @@ static void md_do_sync(mddev_t *mddev)
 		 */
 		max_sectors = mddev->resync_max_sectors;
 		mddev->resync_mismatches = 0;
-	} else
+	} else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
+		max_sectors = mddev->size << 1;
+	else
 		/* recovery follows the physical size of devices */
 		max_sectors = mddev->size << 1;
 
@@ -4679,6 +4685,8 @@ static void md_do_sync(mddev_t *mddev)
 	mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
 
 	if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) &&
+	    test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
+	    !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
 	    mddev->curr_resync > 2 &&
 	    mddev->curr_resync >= mddev->recovery_cp) {
 		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7a6df51..56cba8d 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -93,11 +93,11 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
 				if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
 					md_wakeup_thread(conf->mddev->thread);
 			}
-			list_add_tail(&sh->lru, &conf->inactive_list);
 			atomic_dec(&conf->active_stripes);
-			if (!conf->inactive_blocked ||
-			    atomic_read(&conf->active_stripes) < (conf->max_nr_stripes*3/4))
+			if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
+				list_add_tail(&sh->lru, &conf->inactive_list);
 				wake_up(&conf->wait_for_stripe);
+			}
 		}
 	}
 }
@@ -273,9 +273,8 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
 			} else {
 				if (!test_bit(STRIPE_HANDLE, &sh->state))
 					atomic_inc(&conf->active_stripes);
-				if (list_empty(&sh->lru))
-					BUG();
-				list_del_init(&sh->lru);
+				if (!list_empty(&sh->lru))
+					list_del_init(&sh->lru);
 			}
 		}
 	} while (sh == NULL);
@@ -1035,6 +1034,18 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
 	return 0;
 }
 
+static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int disks)
+{
+	int sectors_per_chunk = conf->chunk_size >> 9;
+	sector_t x = stripe;
+	int pd_idx, dd_idx;
+	int chunk_offset = sector_div(x, sectors_per_chunk);
+	stripe = x;
+	raid5_compute_sector(stripe*(disks-1)*sectors_per_chunk
+			     + chunk_offset, disks, disks-1, &dd_idx, &pd_idx, conf);
+	return pd_idx;
+}
+
 
 /*
  * handle_stripe - do things to a stripe.
@@ -1061,7 +1072,7 @@ static void handle_stripe(struct stripe_head *sh)
 	struct bio *return_bi= NULL;
 	struct bio *bi;
 	int i;
-	int syncing;
+	int syncing, expanding, expanded;
 	int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0;
 	int non_overwrite = 0;
 	int failed_num=0;
@@ -1076,6 +1087,8 @@ static void handle_stripe(struct stripe_head *sh)
 	clear_bit(STRIPE_DELAYED, &sh->state);
 
 	syncing = test_bit(STRIPE_SYNCING, &sh->state);
+	expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+	expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
 	/* Now to look around and see what can be done */
 
 	rcu_read_lock();
@@ -1268,13 +1281,14 @@ static void handle_stripe(struct stripe_head *sh)
 	 * parity, or to satisfy requests
 	 * or to load a block that is being partially written.
 	 */
-	if (to_read || non_overwrite || (syncing && (uptodate < disks))) {
+	if (to_read || non_overwrite || (syncing && (uptodate < disks)) || expanding) {
 		for (i=disks; i--;) {
 			dev = &sh->dev[i];
 			if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
 			    (dev->toread ||
 			     (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
 			     syncing ||
+			     expanding ||
 			     (failed && (sh->dev[failed_num].toread ||
 					 (sh->dev[failed_num].towrite && !test_bit(R5_OVERWRITE, &sh->dev[failed_num].flags))))
 				    )
@@ -1464,13 +1478,76 @@ static void handle_stripe(struct stripe_head *sh)
 			set_bit(R5_Wantwrite, &dev->flags);
 			set_bit(R5_ReWrite, &dev->flags);
 			set_bit(R5_LOCKED, &dev->flags);
+			locked++;
 		} else {
 			/* let's read it back */
 			set_bit(R5_Wantread, &dev->flags);
 			set_bit(R5_LOCKED, &dev->flags);
+			locked++;
 		}
 	}
 
+	if (expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
+		/* Need to write out all blocks after computing parity */
+		sh->disks = conf->raid_disks;
+		sh->pd_idx = stripe_to_pdidx(sh->sector, conf, conf->raid_disks);
+		compute_parity(sh, RECONSTRUCT_WRITE);
+		for (i= conf->raid_disks; i--;) {
+			set_bit(R5_LOCKED, &sh->dev[i].flags);
+			locked++;
+			set_bit(R5_Wantwrite, &sh->dev[i].flags);
+		}
+		clear_bit(STRIPE_EXPANDING, &sh->state);
+	} else if (expanded) {
+		clear_bit(STRIPE_EXPAND_READY, &sh->state);
+		wake_up(&conf->wait_for_overlap);
+		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
+	}
+
+	if (expanding && locked == 0) {
+		/* We have read all the blocks in this stripe and now we need to
+		 * copy some of them into a target stripe for expand.
+		 */
+		clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+		for (i=0; i< sh->disks; i++)
+			if (i != sh->pd_idx) {
+				int dd_idx, pd_idx, j;
+				struct stripe_head *sh2;
+
+				sector_t bn = compute_blocknr(sh, i);
+				sector_t s = raid5_compute_sector(bn, conf->raid_disks,
+								  conf->raid_disks-1,
+								  &dd_idx, &pd_idx, conf);
+				sh2 = get_active_stripe(conf, s, conf->raid_disks, pd_idx, 1);
+				if (sh2 == NULL)
+					/* so far only the early blocks of this stripe
+					 * have been requested.  When later blocks
+					 * get requested, we will try again
+					 */
+					continue;
+				if(!test_bit(STRIPE_EXPANDING, &sh2->state) ||
+				   test_bit(R5_Expanded, &sh2->dev[dd_idx].flags)) {
+					/* must have already done this block */
+					release_stripe(sh2);
+					continue;
+				}
+				memcpy(page_address(sh2->dev[dd_idx].page),
+				       page_address(sh->dev[i].page),
+				       STRIPE_SIZE);
+				set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
+				set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
+				for (j=0; j<conf->raid_disks; j++)
+					if (j != sh2->pd_idx &&
+					    !test_bit(R5_Expanded, &sh2->dev[j].flags))
+						break;
+				if (j == conf->raid_disks) {
+					set_bit(STRIPE_EXPAND_READY, &sh2->state);
+					set_bit(STRIPE_HANDLE, &sh2->state);
+				}
+				release_stripe(sh2);
+			}
+	}
+
 	spin_unlock(&sh->lock);
 
 	while ((bi=return_bi)) {
@@ -1509,7 +1586,7 @@ static void handle_stripe(struct stripe_head *sh)
 		rcu_read_unlock();
  
 		if (rdev) {
-			if (syncing)
+			if (syncing || expanding || expanded)
 				md_sync_acct(rdev->bdev, STRIPE_SECTORS);
 
 			bi->bi_bdev = rdev->bdev;
@@ -1757,12 +1834,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 {
 	raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
 	struct stripe_head *sh;
-	int sectors_per_chunk = conf->chunk_size >> 9;
-	sector_t x;
-	unsigned long stripe;
-	int chunk_offset;
-	int dd_idx, pd_idx;
-	sector_t first_sector;
+	int pd_idx;
+	sector_t first_sector, last_sector;
 	int raid_disks = conf->raid_disks;
 	int data_disks = raid_disks-1;
 	sector_t max_sector = mddev->size << 1;
@@ -1781,6 +1854,80 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 
 		return 0;
 	}
+
+	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
+		/* reshaping is quite different to recovery/resync so it is
+		 * handled quite separately ... here.
+		 *
+		 * On each call to sync_request, we gather one chunk worth of
+		 * destination stripes and flag them as expanding.
+		 * Then we find all the source stripes and request reads.
+		 * As the reads complete, handle_stripe will copy the data
+		 * into the destination stripe and release that stripe.
+		 */
+		int i;
+		int dd_idx;
+		for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) {
+			int j;
+			int skipped = 0;
+			pd_idx = stripe_to_pdidx(sector_nr+i, conf, conf->raid_disks);
+			sh = get_active_stripe(conf, sector_nr+i,
+					       conf->raid_disks, pd_idx, 0);
+			set_bit(STRIPE_EXPANDING, &sh->state);
+			/* If any of this stripe is beyond the end of the old
+			 * array, then we need to zero those blocks
+			 */
+			for (j=sh->disks; j--;) {
+				sector_t s;
+				if (j == sh->pd_idx)
+					continue;
+				s = compute_blocknr(sh, j);
+				if (s < (mddev->array_size<<1)) {
+					skipped = 1;
+					continue;
+				}
+				memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE);
+				set_bit(R5_Expanded, &sh->dev[j].flags);
+				set_bit(R5_UPTODATE, &sh->dev[j].flags);
+			}
+			if (!skipped) {
+				set_bit(STRIPE_EXPAND_READY, &sh->state);
+				set_bit(STRIPE_HANDLE, &sh->state);
+			}
+			release_stripe(sh);
+		}
+		spin_lock_irq(&conf->device_lock);
+		conf->expand_progress = (sector_nr + i)*(conf->raid_disks-1);
+		spin_unlock_irq(&conf->device_lock);
+		/* Ok, those stripe are ready. We can start scheduling
+		 * reads on the source stripes.
+		 * The source stripes are determined by mapping the first and last
+		 * block on the destination stripes.
+		 */
+		raid_disks = conf->previous_raid_disks;
+		data_disks = raid_disks - 1;
+		first_sector =
+			raid5_compute_sector(sector_nr*(conf->raid_disks-1),
+					     raid_disks, data_disks,
+					     &dd_idx, &pd_idx, conf);
+		last_sector =
+			raid5_compute_sector((sector_nr+conf->chunk_size/512)
+					       *(conf->raid_disks-1) -1,
+					     raid_disks, data_disks,
+					     &dd_idx, &pd_idx, conf);
+		if (last_sector >= (mddev->size<<1))
+			last_sector = (mddev->size<<1)-1;
+		while (first_sector <= last_sector) {
+			pd_idx = stripe_to_pdidx(first_sector, conf, conf->previous_raid_disks);
+			sh = get_active_stripe(conf, first_sector,
+					       conf->previous_raid_disks, pd_idx, 0);
+			set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+			set_bit(STRIPE_HANDLE, &sh->state);
+			release_stripe(sh);
+			first_sector += STRIPE_SECTORS;
+		}
+		return conf->chunk_size>>9;
+	}
 	/* if there is 1 or more failed drives and we are trying
 	 * to resync, then assert that we are finished, because there is
 	 * nothing we can do.
@@ -1799,13 +1946,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
 	}
 
-	x = sector_nr;
-	chunk_offset = sector_div(x, sectors_per_chunk);
-	stripe = x;
-	BUG_ON(x != stripe);
-
-	first_sector = raid5_compute_sector((sector_t)stripe*data_disks*sectors_per_chunk
-		+ chunk_offset, raid_disks, data_disks, &dd_idx, &pd_idx, conf);
+	pd_idx = stripe_to_pdidx(sector_nr, conf, raid_disks);
 	sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 1);
 	if (sh == NULL) {
 		sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 0);
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 617b950..4e26ef2 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -157,6 +157,9 @@ struct mddev_s
 	 * DONE:     thread is done and is waiting to be reaped
 	 * REQUEST:  user-space has requested a sync (used with SYNC)
 	 * CHECK:    user-space request for for check-only, no repair
+	 * RESHAPE:  A reshape is happening
+	 *
+	 * If neither SYNC or RESHAPE are set, then it is a recovery.
 	 */
 #define	MD_RECOVERY_RUNNING	0
 #define	MD_RECOVERY_SYNC	1
@@ -166,6 +169,7 @@ struct mddev_s
 #define	MD_RECOVERY_NEEDED	5
 #define	MD_RECOVERY_REQUESTED	6
 #define	MD_RECOVERY_CHECK	7
+#define MD_RECOVERY_RESHAPE	8
 	unsigned long			recovery;
 
 	int				in_sync;	/* know to not need resync */
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index 6fa274a..55c738d 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -157,6 +157,7 @@ struct stripe_head {
 #define	R5_ReadError	8	/* seen a read error here recently */
 #define	R5_ReWrite	9	/* have tried to over-write the readerror */
 
+#define	R5_Expanded	10	/* This block now has post-expand data */
 /*
  * Write method
  */
@@ -176,7 +177,8 @@ struct stripe_head {
 #define	STRIPE_DEGRADED		7
 #define	STRIPE_BIT_DELAY	8
 #define	STRIPE_EXPANDING	9
-
+#define	STRIPE_EXPAND_SOURCE	10
+#define	STRIPE_EXPAND_READY	11
 /*
  * Plugging:
  *
-- 
cgit v0.10.2


From 292695531ae4019bb15deedc121b218d1908b648 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:10 -0800
Subject: [PATCH] md: Final stages of raid5 expand code

This patch adds raid5_reshape and end_reshape which will start and finish the
reshape processes.

raid5_reshape is only enabled in CONFIG_MD_RAID5_RESHAPE is set, to discourage
accidental use.

Read the 'help' for the CONFIG_MD_RAID5_RESHAPE entry.

and Make sure that you have backups, just in case.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index ac43f98..fd2aae1 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -127,6 +127,32 @@ config MD_RAID5
 
 	  If unsure, say Y.
 
+config MD_RAID5_RESHAPE
+	bool "Support adding drives to a raid-5 array (experimental)"
+	depends on MD_RAID5 && EXPERIMENTAL
+	---help---
+	  A RAID-5 set can be expanded by adding extra drives. This
+	  requires "restriping" the array which means (almost) every
+	  block must be written to a different place.
+
+          This option allows such restriping to be done while the array
+	  is online.  However it is still EXPERIMENTAL code.  It should
+	  work, but please be sure that you have backups.
+
+	  You will need a version of mdadm newer than 2.3.1.   During the
+	  early stage of reshape there is a critical section where live data
+	  is being over-written.  A crash during this time needs extra care
+	  for recovery.  The newer mdadm takes a copy of the data in the
+	  critical section and will restore it, if necessary, after a crash.
+
+	  The mdadm usage is e.g.
+	       mdadm --grow /dev/md1 --raid-disks=6
+	  to grow '/dev/md1' to having 6 disks.
+
+	  Note: The array can only be expanded, not contracted.
+	  There should be enough spares already present to make the new
+	  array workable.
+
 config MD_RAID6
 	tristate "RAID-6 mode"
 	depends on BLK_DEV_MD
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 8e65986..d169bc9 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -158,11 +158,12 @@ static int start_readonly;
  */
 static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
 static atomic_t md_event_count;
-static void md_new_event(mddev_t *mddev)
+void md_new_event(mddev_t *mddev)
 {
 	atomic_inc(&md_event_count);
 	wake_up(&md_event_waiters);
 }
+EXPORT_SYMBOL_GPL(md_new_event);
 
 /*
  * Enables to iterate over all existing md arrays
@@ -4467,7 +4468,7 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
 
 #define SYNC_MARKS	10
 #define	SYNC_MARK_STEP	(3*HZ)
-static void md_do_sync(mddev_t *mddev)
+void md_do_sync(mddev_t *mddev)
 {
 	mddev_t *mddev2;
 	unsigned int currspeed = 0,
@@ -4704,6 +4705,7 @@ static void md_do_sync(mddev_t *mddev)
 	set_bit(MD_RECOVERY_DONE, &mddev->recovery);
 	md_wakeup_thread(mddev->thread);
 }
+EXPORT_SYMBOL_GPL(md_do_sync);
 
 
 /*
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 56cba8d..b29135a 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -331,6 +331,8 @@ static int grow_stripes(raid5_conf_t *conf, int num)
 	}
 	return 0;
 }
+
+#ifdef CONFIG_MD_RAID5_RESHAPE
 static int resize_stripes(raid5_conf_t *conf, int newsize)
 {
 	/* Make all the stripes able to hold 'newsize' devices.
@@ -451,7 +453,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
 	conf->pool_size = newsize;
 	return err;
 }
-
+#endif
 
 static int drop_one_stripe(raid5_conf_t *conf)
 {
@@ -1034,6 +1036,8 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
 	return 0;
 }
 
+static void end_reshape(raid5_conf_t *conf);
+
 static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int disks)
 {
 	int sectors_per_chunk = conf->chunk_size >> 9;
@@ -1844,6 +1848,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	if (sector_nr >= max_sector) {
 		/* just being told to finish up .. nothing much to do */
 		unplug_slaves(mddev);
+		if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
+			end_reshape(conf);
+			return 0;
+		}
 
 		if (mddev->curr_resync < max_sector) /* aborted */
 			bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
@@ -2464,6 +2472,116 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
 	return 0;
 }
 
+#ifdef CONFIG_MD_RAID5_RESHAPE
+static int raid5_reshape(mddev_t *mddev, int raid_disks)
+{
+	raid5_conf_t *conf = mddev_to_conf(mddev);
+	int err;
+	mdk_rdev_t *rdev;
+	struct list_head *rtmp;
+	int spares = 0;
+	int added_devices = 0;
+
+	if (mddev->degraded ||
+	    test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+		return -EBUSY;
+	if (conf->raid_disks > raid_disks)
+		return -EINVAL; /* Cannot shrink array yet */
+	if (conf->raid_disks == raid_disks)
+		return 0; /* nothing to do */
+
+	/* Can only proceed if there are plenty of stripe_heads.
+	 * We need a minimum of one full stripe,, and for sensible progress
+	 * it is best to have about 4 times that.
+	 * If we require 4 times, then the default 256 4K stripe_heads will
+	 * allow for chunk sizes up to 256K, which is probably OK.
+	 * If the chunk size is greater, user-space should request more
+	 * stripe_heads first.
+	 */
+	if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes) {
+		printk(KERN_WARNING "raid5: reshape: not enough stripes.  Needed %lu\n",
+		       (mddev->chunk_size / STRIPE_SIZE)*4);
+		return -ENOSPC;
+	}
+
+	ITERATE_RDEV(mddev, rdev, rtmp)
+		if (rdev->raid_disk < 0 &&
+		    !test_bit(Faulty, &rdev->flags))
+			spares++;
+	if (conf->raid_disks + spares < raid_disks-1)
+		/* Not enough devices even to make a degraded array
+		 * of that size
+		 */
+		return -EINVAL;
+
+	err = resize_stripes(conf, raid_disks);
+	if (err)
+		return err;
+
+	spin_lock_irq(&conf->device_lock);
+	conf->previous_raid_disks = conf->raid_disks;
+	mddev->raid_disks = conf->raid_disks = raid_disks;
+	conf->expand_progress = 0;
+	spin_unlock_irq(&conf->device_lock);
+
+	/* Add some new drives, as many as will fit.
+	 * We know there are enough to make the newly sized array work.
+	 */
+	ITERATE_RDEV(mddev, rdev, rtmp)
+		if (rdev->raid_disk < 0 &&
+		    !test_bit(Faulty, &rdev->flags)) {
+			if (raid5_add_disk(mddev, rdev)) {
+				char nm[20];
+				set_bit(In_sync, &rdev->flags);
+				conf->working_disks++;
+				added_devices++;
+				sprintf(nm, "rd%d", rdev->raid_disk);
+				sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
+			} else
+				break;
+		}
+
+	mddev->degraded = (raid_disks - conf->previous_raid_disks) - added_devices;
+	clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+	clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+	set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
+	set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
+	mddev->sync_thread = md_register_thread(md_do_sync, mddev,
+						"%s_reshape");
+	if (!mddev->sync_thread) {
+		mddev->recovery = 0;
+		spin_lock_irq(&conf->device_lock);
+		mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
+		conf->expand_progress = MaxSector;
+		spin_unlock_irq(&conf->device_lock);
+		return -EAGAIN;
+	}
+	md_wakeup_thread(mddev->sync_thread);
+	md_new_event(mddev);
+	return 0;
+}
+#endif
+
+static void end_reshape(raid5_conf_t *conf)
+{
+	struct block_device *bdev;
+
+	conf->mddev->array_size = conf->mddev->size * (conf->mddev->raid_disks-1);
+	set_capacity(conf->mddev->gendisk, conf->mddev->array_size << 1);
+	conf->mddev->changed = 1;
+
+	bdev = bdget_disk(conf->mddev->gendisk, 0);
+	if (bdev) {
+		mutex_lock(&bdev->bd_inode->i_mutex);
+		i_size_write(bdev->bd_inode, conf->mddev->array_size << 10);
+		mutex_unlock(&bdev->bd_inode->i_mutex);
+		bdput(bdev);
+	}
+	spin_lock_irq(&conf->device_lock);
+	conf->expand_progress = MaxSector;
+	spin_unlock_irq(&conf->device_lock);
+}
+
 static void raid5_quiesce(mddev_t *mddev, int state)
 {
 	raid5_conf_t *conf = mddev_to_conf(mddev);
@@ -2502,6 +2620,9 @@ static struct mdk_personality raid5_personality =
 	.spare_active	= raid5_spare_active,
 	.sync_request	= sync_request,
 	.resize		= raid5_resize,
+#ifdef CONFIG_MD_RAID5_RESHAPE
+	.reshape	= raid5_reshape,
+#endif
 	.quiesce	= raid5_quiesce,
 };
 
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index b6e0bca..9c77cde 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -92,7 +92,8 @@ extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
 extern void md_super_wait(mddev_t *mddev);
 extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
 			struct page *page, int rw);
-
+extern void md_do_sync(mddev_t *mddev);
+extern void md_new_event(mddev_t *mddev);
 
 #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
 
-- 
cgit v0.10.2


From f67055780caac6a99f43834795c43acf99eba6a6 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:11 -0800
Subject: [PATCH] md: Checkpoint and allow restart of raid5 reshape

We allow the superblock to record an 'old' and a 'new' geometry, and a
position where any conversion is up to.  The geometry allows for changing
chunksize, layout and level as well as number of devices.

When using verion-0.90 superblock, we convert the version to 0.91 while the
conversion is happening so that an old kernel will refuse the assemble the
array.  For version-1, we use a feature bit for the same effect.

When starting an array we check for an incomplete reshape and restart the
reshape process if needed.  If the reshape stopped at an awkward time (like
when updating the first stripe) we refuse to assemble the array, and let
user-space worry about it.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index d169bc9..b9dfdfc 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -662,7 +662,8 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
 	}
 
 	if (sb->major_version != 0 ||
-	    sb->minor_version != 90) {
+	    sb->minor_version < 90 ||
+	    sb->minor_version > 91) {
 		printk(KERN_WARNING "Bad version number %d.%d on %s\n",
 			sb->major_version, sb->minor_version,
 			b);
@@ -747,6 +748,20 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 		mddev->bitmap_offset = 0;
 		mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
 
+		if (mddev->minor_version >= 91) {
+			mddev->reshape_position = sb->reshape_position;
+			mddev->delta_disks = sb->delta_disks;
+			mddev->new_level = sb->new_level;
+			mddev->new_layout = sb->new_layout;
+			mddev->new_chunk = sb->new_chunk;
+		} else {
+			mddev->reshape_position = MaxSector;
+			mddev->delta_disks = 0;
+			mddev->new_level = mddev->level;
+			mddev->new_layout = mddev->layout;
+			mddev->new_chunk = mddev->chunk_size;
+		}
+
 		if (sb->state & (1<<MD_SB_CLEAN))
 			mddev->recovery_cp = MaxSector;
 		else {
@@ -841,7 +856,6 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 
 	sb->md_magic = MD_SB_MAGIC;
 	sb->major_version = mddev->major_version;
-	sb->minor_version = mddev->minor_version;
 	sb->patch_version = mddev->patch_version;
 	sb->gvalid_words  = 0; /* ignored */
 	memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
@@ -860,6 +874,17 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 	sb->events_hi = (mddev->events>>32);
 	sb->events_lo = (u32)mddev->events;
 
+	if (mddev->reshape_position == MaxSector)
+		sb->minor_version = 90;
+	else {
+		sb->minor_version = 91;
+		sb->reshape_position = mddev->reshape_position;
+		sb->new_level = mddev->new_level;
+		sb->delta_disks = mddev->delta_disks;
+		sb->new_layout = mddev->new_layout;
+		sb->new_chunk = mddev->new_chunk;
+	}
+	mddev->minor_version = sb->minor_version;
 	if (mddev->in_sync)
 	{
 		sb->recovery_cp = mddev->recovery_cp;
@@ -1104,6 +1129,20 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 			}
 			mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset);
 		}
+		if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
+			mddev->reshape_position = le64_to_cpu(sb->reshape_position);
+			mddev->delta_disks = le32_to_cpu(sb->delta_disks);
+			mddev->new_level = le32_to_cpu(sb->new_level);
+			mddev->new_layout = le32_to_cpu(sb->new_layout);
+			mddev->new_chunk = le32_to_cpu(sb->new_chunk)<<9;
+		} else {
+			mddev->reshape_position = MaxSector;
+			mddev->delta_disks = 0;
+			mddev->new_level = mddev->level;
+			mddev->new_layout = mddev->layout;
+			mddev->new_chunk = mddev->chunk_size;
+		}
+
 	} else if (mddev->pers == NULL) {
 		/* Insist of good event counter while assembling */
 		__u64 ev1 = le64_to_cpu(sb->events);
@@ -1175,6 +1214,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 		sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
 		sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
 	}
+	if (mddev->reshape_position != MaxSector) {
+		sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
+		sb->reshape_position = cpu_to_le64(mddev->reshape_position);
+		sb->new_layout = cpu_to_le32(mddev->new_layout);
+		sb->delta_disks = cpu_to_le32(mddev->delta_disks);
+		sb->new_level = cpu_to_le32(mddev->new_level);
+		sb->new_chunk = cpu_to_le32(mddev->new_chunk>>9);
+	}
 
 	max_dev = 0;
 	ITERATE_RDEV(mddev,rdev2,tmp)
@@ -1497,7 +1544,7 @@ static void sync_sbs(mddev_t * mddev)
 	}
 }
 
-static void md_update_sb(mddev_t * mddev)
+void md_update_sb(mddev_t * mddev)
 {
 	int err;
 	struct list_head *tmp;
@@ -1574,6 +1621,7 @@ repeat:
 	wake_up(&mddev->sb_wait);
 
 }
+EXPORT_SYMBOL_GPL(md_update_sb);
 
 /* words written to sysfs files may, or my not, be \n terminated.
  * We want to accept with case. For this we use cmd_match.
@@ -2545,6 +2593,14 @@ static int do_md_run(mddev_t * mddev)
 	mddev->level = pers->level;
 	strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
 
+	if (mddev->reshape_position != MaxSector &&
+	    pers->reshape == NULL) {
+		/* This personality cannot handle reshaping... */
+		mddev->pers = NULL;
+		module_put(pers->owner);
+		return -EINVAL;
+	}
+
 	mddev->recovery = 0;
 	mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */
 	mddev->barriers_work = 1;
@@ -3433,11 +3489,18 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
 	mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
 	mddev->bitmap_offset = 0;
 
+	mddev->reshape_position = MaxSector;
+
 	/*
 	 * Generate a 128 bit UUID
 	 */
 	get_random_bytes(mddev->uuid, 16);
 
+	mddev->new_level = mddev->level;
+	mddev->new_chunk = mddev->chunk_size;
+	mddev->new_layout = mddev->layout;
+	mddev->delta_disks = 0;
+
 	return 0;
 }
 
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 5d88329..b65b8cf 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1789,6 +1789,11 @@ static int run(mddev_t *mddev)
 		       mdname(mddev), mddev->level);
 		goto out;
 	}
+	if (mddev->reshape_position != MaxSector) {
+		printk("raid1: %s: reshape_position set but not supported\n",
+		       mdname(mddev));
+		goto out;
+	}
 	/*
 	 * copy the already verified devices into our private RAID1
 	 * bookkeeping area. [whatever we allocate in run(),
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index b29135a..20ae32d6 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -22,6 +22,7 @@
 #include <linux/raid/raid5.h>
 #include <linux/highmem.h>
 #include <linux/bitops.h>
+#include <linux/kthread.h>
 #include <asm/atomic.h>
 
 #include <linux/raid/bitmap.h>
@@ -1504,6 +1505,7 @@ static void handle_stripe(struct stripe_head *sh)
 		clear_bit(STRIPE_EXPANDING, &sh->state);
 	} else if (expanded) {
 		clear_bit(STRIPE_EXPAND_READY, &sh->state);
+		atomic_dec(&conf->reshape_stripes);
 		wake_up(&conf->wait_for_overlap);
 		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
 	}
@@ -1875,6 +1877,26 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		 */
 		int i;
 		int dd_idx;
+
+		if (sector_nr == 0 &&
+		    conf->expand_progress != 0) {
+			/* restarting in the middle, skip the initial sectors */
+			sector_nr = conf->expand_progress;
+			sector_div(sector_nr, conf->raid_disks-1);
+			*skipped = 1;
+			return sector_nr;
+		}
+
+		/* Cannot proceed until we've updated the superblock... */
+		wait_event(conf->wait_for_overlap,
+			   atomic_read(&conf->reshape_stripes)==0);
+		mddev->reshape_position = conf->expand_progress;
+
+		mddev->sb_dirty = 1;
+		md_wakeup_thread(mddev->thread);
+		wait_event(mddev->sb_wait, mddev->sb_dirty == 0 ||
+			kthread_should_stop());
+
 		for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) {
 			int j;
 			int skipped = 0;
@@ -1882,6 +1904,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 			sh = get_active_stripe(conf, sector_nr+i,
 					       conf->raid_disks, pd_idx, 0);
 			set_bit(STRIPE_EXPANDING, &sh->state);
+			atomic_inc(&conf->reshape_stripes);
 			/* If any of this stripe is beyond the end of the old
 			 * array, then we need to zero those blocks
 			 */
@@ -2121,10 +2144,61 @@ static int run(mddev_t *mddev)
 		return -EIO;
 	}
 
+	if (mddev->reshape_position != MaxSector) {
+		/* Check that we can continue the reshape.
+		 * Currently only disks can change, it must
+		 * increase, and we must be past the point where
+		 * a stripe over-writes itself
+		 */
+		sector_t here_new, here_old;
+		int old_disks;
+
+		if (mddev->new_level != mddev->level ||
+		    mddev->new_layout != mddev->layout ||
+		    mddev->new_chunk != mddev->chunk_size) {
+			printk(KERN_ERR "raid5: %s: unsupported reshape required - aborting.\n",
+			       mdname(mddev));
+			return -EINVAL;
+		}
+		if (mddev->delta_disks <= 0) {
+			printk(KERN_ERR "raid5: %s: unsupported reshape (reduce disks) required - aborting.\n",
+			       mdname(mddev));
+			return -EINVAL;
+		}
+		old_disks = mddev->raid_disks - mddev->delta_disks;
+		/* reshape_position must be on a new-stripe boundary, and one
+		 * further up in new geometry must map after here in old geometry.
+		 */
+		here_new = mddev->reshape_position;
+		if (sector_div(here_new, (mddev->chunk_size>>9)*(mddev->raid_disks-1))) {
+			printk(KERN_ERR "raid5: reshape_position not on a stripe boundary\n");
+			return -EINVAL;
+		}
+		/* here_new is the stripe we will write to */
+		here_old = mddev->reshape_position;
+		sector_div(here_old, (mddev->chunk_size>>9)*(old_disks-1));
+		/* here_old is the first stripe that we might need to read from */
+		if (here_new >= here_old) {
+			/* Reading from the same stripe as writing to - bad */
+			printk(KERN_ERR "raid5: reshape_position too early for auto-recovery - aborting.\n");
+			return -EINVAL;
+		}
+		printk(KERN_INFO "raid5: reshape will continue\n");
+		/* OK, we should be able to continue; */
+	}
+
+
 	mddev->private = kzalloc(sizeof (raid5_conf_t), GFP_KERNEL);
 	if ((conf = mddev->private) == NULL)
 		goto abort;
-	conf->disks = kzalloc(mddev->raid_disks * sizeof(struct disk_info),
+	if (mddev->reshape_position == MaxSector) {
+		conf->previous_raid_disks = conf->raid_disks = mddev->raid_disks;
+	} else {
+		conf->raid_disks = mddev->raid_disks;
+		conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks;
+	}
+
+	conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info),
 			      GFP_KERNEL);
 	if (!conf->disks)
 		goto abort;
@@ -2148,7 +2222,7 @@ static int run(mddev_t *mddev)
 
 	ITERATE_RDEV(mddev,rdev,tmp) {
 		raid_disk = rdev->raid_disk;
-		if (raid_disk >= mddev->raid_disks
+		if (raid_disk >= conf->raid_disks
 		    || raid_disk < 0)
 			continue;
 		disk = conf->disks + raid_disk;
@@ -2164,7 +2238,6 @@ static int run(mddev_t *mddev)
 		}
 	}
 
-	conf->raid_disks = mddev->raid_disks;
 	/*
 	 * 0 for a fully functional array, 1 for a degraded array.
 	 */
@@ -2174,7 +2247,7 @@ static int run(mddev_t *mddev)
 	conf->level = mddev->level;
 	conf->algorithm = mddev->layout;
 	conf->max_nr_stripes = NR_STRIPES;
-	conf->expand_progress = MaxSector;
+	conf->expand_progress = mddev->reshape_position;
 
 	/* device size must be a multiple of chunk size */
 	mddev->size &= ~(mddev->chunk_size/1024 -1);
@@ -2247,6 +2320,20 @@ static int run(mddev_t *mddev)
 
 	print_raid5_conf(conf);
 
+	if (conf->expand_progress != MaxSector) {
+		printk("...ok start reshape thread\n");
+		atomic_set(&conf->reshape_stripes, 0);
+		clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+		clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+		set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
+		set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
+		mddev->sync_thread = md_register_thread(md_do_sync, mddev,
+							"%s_reshape");
+		/* FIXME if md_register_thread fails?? */
+		md_wakeup_thread(mddev->sync_thread);
+
+	}
+
 	/* read-ahead size must cover two whole stripes, which is
 	 * 2 * (n-1) * chunksize where 'n' is the number of raid devices
 	 */
@@ -2262,8 +2349,8 @@ static int run(mddev_t *mddev)
 
 	mddev->queue->unplug_fn = raid5_unplug_device;
 	mddev->queue->issue_flush_fn = raid5_issue_flush;
+	mddev->array_size =  mddev->size * (conf->previous_raid_disks - 1);
 
-	mddev->array_size =  mddev->size * (mddev->raid_disks - 1);
 	return 0;
 abort:
 	if (conf) {
@@ -2436,7 +2523,7 @@ static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 	/*
 	 * find the disk ...
 	 */
-	for (disk=0; disk < mddev->raid_disks; disk++)
+	for (disk=0; disk < conf->raid_disks; disk++)
 		if ((p=conf->disks + disk)->rdev == NULL) {
 			clear_bit(In_sync, &rdev->flags);
 			rdev->raid_disk = disk;
@@ -2518,9 +2605,10 @@ static int raid5_reshape(mddev_t *mddev, int raid_disks)
 	if (err)
 		return err;
 
+	atomic_set(&conf->reshape_stripes, 0);
 	spin_lock_irq(&conf->device_lock);
 	conf->previous_raid_disks = conf->raid_disks;
-	mddev->raid_disks = conf->raid_disks = raid_disks;
+	conf->raid_disks = raid_disks;
 	conf->expand_progress = 0;
 	spin_unlock_irq(&conf->device_lock);
 
@@ -2542,6 +2630,14 @@ static int raid5_reshape(mddev_t *mddev, int raid_disks)
 		}
 
 	mddev->degraded = (raid_disks - conf->previous_raid_disks) - added_devices;
+	mddev->new_chunk = mddev->chunk_size;
+	mddev->new_layout = mddev->layout;
+	mddev->new_level = mddev->level;
+	mddev->raid_disks = raid_disks;
+	mddev->delta_disks = raid_disks - conf->previous_raid_disks;
+	mddev->reshape_position = 0;
+	mddev->sb_dirty = 1;
+
 	clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
 	clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
 	set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
@@ -2552,6 +2648,7 @@ static int raid5_reshape(mddev_t *mddev, int raid_disks)
 		mddev->recovery = 0;
 		spin_lock_irq(&conf->device_lock);
 		mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
+		mddev->delta_disks = 0;
 		conf->expand_progress = MaxSector;
 		spin_unlock_irq(&conf->device_lock);
 		return -EAGAIN;
@@ -2566,20 +2663,23 @@ static void end_reshape(raid5_conf_t *conf)
 {
 	struct block_device *bdev;
 
-	conf->mddev->array_size = conf->mddev->size * (conf->mddev->raid_disks-1);
-	set_capacity(conf->mddev->gendisk, conf->mddev->array_size << 1);
-	conf->mddev->changed = 1;
-
-	bdev = bdget_disk(conf->mddev->gendisk, 0);
-	if (bdev) {
-		mutex_lock(&bdev->bd_inode->i_mutex);
-		i_size_write(bdev->bd_inode, conf->mddev->array_size << 10);
-		mutex_unlock(&bdev->bd_inode->i_mutex);
-		bdput(bdev);
+	if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
+		conf->mddev->array_size = conf->mddev->size * (conf->raid_disks-1);
+		set_capacity(conf->mddev->gendisk, conf->mddev->array_size << 1);
+		conf->mddev->changed = 1;
+
+		bdev = bdget_disk(conf->mddev->gendisk, 0);
+		if (bdev) {
+			mutex_lock(&bdev->bd_inode->i_mutex);
+			i_size_write(bdev->bd_inode, conf->mddev->array_size << 10);
+			mutex_unlock(&bdev->bd_inode->i_mutex);
+			bdput(bdev);
+		}
+		spin_lock_irq(&conf->device_lock);
+		conf->expand_progress = MaxSector;
+		spin_unlock_irq(&conf->device_lock);
+		conf->mddev->reshape_position = MaxSector;
 	}
-	spin_lock_irq(&conf->device_lock);
-	conf->expand_progress = MaxSector;
-	spin_unlock_irq(&conf->device_lock);
 }
 
 static void raid5_quiesce(mddev_t *mddev, int state)
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index 9c77cde..66b44e5 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -95,6 +95,8 @@ extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
 extern void md_do_sync(mddev_t *mddev);
 extern void md_new_event(mddev_t *mddev);
 
+extern void md_update_sb(mddev_t * mddev);
+
 #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
 
 #endif 
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 4e26ef2..1a6f9f2 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -132,6 +132,14 @@ struct mddev_s
 
 	char				uuid[16];
 
+	/* If the array is being reshaped, we need to record the
+	 * new shape and an indication of where we are up to.
+	 * This is written to the superblock.
+	 * If reshape_position is MaxSector, then no reshape is happening (yet).
+	 */
+	sector_t			reshape_position;
+	int				delta_disks, new_level, new_layout, new_chunk;
+
 	struct mdk_thread_s		*thread;	/* management thread */
 	struct mdk_thread_s		*sync_thread;	/* doing resync or reconstruct */
 	sector_t			curr_resync;	/* blocks scheduled */
diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h
index c100fa5..774e1ac 100644
--- a/include/linux/raid/md_p.h
+++ b/include/linux/raid/md_p.h
@@ -102,6 +102,18 @@ typedef struct mdp_device_descriptor_s {
 #define MD_SB_ERRORS		1
 
 #define	MD_SB_BITMAP_PRESENT	8 /* bitmap may be present nearby */
+
+/*
+ * Notes:
+ * - if an array is being reshaped (restriped) in order to change the
+ *   the number of active devices in the array, 'raid_disks' will be
+ *   the larger of the old and new numbers.  'delta_disks' will
+ *   be the "new - old".  So if +ve, raid_disks is the new value, and
+ *   "raid_disks-delta_disks" is the old.  If -ve, raid_disks is the
+ *   old value and "raid_disks+delta_disks" is the new (smaller) value.
+ */
+
+
 typedef struct mdp_superblock_s {
 	/*
 	 * Constant generic information
@@ -146,7 +158,13 @@ typedef struct mdp_superblock_s {
 	__u32 cp_events_hi;	/* 10 high-order of checkpoint update count   */
 #endif
 	__u32 recovery_cp;	/* 11 recovery checkpoint sector count	      */
-	__u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 12];
+	/* There are only valid for minor_version > 90 */
+	__u64 reshape_position;	/* 12,13 next address in array-space for reshape */
+	__u32 new_level;	/* 14 new level we are reshaping to	      */
+	__u32 delta_disks;	/* 15 change in number of raid_disks	      */
+	__u32 new_layout;	/* 16 new layout			      */
+	__u32 new_chunk;	/* 17 new chunk size (bytes)		      */
+	__u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 18];
 
 	/*
 	 * Personality information
@@ -207,7 +225,14 @@ struct mdp_superblock_1 {
 				 * NOTE: signed, so bitmap can be before superblock
 				 * only meaningful of feature_map[0] is set.
 				 */
-	__u8	pad1[128-100];	/* set to 0 when written */
+
+	/* These are only valid with feature bit '4' */
+	__u64	reshape_position;	/* next address in array-space for reshape */
+	__u32	new_level;	/* new level we are reshaping to		*/
+	__u32	delta_disks;	/* change in number of raid_disks		*/
+	__u32	new_layout;	/* new layout					*/
+	__u32	new_chunk;	/* new chunk size (bytes)			*/
+	__u8	pad1[128-124];	/* set to 0 when written */
 
 	/* constant this-device information - 64 bytes */
 	__u64	data_offset;	/* sector start of data, often 0 */
@@ -240,8 +265,9 @@ struct mdp_superblock_1 {
 
 /* feature_map bits */
 #define MD_FEATURE_BITMAP_OFFSET	1
+#define	MD_FEATURE_RESHAPE_ACTIVE	4
 
-#define	MD_FEATURE_ALL			1
+#define	MD_FEATURE_ALL			5
 
 #endif 
 
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index 55c738d..abcdf0d 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -224,6 +224,7 @@ struct raid5_private_data {
 	struct list_head	bitmap_list; /* stripes delaying awaiting bitmap update */
 	atomic_t		preread_active_stripes; /* stripes with scheduled io */
 
+	atomic_t		reshape_stripes; /* stripes with pending writes for reshape */
 	/* unfortunately we need two cache names as we temporarily have
 	 * two caches.
 	 */
-- 
cgit v0.10.2


From b578d55fdd80140f657130abd85aebeb345755fb Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:12 -0800
Subject: [PATCH] md: Only checkpoint expansion progress occasionally

Instead of checkpointing at each stripe, only checkpoint when a new write
would overwrite uncheckpointed data.  Block any write to the uncheckpointed
area.  Arbitrarily checkpoint at least every 3Meg.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 20ae32d6..089a326 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1762,8 +1762,9 @@ static int make_request(request_queue_t *q, struct bio * bi)
 	for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
 		DEFINE_WAIT(w);
 		int disks;
-		
+
 	retry:
+		prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
 		if (likely(conf->expand_progress == MaxSector))
 			disks = conf->raid_disks;
 		else {
@@ -1771,6 +1772,13 @@ static int make_request(request_queue_t *q, struct bio * bi)
 			disks = conf->raid_disks;
 			if (logical_sector >= conf->expand_progress)
 				disks = conf->previous_raid_disks;
+			else {
+				if (logical_sector >= conf->expand_lo) {
+					spin_unlock_irq(&conf->device_lock);
+					schedule();
+					goto retry;
+				}
+			}
 			spin_unlock_irq(&conf->device_lock);
 		}
  		new_sector = raid5_compute_sector(logical_sector, disks, disks - 1,
@@ -1779,7 +1787,6 @@ static int make_request(request_queue_t *q, struct bio * bi)
 			(unsigned long long)new_sector, 
 			(unsigned long long)logical_sector);
 
-		prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
 		sh = get_active_stripe(conf, new_sector, disks, pd_idx, (bi->bi_rw&RWA_MASK));
 		if (sh) {
 			if (unlikely(conf->expand_progress != MaxSector)) {
@@ -1877,6 +1884,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		 */
 		int i;
 		int dd_idx;
+		sector_t writepos, safepos, gap;
 
 		if (sector_nr == 0 &&
 		    conf->expand_progress != 0) {
@@ -1887,15 +1895,36 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 			return sector_nr;
 		}
 
-		/* Cannot proceed until we've updated the superblock... */
-		wait_event(conf->wait_for_overlap,
-			   atomic_read(&conf->reshape_stripes)==0);
-		mddev->reshape_position = conf->expand_progress;
-
-		mddev->sb_dirty = 1;
-		md_wakeup_thread(mddev->thread);
-		wait_event(mddev->sb_wait, mddev->sb_dirty == 0 ||
-			kthread_should_stop());
+		/* we update the metadata when there is more than 3Meg
+		 * in the block range (that is rather arbitrary, should
+		 * probably be time based) or when the data about to be
+		 * copied would over-write the source of the data at
+		 * the front of the range.
+		 * i.e. one new_stripe forward from expand_progress new_maps
+		 * to after where expand_lo old_maps to
+		 */
+		writepos = conf->expand_progress +
+			conf->chunk_size/512*(conf->raid_disks-1);
+		sector_div(writepos, conf->raid_disks-1);
+		safepos = conf->expand_lo;
+		sector_div(safepos, conf->previous_raid_disks-1);
+		gap = conf->expand_progress - conf->expand_lo;
+
+		if (writepos >= safepos ||
+		    gap > (conf->raid_disks-1)*3000*2 /*3Meg*/) {
+			/* Cannot proceed until we've updated the superblock... */
+			wait_event(conf->wait_for_overlap,
+				   atomic_read(&conf->reshape_stripes)==0);
+			mddev->reshape_position = conf->expand_progress;
+			mddev->sb_dirty = 1;
+			md_wakeup_thread(mddev->thread);
+			wait_event(mddev->sb_wait, mddev->sb_dirty == 0 ||
+				   kthread_should_stop());
+			spin_lock_irq(&conf->device_lock);
+			conf->expand_lo = mddev->reshape_position;
+			spin_unlock_irq(&conf->device_lock);
+			wake_up(&conf->wait_for_overlap);
+		}
 
 		for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) {
 			int j;
@@ -2322,6 +2351,7 @@ static int run(mddev_t *mddev)
 
 	if (conf->expand_progress != MaxSector) {
 		printk("...ok start reshape thread\n");
+		conf->expand_lo = conf->expand_progress;
 		atomic_set(&conf->reshape_stripes, 0);
 		clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
 		clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
@@ -2610,6 +2640,7 @@ static int raid5_reshape(mddev_t *mddev, int raid_disks)
 	conf->previous_raid_disks = conf->raid_disks;
 	conf->raid_disks = raid_disks;
 	conf->expand_progress = 0;
+	conf->expand_lo = 0;
 	spin_unlock_irq(&conf->device_lock);
 
 	/* Add some new drives, as many as will fit.
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index abcdf0d..914af66 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -217,6 +217,9 @@ struct raid5_private_data {
 
 	/* used during an expand */
 	sector_t		expand_progress;	/* MaxSector when no expand happening */
+	sector_t		expand_lo; /* from here up to expand_progress it out-of-bounds
+					    * as we haven't flushed the metadata yet
+					    */
 	int			previous_raid_disks;
 
 	struct list_head	handle_list; /* stripes needing handling */
-- 
cgit v0.10.2


From 63c70c4f3a30e77e6f445bd16eff7934a031ebd3 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:13 -0800
Subject: [PATCH] md: Split reshape handler in check_reshape and start_reshape

check_reshape checks validity and does things that can be done instantly -
like adding devices to raid1.  start_reshape initiates a restriping process to
convert the whole array.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index b9dfdfc..1a63767 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2594,7 +2594,7 @@ static int do_md_run(mddev_t * mddev)
 	strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
 
 	if (mddev->reshape_position != MaxSector &&
-	    pers->reshape == NULL) {
+	    pers->start_reshape == NULL) {
 		/* This personality cannot handle reshaping... */
 		mddev->pers = NULL;
 		module_put(pers->owner);
@@ -3556,14 +3556,16 @@ static int update_raid_disks(mddev_t *mddev, int raid_disks)
 {
 	int rv;
 	/* change the number of raid disks */
-	if (mddev->pers->reshape == NULL)
+	if (mddev->pers->check_reshape == NULL)
 		return -EINVAL;
 	if (raid_disks <= 0 ||
 	    raid_disks >= mddev->max_disks)
 		return -EINVAL;
-	if (mddev->sync_thread)
+	if (mddev->sync_thread || mddev->reshape_position != MaxSector)
 		return -EBUSY;
-	rv = mddev->pers->reshape(mddev, raid_disks);
+	mddev->delta_disks = raid_disks - mddev->raid_disks;
+
+	rv = mddev->pers->check_reshape(mddev);
 	return rv;
 }
 
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index b65b8cf..04418e1 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1976,7 +1976,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
 	return 0;
 }
 
-static int raid1_reshape(mddev_t *mddev, int raid_disks)
+static int raid1_reshape(mddev_t *mddev)
 {
 	/* We need to:
 	 * 1/ resize the r1bio_pool
@@ -1993,10 +1993,22 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks)
 	struct pool_info *newpoolinfo;
 	mirror_info_t *newmirrors;
 	conf_t *conf = mddev_to_conf(mddev);
-	int cnt;
+	int cnt, raid_disks;
 
 	int d, d2;
 
+	/* Cannot change chunk_size, layout, or level */
+	if (mddev->chunk_size != mddev->new_chunk ||
+	    mddev->layout != mddev->new_layout ||
+	    mddev->level != mddev->new_level) {
+		mddev->new_chunk = mddev->chunk_size;
+		mddev->new_layout = mddev->layout;
+		mddev->new_level = mddev->level;
+		return -EINVAL;
+	}
+
+	raid_disks = mddev->raid_disks + mddev->delta_disks;
+
 	if (raid_disks < conf->raid_disks) {
 		cnt=0;
 		for (d= 0; d < conf->raid_disks; d++)
@@ -2043,6 +2055,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks)
 
 	mddev->degraded += (raid_disks - conf->raid_disks);
 	conf->raid_disks = mddev->raid_disks = raid_disks;
+	mddev->delta_disks = 0;
 
 	conf->last_used = 0; /* just make sure it is in-range */
 	lower_barrier(conf);
@@ -2084,7 +2097,7 @@ static struct mdk_personality raid1_personality =
 	.spare_active	= raid1_spare_active,
 	.sync_request	= sync_request,
 	.resize		= raid1_resize,
-	.reshape	= raid1_reshape,
+	.check_reshape	= raid1_reshape,
 	.quiesce	= raid1_quiesce,
 };
 
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 089a326..355dafb 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2590,21 +2590,15 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
 }
 
 #ifdef CONFIG_MD_RAID5_RESHAPE
-static int raid5_reshape(mddev_t *mddev, int raid_disks)
+static int raid5_check_reshape(mddev_t *mddev)
 {
 	raid5_conf_t *conf = mddev_to_conf(mddev);
 	int err;
-	mdk_rdev_t *rdev;
-	struct list_head *rtmp;
-	int spares = 0;
-	int added_devices = 0;
 
-	if (mddev->degraded ||
-	    test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-		return -EBUSY;
-	if (conf->raid_disks > raid_disks)
-		return -EINVAL; /* Cannot shrink array yet */
-	if (conf->raid_disks == raid_disks)
+	if (mddev->delta_disks < 0 ||
+	    mddev->new_level != mddev->level)
+		return -EINVAL; /* Cannot shrink array or change level yet */
+	if (mddev->delta_disks == 0)
 		return 0; /* nothing to do */
 
 	/* Can only proceed if there are plenty of stripe_heads.
@@ -2615,30 +2609,48 @@ static int raid5_reshape(mddev_t *mddev, int raid_disks)
 	 * If the chunk size is greater, user-space should request more
 	 * stripe_heads first.
 	 */
-	if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes) {
+	if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes ||
+	    (mddev->new_chunk / STRIPE_SIZE) * 4 > conf->max_nr_stripes) {
 		printk(KERN_WARNING "raid5: reshape: not enough stripes.  Needed %lu\n",
 		       (mddev->chunk_size / STRIPE_SIZE)*4);
 		return -ENOSPC;
 	}
 
+	err = resize_stripes(conf, conf->raid_disks + mddev->delta_disks);
+	if (err)
+		return err;
+
+	/* looks like we might be able to manage this */
+	return 0;
+}
+
+static int raid5_start_reshape(mddev_t *mddev)
+{
+	raid5_conf_t *conf = mddev_to_conf(mddev);
+	mdk_rdev_t *rdev;
+	struct list_head *rtmp;
+	int spares = 0;
+	int added_devices = 0;
+
+	if (mddev->degraded ||
+	    test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+		return -EBUSY;
+
 	ITERATE_RDEV(mddev, rdev, rtmp)
 		if (rdev->raid_disk < 0 &&
 		    !test_bit(Faulty, &rdev->flags))
 			spares++;
-	if (conf->raid_disks + spares < raid_disks-1)
+
+	if (spares < mddev->delta_disks-1)
 		/* Not enough devices even to make a degraded array
 		 * of that size
 		 */
 		return -EINVAL;
 
-	err = resize_stripes(conf, raid_disks);
-	if (err)
-		return err;
-
 	atomic_set(&conf->reshape_stripes, 0);
 	spin_lock_irq(&conf->device_lock);
 	conf->previous_raid_disks = conf->raid_disks;
-	conf->raid_disks = raid_disks;
+	conf->raid_disks += mddev->delta_disks;
 	conf->expand_progress = 0;
 	conf->expand_lo = 0;
 	spin_unlock_irq(&conf->device_lock);
@@ -2660,12 +2672,8 @@ static int raid5_reshape(mddev_t *mddev, int raid_disks)
 				break;
 		}
 
-	mddev->degraded = (raid_disks - conf->previous_raid_disks) - added_devices;
-	mddev->new_chunk = mddev->chunk_size;
-	mddev->new_layout = mddev->layout;
-	mddev->new_level = mddev->level;
-	mddev->raid_disks = raid_disks;
-	mddev->delta_disks = raid_disks - conf->previous_raid_disks;
+	mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) - added_devices;
+	mddev->raid_disks = conf->raid_disks;
 	mddev->reshape_position = 0;
 	mddev->sb_dirty = 1;
 
@@ -2679,7 +2687,6 @@ static int raid5_reshape(mddev_t *mddev, int raid_disks)
 		mddev->recovery = 0;
 		spin_lock_irq(&conf->device_lock);
 		mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
-		mddev->delta_disks = 0;
 		conf->expand_progress = MaxSector;
 		spin_unlock_irq(&conf->device_lock);
 		return -EAGAIN;
@@ -2752,7 +2759,8 @@ static struct mdk_personality raid5_personality =
 	.sync_request	= sync_request,
 	.resize		= raid5_resize,
 #ifdef CONFIG_MD_RAID5_RESHAPE
-	.reshape	= raid5_reshape,
+	.check_reshape	= raid5_check_reshape,
+	.start_reshape  = raid5_start_reshape,
 #endif
 	.quiesce	= raid5_quiesce,
 };
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 1a6f9f2..002ee631 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -261,7 +261,8 @@ struct mdk_personality
 	int (*spare_active) (mddev_t *mddev);
 	sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster);
 	int (*resize) (mddev_t *mddev, sector_t sectors);
-	int (*reshape) (mddev_t *mddev, int raid_disks);
+	int (*check_reshape) (mddev_t *mddev);
+	int (*start_reshape) (mddev_t *mddev);
 	int (*reconfig) (mddev_t *mddev, int layout, int chunk_size);
 	/* quiesce moves between quiescence states
 	 * 0 - fully active
-- 
cgit v0.10.2


From 16484bf59634e25d1299761e5ed8bacf22bc6368 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:13 -0800
Subject: [PATCH] md: Make 'reshape' a possible sync_action action

This allows reshape to be triggerred via sysfs (which is the only way to start
it happening).

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1a63767..a79dd33 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2247,7 +2247,14 @@ action_store(mddev_t *mddev, const char *page, size_t len)
 		return -EBUSY;
 	else if (cmd_match(page, "resync") || cmd_match(page, "recover"))
 		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-	else {
+	else if (cmd_match(page, "reshape")) {
+		int err;
+		if (mddev->pers->start_reshape == NULL)
+			return -EINVAL;
+		err = mddev->pers->start_reshape(mddev);
+		if (err)
+			return err;
+	} else {
 		if (cmd_match(page, "check"))
 			set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
 		else if (cmd_match(page, "repair"))
-- 
cgit v0.10.2


From e464eafdb4400c6d6576ba3840d8bd40340f8a96 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:14 -0800
Subject: [PATCH] md: Support suspending of IO to regions of an md array

This allows user-space to access data safely.  This is needed for raid5
reshape as user-space needs to take a backup of the first few stripes before
allowing reshape to commence.

It will also be useful in cluster-aware raid1 configurations so that all
cluster members can leave a section of the array untouched while a
resync/recovery happens.

A 'start' and 'end' of the suspended range are written to 2 sysfs attributes.
Note that only one range can be suspended at a time.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index a79dd33..92fd010 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2365,6 +2365,63 @@ sync_completed_show(mddev_t *mddev, char *page)
 static struct md_sysfs_entry
 md_sync_completed = __ATTR_RO(sync_completed);
 
+static ssize_t
+suspend_lo_show(mddev_t *mddev, char *page)
+{
+	return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
+}
+
+static ssize_t
+suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
+{
+	char *e;
+	unsigned long long new = simple_strtoull(buf, &e, 10);
+
+	if (mddev->pers->quiesce == NULL)
+		return -EINVAL;
+	if (buf == e || (*e && *e != '\n'))
+		return -EINVAL;
+	if (new >= mddev->suspend_hi ||
+	    (new > mddev->suspend_lo && new < mddev->suspend_hi)) {
+		mddev->suspend_lo = new;
+		mddev->pers->quiesce(mddev, 2);
+		return len;
+	} else
+		return -EINVAL;
+}
+static struct md_sysfs_entry md_suspend_lo =
+__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
+
+
+static ssize_t
+suspend_hi_show(mddev_t *mddev, char *page)
+{
+	return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
+}
+
+static ssize_t
+suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
+{
+	char *e;
+	unsigned long long new = simple_strtoull(buf, &e, 10);
+
+	if (mddev->pers->quiesce == NULL)
+		return -EINVAL;
+	if (buf == e || (*e && *e != '\n'))
+		return -EINVAL;
+	if ((new <= mddev->suspend_lo && mddev->suspend_lo >= mddev->suspend_hi) ||
+	    (new > mddev->suspend_lo && new > mddev->suspend_hi)) {
+		mddev->suspend_hi = new;
+		mddev->pers->quiesce(mddev, 1);
+		mddev->pers->quiesce(mddev, 0);
+		return len;
+	} else
+		return -EINVAL;
+}
+static struct md_sysfs_entry md_suspend_hi =
+__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
+
+
 static struct attribute *md_default_attrs[] = {
 	&md_level.attr,
 	&md_raid_disks.attr,
@@ -2382,6 +2439,8 @@ static struct attribute *md_redundancy_attrs[] = {
 	&md_sync_max.attr,
 	&md_sync_speed.attr,
 	&md_sync_completed.attr,
+	&md_suspend_lo.attr,
+	&md_suspend_hi.attr,
 	NULL,
 };
 static struct attribute_group md_redundancy_group = {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 355dafb..bb16ac2 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1805,6 +1805,15 @@ static int make_request(request_queue_t *q, struct bio * bi)
 					goto retry;
 				}
 			}
+			/* FIXME what if we get a false positive because these
+			 * are being updated.
+			 */
+			if (logical_sector >= mddev->suspend_lo &&
+			    logical_sector < mddev->suspend_hi) {
+				release_stripe(sh);
+				schedule();
+				goto retry;
+			}
 
 			if (test_bit(STRIPE_EXPANDING, &sh->state) ||
 			    !add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {
@@ -2725,6 +2734,10 @@ static void raid5_quiesce(mddev_t *mddev, int state)
 	raid5_conf_t *conf = mddev_to_conf(mddev);
 
 	switch(state) {
+	case 2: /* resume for a suspend */
+		wake_up(&conf->wait_for_overlap);
+		break;
+
 	case 1: /* stop all writes */
 		spin_lock_irq(&conf->device_lock);
 		conf->quiesce = 1;
@@ -2738,6 +2751,7 @@ static void raid5_quiesce(mddev_t *mddev, int state)
 		spin_lock_irq(&conf->device_lock);
 		conf->quiesce = 0;
 		wake_up(&conf->wait_for_stripe);
+		wake_up(&conf->wait_for_overlap);
 		spin_unlock_irq(&conf->device_lock);
 		break;
 	}
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 002ee631..c0d3097 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -151,6 +151,10 @@ struct mddev_s
 	sector_t			resync_mismatches; /* count of sectors where
 							    * parity/replica mismatch found
 							    */
+
+	/* allow user-space to request suspension of IO to regions of the array */
+	sector_t			suspend_lo;
+	sector_t			suspend_hi;
 	/* if zero, use the system-wide default */
 	int				sync_speed_min;
 	int				sync_speed_max;
-- 
cgit v0.10.2


From df8e7f7639bab3b2cc536a1d30d5593d65251778 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:15 -0800
Subject: [PATCH] md: Improve comments about locking situation in raid5
 make_request

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index bb16ac2..8eded08 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1768,6 +1768,14 @@ static int make_request(request_queue_t *q, struct bio * bi)
 		if (likely(conf->expand_progress == MaxSector))
 			disks = conf->raid_disks;
 		else {
+			/* spinlock is needed as expand_progress may be
+			 * 64bit on a 32bit platform, and so it might be
+			 * possible to see a half-updated value
+			 * Ofcourse expand_progress could change after
+			 * the lock is dropped, so once we get a reference
+			 * to the stripe that we think it is, we will have
+			 * to check again.
+			 */
 			spin_lock_irq(&conf->device_lock);
 			disks = conf->raid_disks;
 			if (logical_sector >= conf->expand_progress)
@@ -1791,7 +1799,12 @@ static int make_request(request_queue_t *q, struct bio * bi)
 		if (sh) {
 			if (unlikely(conf->expand_progress != MaxSector)) {
 				/* expansion might have moved on while waiting for a
-				 * stripe, so we much do the range check again.
+				 * stripe, so we must do the range check again.
+				 * Expansion could still move past after this
+				 * test, but as we are holding a reference to
+				 * 'sh', we know that if that happens,
+				 *  STRIPE_EXPANDING will get set and the expansion
+				 * won't proceed until we finish with the stripe.
 				 */
 				int must_retry = 0;
 				spin_lock_irq(&conf->device_lock);
-- 
cgit v0.10.2


From b3b46be38aef5c46a4e144f1bcb8d0cc6bf3ff97 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:16 -0800
Subject: [PATCH] md: Remove some stray semi-colons after functions called in
 macro..

wait_event_lock_irq puts a ';' after its usage of the 4th arg, so we don't
need to.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 8eded08..59c8772 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -262,7 +262,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
 						     < (conf->max_nr_stripes *3/4)
 						     || !conf->inactive_blocked),
 						    conf->device_lock,
-						    unplug_slaves(conf->mddev);
+						    unplug_slaves(conf->mddev)
 					);
 				conf->inactive_blocked = 0;
 			} else
@@ -407,7 +407,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    !list_empty(&conf->inactive_list),
 				    conf->device_lock,
-				    unplug_slaves(conf->mddev);
+				    unplug_slaves(conf->mddev)
 			);
 		osh = get_free_stripe(conf);
 		spin_unlock_irq(&conf->device_lock);
-- 
cgit v0.10.2


From f6344757a92e5466bf8c23a74ee8f972ff35cb05 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:17 -0800
Subject: [PATCH] md: Remove bi_end_io call out from under a spinlock

raid5 overloads bi_phys_segments to count the number of blocks that the
request was broken in to so that it knows when the bio is completely handled.

Accessing this must always be done under a spinlock.  In one case we also call
bi_end_io under that spinlock, which probably isn't ideal as bi_end_io could
be expensive (even though it isn't allowed to sleep).

So we reducde the range of the spinlock to just accessing bi_phys_segments.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 59c8772..dae740a 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1743,6 +1743,7 @@ static int make_request(request_queue_t *q, struct bio * bi)
 	sector_t logical_sector, last_sector;
 	struct stripe_head *sh;
 	const int rw = bio_data_dir(bi);
+	int remaining;
 
 	if (unlikely(bio_barrier(bi))) {
 		bio_endio(bi, bi->bi_size, -EOPNOTSUPP);
@@ -1852,7 +1853,9 @@ static int make_request(request_queue_t *q, struct bio * bi)
 			
 	}
 	spin_lock_irq(&conf->device_lock);
-	if (--bi->bi_phys_segments == 0) {
+	remaining = --bi->bi_phys_segments;
+	spin_unlock_irq(&conf->device_lock);
+	if (remaining == 0) {
 		int bytes = bi->bi_size;
 
 		if ( bio_data_dir(bi) == WRITE )
@@ -1860,7 +1863,6 @@ static int make_request(request_queue_t *q, struct bio * bi)
 		bi->bi_size = 0;
 		bi->bi_end_io(bi, bytes, 0);
 	}
-	spin_unlock_irq(&conf->device_lock);
 	return 0;
 }
 
-- 
cgit v0.10.2


From 8ddeeae51f2f197b4fafcba117ee8191b49d843e Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:18 -0800
Subject: [PATCH] md: Fix md grow/size code to correctly find the maximum
 available space

An md array can be asked to change the amount of each device that it is using,
and in particular can be asked to use the maximum available space.  This
currently only works if the first device is not larger than the rest.  As
'size' gets changed and so 'fit' becomes wrong.  So check if a 'fit' is
required early and don't corrupt it.

Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 92fd010..147efcb 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3575,6 +3575,7 @@ static int update_size(mddev_t *mddev, unsigned long size)
 	mdk_rdev_t * rdev;
 	int rv;
 	struct list_head *tmp;
+	int fit = (size == 0);
 
 	if (mddev->pers->resize == NULL)
 		return -EINVAL;
@@ -3592,7 +3593,6 @@ static int update_size(mddev_t *mddev, unsigned long size)
 		return -EBUSY;
 	ITERATE_RDEV(mddev,rdev,tmp) {
 		sector_t avail;
-		int fit = (size == 0);
 		if (rdev->sb_offset > rdev->data_offset)
 			avail = (rdev->sb_offset*2) - rdev->data_offset;
 		else
-- 
cgit v0.10.2


From 2f889129de148b0ba2e1fbc9e9d33a4ef4c5f2cb Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:19 -0800
Subject: [PATCH] md: Restore 'remaining' count when retrying an write
 operation

When retrying a write due to barrier failure, we don't reset 'remaining', so
it goes negative and never hits 0 again.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 04418e1..3cb0872 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1402,6 +1402,9 @@ static void raid1d(mddev_t *mddev)
 			clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
 			clear_bit(R1BIO_Barrier, &r1_bio->state);
 			for (i=0; i < conf->raid_disks; i++)
+				if (r1_bio->bios[i])
+					atomic_inc(&r1_bio->remaining);
+			for (i=0; i < conf->raid_disks; i++)
 				if (r1_bio->bios[i]) {
 					struct bio_vec *bvec;
 					int j;
-- 
cgit v0.10.2


From 48c9c27b8bcd2a328a06151e2d5c1170db0b701b Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Mon, 27 Mar 2006 01:18:20 -0800
Subject: [PATCH] sem2mutex: drivers/md

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 76610a6..8f56a54 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -14,6 +14,7 @@
 #include <linux/ctype.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
+#include <linux/mutex.h>
 #include <asm/atomic.h>
 
 #define MAX_DEPTH 16
@@ -770,14 +771,14 @@ int dm_table_complete(struct dm_table *t)
 	return r;
 }
 
-static DECLARE_MUTEX(_event_lock);
+static DEFINE_MUTEX(_event_lock);
 void dm_table_event_callback(struct dm_table *t,
 			     void (*fn)(void *), void *context)
 {
-	down(&_event_lock);
+	mutex_lock(&_event_lock);
 	t->event_fn = fn;
 	t->event_context = context;
-	up(&_event_lock);
+	mutex_unlock(&_event_lock);
 }
 
 void dm_table_event(struct dm_table *t)
@@ -788,10 +789,10 @@ void dm_table_event(struct dm_table *t)
 	 */
 	BUG_ON(in_interrupt());
 
-	down(&_event_lock);
+	mutex_lock(&_event_lock);
 	if (t->event_fn)
 		t->event_fn(t->event_context);
-	up(&_event_lock);
+	mutex_unlock(&_event_lock);
 }
 
 sector_t dm_table_get_size(struct dm_table *t)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 973e63d..4d710b7 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -10,6 +10,7 @@
 
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/moduleparam.h>
 #include <linux/blkpg.h>
 #include <linux/bio.h>
@@ -743,14 +744,14 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
 /*-----------------------------------------------------------------
  * An IDR is used to keep track of allocated minor numbers.
  *---------------------------------------------------------------*/
-static DECLARE_MUTEX(_minor_lock);
+static DEFINE_MUTEX(_minor_lock);
 static DEFINE_IDR(_minor_idr);
 
 static void free_minor(unsigned int minor)
 {
-	down(&_minor_lock);
+	mutex_lock(&_minor_lock);
 	idr_remove(&_minor_idr, minor);
-	up(&_minor_lock);
+	mutex_unlock(&_minor_lock);
 }
 
 /*
@@ -763,7 +764,7 @@ static int specific_minor(struct mapped_device *md, unsigned int minor)
 	if (minor >= (1 << MINORBITS))
 		return -EINVAL;
 
-	down(&_minor_lock);
+	mutex_lock(&_minor_lock);
 
 	if (idr_find(&_minor_idr, minor)) {
 		r = -EBUSY;
@@ -788,7 +789,7 @@ static int specific_minor(struct mapped_device *md, unsigned int minor)
 	}
 
 out:
-	up(&_minor_lock);
+	mutex_unlock(&_minor_lock);
 	return r;
 }
 
@@ -797,7 +798,7 @@ static int next_free_minor(struct mapped_device *md, unsigned int *minor)
 	int r;
 	unsigned int m;
 
-	down(&_minor_lock);
+	mutex_lock(&_minor_lock);
 
 	r = idr_pre_get(&_minor_idr, GFP_KERNEL);
 	if (!r) {
@@ -819,7 +820,7 @@ static int next_free_minor(struct mapped_device *md, unsigned int *minor)
 	*minor = m;
 
 out:
-	up(&_minor_lock);
+	mutex_unlock(&_minor_lock);
 	return r;
 }
 
@@ -1014,13 +1015,13 @@ static struct mapped_device *dm_find_md(dev_t dev)
 	if (MAJOR(dev) != _major || minor >= (1 << MINORBITS))
 		return NULL;
 
-	down(&_minor_lock);
+	mutex_lock(&_minor_lock);
 
 	md = idr_find(&_minor_idr, minor);
 	if (!md || (dm_disk(md)->first_minor != minor))
 		md = NULL;
 
-	up(&_minor_lock);
+	mutex_unlock(&_minor_lock);
 
 	return md;
 }
diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c
index ed71f3f..72480a4 100644
--- a/drivers/md/kcopyd.c
+++ b/drivers/md/kcopyd.c
@@ -22,6 +22,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/workqueue.h>
+#include <linux/mutex.h>
 
 #include "kcopyd.h"
 
@@ -581,21 +582,21 @@ int kcopyd_cancel(struct kcopyd_job *job, int block)
 /*-----------------------------------------------------------------
  * Unit setup
  *---------------------------------------------------------------*/
-static DECLARE_MUTEX(_client_lock);
+static DEFINE_MUTEX(_client_lock);
 static LIST_HEAD(_clients);
 
 static void client_add(struct kcopyd_client *kc)
 {
-	down(&_client_lock);
+	mutex_lock(&_client_lock);
 	list_add(&kc->list, &_clients);
-	up(&_client_lock);
+	mutex_unlock(&_client_lock);
 }
 
 static void client_del(struct kcopyd_client *kc)
 {
-	down(&_client_lock);
+	mutex_lock(&_client_lock);
 	list_del(&kc->list);
-	up(&_client_lock);
+	mutex_unlock(&_client_lock);
 }
 
 static DEFINE_MUTEX(kcopyd_init_lock);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 147efcb..c9c9c09 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -43,6 +43,7 @@
 #include <linux/buffer_head.h> /* for invalidate_bdev */
 #include <linux/suspend.h>
 #include <linux/poll.h>
+#include <linux/mutex.h>
 
 #include <linux/init.h>
 
@@ -2500,7 +2501,7 @@ int mdp_major = 0;
 
 static struct kobject *md_probe(dev_t dev, int *part, void *data)
 {
-	static DECLARE_MUTEX(disks_sem);
+	static DEFINE_MUTEX(disks_mutex);
 	mddev_t *mddev = mddev_find(dev);
 	struct gendisk *disk;
 	int partitioned = (MAJOR(dev) != MD_MAJOR);
@@ -2510,15 +2511,15 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
 	if (!mddev)
 		return NULL;
 
-	down(&disks_sem);
+	mutex_lock(&disks_mutex);
 	if (mddev->gendisk) {
-		up(&disks_sem);
+		mutex_unlock(&disks_mutex);
 		mddev_put(mddev);
 		return NULL;
 	}
 	disk = alloc_disk(1 << shift);
 	if (!disk) {
-		up(&disks_sem);
+		mutex_unlock(&disks_mutex);
 		mddev_put(mddev);
 		return NULL;
 	}
@@ -2536,7 +2537,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
 	disk->queue = mddev->queue;
 	add_disk(disk);
 	mddev->gendisk = disk;
-	up(&disks_sem);
+	mutex_unlock(&disks_mutex);
 	mddev->kobj.parent = &disk->kobj;
 	mddev->kobj.k_name = NULL;
 	snprintf(mddev->kobj.name, KOBJ_NAME_LEN, "%s", "md");
-- 
cgit v0.10.2


From df5b89b323b922f56650b4b4d7c41899b937cf19 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 27 Mar 2006 01:18:20 -0800
Subject: [PATCH] md: Convert reconfig_sem to reconfig_mutex

... being careful that mutex_trylock is inverted wrt down_trylock

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index c9c9c09..039e071 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -255,7 +255,7 @@ static mddev_t * mddev_find(dev_t unit)
 	else
 		new->md_minor = MINOR(unit) >> MdpMinorShift;
 
-	init_MUTEX(&new->reconfig_sem);
+	mutex_init(&new->reconfig_mutex);
 	INIT_LIST_HEAD(&new->disks);
 	INIT_LIST_HEAD(&new->all_mddevs);
 	init_timer(&new->safemode_timer);
@@ -277,22 +277,22 @@ static mddev_t * mddev_find(dev_t unit)
 
 static inline int mddev_lock(mddev_t * mddev)
 {
-	return down_interruptible(&mddev->reconfig_sem);
+	return mutex_lock_interruptible(&mddev->reconfig_mutex);
 }
 
 static inline void mddev_lock_uninterruptible(mddev_t * mddev)
 {
-	down(&mddev->reconfig_sem);
+	mutex_lock(&mddev->reconfig_mutex);
 }
 
 static inline int mddev_trylock(mddev_t * mddev)
 {
-	return down_trylock(&mddev->reconfig_sem);
+	return mutex_trylock(&mddev->reconfig_mutex);
 }
 
 static inline void mddev_unlock(mddev_t * mddev)
 {
-	up(&mddev->reconfig_sem);
+	mutex_unlock(&mddev->reconfig_mutex);
 
 	md_wakeup_thread(mddev->thread);
 }
@@ -4893,7 +4893,7 @@ void md_check_recovery(mddev_t *mddev)
 		))
 		return;
 
-	if (mddev_trylock(mddev)==0) {
+	if (mddev_trylock(mddev)) {
 		int spares =0;
 
 		spin_lock_irq(&mddev->write_lock);
@@ -5029,7 +5029,7 @@ static int md_notify_reboot(struct notifier_block *this,
 		printk(KERN_INFO "md: stopping all md devices.\n");
 
 		ITERATE_MDDEV(mddev,tmp)
-			if (mddev_trylock(mddev)==0)
+			if (mddev_trylock(mddev))
 				do_md_stop (mddev, 1);
 		/*
 		 * certain more exotic SCSI devices are known to be
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index c0d3097..e2df61f 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -185,7 +185,7 @@ struct mddev_s
 	unsigned long			recovery;
 
 	int				in_sync;	/* know to not need resync */
-	struct semaphore		reconfig_sem;
+	struct mutex			reconfig_mutex;
 	atomic_t			active;
 
 	int				changed;	/* true if we might need to reread partition info */
-- 
cgit v0.10.2


From eef5167e5045fa8265b3e72cac9dbc4bc7dd82a6 Mon Sep 17 00:00:00 2001
From: "shin, jacob" <jacob.shin@amd.com>
Date: Mon, 27 Mar 2006 09:57:20 -0600
Subject: [CPUFREQ] hotplug cpu fix for powernow-k8

Andi's previous fix to initialise powernow_data on all siblings
will not work properly with CPU Hotplug.

Signed-off-by: Jacob Shin <jacob.shin@amd.com>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
index 1e70823..712a26b 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
@@ -1095,10 +1095,15 @@ static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol)
 
 static unsigned int powernowk8_get (unsigned int cpu)
 {
-	struct powernow_k8_data *data = powernow_data[cpu];
+	struct powernow_k8_data *data;
 	cpumask_t oldmask = current->cpus_allowed;
 	unsigned int khz = 0;
 
+	data = powernow_data[first_cpu(cpu_core_map[cpu])];
+
+	if (!data)
+		return -EINVAL;
+
 	set_cpus_allowed(current, cpumask_of_cpu(cpu));
 	if (smp_processor_id() != cpu) {
 		printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu);
-- 
cgit v0.10.2


From 64840e2722aeb789574e336d231bbc6436d51b34 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 25 Mar 2006 01:51:23 -0800
Subject: [CPUFREQ] powernow: remove private for_each_cpu_mask()

It is unneeded and wrong.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
index 00ea899c..79a7c5c 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
@@ -182,10 +182,6 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
 
 static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index);
 
-#ifndef for_each_cpu_mask
-#define for_each_cpu_mask(i,mask) for (i=0;i<1;i++)
-#endif
-
 #ifdef CONFIG_SMP
 static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
 {
-- 
cgit v0.10.2


From e2d74ac0664c89757bde8fb18c98cd7bf53da61c Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Tue, 28 Mar 2006 08:59:01 +0200
Subject: [PATCH] [BLOCK] cfq-iosched: change cfq io context linking from list
 to tree

On setups with many disks, we spend a considerable amount of time
looking up the process-disk mapping on each queue of io. Testing with
a NULL based block driver, this costs 40-50% reduction in throughput
for 1000 disks.

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index bde40a6..bb43a16 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1190,19 +1190,19 @@ cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio)
 	return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT));
 }
 
-static void cfq_free_io_context(struct cfq_io_context *cic)
+static void cfq_free_io_context(struct io_context *ioc)
 {
 	struct cfq_io_context *__cic;
-	struct list_head *entry, *next;
-	int freed = 1;
+	struct rb_node *n;
+	int freed = 0;
 
-	list_for_each_safe(entry, next, &cic->list) {
-		__cic = list_entry(entry, struct cfq_io_context, list);
+	while ((n = rb_first(&ioc->cic_root)) != NULL) {
+		__cic = rb_entry(n, struct cfq_io_context, rb_node);
+		rb_erase(&__cic->rb_node, &ioc->cic_root);
 		kmem_cache_free(cfq_ioc_pool, __cic);
 		freed++;
 	}
 
-	kmem_cache_free(cfq_ioc_pool, cic);
 	if (atomic_sub_and_test(freed, &ioc_count) && ioc_gone)
 		complete(ioc_gone);
 }
@@ -1210,8 +1210,7 @@ static void cfq_free_io_context(struct cfq_io_context *cic)
 static void cfq_trim(struct io_context *ioc)
 {
 	ioc->set_ioprio = NULL;
-	if (ioc->cic)
-		cfq_free_io_context(ioc->cic);
+	cfq_free_io_context(ioc);
 }
 
 /*
@@ -1250,26 +1249,26 @@ static void cfq_exit_single_io_context(struct cfq_io_context *cic)
 	spin_unlock(q->queue_lock);
 }
 
-static void cfq_exit_io_context(struct cfq_io_context *cic)
+static void cfq_exit_io_context(struct io_context *ioc)
 {
 	struct cfq_io_context *__cic;
-	struct list_head *entry;
 	unsigned long flags;
-
-	local_irq_save(flags);
+	struct rb_node *n;
 
 	/*
 	 * put the reference this task is holding to the various queues
 	 */
-	read_lock(&cfq_exit_lock);
-	list_for_each(entry, &cic->list) {
-		__cic = list_entry(entry, struct cfq_io_context, list);
+	read_lock_irqsave(&cfq_exit_lock, flags);
+
+	n = rb_first(&ioc->cic_root);
+	while (n != NULL) {
+		__cic = rb_entry(n, struct cfq_io_context, rb_node);
+
 		cfq_exit_single_io_context(__cic);
+		n = rb_next(n);
 	}
 
-	cfq_exit_single_io_context(cic);
-	read_unlock(&cfq_exit_lock);
-	local_irq_restore(flags);
+	read_unlock_irqrestore(&cfq_exit_lock, flags);
 }
 
 static struct cfq_io_context *
@@ -1278,10 +1277,10 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 	struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
 
 	if (cic) {
-		INIT_LIST_HEAD(&cic->list);
+		RB_CLEAR(&cic->rb_node);
+		cic->key = NULL;
 		cic->cfqq[ASYNC] = NULL;
 		cic->cfqq[SYNC] = NULL;
-		cic->key = NULL;
 		cic->last_end_request = jiffies;
 		cic->ttime_total = 0;
 		cic->ttime_samples = 0;
@@ -1373,15 +1372,17 @@ static inline void changed_ioprio(struct cfq_io_context *cic)
 static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
 {
 	struct cfq_io_context *cic;
+	struct rb_node *n;
 
 	write_lock(&cfq_exit_lock);
 
-	cic = ioc->cic;
-
-	changed_ioprio(cic);
-
-	list_for_each_entry(cic, &cic->list, list)
+	n = rb_first(&ioc->cic_root);
+	while (n != NULL) {
+		cic = rb_entry(n, struct cfq_io_context, rb_node);
+ 
 		changed_ioprio(cic);
+		n = rb_next(n);
+	}
 
 	write_unlock(&cfq_exit_lock);
 
@@ -1445,14 +1446,67 @@ out:
 	return cfqq;
 }
 
+static struct cfq_io_context *
+cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc)
+{
+	struct rb_node *n = ioc->cic_root.rb_node;
+	struct cfq_io_context *cic;
+	void *key = cfqd;
+
+	while (n) {
+		cic = rb_entry(n, struct cfq_io_context, rb_node);
+
+		if (key < cic->key)
+			n = n->rb_left;
+		else if (key > cic->key)
+			n = n->rb_right;
+		else
+			return cic;
+	}
+
+	return NULL;
+}
+
+static inline void
+cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
+	     struct cfq_io_context *cic)
+{
+	struct rb_node **p = &ioc->cic_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct cfq_io_context *__cic;
+
+	read_lock(&cfq_exit_lock);
+
+	cic->ioc = ioc;
+	cic->key = cfqd;
+
+	ioc->set_ioprio = cfq_ioc_set_ioprio;
+
+	while (*p) {
+		parent = *p;
+		__cic = rb_entry(parent, struct cfq_io_context, rb_node);
+
+		if (cic->key < __cic->key)
+			p = &(*p)->rb_left;
+		else if (cic->key > __cic->key)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&cic->rb_node, parent, p);
+	rb_insert_color(&cic->rb_node, &ioc->cic_root);
+	list_add(&cic->queue_list, &cfqd->cic_list);
+	read_unlock(&cfq_exit_lock);
+}
+
 /*
  * Setup general io context and cfq io context. There can be several cfq
  * io contexts per general io context, if this process is doing io to more
- * than one device managed by cfq. Note that caller is holding a reference to
- * cfqq, so we don't need to worry about it disappearing
+ * than one device managed by cfq.
  */
 static struct cfq_io_context *
-cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask)
+cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 {
 	struct io_context *ioc = NULL;
 	struct cfq_io_context *cic;
@@ -1463,88 +1517,15 @@ cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask)
 	if (!ioc)
 		return NULL;
 
-restart:
-	if ((cic = ioc->cic) == NULL) {
-		cic = cfq_alloc_io_context(cfqd, gfp_mask);
-
-		if (cic == NULL)
-			goto err;
-
-		/*
-		 * manually increment generic io_context usage count, it
-		 * cannot go away since we are already holding one ref to it
-		 */
-		cic->ioc = ioc;
-		cic->key = cfqd;
-		read_lock(&cfq_exit_lock);
-		ioc->set_ioprio = cfq_ioc_set_ioprio;
-		ioc->cic = cic;
-		list_add(&cic->queue_list, &cfqd->cic_list);
-		read_unlock(&cfq_exit_lock);
-	} else {
-		struct cfq_io_context *__cic;
-
-		/*
-		 * the first cic on the list is actually the head itself
-		 */
-		if (cic->key == cfqd)
-			goto out;
-
-		if (unlikely(!cic->key)) {
-			read_lock(&cfq_exit_lock);
-			if (list_empty(&cic->list))
-				ioc->cic = NULL;
-			else
-				ioc->cic = list_entry(cic->list.next,
-						      struct cfq_io_context,
-						      list);
-			read_unlock(&cfq_exit_lock);
-			kmem_cache_free(cfq_ioc_pool, cic);
-			atomic_dec(&ioc_count);
-			goto restart;
-		}
-
-		/*
-		 * cic exists, check if we already are there. linear search
-		 * should be ok here, the list will usually not be more than
-		 * 1 or a few entries long
-		 */
-		list_for_each_entry(__cic, &cic->list, list) {
-			/*
-			 * this process is already holding a reference to
-			 * this queue, so no need to get one more
-			 */
-			if (__cic->key == cfqd) {
-				cic = __cic;
-				goto out;
-			}
-			if (unlikely(!__cic->key)) {
-				read_lock(&cfq_exit_lock);
-				list_del(&__cic->list);
-				read_unlock(&cfq_exit_lock);
-				kmem_cache_free(cfq_ioc_pool, __cic);
-				atomic_dec(&ioc_count);
-				goto restart;
-			}
-		}
+	cic = cfq_cic_rb_lookup(cfqd, ioc);
+	if (cic)
+		goto out;
 
-		/*
-		 * nope, process doesn't have a cic assoicated with this
-		 * cfqq yet. get a new one and add to list
-		 */
-		__cic = cfq_alloc_io_context(cfqd, gfp_mask);
-		if (__cic == NULL)
-			goto err;
-
-		__cic->ioc = ioc;
-		__cic->key = cfqd;
-		read_lock(&cfq_exit_lock);
-		list_add(&__cic->list, &cic->list);
-		list_add(&__cic->queue_list, &cfqd->cic_list);
-		read_unlock(&cfq_exit_lock);
-		cic = __cic;
-	}
+	cic = cfq_alloc_io_context(cfqd, gfp_mask);
+	if (cic == NULL)
+		goto err;
 
+	cfq_cic_link(cfqd, ioc, cic);
 out:
 	return cic;
 err:
@@ -1965,7 +1946,7 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
 
 	might_sleep_if(gfp_mask & __GFP_WAIT);
 
-	cic = cfq_get_io_context(cfqd, key, gfp_mask);
+	cic = cfq_get_io_context(cfqd, gfp_mask);
 
 	spin_lock_irqsave(q->queue_lock, flags);
 
@@ -2133,11 +2114,14 @@ static void cfq_exit_queue(elevator_t *e)
 	request_queue_t *q = cfqd->queue;
 
 	cfq_shutdown_timer_wq(cfqd);
+
 	write_lock(&cfq_exit_lock);
 	spin_lock_irq(q->queue_lock);
+
 	if (cfqd->active_queue)
 		__cfq_slice_expired(cfqd, cfqd->active_queue, 0);
-	while(!list_empty(&cfqd->cic_list)) {
+
+	while (!list_empty(&cfqd->cic_list)) {
 		struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
 							struct cfq_io_context,
 							queue_list);
@@ -2152,6 +2136,7 @@ static void cfq_exit_queue(elevator_t *e)
 		cic->key = NULL;
 		list_del_init(&cic->queue_list);
 	}
+
 	spin_unlock_irq(q->queue_lock);
 	write_unlock(&cfq_exit_lock);
 
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 82469db..cb60876 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3539,11 +3539,15 @@ void put_io_context(struct io_context *ioc)
 	BUG_ON(atomic_read(&ioc->refcount) == 0);
 
 	if (atomic_dec_and_test(&ioc->refcount)) {
+		struct cfq_io_context *cic;
+
 		rcu_read_lock();
 		if (ioc->aic && ioc->aic->dtor)
 			ioc->aic->dtor(ioc->aic);
-		if (ioc->cic && ioc->cic->dtor)
-			ioc->cic->dtor(ioc->cic);
+		if (ioc->cic_root.rb_node != NULL) {
+			cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node);
+			cic->dtor(ioc);
+		}
 		rcu_read_unlock();
 
 		kmem_cache_free(iocontext_cachep, ioc);
@@ -3556,6 +3560,7 @@ void exit_io_context(void)
 {
 	unsigned long flags;
 	struct io_context *ioc;
+	struct cfq_io_context *cic;
 
 	local_irq_save(flags);
 	task_lock(current);
@@ -3567,9 +3572,11 @@ void exit_io_context(void)
 
 	if (ioc->aic && ioc->aic->exit)
 		ioc->aic->exit(ioc->aic);
-	if (ioc->cic && ioc->cic->exit)
-		ioc->cic->exit(ioc->cic);
-
+	if (ioc->cic_root.rb_node != NULL) {
+		cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node);
+		cic->exit(ioc);
+	}
+ 
 	put_io_context(ioc);
 }
 
@@ -3598,7 +3605,7 @@ struct io_context *current_io_context(gfp_t gfp_flags)
 		ret->last_waited = jiffies; /* doesn't matter... */
 		ret->nr_batch_requests = 0; /* because this is 0 */
 		ret->aic = NULL;
-		ret->cic = NULL;
+		ret->cic_root.rb_node = NULL;
 		tsk->io_context = ret;
 	}
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c179966..ed0ffa6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -55,13 +55,11 @@ struct as_io_context {
 
 struct cfq_queue;
 struct cfq_io_context {
-	/*
-	 * circular list of cfq_io_contexts belonging to a process io context
-	 */
-	struct list_head list;
-	struct cfq_queue *cfqq[2];
+	struct rb_node rb_node;
 	void *key;
 
+	struct cfq_queue *cfqq[2];
+
 	struct io_context *ioc;
 
 	unsigned long last_end_request;
@@ -72,8 +70,8 @@ struct cfq_io_context {
 
 	struct list_head queue_list;
 
-	void (*dtor)(struct cfq_io_context *);
-	void (*exit)(struct cfq_io_context *);
+	void (*dtor)(struct io_context *); /* destructor */
+	void (*exit)(struct io_context *); /* called on task exit */
 };
 
 /*
@@ -94,7 +92,7 @@ struct io_context {
 	int nr_batch_requests;     /* Number of requests left in the batch */
 
 	struct as_io_context *aic;
-	struct cfq_io_context *cic;
+	struct rb_root cic_root;
 };
 
 void put_io_context(struct io_context *ioc);
-- 
cgit v0.10.2


From e8a99053ea82a4b4375049886cf1db64d7dcd755 Mon Sep 17 00:00:00 2001
From: Andreas Mohr <andi@lisas.de>
Date: Tue, 28 Mar 2006 08:59:49 +0200
Subject: [PATCH] cfq-iosched: small cfq_choose_req() optimization

this is a small optimization to cfq_choose_req() in the CFQ I/O scheduler
(this function is a semi-often invoked candidate in an oprofile log):
by using a bit mask variable, we can use a simple switch() to check
the various cases instead of having to query two variables for each check.
Benefit: 251 vs. 285 bytes footprint of cfq_choose_req().
Also, common case 0 (no request wrapping) is now checked first in code.

Signed-off-by: Andreas Mohr <andi@lisas.de>
Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index bb43a16..15152e2 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -346,14 +346,16 @@ static int cfq_queue_empty(request_queue_t *q)
 /*
  * Lifted from AS - choose which of crq1 and crq2 that is best served now.
  * We choose the request that is closest to the head right now. Distance
- * behind the head are penalized and only allowed to a certain extent.
+ * behind the head is penalized and only allowed to a certain extent.
  */
 static struct cfq_rq *
 cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
 {
 	sector_t last, s1, s2, d1 = 0, d2 = 0;
-	int r1_wrap = 0, r2_wrap = 0;	/* requests are behind the disk head */
 	unsigned long back_max;
+#define CFQ_RQ1_WRAP	0x01 /* request 1 wraps */
+#define CFQ_RQ2_WRAP	0x02 /* request 2 wraps */
+	unsigned wrap = 0; /* bit mask: requests behind the disk head? */
 
 	if (crq1 == NULL || crq1 == crq2)
 		return crq2;
@@ -385,35 +387,47 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
 	else if (s1 + back_max >= last)
 		d1 = (last - s1) * cfqd->cfq_back_penalty;
 	else
-		r1_wrap = 1;
+		wrap |= CFQ_RQ1_WRAP;
 
 	if (s2 >= last)
 		d2 = s2 - last;
 	else if (s2 + back_max >= last)
 		d2 = (last - s2) * cfqd->cfq_back_penalty;
 	else
-		r2_wrap = 1;
+		wrap |= CFQ_RQ2_WRAP;
 
 	/* Found required data */
-	if (!r1_wrap && r2_wrap)
-		return crq1;
-	else if (!r2_wrap && r1_wrap)
-		return crq2;
-	else if (r1_wrap && r2_wrap) {
-		/* both behind the head */
-		if (s1 <= s2)
+
+	/*
+	 * By doing switch() on the bit mask "wrap" we avoid having to
+	 * check two variables for all permutations: --> faster!
+	 */
+	switch (wrap) {
+	case 0: /* common case for CFQ: crq1 and crq2 not wrapped */
+		if (d1 < d2)
 			return crq1;
-		else
+		else if (d2 < d1)
 			return crq2;
-	}
+		else {
+			if (s1 >= s2)
+				return crq1;
+			else
+				return crq2;
+		}
 
-	/* Both requests in front of the head */
-	if (d1 < d2)
+	case CFQ_RQ2_WRAP:
 		return crq1;
-	else if (d2 < d1)
+	case CFQ_RQ1_WRAP:
 		return crq2;
-	else {
-		if (s1 >= s2)
+	case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both crqs wrapped */
+	default:
+		/*
+		 * Since both rqs are wrapped,
+		 * start with the one that's further behind head
+		 * (--> only *one* back seek required),
+		 * since back seek takes more time than forward.
+		 */
+		if (s1 <= s2)
 			return crq1;
 		else
 			return crq2;
-- 
cgit v0.10.2


From 7143dd4b0127141a4f773e819d1d1f4ab82bb517 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Tue, 28 Mar 2006 09:00:28 +0200
Subject: [PATCH] ll_rw_blk: fix 80-col offender in put_io_context()

This makes akpm more happy.

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index cb60876..7b23743 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3545,7 +3545,9 @@ void put_io_context(struct io_context *ioc)
 		if (ioc->aic && ioc->aic->dtor)
 			ioc->aic->dtor(ioc->aic);
 		if (ioc->cic_root.rb_node != NULL) {
-			cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node);
+			struct rb_node *n = rb_first(&ioc->cic_root);
+
+			cic = rb_entry(n, struct cfq_io_context, rb_node);
 			cic->dtor(ioc);
 		}
 		rcu_read_unlock();
-- 
cgit v0.10.2


From 6e57a3a89785692bd8d012d80f5ee210ab8e0b68 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 28 Mar 2006 01:00:08 -0800
Subject: [SPARC64]: Implement futex_atomic_cmpxchg_inatomic().

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/asm-sparc64/futex.h b/include/asm-sparc64/futex.h
index cd340a2..dee4020 100644
--- a/include/asm-sparc64/futex.h
+++ b/include/asm-sparc64/futex.h
@@ -84,9 +84,27 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
 }
 
 static inline int
-futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
-	return -ENOSYS;
+	__asm__ __volatile__(
+	"\n1:	lduwa	[%2] %%asi, %0\n"
+	"2:	casa	[%2] %%asi, %0, %1\n"
+	"3:\n"
+	"	.section .fixup,#alloc,#execinstr\n"
+	"	.align	4\n"
+	"4:	ba	3b\n"
+	"	 mov	%3, %0\n"
+	"	.previous\n"
+	"	.section __ex_table,\"a\"\n"
+	"	.align	4\n"
+	"	.word	1b, 4b\n"
+	"	.word	2b, 4b\n"
+	"	.previous\n"
+	: "=&r" (oldval)
+	: "r" (newval), "r" (uaddr), "i" (-EFAULT)
+	: "memory");
+
+	return oldval;
 }
 
 #endif /* !(_SPARC64_FUTEX_H) */
-- 
cgit v0.10.2


From a081568d7016061ed848696984e3acf1ba0b3054 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Tue, 28 Mar 2006 10:24:33 +0100
Subject: [ARM] Fix decompressor serial IO to give CRLF not LFCR

As per the corresponding change to the serial drivers, arrange
for ARM decompressors to give CRLF.  Move the common putstr code
into misc.c such that machines only need to supply "putc" and
"flush" functions.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index 5ab9458..28626ec 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -20,24 +20,32 @@ unsigned int __machine_arch_type;
 
 #include <linux/string.h>
 
-#include <asm/arch/uncompress.h>
-
 #ifdef STANDALONE_DEBUG
 #define putstr printf
-#endif
+#else
 
-#ifdef CONFIG_DEBUG_ICEDCC
-#define putstr icedcc_putstr
-#define putc icedcc_putc
+static void putstr(const char *ptr);
 
+#include <linux/compiler.h>
+#include <asm/arch/uncompress.h>
+
+#ifdef CONFIG_DEBUG_ICEDCC
 extern void icedcc_putc(int ch);
+#define putc(ch)	icedcc_putc(ch)
+#define flush()	do { } while (0)
+#endif
 
-static void
-icedcc_putstr(const char *ptr)
+static void putstr(const char *ptr)
 {
-	for (; *ptr != '\0'; ptr++) {
-		icedcc_putc(*ptr);
+	char c;
+
+	while ((c = *ptr++) != '\0') {
+		if (c == '\n')
+			putc('\r');
+		putc(c);
 	}
+
+	flush();
 }
 
 #endif
diff --git a/include/asm-arm/arch-aaec2000/uncompress.h b/include/asm-arm/arch-aaec2000/uncompress.h
index fff0c94..300f4bf 100644
--- a/include/asm-arm/arch-aaec2000/uncompress.h
+++ b/include/asm-arm/arch-aaec2000/uncompress.h
@@ -15,7 +15,7 @@
 
 #define UART(x)         (*(volatile unsigned long *)(serial_port + (x)))
 
-static void putstr( const char *s )
+static void putc(int c)
 {
 	unsigned long serial_port;
         do {
@@ -28,17 +28,16 @@ static void putstr( const char *s )
 		return;
 	} while (0);
 
-	for (; *s; s++) {
-		/* wait for space in the UART's transmitter */
-		while ((UART(UART_SR) & UART_SR_TxFF));
-		/* send the character out. */
-		UART(UART_DR) = *s;
-		/* if a LF, also do CR... */
-		if (*s == 10) {
-			while ((UART(UART_SR) & UART_SR_TxFF));
-			UART(UART_DR) = 13;
-		}
-	}
+	/* wait for space in the UART's transmitter */
+	while ((UART(UART_SR) & UART_SR_TxFF))
+		barrier();
+
+	/* send the character out. */
+	UART(UART_DR) = c;
+}
+
+static inline void flush(void)
+{
 }
 
 #define arch_decomp_setup()
diff --git a/include/asm-arm/arch-at91rm9200/uncompress.h b/include/asm-arm/arch-at91rm9200/uncompress.h
index b30dd55..7b38497 100644
--- a/include/asm-arm/arch-at91rm9200/uncompress.h
+++ b/include/asm-arm/arch-at91rm9200/uncompress.h
@@ -31,21 +31,22 @@
  *
  * This does not append a newline
  */
-static void putstr(const char *s)
+static void putc(int c)
+{
+	void __iomem *sys = (void __iomem *) AT91_BASE_SYS;	/* physical address */
+
+	while (!(__raw_readl(sys + AT91_DBGU_SR) & AT91_DBGU_TXRDY))
+		barrier();
+	__raw_writel(c, sys + AT91_DBGU_THR);
+}
+
+static inline void flush(void)
 {
 	void __iomem *sys = (void __iomem *) AT91_BASE_SYS;	/* physical address */
 
-	while (*s) {
-		while (!(__raw_readl(sys + AT91_DBGU_SR) & AT91_DBGU_TXRDY)) { barrier(); }
-		__raw_writel(*s, sys + AT91_DBGU_THR);
-		if (*s == '\n')	{
-			while (!(__raw_readl(sys + AT91_DBGU_SR) & AT91_DBGU_TXRDY)) { barrier(); }
-			__raw_writel('\r', sys + AT91_DBGU_THR);
-		}
-		s++;
-	}
 	/* wait for transmission to complete */
-	while (!(__raw_readl(sys + AT91_DBGU_SR) & AT91_DBGU_TXEMPTY)) { barrier(); }
+	while (!(__raw_readl(sys + AT91_DBGU_SR) & AT91_DBGU_TXEMPTY))
+		barrier();
 }
 
 #define arch_decomp_setup()
diff --git a/include/asm-arm/arch-cl7500/uncompress.h b/include/asm-arm/arch-cl7500/uncompress.h
index 68601b3..c437e0c 100644
--- a/include/asm-arm/arch-cl7500/uncompress.h
+++ b/include/asm-arm/arch-cl7500/uncompress.h
@@ -3,27 +3,19 @@
  *
  * Copyright (C) 1999, 2000 Nexus Electronics Ltd.
  */
-
 #define BASE 0x03010000
 #define SERBASE (BASE + (0x2f8 << 2))
 
-static __inline__ void putc(char c)
+static inline void putc(char c)
 {
-	while (!(*((volatile unsigned int *)(SERBASE + 0x14)) & 0x20));
+	while (!(*((volatile unsigned int *)(SERBASE + 0x14)) & 0x20))
+		barrier();
+
 	*((volatile unsigned int *)(SERBASE)) = c;
 }
 
-/*
- * This does not append a newline
- */
-static void putstr(const char *s)
+static inline void flush(void)
 {
-	while (*s) {
-		putc(*s);
-		if (*s == '\n')
-			putc('\r');
-		s++;
-	}
 }
 
 static __inline__ void arch_decomp_setup(void)
diff --git a/include/asm-arm/arch-clps711x/uncompress.h b/include/asm-arm/arch-clps711x/uncompress.h
index 9fc4bcf..07157b7 100644
--- a/include/asm-arm/arch-clps711x/uncompress.h
+++ b/include/asm-arm/arch-clps711x/uncompress.h
@@ -25,7 +25,6 @@
 #undef CLPS7111_BASE
 #define CLPS7111_BASE CLPS7111_PHYS_BASE
 
-#define barrier()		__asm__ __volatile__("": : :"memory")
 #define __raw_readl(p)		(*(unsigned long *)(p))
 #define __raw_writel(v,p)	(*(unsigned long *)(p) = (v))
 
@@ -40,21 +39,15 @@
 /*
  * This does not append a newline
  */
-static void putstr(const char *s)
+static inline void putc(int c)
 {
-	char c;
-
-	while ((c = *s++) != '\0') {
-		while (clps_readl(SYSFLGx) & SYSFLG_UTXFF)
-			barrier();
-		clps_writel(c, UARTDRx);
+	while (clps_readl(SYSFLGx) & SYSFLG_UTXFF)
+		barrier();
+	clps_writel(c, UARTDRx);
+}
 
-		if (c == '\n') {
-			while (clps_readl(SYSFLGx) & SYSFLG_UTXFF)
-				barrier();
-			clps_writel('\r', UARTDRx);
-		}
-	}
+static inline void flush(void)
+{
 	while (clps_readl(SYSFLGx) & SYSFLG_UBUSY)
 		barrier();
 }
diff --git a/include/asm-arm/arch-ebsa110/uncompress.h b/include/asm-arm/arch-ebsa110/uncompress.h
index eee9558..66b19c7 100644
--- a/include/asm-arm/arch-ebsa110/uncompress.h
+++ b/include/asm-arm/arch-ebsa110/uncompress.h
@@ -8,33 +8,34 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/serial_reg.h>
+
+#define SERIAL_BASE	((unsigned char *)0xfe000be0)
+
 /*
  * This does not append a newline
  */
-static void putstr(const char *s)
+static inline void putc(int c)
+{
+	unsigned char v, *base = SERIAL_BASE;
+
+	do {
+		v = base[UART_LSR << 2];
+		barrier();
+	} while (!(v & UART_LSR_THRE));
+
+	base[UART_TX << 2] = c;
+}
+
+static inline void flush(void)
 {
-	unsigned long tmp1, tmp2;
-	__asm__ __volatile__(
-	"ldrb	%0, [%2], #1\n"
-"	teq	%0, #0\n"
-"	beq	3f\n"
-"1:	strb	%0, [%3]\n"
-"2:	ldrb	%1, [%3, #0x14]\n"
-"	and	%1, %1, #0x60\n"
-"	teq	%1, #0x60\n"
-"	bne	2b\n"
-"	teq	%0, #'\n'\n"
-"	moveq	%0, #'\r'\n"
-"	beq	1b\n"
-"	ldrb	%0, [%2], #1\n"
-"	teq	%0, #0\n"
-"	bne	1b\n"
-"3:	ldrb	%1, [%3, #0x14]\n"
-"	and	%1, %1, #0x60\n"
-"	teq	%1, #0x60\n"
-"	bne	3b"
-	: "=&r" (tmp1), "=&r" (tmp2)
-	: "r" (s), "r" (0xf0000be0) : "cc");
+	unsigned char v, *base = SERIAL_BASE;
+
+	do {
+		v = base[UART_LSR << 2];
+		barrier();
+	} while ((v & (UART_LSR_TEMT|UART_LSR_THRE)) !=
+		 (UART_LSR_TEMT|UART_LSR_THRE));
 }
 
 /*
diff --git a/include/asm-arm/arch-ebsa285/uncompress.h b/include/asm-arm/arch-ebsa285/uncompress.h
index c2fd84e..86142c8 100644
--- a/include/asm-arm/arch-ebsa285/uncompress.h
+++ b/include/asm-arm/arch-ebsa285/uncompress.h
@@ -15,10 +15,11 @@
 #define DC21285_BASE ((volatile unsigned int *)0x42000160)
 #define SER0_BASE    ((volatile unsigned char *)0x7c0003f8)
 
-static __inline__ void putc(char c)
+static inline void putc(char c)
 {
 	if (machine_is_netwinder()) {
-		while ((SER0_BASE[5] & 0x60) != 0x60);
+		while ((SER0_BASE[5] & 0x60) != 0x60)
+			barrier();
 		SER0_BASE[0] = c;
 	} else {
 		while (DC21285_BASE[6] & 8);
@@ -26,17 +27,8 @@ static __inline__ void putc(char c)
 	}
 }
 
-/*
- * This does not append a newline
- */
-static void putstr(const char *s)
+static inline void flush(void)
 {
-	while (*s) {
-		putc(*s);
-		if (*s == '\n')
-			putc('\r');
-		s++;
-	}
 }
 
 /*
diff --git a/include/asm-arm/arch-ep93xx/uncompress.h b/include/asm-arm/arch-ep93xx/uncompress.h
index 2171082..c15274c 100644
--- a/include/asm-arm/arch-ep93xx/uncompress.h
+++ b/include/asm-arm/arch-ep93xx/uncompress.h
@@ -36,7 +36,7 @@ static void __raw_writel(unsigned int value, unsigned int ptr)
 #define PHYS_UART1_FLAG		0x808c0018
 #define UART1_FLAG_TXFF		0x20
 
-static __inline__ void putc(char c)
+static inline void putc(int c)
 {
 	int i;
 
@@ -49,14 +49,8 @@ static __inline__ void putc(char c)
 	__raw_writeb(c, PHYS_UART1_DATA);
 }
 
-static void putstr(const char *s)
+static inline void flush(void)
 {
-	while (*s) {
-		putc(*s);
-		if (*s == '\n')
-			putc('\r');
-		s++;
-	}
 }
 
 
diff --git a/include/asm-arm/arch-h720x/uncompress.h b/include/asm-arm/arch-h720x/uncompress.h
index 9535764..18c69e0 100644
--- a/include/asm-arm/arch-h720x/uncompress.h
+++ b/include/asm-arm/arch-h720x/uncompress.h
@@ -12,22 +12,20 @@
 #define LSR 	0x14
 #define TEMPTY 	0x40
 
-static void putstr(const char *s)
+static inline void putc(int c)
 {
-	char c;
 	volatile unsigned char *p = (volatile unsigned char *)(IO_PHYS+0x20000);
 
-	while ( (c = *s++) != '\0') {
-		/* wait until transmit buffer is empty */
-		while((p[LSR] & TEMPTY) == 0x0);
-		/* write next character */
-		*p = c;
-
-		if(c == '\n') {
-			while((p[LSR] & TEMPTY) == 0x0);
-			*p = '\r';
-		}
-	}
+	/* wait until transmit buffer is empty */
+	while((p[LSR] & TEMPTY) == 0x0)
+		barrier();
+
+	/* write next character */
+	*p = c;
+}
+
+static inline void flush(void)
+{
 }
 
 /*
diff --git a/include/asm-arm/arch-imx/uncompress.h b/include/asm-arm/arch-imx/uncompress.h
index 096077f..da333f6 100644
--- a/include/asm-arm/arch-imx/uncompress.h
+++ b/include/asm-arm/arch-imx/uncompress.h
@@ -39,8 +39,7 @@
  *
  * This does not append a newline
  */
-static void
-putstr(const char *s)
+static void putc(int c)
 {
 	unsigned long serial_port;
 
@@ -54,20 +53,14 @@ putstr(const char *s)
 		return;
 	} while(0);
 
-	while (*s) {
-		while ( !(UART(USR2) & USR2_TXFE) )
-			barrier();
+	while (!(UART(USR2) & USR2_TXFE))
+		barrier();
 
-		UART(TXR) = *s;
-
-		if (*s == '\n') {
-			while ( !(UART(USR2) & USR2_TXFE) )
-				barrier();
+	UART(TXR) = c;
+}
 
-			UART(TXR) = '\r';
-		}
-		s++;
-	}
+static inline void flush(void)
+{
 }
 
 /*
diff --git a/include/asm-arm/arch-integrator/uncompress.h b/include/asm-arm/arch-integrator/uncompress.h
index 3957402..f61825c 100644
--- a/include/asm-arm/arch-integrator/uncompress.h
+++ b/include/asm-arm/arch-integrator/uncompress.h
@@ -28,21 +28,18 @@
 /*
  * This does not append a newline
  */
-static void putstr(const char *s)
+static void putc(int c)
 {
-	while (*s) {
-		while (AMBA_UART_FR & (1 << 5));
+	while (AMBA_UART_FR & (1 << 5))
+		barrier();
 
-		AMBA_UART_DR = *s;
-
-		if (*s == '\n') {
-			while (AMBA_UART_FR & (1 << 5));
+	AMBA_UART_DR = c;
+}
 
-			AMBA_UART_DR = '\r';
-		}
-		s++;
-	}
-	while (AMBA_UART_FR & (1 << 3));
+static inline void flush(void)
+{
+	while (AMBA_UART_FR & (1 << 3))
+		barrier();
 }
 
 /*
diff --git a/include/asm-arm/arch-iop3xx/uncompress.h b/include/asm-arm/arch-iop3xx/uncompress.h
index 82b8876..c98eb62 100644
--- a/include/asm-arm/arch-iop3xx/uncompress.h
+++ b/include/asm-arm/arch-iop3xx/uncompress.h
@@ -19,23 +19,15 @@ static volatile UTYPE uart_base;
 
 #define TX_DONE (UART_LSR_TEMT|UART_LSR_THRE)
 
-static __inline__ void putc(char c)
+static inline void putc(char c)
 {
-	while ((uart_base[UART_LSR] & TX_DONE) != TX_DONE);
+	while ((uart_base[UART_LSR] & TX_DONE) != TX_DONE)
+		barrier();
 	*uart_base = c;
 }
 
-/*
- * This does not append a newline
- */
-static void putstr(const char *s)
+static inline void flush(void)
 {
-	while (*s) {
-		putc(*s);
-		if (*s == '\n')
-			putc('\r');
-		s++;
-	}
 }
 
 static __inline__ void __arch_decomp_setup(unsigned long arch_id)
diff --git a/include/asm-arm/arch-ixp2000/uncompress.h b/include/asm-arm/arch-ixp2000/uncompress.h
index 3d3d5b2..f66b408 100644
--- a/include/asm-arm/arch-ixp2000/uncompress.h
+++ b/include/asm-arm/arch-ixp2000/uncompress.h
@@ -29,23 +29,18 @@
 #define UARTSR          PHYS(0x14)      /* Status reg */
 
 
-static __inline__ void putc(char c)
+static inline void putc(int c)
 {
 	int j = 0x1000;
 
-	while (--j && !(*UARTSR & UART_LSR_THRE)); 
+	while (--j && !(*UARTSR & UART_LSR_THRE))
+		barrier();
+
 	*UARTDR = c;
 }
 
-static void putstr(const char *s)
+static inline void flush(void)
 {
-	while (*s)
-	{
-		putc(*s);
-		if (*s == '\n')
-			putc('\r');
-		s++;
-	}
 }
 
 #define arch_decomp_setup()
diff --git a/include/asm-arm/arch-ixp4xx/uncompress.h b/include/asm-arm/arch-ixp4xx/uncompress.h
index 960c358..09ae6c9 100644
--- a/include/asm-arm/arch-ixp4xx/uncompress.h
+++ b/include/asm-arm/arch-ixp4xx/uncompress.h
@@ -21,26 +21,18 @@
 
 static volatile u32* uart_base;
 
-static __inline__ void putc(char c)
+static inline void putc(int c)
 {
 	/* Check THRE and TEMT bits before we transmit the character.
 	 */
-	while ((uart_base[UART_LSR] & TX_DONE) != TX_DONE); 
+	while ((uart_base[UART_LSR] & TX_DONE) != TX_DONE)
+		barrier();
+
 	*uart_base = c;
 }
 
-/*
- * This does not append a newline
- */
-static void putstr(const char *s)
+static void flush(void)
 {
-	while (*s)
-	{
-		putc(*s);
-		if (*s == '\n')
-			putc('\r');
-		s++;
-	}
 }
 
 static __inline__ void __arch_decomp_setup(unsigned long arch_id)
diff --git a/include/asm-arm/arch-l7200/uncompress.h b/include/asm-arm/arch-l7200/uncompress.h
index 1caa2b5..9fcd40a 100644
--- a/include/asm-arm/arch-l7200/uncompress.h
+++ b/include/asm-arm/arch-l7200/uncompress.h
@@ -16,22 +16,17 @@
 #define __raw_writeb(v,p)	(*(volatile unsigned char *)(p) = (v))
 #define __raw_readb(p)		(*(volatile unsigned char *)(p))
 
-static __inline__ void putc(char c)
+static inline void putc(int c)
 {
 	while(__raw_readb(IO_UART + 0x18) & 0x20 ||
-		__raw_readb(IO_UART + 0x18) & 0x08);
+	      __raw_readb(IO_UART + 0x18) & 0x08)
+		barrier();
+
 	__raw_writeb(c, IO_UART + 0x00);
 }
 
-static void putstr(const char *s)
+static inline void flush(void)
 {
-	while (*s) {
-		if (*s == 10) {			/* If a LF, add CR */
-			putc(10);
-			putc(13);
-		}
-		putc(*(s++));
-	}
 }
 
 static __inline__ void arch_decomp_setup(void)
diff --git a/include/asm-arm/arch-lh7a40x/uncompress.h b/include/asm-arm/arch-lh7a40x/uncompress.h
index ec8ab67..f805334 100644
--- a/include/asm-arm/arch-lh7a40x/uncompress.h
+++ b/include/asm-arm/arch-lh7a40x/uncompress.h
@@ -22,20 +22,15 @@
 #define UART_STATUS (*(volatile unsigned long*) (UART2_PHYS + UART_R_STATUS))
 #define UART_DATA   (*(volatile unsigned long*) (UART2_PHYS + UART_R_DATA))
 
-static __inline__ void putc (char ch)
+static inline void putc(int ch)
 {
 	while (UART_STATUS & nTxRdy)
-		;
+		barrier();
 	UART_DATA = ch;
 }
 
-static void putstr (const char* sz)
+static inline void flush(void)
 {
-	for (; *sz; ++sz) {
-		putc (*sz);
-		if (*sz == '\n')
-			putc ('\r');
-	}
 }
 
 	/* NULL functions; we don't presently need them */
diff --git a/include/asm-arm/arch-omap/uncompress.h b/include/asm-arm/arch-omap/uncompress.h
index c718264..ca2c8be 100644
--- a/include/asm-arm/arch-omap/uncompress.h
+++ b/include/asm-arm/arch-omap/uncompress.h
@@ -30,8 +30,7 @@ unsigned int system_rev;
 #define check_port(base, shift) ((base[UART_OMAP_MDR1 << shift] & 7) == 0)
 #define omap_get_id() ((*(volatile unsigned int *)(0xfffed404)) >> 12) & ID_MASK
 
-static void
-putstr(const char *s)
+static void putc(int c)
 {
 	volatile u8 * uart = 0;
 	int shift = 2;
@@ -69,16 +68,13 @@ putstr(const char *s)
 	/*
 	 * Now, xmit each character
 	 */
-	while (*s) {
-		while (!(uart[UART_LSR << shift] & UART_LSR_THRE))
-			barrier();
-		uart[UART_TX << shift] = *s;
-		if (*s++ == '\n') {
-			while (!(uart[UART_LSR << shift] & UART_LSR_THRE))
-				barrier();
-			uart[UART_TX << shift] = '\r';
-		}
-	}
+	while (!(uart[UART_LSR << shift] & UART_LSR_THRE))
+		barrier();
+	uart[UART_TX << shift] = c;
+}
+
+static inline void flush(void)
+{
 }
 
 /*
diff --git a/include/asm-arm/arch-pxa/uncompress.h b/include/asm-arm/arch-pxa/uncompress.h
index fe38090..178aa2e 100644
--- a/include/asm-arm/arch-pxa/uncompress.h
+++ b/include/asm-arm/arch-pxa/uncompress.h
@@ -17,23 +17,18 @@
 #define UART		FFUART
 
 
-static __inline__ void putc(char c)
+static inline void putc(char c)
 {
-	while (!(UART[5] & 0x20));
+	while (!(UART[5] & 0x20))
+		barrier();
 	UART[0] = c;
 }
 
 /*
  * This does not append a newline
  */
-static void putstr(const char *s)
+static inline void flush(void)
 {
-	while (*s) {
-		putc(*s);
-		if (*s == '\n')
-			putc('\r');
-		s++;
-	}
 }
 
 /*
diff --git a/include/asm-arm/arch-realview/uncompress.h b/include/asm-arm/arch-realview/uncompress.h
index b5e4d36..f05631d 100644
--- a/include/asm-arm/arch-realview/uncompress.h
+++ b/include/asm-arm/arch-realview/uncompress.h
@@ -27,22 +27,16 @@
 /*
  * This does not append a newline
  */
-static void putstr(const char *s)
+static inline void putc(int c)
 {
-	while (*s) {
-		while (AMBA_UART_FR & (1 << 5))
-			barrier();
-
-		AMBA_UART_DR = *s;
+	while (AMBA_UART_FR & (1 << 5))
+		barrier();
 
-		if (*s == '\n') {
-			while (AMBA_UART_FR & (1 << 5))
-				barrier();
+	AMBA_UART_DR = c;
+}
 
-			AMBA_UART_DR = '\r';
-		}
-		s++;
-	}
+static inline void flush(void)
+{
 	while (AMBA_UART_FR & (1 << 3))
 		barrier();
 }
diff --git a/include/asm-arm/arch-rpc/uncompress.h b/include/asm-arm/arch-rpc/uncompress.h
index 43035fe..06231ed 100644
--- a/include/asm-arm/arch-rpc/uncompress.h
+++ b/include/asm-arm/arch-rpc/uncompress.h
@@ -67,31 +67,28 @@ extern __attribute__((pure)) struct param_struct *params(void);
 /*
  * This does not append a newline
  */
-static void putstr(const char *s)
+static void putc(int c)
 {
 	extern void ll_write_char(char *, char c, char white);
 	int x,y;
-	unsigned char c;
 	char *ptr;
 
 	x = params->video_x;
 	y = params->video_y;
 
-	while ( ( c = *(unsigned char *)s++ ) != '\0' ) {
-		if ( c == '\n' ) {
+	if (c == '\n') {
+		if (++y >= video_num_lines)
+			y--;
+	} else if (c == '\r') {
+		x = 0;
+	} else {
+		ptr = VIDMEM + ((y*video_num_columns*params->bytes_per_char_v+x)*bytes_per_char_h);
+		ll_write_char(ptr, c, white);
+		if (++x >= video_num_columns) {
 			x = 0;
 			if ( ++y >= video_num_lines ) {
 				y--;
 			}
-		} else {
-			ptr = VIDMEM + ((y*video_num_columns*params->bytes_per_char_v+x)*bytes_per_char_h);
-			ll_write_char(ptr, c, white);
-			if ( ++x >= video_num_columns ) {
-				x = 0;
-				if ( ++y >= video_num_lines ) {
-					y--;
-				}
-			}
 		}
 	}
 
@@ -99,6 +96,10 @@ static void putstr(const char *s)
 	params->video_y = y;
 }
 
+static inline void flush(void)
+{
+}
+
 static void error(char *x);
 
 /*
diff --git a/include/asm-arm/arch-s3c2410/uncompress.h b/include/asm-arm/arch-s3c2410/uncompress.h
index 4367ec0..a6f6a0e 100644
--- a/include/asm-arm/arch-s3c2410/uncompress.h
+++ b/include/asm-arm/arch-s3c2410/uncompress.h
@@ -67,8 +67,7 @@ uart_rd(unsigned int reg)
  * waiting for tx to happen...
 */
 
-static void
-putc(char ch)
+static void putc(int ch)
 {
 	int cpuid = S3C2410_GSTATUS1_2410;
 
@@ -77,9 +76,6 @@ putc(char ch)
 	cpuid &= S3C2410_GSTATUS1_IDMASK;
 #endif
 
-	if (ch == '\n')
-		putc('\r');    /* expand newline to \r\n */
-
 	if (uart_rd(S3C2410_UFCON) & S3C2410_UFCON_FIFOMODE) {
 		int level;
 
@@ -101,19 +97,16 @@ putc(char ch)
 	} else {
 		/* not using fifos */
 
-		while ((uart_rd(S3C2410_UTRSTAT) & S3C2410_UTRSTAT_TXE) != S3C2410_UTRSTAT_TXE);
+		while ((uart_rd(S3C2410_UTRSTAT) & S3C2410_UTRSTAT_TXE) != S3C2410_UTRSTAT_TXE)
+			barrier();
 	}
 
 	/* write byte to transmission register */
 	uart_wr(S3C2410_UTXH, ch);
 }
 
-static void
-putstr(const char *ptr)
+static inline void flush(void)
 {
-	for (; *ptr != '\0'; ptr++) {
-		putc(*ptr);
-	}
 }
 
 #define __raw_writel(d,ad) do { *((volatile unsigned int *)(ad)) = (d); } while(0)
diff --git a/include/asm-arm/arch-sa1100/uncompress.h b/include/asm-arm/arch-sa1100/uncompress.h
index 4345350..2601a77 100644
--- a/include/asm-arm/arch-sa1100/uncompress.h
+++ b/include/asm-arm/arch-sa1100/uncompress.h
@@ -17,7 +17,7 @@
 
 #define UART(x)		(*(volatile unsigned long *)(serial_port + (x)))
 
-static void putstr( const char *s )
+static void putc(int c)
 {
 	unsigned long serial_port;
 
@@ -31,19 +31,16 @@ static void putstr( const char *s )
 		return;
 	} while (0);
 
-	for (; *s; s++) {
-		/* wait for space in the UART's transmitter */
-		while (!(UART(UTSR1) & UTSR1_TNF));
+	/* wait for space in the UART's transmitter */
+	while (!(UART(UTSR1) & UTSR1_TNF))
+		barrier();
 
-		/* send the character out. */
-		UART(UTDR) = *s;
+	/* send the character out. */
+	UART(UTDR) = c;
+}
 
-		/* if a LF, also do CR... */
-		if (*s == 10) {
-			while (!(UART(UTSR1) & UTSR1_TNF));
-			UART(UTDR) = 13;
-		}
-	}
+static inline void flush(void)
+{
 }
 
 /*
diff --git a/include/asm-arm/arch-shark/uncompress.h b/include/asm-arm/arch-shark/uncompress.h
index 910a8e0..7eca653 100644
--- a/include/asm-arm/arch-shark/uncompress.h
+++ b/include/asm-arm/arch-shark/uncompress.h
@@ -9,7 +9,7 @@
 
 #define SERIAL_BASE ((volatile unsigned char *)0x400003f8)
 
-static __inline__ void putc(char c)
+static inline void putc(int c)
 {
 	int t;
 
@@ -18,17 +18,8 @@ static __inline__ void putc(char c)
 	while (t--);
 }
 
-/*
- * This does not append a newline
- */
-static void putstr(const char *s)
+static inline void flush(void)
 {
-	while (*s) {
-		putc(*s);
-		if (*s == '\n')
-			putc('\r');
-		s++;
-	}
 }
 
 #ifdef DEBUG
diff --git a/include/asm-arm/arch-versatile/uncompress.h b/include/asm-arm/arch-versatile/uncompress.h
index 2f57499..7215133 100644
--- a/include/asm-arm/arch-versatile/uncompress.h
+++ b/include/asm-arm/arch-versatile/uncompress.h
@@ -25,22 +25,16 @@
 /*
  * This does not append a newline
  */
-static void putstr(const char *s)
+static inline void putc(int c)
 {
-	while (*s) {
-		while (AMBA_UART_FR & (1 << 5))
-			barrier();
-
-		AMBA_UART_DR = *s;
+	while (AMBA_UART_FR & (1 << 5))
+		barrier();
 
-		if (*s == '\n') {
-			while (AMBA_UART_FR & (1 << 5))
-				barrier();
+	AMBA_UART_DR = c;
+}
 
-			AMBA_UART_DR = '\r';
-		}
-		s++;
-	}
+static inline void flush(void)
+{
 	while (AMBA_UART_FR & (1 << 3))
 		barrier();
 }
-- 
cgit v0.10.2


From de4533a04eb4f66dbef71f59a9c118256b886823 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Tue, 28 Mar 2006 10:34:05 +0100
Subject: [ARM] Move ice-dcc code into misc.c

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 35ffe0f..2adc152 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -50,10 +50,6 @@ ifeq ($(CONFIG_ARCH_AT91RM9200),y)
 OBJS		+= head-at91rm9200.o
 endif
 
-ifeq ($(CONFIG_DEBUG_ICEDCC),y)
-OBJS            += ice-dcc.o
-endif
-
 ifeq ($(CONFIG_CPU_BIG_ENDIAN),y)
 OBJS		+= big-endian.o
 endif
diff --git a/arch/arm/boot/compressed/ice-dcc.S b/arch/arm/boot/compressed/ice-dcc.S
deleted file mode 100644
index 104377a..0000000
--- a/arch/arm/boot/compressed/ice-dcc.S
+++ /dev/null
@@ -1,17 +0,0 @@
-
-
-	.text
-
-	.global	icedcc_putc
-
-icedcc_putc:
-	mov	r2, #0x4000000
-1:
-	subs	r2, r2, #1
-	movlt	pc, r14
-	mrc	p14, 0, r1, c0, c0, 0
-	tst	r1, #2
-	bne	1b
-
-	mcr	p14, 0, r0, c1, c0, 0
-	mov	pc, r14
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index 28626ec..0af3772 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -30,7 +30,20 @@ static void putstr(const char *ptr);
 #include <asm/arch/uncompress.h>
 
 #ifdef CONFIG_DEBUG_ICEDCC
-extern void icedcc_putc(int ch);
+static void icedcc_putc(int ch)
+{
+	int status, i = 0x4000000;
+
+	do {
+		if (--i < 0)
+			return;
+
+		asm("mrc p14, 0, %0, c0, c0, 0" : "=r" (status));
+	} while (status & 2);
+
+	asm("mcr p15, 0, %0, c1, c0, 0" : : "r" (ch));
+}
+
 #define putc(ch)	icedcc_putc(ch)
 #define flush()	do { } while (0)
 #endif
-- 
cgit v0.10.2


From 206dc69b31ca05baac68c75b8ed2ba7dd857d273 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Tue, 28 Mar 2006 13:03:44 +0200
Subject: [BLOCK] cfq-iosched: seek and async performance fixes

Detect whether a given process is seeky and if so disable (mostly) the
idle window if it is. We still allow just a little idle time, just enough
to allow that process to submit a new request. That is needed to maintain
fairness across priority groups.

In some cases, we could setup several async queues. This is not optimal
from a performance POV, since we want all async io in one queue to perform
good sorting on it. It also impacted sync queues, as async io got too much
slice time.

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 15152e2..67d446d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -26,18 +26,12 @@ static const int cfq_back_penalty = 2;		/* penalty of a backwards seek */
 static const int cfq_slice_sync = HZ / 10;
 static int cfq_slice_async = HZ / 25;
 static const int cfq_slice_async_rq = 2;
-static int cfq_slice_idle = HZ / 100;
+static int cfq_slice_idle = HZ / 70;
 
 #define CFQ_IDLE_GRACE		(HZ / 10)
 #define CFQ_SLICE_SCALE		(5)
 
 #define CFQ_KEY_ASYNC		(0)
-#define CFQ_KEY_ANY		(0xffff)
-
-/*
- * disable queueing at the driver/hardware level
- */
-static const int cfq_max_depth = 2;
 
 static DEFINE_RWLOCK(cfq_exit_lock);
 
@@ -102,6 +96,8 @@ static struct completion *ioc_gone;
 #define cfq_cfqq_sync(cfqq)		\
 	(cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC])
 
+#define sample_valid(samples)	((samples) > 80)
+
 /*
  * Per block device queue structure
  */
@@ -170,7 +166,6 @@ struct cfq_data {
 	unsigned int cfq_slice[2];
 	unsigned int cfq_slice_async_rq;
 	unsigned int cfq_slice_idle;
-	unsigned int cfq_max_depth;
 
 	struct list_head cic_list;
 };
@@ -343,6 +338,14 @@ static int cfq_queue_empty(request_queue_t *q)
 	return !cfqd->busy_queues;
 }
 
+static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
+{
+	if (rw == READ || process_sync(task))
+		return task->pid;
+
+	return CFQ_KEY_ASYNC;
+}
+
 /*
  * Lifted from AS - choose which of crq1 and crq2 that is best served now.
  * We choose the request that is closest to the head right now. Distance
@@ -626,15 +629,20 @@ cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
 	cfq_add_crq_rb(crq);
 }
 
-static struct request *cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector)
-
+static struct request *
+cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
 {
-	struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid, CFQ_KEY_ANY);
+	struct task_struct *tsk = current;
+	pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio));
+	struct cfq_queue *cfqq;
 	struct rb_node *n;
+	sector_t sector;
 
+	cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio);
 	if (!cfqq)
 		goto out;
 
+	sector = bio->bi_sector + bio_sectors(bio);
 	n = cfqq->sort_list.rb_node;
 	while (n) {
 		struct cfq_rq *crq = rb_entry_crq(n);
@@ -688,7 +696,7 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
 		goto out;
 	}
 
-	__rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio));
+	__rq = cfq_find_rq_fmerge(cfqd, bio);
 	if (__rq && elv_rq_merge_ok(__rq, bio)) {
 		ret = ELEVATOR_FRONT_MERGE;
 		goto out;
@@ -891,6 +899,7 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
 static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 
 {
+	struct cfq_io_context *cic;
 	unsigned long sl;
 
 	WARN_ON(!RB_EMPTY(&cfqq->sort_list));
@@ -906,13 +915,23 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	/*
 	 * task has exited, don't wait
 	 */
-	if (cfqd->active_cic && !cfqd->active_cic->ioc->task)
+	cic = cfqd->active_cic;
+	if (!cic || !cic->ioc->task)
 		return 0;
 
 	cfq_mark_cfqq_must_dispatch(cfqq);
 	cfq_mark_cfqq_wait_request(cfqq);
 
 	sl = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle);
+
+	/*
+	 * we don't want to idle for seeks, but we do want to allow
+	 * fair distribution of slice time for a process doing back-to-back
+	 * seeks. so allow a little bit of time for him to submit a new rq
+	 */
+	if (sample_valid(cic->seek_samples) && cic->seek_mean > 131072)
+		sl = 2;
+
 	mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
 	return 1;
 }
@@ -1129,13 +1148,6 @@ cfq_dispatch_requests(request_queue_t *q, int force)
 	if (cfqq) {
 		int max_dispatch;
 
-		/*
-		 * if idle window is disabled, allow queue buildup
-		 */
-		if (!cfq_cfqq_idle_window(cfqq) &&
-		    cfqd->rq_in_driver >= cfqd->cfq_max_depth)
-			return 0;
-
 		cfq_clear_cfqq_must_dispatch(cfqq);
 		cfq_clear_cfqq_wait_request(cfqq);
 		del_timer(&cfqd->idle_slice_timer);
@@ -1185,13 +1197,13 @@ __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio,
 		    const int hashval)
 {
 	struct hlist_head *hash_list = &cfqd->cfq_hash[hashval];
-	struct hlist_node *entry, *next;
+	struct hlist_node *entry;
+	struct cfq_queue *__cfqq;
 
-	hlist_for_each_safe(entry, next, hash_list) {
-		struct cfq_queue *__cfqq = list_entry_qhash(entry);
+	hlist_for_each_entry(__cfqq, entry, hash_list, cfq_hash) {
 		const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->org_ioprio_class, __cfqq->org_ioprio);
 
-		if (__cfqq->key == key && (__p == prio || prio == CFQ_KEY_ANY))
+		if (__cfqq->key == key && (__p == prio || !prio))
 			return __cfqq;
 	}
 
@@ -1572,7 +1584,33 @@ cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
 	cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;
 }
 
-#define sample_valid(samples)	((samples) > 80)
+static void
+cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic,
+		       struct cfq_rq *crq)
+{
+	sector_t sdist;
+	u64 total;
+
+	if (cic->last_request_pos < crq->request->sector)
+		sdist = crq->request->sector - cic->last_request_pos;
+	else
+		sdist = cic->last_request_pos - crq->request->sector;
+
+	/*
+	 * Don't allow the seek distance to get too large from the
+	 * odd fragment, pagein, etc
+	 */
+	if (cic->seek_samples <= 60) /* second&third seek */
+		sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*1024);
+	else
+		sdist = min(sdist, (cic->seek_mean * 4)	+ 2*1024*64);
+
+	cic->seek_samples = (7*cic->seek_samples + 256) / 8;
+	cic->seek_total = (7*cic->seek_total + (u64)256*sdist) / 8;
+	total = cic->seek_total + (cic->seek_samples/2);
+	do_div(total, cic->seek_samples);
+	cic->seek_mean = (sector_t)total;
+}
 
 /*
  * Disable idle window if the process thinks too long or seeks so much that
@@ -1685,9 +1723,11 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	cic = crq->io_context;
 
 	cfq_update_io_thinktime(cfqd, cic);
+	cfq_update_io_seektime(cfqd, cic, crq);
 	cfq_update_idle_window(cfqd, cfqq, cic);
 
 	cic->last_queue = jiffies;
+	cic->last_request_pos = crq->request->sector + crq->request->nr_sectors;
 
 	if (cfqq == cfqd->active_queue) {
 		/*
@@ -1820,14 +1860,6 @@ static void cfq_prio_boost(struct cfq_queue *cfqq)
 		cfq_resort_rr_list(cfqq, 0);
 }
 
-static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
-{
-	if (rw == READ || process_sync(task))
-		return task->pid;
-
-	return CFQ_KEY_ASYNC;
-}
-
 static inline int
 __cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		struct task_struct *task, int rw)
@@ -2226,7 +2258,6 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e)
 	cfqd->cfq_slice[1] = cfq_slice_sync;
 	cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
 	cfqd->cfq_slice_idle = cfq_slice_idle;
-	cfqd->cfq_max_depth = cfq_max_depth;
 
 	return 0;
 out_crqpool:
@@ -2309,7 +2340,6 @@ SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
-SHOW_FUNCTION(cfq_max_depth_show, cfqd->cfq_max_depth, 0);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
@@ -2338,7 +2368,6 @@ STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0);
-STORE_FUNCTION(cfq_max_depth_store, &cfqd->cfq_max_depth, 1, UINT_MAX, 0);
 #undef STORE_FUNCTION
 
 #define CFQ_ATTR(name) \
@@ -2355,7 +2384,6 @@ static struct elv_fs_entry cfq_attrs[] = {
 	CFQ_ATTR(slice_async),
 	CFQ_ATTR(slice_async_rq),
 	CFQ_ATTR(slice_idle),
-	CFQ_ATTR(max_depth),
 	__ATTR_NULL
 };
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ed0ffa6..d0cac8b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -63,11 +63,17 @@ struct cfq_io_context {
 	struct io_context *ioc;
 
 	unsigned long last_end_request;
-	unsigned long last_queue;
+	sector_t last_request_pos;
+ 	unsigned long last_queue;
+
 	unsigned long ttime_total;
 	unsigned long ttime_samples;
 	unsigned long ttime_mean;
 
+	unsigned int seek_samples;
+	u64 seek_total;
+	sector_t seek_mean;
+
 	struct list_head queue_list;
 
 	void (*dtor)(struct io_context *); /* destructor */
-- 
cgit v0.10.2


From 63732c2f37093d63102d53e70866cf87bf0c0479 Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Tue, 28 Mar 2006 01:55:58 -0800
Subject: [PATCH] RTC: Remove RTC UIP synchronization on x86

Reading the CMOS clock on x86 and some other arches currently takes up to one
second because it synchronizes with the CMOS second tick-over.  This delay
shows up at boot time as well a resume time.

This is the currently the most substantial boot time delay for machines that
are working towards instant-on capability.  Also, a quick back of the envelope
calculation (.5sec * 2M users * 1 boot a day * 10 years) suggests it has cost
Linux users in the neighborhood of a million man-hours.

An earlier thread on this topic is here:

http://groups.google.com/group/linux.kernel/browse_frm/thread/8a24255215ff6151/2aa97e66a977653d?hl=en&lr=&ie=UTF-8&rnum=1&prev=/groups%3Fhl%3Den%26lr%3D%26ie%3DUTF-8%26selm%3D1To2R-2S7-11%40gated-at.bofh.it#2aa97e66a977653d

..from which the consensus seems to be that it's no longer desirable.

In my view, there are basically four cases to consider:

1) networked, need precise walltime: use NTP
2) networked, don't need precise walltime: use NTP anyway
3) not networked, don't need sub-second precision walltime: don't care
4) not networked, need sub-second precision walltime:
   get a network or a radio time source because RTC isn't good enough anyway

So this patch series simply removes the synchronization in favor of a simple
seqlock-like approach using the seconds value.

Note that for purposes of timer accuracy on wakeup, this patch will cause us
to fire timers up to one second late.  But as the current timer resume code
will already sync once (or more!), it's no worse for short timers.

Signed-off-by: Matt Mackall <mpm@selenic.com>
Cc: Andi Kleen <ak@muc.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-i386/mach-default/mach_time.h b/include/asm-i386/mach-default/mach_time.h
index b749aa4..ff03cf0 100644
--- a/include/asm-i386/mach-default/mach_time.h
+++ b/include/asm-i386/mach-default/mach_time.h
@@ -82,21 +82,8 @@ static inline int mach_set_rtc_mmss(unsigned long nowtime)
 static inline unsigned long mach_get_cmos_time(void)
 {
 	unsigned int year, mon, day, hour, min, sec;
-	int i;
 
-	/* The Linux interpretation of the CMOS clock register contents:
-	 * When the Update-In-Progress (UIP) flag goes from 1 to 0, the
-	 * RTC registers show the second which has precisely just started.
-	 * Let's hope other operating systems interpret the RTC the same way.
-	 */
-	/* read RTC exactly on falling edge of update flag */
-	for (i = 0 ; i < 1000000 ; i++)	/* may take up to 1 second... */
-		if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)
-			break;
-	for (i = 0 ; i < 1000000 ; i++)	/* must try at least 2.228 ms */
-		if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
-			break;
-	do { /* Isn't this overkill ? UIP above should guarantee consistency */
+	do {
 		sec = CMOS_READ(RTC_SECONDS);
 		min = CMOS_READ(RTC_MINUTES);
 		hour = CMOS_READ(RTC_HOURS);
@@ -104,6 +91,7 @@ static inline unsigned long mach_get_cmos_time(void)
 		mon = CMOS_READ(RTC_MONTH);
 		year = CMOS_READ(RTC_YEAR);
 	} while (sec != CMOS_READ(RTC_SECONDS));
+
 	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
 	  {
 	    BCD_TO_BIN(sec);
-- 
cgit v0.10.2


From 641f71f5f6ed251959ef8f88b1d0edc6ef7a4632 Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Tue, 28 Mar 2006 01:56:01 -0800
Subject: [PATCH] RTC: Remove RTC UIP synchronization on x86_64

Signed-off-by: Matt Mackall <mpm@selenic.com>
Cc: Andi Kleen <ak@muc.de>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 7f58fa6..473b514 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -504,42 +504,25 @@ unsigned long long sched_clock(void)
 
 static unsigned long get_cmos_time(void)
 {
-	unsigned int timeout = 1000000, year, mon, day, hour, min, sec;
-	unsigned char uip = 0, this = 0;
+	unsigned int year, mon, day, hour, min, sec;
 	unsigned long flags;
 	unsigned extyear = 0;
 
-/*
- * The Linux interpretation of the CMOS clock register contents: When the
- * Update-In-Progress (UIP) flag goes from 1 to 0, the RTC registers show the
- * second which has precisely just started. Waiting for this can take up to 1
- * second, we timeout approximately after 2.4 seconds on a machine with
- * standard 8.3 MHz ISA bus.
- */
-
 	spin_lock_irqsave(&rtc_lock, flags);
 
-	while (timeout && (!uip || this)) {
-		uip |= this;
-		this = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP;
-		timeout--;
-	}
-
-	/*
-	 * Here we are safe to assume the registers won't change for a whole
-	 * second, so we just go ahead and read them.
- 	 */
-	sec = CMOS_READ(RTC_SECONDS);
-	min = CMOS_READ(RTC_MINUTES);
-	hour = CMOS_READ(RTC_HOURS);
-	day = CMOS_READ(RTC_DAY_OF_MONTH);
-	mon = CMOS_READ(RTC_MONTH);
-	year = CMOS_READ(RTC_YEAR);
-
+	do {
+		sec = CMOS_READ(RTC_SECONDS);
+		min = CMOS_READ(RTC_MINUTES);
+		hour = CMOS_READ(RTC_HOURS);
+		day = CMOS_READ(RTC_DAY_OF_MONTH);
+		mon = CMOS_READ(RTC_MONTH);
+		year = CMOS_READ(RTC_YEAR);
 #ifdef CONFIG_ACPI
-	if (acpi_fadt.revision >= FADT2_REVISION_ID && acpi_fadt.century)
-		extyear = CMOS_READ(acpi_fadt.century);
+		if (acpi_fadt.revision >= FADT2_REVISION_ID &&
+					acpi_fadt.century)
+			extyear = CMOS_READ(acpi_fadt.century);
 #endif
+	} while (sec != CMOS_READ(RTC_SECONDS));
 
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-- 
cgit v0.10.2


From 3dedf53bb16e27743e5d1f49dbfecf3e5897befa Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Tue, 28 Mar 2006 01:56:01 -0800
Subject: [PATCH] RTC: Remove RTC UIP synchronization on Sparc64

Signed-off-by: Matt Mackall <mpm@selenic.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c
index 7d61f1b..e55b5c6 100644
--- a/arch/sparc64/kernel/time.c
+++ b/arch/sparc64/kernel/time.c
@@ -641,23 +641,8 @@ static void __init set_system_time(void)
 		mon = MSTK_REG_MONTH(mregs);
 		year = MSTK_CVT_YEAR( MSTK_REG_YEAR(mregs) );
 	} else {
-		int i;
-
 		/* Dallas 12887 RTC chip. */
 
-		/* Stolen from arch/i386/kernel/time.c, see there for
-		 * credits and descriptive comments.
-		 */
-		for (i = 0; i < 1000000; i++) {
-			if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)
-				break;
-			udelay(10);
-		}
-		for (i = 0; i < 1000000; i++) {
-			if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
-				break;
-			udelay(10);
-		}
 		do {
 			sec  = CMOS_READ(RTC_SECONDS);
 			min  = CMOS_READ(RTC_MINUTES);
@@ -666,6 +651,7 @@ static void __init set_system_time(void)
 			mon  = CMOS_READ(RTC_MONTH);
 			year = CMOS_READ(RTC_YEAR);
 		} while (sec != CMOS_READ(RTC_SECONDS));
+
 		if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
 			BCD_TO_BIN(sec);
 			BCD_TO_BIN(min);
-- 
cgit v0.10.2


From 59937bcdb2bd759d15c792d9798a00da03bf1f48 Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Tue, 28 Mar 2006 01:56:02 -0800
Subject: [PATCH] RTC: Remove RTC UIP synchronization on PPC CHRP (arch/ppc)

Signed-off-by: Matt Mackall <mpm@selenic.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/ppc/platforms/chrp_time.c b/arch/ppc/platforms/chrp_time.c
index c862777..e103a36 100644
--- a/arch/ppc/platforms/chrp_time.c
+++ b/arch/ppc/platforms/chrp_time.c
@@ -119,33 +119,15 @@ int chrp_set_rtc_time(unsigned long nowtime)
 unsigned long chrp_get_rtc_time(void)
 {
 	unsigned int year, mon, day, hour, min, sec;
-	int uip, i;
 
-	/* The Linux interpretation of the CMOS clock register contents:
-	 * When the Update-In-Progress (UIP) flag goes from 1 to 0, the
-	 * RTC registers show the second which has precisely just started.
-	 * Let's hope other operating systems interpret the RTC the same way.
-	 */
-
-	/* Since the UIP flag is set for about 2.2 ms and the clock
-	 * is typically written with a precision of 1 jiffy, trying
-	 * to obtain a precision better than a few milliseconds is
-	 * an illusion. Only consistency is interesting, this also
-	 * allows to use the routine for /dev/rtc without a potential
-	 * 1 second kernel busy loop triggered by any reader of /dev/rtc.
-	 */
-
-	for ( i = 0; i<1000000; i++) {
-		uip = chrp_cmos_clock_read(RTC_FREQ_SELECT);
+	do {
 		sec = chrp_cmos_clock_read(RTC_SECONDS);
 		min = chrp_cmos_clock_read(RTC_MINUTES);
 		hour = chrp_cmos_clock_read(RTC_HOURS);
 		day = chrp_cmos_clock_read(RTC_DAY_OF_MONTH);
 		mon = chrp_cmos_clock_read(RTC_MONTH);
 		year = chrp_cmos_clock_read(RTC_YEAR);
-		uip |= chrp_cmos_clock_read(RTC_FREQ_SELECT);
-		if ((uip & RTC_UIP)==0) break;
-	}
+	} while (sec != chrp_cmos_clock_read(RTC_SECONDS));
 
 	if (!(chrp_cmos_clock_read(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
 	  {
-- 
cgit v0.10.2


From 4dc12ec7e26f287e33065e803bc0aede4c1dbdf3 Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Tue, 28 Mar 2006 01:56:03 -0800
Subject: [PATCH] RTC: Remove RTC UIP synchronization on CHRP (arch/powerpc)

Signed-off-by: Matt Mackall <mpm@selenic.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/powerpc/platforms/chrp/time.c b/arch/powerpc/platforms/chrp/time.c
index 12c6f68..7d78890 100644
--- a/arch/powerpc/platforms/chrp/time.c
+++ b/arch/powerpc/platforms/chrp/time.c
@@ -120,33 +120,15 @@ int chrp_set_rtc_time(struct rtc_time *tmarg)
 void chrp_get_rtc_time(struct rtc_time *tm)
 {
 	unsigned int year, mon, day, hour, min, sec;
-	int uip, i;
 
-	/* The Linux interpretation of the CMOS clock register contents:
-	 * When the Update-In-Progress (UIP) flag goes from 1 to 0, the
-	 * RTC registers show the second which has precisely just started.
-	 * Let's hope other operating systems interpret the RTC the same way.
-	 */
-
-	/* Since the UIP flag is set for about 2.2 ms and the clock
-	 * is typically written with a precision of 1 jiffy, trying
-	 * to obtain a precision better than a few milliseconds is
-	 * an illusion. Only consistency is interesting, this also
-	 * allows to use the routine for /dev/rtc without a potential
-	 * 1 second kernel busy loop triggered by any reader of /dev/rtc.
-	 */
-
-	for ( i = 0; i<1000000; i++) {
-		uip = chrp_cmos_clock_read(RTC_FREQ_SELECT);
+	do {
 		sec = chrp_cmos_clock_read(RTC_SECONDS);
 		min = chrp_cmos_clock_read(RTC_MINUTES);
 		hour = chrp_cmos_clock_read(RTC_HOURS);
 		day = chrp_cmos_clock_read(RTC_DAY_OF_MONTH);
 		mon = chrp_cmos_clock_read(RTC_MONTH);
 		year = chrp_cmos_clock_read(RTC_YEAR);
-		uip |= chrp_cmos_clock_read(RTC_FREQ_SELECT);
-		if ((uip & RTC_UIP)==0) break;
-	}
+	} while (sec != chrp_cmos_clock_read(RTC_SECONDS));
 
 	if (!(chrp_cmos_clock_read(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
 		BCD_TO_BIN(sec);
-- 
cgit v0.10.2


From 6f0d7bd6a14dcf5b258cfd809143c50e4897902b Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Tue, 28 Mar 2006 01:56:04 -0800
Subject: [PATCH] RTC: Remove RTC UIP synchronization on PPC Maple

Signed-off-by: Matt Mackall <mpm@selenic.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c
index 5e6981d..b9a2b3d 100644
--- a/arch/powerpc/platforms/maple/time.c
+++ b/arch/powerpc/platforms/maple/time.c
@@ -60,34 +60,14 @@ static void maple_clock_write(unsigned long val, int addr)
 
 void maple_get_rtc_time(struct rtc_time *tm)
 {
-	int uip, i;
-
-	/* The Linux interpretation of the CMOS clock register contents:
-	 * When the Update-In-Progress (UIP) flag goes from 1 to 0, the
-	 * RTC registers show the second which has precisely just started.
-	 * Let's hope other operating systems interpret the RTC the same way.
-	 */
-
-	/* Since the UIP flag is set for about 2.2 ms and the clock
-	 * is typically written with a precision of 1 jiffy, trying
-	 * to obtain a precision better than a few milliseconds is
-	 * an illusion. Only consistency is interesting, this also
-	 * allows to use the routine for /dev/rtc without a potential
-	 * 1 second kernel busy loop triggered by any reader of /dev/rtc.
-	 */
-
-	for (i = 0; i<1000000; i++) {
-		uip = maple_clock_read(RTC_FREQ_SELECT);
+	do {
 		tm->tm_sec = maple_clock_read(RTC_SECONDS);
 		tm->tm_min = maple_clock_read(RTC_MINUTES);
 		tm->tm_hour = maple_clock_read(RTC_HOURS);
 		tm->tm_mday = maple_clock_read(RTC_DAY_OF_MONTH);
 		tm->tm_mon = maple_clock_read(RTC_MONTH);
 		tm->tm_year = maple_clock_read(RTC_YEAR);
-		uip |= maple_clock_read(RTC_FREQ_SELECT);
-		if ((uip & RTC_UIP)==0)
-			break;
-	}
+	} while (tm->tm_sec != maple_clock_read(RTC_SECONDS));
 
 	if (!(maple_clock_read(RTC_CONTROL) & RTC_DM_BINARY)
 	    || RTC_ALWAYS_BCD) {
-- 
cgit v0.10.2


From 4af6ec46c696a57ba9b1c1abffcdca14d6ab5410 Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Tue, 28 Mar 2006 01:56:05 -0800
Subject: [PATCH] RTC: Remove RTC UIP synchronization on ARM

Signed-off-by: Matt Mackall <mpm@selenic.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/arm/mach-footbridge/time.c b/arch/arm/mach-footbridge/time.c
index 2c64a0b..5d02e95 100644
--- a/arch/arm/mach-footbridge/time.c
+++ b/arch/arm/mach-footbridge/time.c
@@ -34,27 +34,12 @@ static int rtc_base;
 static unsigned long __init get_isa_cmos_time(void)
 {
 	unsigned int year, mon, day, hour, min, sec;
-	int i;
 
 	// check to see if the RTC makes sense.....
 	if ((CMOS_READ(RTC_VALID) & RTC_VRT) == 0)
 		return mktime(1970, 1, 1, 0, 0, 0);
 
-	/* The Linux interpretation of the CMOS clock register contents:
-	 * When the Update-In-Progress (UIP) flag goes from 1 to 0, the
-	 * RTC registers show the second which has precisely just started.
-	 * Let's hope other operating systems interpret the RTC the same way.
-	 */
-	/* read RTC exactly on falling edge of update flag */
-	for (i = 0 ; i < 1000000 ; i++) /* may take up to 1 second... */
-		if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)
-			break;
-
-	for (i = 0 ; i < 1000000 ; i++) /* must try at least 2.228 ms */
-		if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
-			break;
-
-	do { /* Isn't this overkill ? UIP above should guarantee consistency */
+	do {
 		sec  = CMOS_READ(RTC_SECONDS);
 		min  = CMOS_READ(RTC_MINUTES);
 		hour = CMOS_READ(RTC_HOURS);
-- 
cgit v0.10.2


From da2468b6a81884a696fd6c7ab66dcc62d7233d32 Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Tue, 28 Mar 2006 01:56:05 -0800
Subject: [PATCH] RTC: Remove RTC UIP synchronization on MIPS MC146818

Signed-off-by: Matt Mackall <mpm@selenic.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-mips/mc146818-time.h b/include/asm-mips/mc146818-time.h
index 4721486..41ac8d3 100644
--- a/include/asm-mips/mc146818-time.h
+++ b/include/asm-mips/mc146818-time.h
@@ -86,43 +86,14 @@ static inline int mc146818_set_rtc_mmss(unsigned long nowtime)
 	return retval;
 }
 
-/*
- * Returns true if a clock update is in progress
- */
-static inline unsigned char rtc_is_updating(void)
-{
-	unsigned char uip;
-	unsigned long flags;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-	uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP);
-	spin_unlock_irqrestore(&rtc_lock, flags);
-	return uip;
-}
-
 static inline unsigned long mc146818_get_cmos_time(void)
 {
 	unsigned int year, mon, day, hour, min, sec;
-	int i;
 	unsigned long flags;
 
-	/*
-	 * The Linux interpretation of the CMOS clock register contents:
-	 * When the Update-In-Progress (UIP) flag goes from 1 to 0, the
-	 * RTC registers show the second which has precisely just started.
-	 * Let's hope other operating systems interpret the RTC the same way.
-	 */
-
-	/* read RTC exactly on falling edge of update flag */
-	for (i = 0 ; i < 1000000 ; i++)	/* may take up to 1 second... */
-		if (rtc_is_updating())
-			break;
-	for (i = 0 ; i < 1000000 ; i++)	/* must try at least 2.228 ms */
-		if (!rtc_is_updating())
-			break;
-
 	spin_lock_irqsave(&rtc_lock, flags);
-	do { /* Isn't this overkill ? UIP above should guarantee consistency */
+
+	do {
 		sec = CMOS_READ(RTC_SECONDS);
 		min = CMOS_READ(RTC_MINUTES);
 		hour = CMOS_READ(RTC_HOURS);
-- 
cgit v0.10.2


From ddcabb4fc7f2f4a70f35aa72827888a4c549d355 Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Tue, 28 Mar 2006 01:56:06 -0800
Subject: [PATCH] RTC: Remove RTC UIP synchronization on MIPS-based DEC

Move real_year inside the read loop and move the spinlock up as well

Signed-off-by: Matt Mackall <mpm@selenic.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c
index f17d337..74cb055 100644
--- a/arch/mips/dec/time.c
+++ b/arch/mips/dec/time.c
@@ -36,41 +36,13 @@
 #include <asm/dec/ioasic_addrs.h>
 #include <asm/dec/machtype.h>
 
-
-/*
- * Returns true if a clock update is in progress
- */
-static inline unsigned char dec_rtc_is_updating(void)
-{
-	unsigned char uip;
-	unsigned long flags;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-	uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP);
-	spin_unlock_irqrestore(&rtc_lock, flags);
-	return uip;
-}
-
 static unsigned long dec_rtc_get_time(void)
 {
 	unsigned int year, mon, day, hour, min, sec, real_year;
-	int i;
 	unsigned long flags;
 
-	/* The Linux interpretation of the DS1287 clock register contents:
-	 * When the Update-In-Progress (UIP) flag goes from 1 to 0, the
-	 * RTC registers show the second which has precisely just started.
-	 * Let's hope other operating systems interpret the RTC the same way.
-	 */
-	/* read RTC exactly on falling edge of update flag */
-	for (i = 0; i < 1000000; i++)	/* may take up to 1 second... */
-		if (dec_rtc_is_updating())
-			break;
-	for (i = 0; i < 1000000; i++)	/* must try at least 2.228 ms */
-		if (!dec_rtc_is_updating())
-			break;
 	spin_lock_irqsave(&rtc_lock, flags);
-	/* Isn't this overkill?  UIP above should guarantee consistency */
+
 	do {
 		sec = CMOS_READ(RTC_SECONDS);
 		min = CMOS_READ(RTC_MINUTES);
@@ -78,7 +50,16 @@ static unsigned long dec_rtc_get_time(void)
 		day = CMOS_READ(RTC_DAY_OF_MONTH);
 		mon = CMOS_READ(RTC_MONTH);
 		year = CMOS_READ(RTC_YEAR);
+		/*
+		 * The PROM will reset the year to either '72 or '73.
+		 * Therefore we store the real year separately, in one
+		 * of unused BBU RAM locations.
+		 */
+		real_year = CMOS_READ(RTC_DEC_YEAR);
 	} while (sec != CMOS_READ(RTC_SECONDS));
+
+	spin_unlock_irqrestore(&rtc_lock, flags);
+
 	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
 		sec = BCD2BIN(sec);
 		min = BCD2BIN(min);
@@ -87,13 +68,7 @@ static unsigned long dec_rtc_get_time(void)
 		mon = BCD2BIN(mon);
 		year = BCD2BIN(year);
 	}
-	/*
-	 * The PROM will reset the year to either '72 or '73.
-	 * Therefore we store the real year separately, in one
-	 * of unused BBU RAM locations.
-	 */
-	real_year = CMOS_READ(RTC_DEC_YEAR);
-	spin_unlock_irqrestore(&rtc_lock, flags);
+
 	year += real_year - 72 + 2000;
 
 	return mktime(year, mon, day, hour, min, sec);
-- 
cgit v0.10.2


From 04cfbee0fcb2a2ab31344ac9ace1dd370f3de2bd Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Tue, 28 Mar 2006 01:56:07 -0800
Subject: [PATCH] RTC: Remove RTC UIP synchronization on SH03

Signed-off-by: Matt Mackall <mpm@selenic.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/sh/boards/sh03/rtc.c b/arch/sh/boards/sh03/rtc.c
index cbeca70..e8bec67 100644
--- a/arch/sh/boards/sh03/rtc.c
+++ b/arch/sh/boards/sh03/rtc.c
@@ -48,13 +48,9 @@ extern spinlock_t rtc_lock;
 unsigned long get_cmos_time(void)
 {
 	unsigned int year, mon, day, hour, min, sec;
-	int i;
 
 	spin_lock(&rtc_lock);
  again:
-	for (i = 0 ; i < 1000000 ; i++)	/* may take up to 1 second... */
-		if (!(ctrl_inb(RTC_CTL) & RTC_BUSY))
-			break;
 	do {
 		sec  = (ctrl_inb(RTC_SEC1) & 0xf) + (ctrl_inb(RTC_SEC10) & 0x7) * 10;
 		min  = (ctrl_inb(RTC_MIN1) & 0xf) + (ctrl_inb(RTC_MIN10) & 0xf) * 10;
-- 
cgit v0.10.2


From 356a9ce118879664ff88b76055b8bba115c1cc5e Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Tue, 28 Mar 2006 01:56:08 -0800
Subject: [PATCH] RTC: Remove RTC UIP synchronization on SH MPC1211

Signed-off-by: Matt Mackall <mpm@selenic.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/sh/boards/mpc1211/rtc.c b/arch/sh/boards/mpc1211/rtc.c
index 4d100f0..76aab46 100644
--- a/arch/sh/boards/mpc1211/rtc.c
+++ b/arch/sh/boards/mpc1211/rtc.c
@@ -19,26 +19,13 @@
 #define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10)
 #endif
 
-/* arc/i386/kernel/time.c */
 unsigned long get_cmos_time(void)
 {
 	unsigned int year, mon, day, hour, min, sec;
-	int i;
 
 	spin_lock(&rtc_lock);
-	/* The Linux interpretation of the CMOS clock register contents:
-	 * When the Update-In-Progress (UIP) flag goes from 1 to 0, the
-	 * RTC registers show the second which has precisely just started.
-	 * Let's hope other operating systems interpret the RTC the same way.
-	 */
-	/* read RTC exactly on falling edge of update flag */
-	for (i = 0 ; i < 1000000 ; i++)	/* may take up to 1 second... */
-		if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)
-			break;
-	for (i = 0 ; i < 1000000 ; i++)	/* must try at least 2.228 ms */
-		if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
-			break;
-	do { /* Isn't this overkill ? UIP above should guarantee consistency */
+
+	do {
 		sec = CMOS_READ(RTC_SECONDS);
 		min = CMOS_READ(RTC_MINUTES);
 		hour = CMOS_READ(RTC_HOURS);
@@ -46,6 +33,7 @@ unsigned long get_cmos_time(void)
 		mon = CMOS_READ(RTC_MONTH);
 		year = CMOS_READ(RTC_YEAR);
 	} while (sec != CMOS_READ(RTC_SECONDS));
+
 	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
 	  {
 	    BCD_TO_BIN(sec);
-- 
cgit v0.10.2


From 4c2e6f6a06cdd239ec17e195e7868ce0171ea154 Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Tue, 28 Mar 2006 01:56:09 -0800
Subject: [PATCH] RTC: Remove RTC UIP synchronization on Alpha

The sync may still be needed for CPU clock calibration but we don't sync in
the regular case.

Signed-off-by: Matt Mackall <mpm@selenic.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index 6b2921b..3859749 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -314,10 +314,11 @@ time_init(void)
 	if (!est_cycle_freq)
 		est_cycle_freq = validate_cc_value(calibrate_cc_with_pit());
 
-	cc1 = rpcc_after_update_in_progress();
+	cc1 = rpcc();
 
 	/* Calibrate CPU clock -- attempt #2.  */
 	if (!est_cycle_freq) {
+		cc1 = rpcc_after_update_in_progress();
 		cc2 = rpcc_after_update_in_progress();
 		est_cycle_freq = validate_cc_value(cc2 - cc1);
 		cc1 = cc2;
-- 
cgit v0.10.2


From 41623b064fbd76de5901da7c0e3cd2136617d787 Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Tue, 28 Mar 2006 01:56:09 -0800
Subject: [PATCH] RTC: Fix up some RTC whitespace and style

Fix up some RTC whitespace and style

Signed-off-by: Matt Mackall <mpm@selenic.com>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/ppc/platforms/chrp_time.c b/arch/ppc/platforms/chrp_time.c
index e103a36..51e06ad 100644
--- a/arch/ppc/platforms/chrp_time.c
+++ b/arch/ppc/platforms/chrp_time.c
@@ -129,16 +129,18 @@ unsigned long chrp_get_rtc_time(void)
 		year = chrp_cmos_clock_read(RTC_YEAR);
 	} while (sec != chrp_cmos_clock_read(RTC_SECONDS));
 
-	if (!(chrp_cmos_clock_read(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-	  {
-	    BCD_TO_BIN(sec);
-	    BCD_TO_BIN(min);
-	    BCD_TO_BIN(hour);
-	    BCD_TO_BIN(day);
-	    BCD_TO_BIN(mon);
-	    BCD_TO_BIN(year);
-	  }
-	if ((year += 1900) < 1970)
+	if (!(chrp_cmos_clock_read(RTC_CONTROL) & RTC_DM_BINARY)
+	    || RTC_ALWAYS_BCD) {
+		BCD_TO_BIN(sec);
+		BCD_TO_BIN(min);
+		BCD_TO_BIN(hour);
+		BCD_TO_BIN(day);
+		BCD_TO_BIN(mon);
+		BCD_TO_BIN(year);
+	}
+
+	year += 1900;
+	if (year < 1970)
 		year += 100;
 	return mktime(year, mon, day, hour, min, sec);
 }
diff --git a/arch/sh/boards/mpc1211/rtc.c b/arch/sh/boards/mpc1211/rtc.c
index 76aab46..8ae2dc1 100644
--- a/arch/sh/boards/mpc1211/rtc.c
+++ b/arch/sh/boards/mpc1211/rtc.c
@@ -34,18 +34,21 @@ unsigned long get_cmos_time(void)
 		year = CMOS_READ(RTC_YEAR);
 	} while (sec != CMOS_READ(RTC_SECONDS));
 
-	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-	  {
-	    BCD_TO_BIN(sec);
-	    BCD_TO_BIN(min);
-	    BCD_TO_BIN(hour);
-	    BCD_TO_BIN(day);
-	    BCD_TO_BIN(mon);
-	    BCD_TO_BIN(year);
-	  }
+	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+		BCD_TO_BIN(sec);
+		BCD_TO_BIN(min);
+		BCD_TO_BIN(hour);
+		BCD_TO_BIN(day);
+		BCD_TO_BIN(mon);
+		BCD_TO_BIN(year);
+	}
+
 	spin_unlock(&rtc_lock);
-	if ((year += 1900) < 1970)
+
+	year += 1900;
+	if (year < 1970)
 		year += 100;
+
 	return mktime(year, mon, day, hour, min, sec);
 }
 
diff --git a/include/asm-i386/mach-default/mach_time.h b/include/asm-i386/mach-default/mach_time.h
index ff03cf0..31eb5de 100644
--- a/include/asm-i386/mach-default/mach_time.h
+++ b/include/asm-i386/mach-default/mach_time.h
@@ -92,16 +92,17 @@ static inline unsigned long mach_get_cmos_time(void)
 		year = CMOS_READ(RTC_YEAR);
 	} while (sec != CMOS_READ(RTC_SECONDS));
 
-	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-	  {
-	    BCD_TO_BIN(sec);
-	    BCD_TO_BIN(min);
-	    BCD_TO_BIN(hour);
-	    BCD_TO_BIN(day);
-	    BCD_TO_BIN(mon);
-	    BCD_TO_BIN(year);
-	  }
-	if ((year += 1900) < 1970)
+	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+		BCD_TO_BIN(sec);
+		BCD_TO_BIN(min);
+		BCD_TO_BIN(hour);
+		BCD_TO_BIN(day);
+		BCD_TO_BIN(mon);
+		BCD_TO_BIN(year);
+	}
+
+	year += 1900;
+	if (year < 1970)
 		year += 100;
 
 	return mktime(year, mon, day, hour, min, sec);
-- 
cgit v0.10.2


From 4f3a36a7d0eb420471506fcd46ee46f4b5cd4ebc Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Tue, 28 Mar 2006 01:56:10 -0800
Subject: [PATCH] RTC: Remove some duplicate BCD definitions

Remove some duplicate BCD definitions

Signed-off-by: Matt Mackall <mpm@selenic.com>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/m68k/mvme16x/rtc.c b/arch/m68k/mvme16x/rtc.c
index a69fe30..b0e4c08 100644
--- a/arch/m68k/mvme16x/rtc.c
+++ b/arch/m68k/mvme16x/rtc.c
@@ -17,6 +17,7 @@
 #include <linux/poll.h>
 #include <linux/mc146818rtc.h>	/* For struct rtc_time and ioctls, etc */
 #include <linux/smp_lock.h>
+#include <linux/bcd.h>
 #include <asm/mvme16xhw.h>
 
 #include <asm/io.h>
@@ -31,9 +32,6 @@
  *	ioctls.
  */
 
-#define BCD2BIN(val) (((val)&15) + ((val)>>4)*10)
-#define BIN2BCD(val) ((((val)/10)<<4) + (val)%10)
-
 static const unsigned char days_in_mo[] =
 {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
 
diff --git a/arch/mips/momentum/ocelot_3/setup.c b/arch/mips/momentum/ocelot_3/setup.c
index 969612e..370e75d 100644
--- a/arch/mips/momentum/ocelot_3/setup.c
+++ b/arch/mips/momentum/ocelot_3/setup.c
@@ -58,6 +58,7 @@
 #include <linux/bootmem.h>
 #include <linux/mv643xx.h>
 #include <linux/pm.h>
+#include <linux/bcd.h>
 
 #include <asm/time.h>
 #include <asm/page.h>
@@ -131,9 +132,6 @@ void setup_wired_tlb_entries(void)
 	add_wired_entry(ENTRYLO(0xfc000000), ENTRYLO(0xfd000000), (signed)0xfc000000, PM_16M);
 }
 
-#define CONV_BCD_TO_BIN(val)	(((val) & 0xf) + (((val) >> 4) * 10))
-#define CONV_BIN_TO_BCD(val)	(((val) % 10) + (((val) / 10) << 4))
-
 unsigned long m48t37y_get_time(void)
 {
 	unsigned int year, month, day, hour, min, sec;
@@ -143,16 +141,16 @@ unsigned long m48t37y_get_time(void)
 	/* stop the update */
 	rtc_base[0x7ff8] = 0x40;
 
-	year = CONV_BCD_TO_BIN(rtc_base[0x7fff]);
-	year += CONV_BCD_TO_BIN(rtc_base[0x7ff1]) * 100;
+	year = BCD2BIN(rtc_base[0x7fff]);
+	year += BCD2BIN(rtc_base[0x7ff1]) * 100;
 
-	month = CONV_BCD_TO_BIN(rtc_base[0x7ffe]);
+	month = BCD2BIN(rtc_base[0x7ffe]);
 
-	day = CONV_BCD_TO_BIN(rtc_base[0x7ffd]);
+	day = BCD2BIN(rtc_base[0x7ffd]);
 
-	hour = CONV_BCD_TO_BIN(rtc_base[0x7ffb]);
-	min = CONV_BCD_TO_BIN(rtc_base[0x7ffa]);
-	sec = CONV_BCD_TO_BIN(rtc_base[0x7ff9]);
+	hour = BCD2BIN(rtc_base[0x7ffb]);
+	min = BCD2BIN(rtc_base[0x7ffa]);
+	sec = BCD2BIN(rtc_base[0x7ff9]);
 
 	/* start the update */
 	rtc_base[0x7ff8] = 0x00;
@@ -175,22 +173,22 @@ int m48t37y_set_time(unsigned long sec)
 	rtc_base[0x7ff8] = 0x80;
 
 	/* year */
-	rtc_base[0x7fff] = CONV_BIN_TO_BCD(tm.tm_year % 100);
-	rtc_base[0x7ff1] = CONV_BIN_TO_BCD(tm.tm_year / 100);
+	rtc_base[0x7fff] = BIN2BCD(tm.tm_year % 100);
+	rtc_base[0x7ff1] = BIN2BCD(tm.tm_year / 100);
 
 	/* month */
-	rtc_base[0x7ffe] = CONV_BIN_TO_BCD(tm.tm_mon);
+	rtc_base[0x7ffe] = BIN2BCD(tm.tm_mon);
 
 	/* day */
-	rtc_base[0x7ffd] = CONV_BIN_TO_BCD(tm.tm_mday);
+	rtc_base[0x7ffd] = BIN2BCD(tm.tm_mday);
 
 	/* hour/min/sec */
-	rtc_base[0x7ffb] = CONV_BIN_TO_BCD(tm.tm_hour);
-	rtc_base[0x7ffa] = CONV_BIN_TO_BCD(tm.tm_min);
-	rtc_base[0x7ff9] = CONV_BIN_TO_BCD(tm.tm_sec);
+	rtc_base[0x7ffb] = BIN2BCD(tm.tm_hour);
+	rtc_base[0x7ffa] = BIN2BCD(tm.tm_min);
+	rtc_base[0x7ff9] = BIN2BCD(tm.tm_sec);
 
 	/* day of week -- not really used, but let's keep it up-to-date */
-	rtc_base[0x7ffc] = CONV_BIN_TO_BCD(tm.tm_wday + 1);
+	rtc_base[0x7ffc] = BIN2BCD(tm.tm_wday + 1);
 
 	/* disable writing */
 	rtc_base[0x7ff8] = 0x00;
diff --git a/arch/mips/tx4938/common/rtc_rx5c348.c b/arch/mips/tx4938/common/rtc_rx5c348.c
index f74295f..07f782f 100644
--- a/arch/mips/tx4938/common/rtc_rx5c348.c
+++ b/arch/mips/tx4938/common/rtc_rx5c348.c
@@ -14,6 +14,7 @@
 #include <linux/string.h>
 #include <linux/rtc.h>
 #include <linux/time.h>
+#include <linux/bcd.h>
 #include <asm/time.h>
 #include <asm/tx4938/spi.h>
 
@@ -77,17 +78,6 @@ spi_rtc_io(unsigned char *inbuf, unsigned char *outbuf, unsigned int count)
 			   inbufs, incounts, outbufs, outcounts, 0);
 }
 
-/*
- * Conversion between binary and BCD.
- */
-#ifndef BCD_TO_BIN
-#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10)
-#endif
-
-#ifndef BIN_TO_BCD
-#define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10)
-#endif
-
 /* RTC-dependent code for time.c */
 
 static int
diff --git a/arch/sh/boards/mpc1211/rtc.c b/arch/sh/boards/mpc1211/rtc.c
index 8ae2dc1..a76c655 100644
--- a/arch/sh/boards/mpc1211/rtc.c
+++ b/arch/sh/boards/mpc1211/rtc.c
@@ -9,16 +9,9 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/time.h>
+#include <linux/bcd.h>
 #include <linux/mc146818rtc.h>
 
-#ifndef BCD_TO_BIN
-#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10)
-#endif
-
-#ifndef BIN_TO_BCD
-#define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10)
-#endif
-
 unsigned long get_cmos_time(void)
 {
 	unsigned int year, mon, day, hour, min, sec;
diff --git a/arch/sh/boards/sh03/rtc.c b/arch/sh/boards/sh03/rtc.c
index e8bec67..d609863 100644
--- a/arch/sh/boards/sh03/rtc.c
+++ b/arch/sh/boards/sh03/rtc.c
@@ -9,6 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/time.h>
+#include <linux/bcd.h>
 #include <asm/io.h>
 #include <linux/rtc.h>
 #include <linux/spinlock.h>
@@ -33,14 +34,6 @@
 #define RTC_BUSY	1
 #define RTC_STOP	2
 
-#ifndef BCD_TO_BIN
-#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10)
-#endif
-
-#ifndef BIN_TO_BCD
-#define BIN_TO_BCD(val)	((val)=(((val)/10)<<4) + (val)%10)
-#endif
-
 extern void (*rtc_get_time)(struct timespec *);
 extern int (*rtc_set_time)(const time_t);
 extern spinlock_t rtc_lock;
diff --git a/arch/sh/kernel/cpu/rtc.c b/arch/sh/kernel/cpu/rtc.c
index f8361f5..4304cf7 100644
--- a/arch/sh/kernel/cpu/rtc.c
+++ b/arch/sh/kernel/cpu/rtc.c
@@ -9,18 +9,10 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/time.h>
-
+#include <linux/bcd.h>
 #include <asm/io.h>
 #include <asm/rtc.h>
 
-#ifndef BCD_TO_BIN
-#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10)
-#endif
-
-#ifndef BIN_TO_BCD
-#define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10)
-#endif
-
 void sh_rtc_gettimeofday(struct timespec *ts)
 {
 	unsigned int sec128, sec, sec2, min, hr, wk, day, mon, yr, yr100, cf_bit;
diff --git a/arch/sh64/kernel/time.c b/arch/sh64/kernel/time.c
index 0773c9f..6b8f4d2 100644
--- a/arch/sh64/kernel/time.c
+++ b/arch/sh64/kernel/time.c
@@ -30,6 +30,7 @@
 #include <linux/profile.h>
 #include <linux/smp.h>
 #include <linux/module.h>
+#include <linux/bcd.h>
 
 #include <asm/registers.h>	 /* required by inline __asm__ stmt. */
 
@@ -105,14 +106,6 @@
 #define RCR1    	rtc_base+0x38
 #define RCR2    	rtc_base+0x3c
 
-#ifndef BCD_TO_BIN
-#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10)
-#endif
-
-#ifndef BIN_TO_BCD
-#define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10)
-#endif
-
 #define TICK_SIZE (tick_nsec / 1000)
 
 extern unsigned long wall_jiffies;
-- 
cgit v0.10.2


From f5f5370da4b3128b7dfd944b4fcbb5c7b6887348 Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@parisc-linux.org>
Date: Tue, 28 Mar 2006 01:56:11 -0800
Subject: [PATCH] Decrapify asm-generic/local.h

Now that Christoph Lameter's atomic_long_t support is merged in mainline,
might as well convert asm-generic/local.h to use it, so the same code can
be used for both sizes of 32 and 64-bit unsigned longs.

akpm sayeth:

Q:

  Is there any particular reason why these routines weren't simply
  implemented with local_save/restore_flags, if they are only meant to
  guarantee atomicity to the local cpu?  I'm sure on most platforms this
  would be more efficient than using an atomic...

A:

  The whole _point_ of local_t is to avoid local_irq_disable().  It's
  designed to exploit the fact that many CPUs can do incs and decs in a way
  which is atomic wrt local interrupts, but not atomic wrt SMP.

But this patch makes sense, because asm-generic/local.h is just a fallback
implementation for architectures which either cannot perform these
local-irq-atomic operations, or its maintainers haven't yet got around to
implementing them.

We need more work done on local_t in the 2.6.17 timeframe - they're defined as
unsigned long, but some architectures implement them as signed long.

Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-generic/local.h b/include/asm-generic/local.h
index 16fc003..de46148 100644
--- a/include/asm-generic/local.h
+++ b/include/asm-generic/local.h
@@ -4,28 +4,28 @@
 #include <linux/config.h>
 #include <linux/percpu.h>
 #include <linux/hardirq.h>
+#include <asm/atomic.h>
 #include <asm/types.h>
 
 /* An unsigned long type for operations which are atomic for a single
  * CPU.  Usually used in combination with per-cpu variables. */
 
-#if BITS_PER_LONG == 32
 /* Implement in terms of atomics. */
 
 /* Don't use typedef: don't want them to be mixed with atomic_t's. */
 typedef struct
 {
-	atomic_t a;
+	atomic_long_t a;
 } local_t;
 
-#define LOCAL_INIT(i)	{ ATOMIC_INIT(i) }
+#define LOCAL_INIT(i)	{ ATOMIC_LONG_INIT(i) }
 
-#define local_read(l)	((unsigned long)atomic_read(&(l)->a))
-#define local_set(l,i)	atomic_set((&(l)->a),(i))
-#define local_inc(l)	atomic_inc(&(l)->a)
-#define local_dec(l)	atomic_dec(&(l)->a)
-#define local_add(i,l)	atomic_add((i),(&(l)->a))
-#define local_sub(i,l)	atomic_sub((i),(&(l)->a))
+#define local_read(l)	((unsigned long)atomic_long_read(&(l)->a))
+#define local_set(l,i)	atomic_long_set((&(l)->a),(i))
+#define local_inc(l)	atomic_long_inc(&(l)->a)
+#define local_dec(l)	atomic_long_dec(&(l)->a)
+#define local_add(i,l)	atomic_long_add((i),(&(l)->a))
+#define local_sub(i,l)	atomic_long_sub((i),(&(l)->a))
 
 /* Non-atomic variants, ie. preemption disabled and won't be touched
  * in interrupt, etc.  Some archs can optimize this case well. */
@@ -34,68 +34,6 @@ typedef struct
 #define __local_add(i,l)	local_set((l), local_read(l) + (i))
 #define __local_sub(i,l)	local_set((l), local_read(l) - (i))
 
-#else /* ... can't use atomics. */
-/* Implement in terms of three variables.
-   Another option would be to use local_irq_save/restore. */
-
-typedef struct
-{
-	/* 0 = in hardirq, 1 = in softirq, 2 = usermode. */
-	unsigned long v[3];
-} local_t;
-
-#define _LOCAL_VAR(l)	((l)->v[!in_interrupt() + !in_irq()])
-
-#define LOCAL_INIT(i)	{ { (i), 0, 0 } }
-
-static inline unsigned long local_read(local_t *l)
-{
-	return l->v[0] + l->v[1] + l->v[2];
-}
-
-static inline void local_set(local_t *l, unsigned long v)
-{
-	l->v[0] = v;
-	l->v[1] = l->v[2] = 0;
-}
-
-static inline void local_inc(local_t *l)
-{
-	preempt_disable();
-	_LOCAL_VAR(l)++;
-	preempt_enable();
-}
-
-static inline void local_dec(local_t *l)
-{
-	preempt_disable();
-	_LOCAL_VAR(l)--;
-	preempt_enable();
-}
-
-static inline void local_add(unsigned long v, local_t *l)
-{
-	preempt_disable();
-	_LOCAL_VAR(l) += v;
-	preempt_enable();
-}
-
-static inline void local_sub(unsigned long v, local_t *l)
-{
-	preempt_disable();
-	_LOCAL_VAR(l) -= v;
-	preempt_enable();
-}
-
-/* Non-atomic variants, ie. preemption disabled and won't be touched
- * in interrupt, etc.  Some archs can optimize this case well. */
-#define __local_inc(l)		((l)->v[0]++)
-#define __local_dec(l)		((l)->v[0]--)
-#define __local_add(i,l)	((l)->v[0] += (i))
-#define __local_sub(i,l)	((l)->v[0] -= (i))
-
-#endif /* Non-atomic implementation */
-
 /* Use these for per-cpu local_t variables: on some archs they are
  * much more efficient than these naive implementations.  Note they take
  * a variable (eg. mystruct.foo), not an address.
-- 
cgit v0.10.2


From 86a34147d1f1c94e94500e63e83f9fa42548a088 Mon Sep 17 00:00:00 2001
From: Paul Fulghum <paulkf@microgate.com>
Date: Tue, 28 Mar 2006 01:56:14 -0800
Subject: [PATCH] synclink: remove dead code

Remove dead code from synclink driver.  This was used previously when the
write method had a from_user flag, which has been removed.

Signed-off-by: Paul Fulghum <paulkf@microgate.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index d68be61..abb03e5 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -941,17 +941,6 @@ static void* mgsl_get_text_ptr(void)
 	return mgsl_get_text_ptr;
 }
 
-/*
- * tmp_buf is used as a temporary buffer by mgsl_write.  We need to
- * lock it in case the COPY_FROM_USER blocks while swapping in a page,
- * and some other program tries to do a serial write at the same time.
- * Since the lock will only come under contention when the system is
- * swapping and available memory is low, it makes sense to share one
- * buffer across all the serial ioports, since it significantly saves
- * memory if large numbers of serial ports are open.
- */
-static unsigned char *tmp_buf;
-
 static inline int mgsl_paranoia_check(struct mgsl_struct *info,
 					char *name, const char *routine)
 {
@@ -2150,7 +2139,7 @@ static int mgsl_write(struct tty_struct * tty,
 	if (mgsl_paranoia_check(info, tty->name, "mgsl_write"))
 		goto cleanup;
 
-	if (!tty || !info->xmit_buf || !tmp_buf)
+	if (!tty || !info->xmit_buf)
 		goto cleanup;
 
 	if ( info->params.mode == MGSL_MODE_HDLC ||
@@ -3438,7 +3427,6 @@ static int mgsl_open(struct tty_struct *tty, struct file * filp)
 {
 	struct mgsl_struct	*info;
 	int 			retval, line;
-	unsigned long		page;
 	unsigned long flags;
 
 	/* verify range of specified line number */	
@@ -3472,18 +3460,6 @@ static int mgsl_open(struct tty_struct *tty, struct file * filp)
 		goto cleanup;
 	}
 	
-	if (!tmp_buf) {
-		page = get_zeroed_page(GFP_KERNEL);
-		if (!page) {
-			retval = -ENOMEM;
-			goto cleanup;
-		}
-		if (tmp_buf)
-			free_page(page);
-		else
-			tmp_buf = (unsigned char *) page;
-	}
-	
 	info->tty->low_latency = (info->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 
 	spin_lock_irqsave(&info->netlock, flags);
@@ -4502,11 +4478,6 @@ static void synclink_cleanup(void)
 		kfree(tmp);
 	}
 	
-	if (tmp_buf) {
-		free_page((unsigned long) tmp_buf);
-		tmp_buf = NULL;
-	}
-	
 	if (pci_registered)
 		pci_unregister_driver(&synclink_pci_driver);
 }
-- 
cgit v0.10.2


From 0080b7aae88c75e2a6b38dfcb228b0f239e18e3c Mon Sep 17 00:00:00 2001
From: Paul Fulghum <paulkf@microgate.com>
Date: Tue, 28 Mar 2006 01:56:15 -0800
Subject: [PATCH] synclink_gt add gpio feature

Add driver support for general purpose I/O feature of the Synclink GT
adapters.

Signed-off-by: Paul Fulghum <paulkf@micrgate.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 738ec2f..8818042 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -1,5 +1,5 @@
 /*
- * $Id: synclink_gt.c,v 4.22 2006/01/09 20:16:06 paulkf Exp $
+ * $Id: synclink_gt.c,v 4.25 2006/02/06 21:20:33 paulkf Exp $
  *
  * Device driver for Microgate SyncLink GT serial adapters.
  *
@@ -92,7 +92,7 @@
  * module identification
  */
 static char *driver_name     = "SyncLink GT";
-static char *driver_version  = "$Revision: 4.22 $";
+static char *driver_version  = "$Revision: 4.25 $";
 static char *tty_driver_name = "synclink_gt";
 static char *tty_dev_prefix  = "ttySLG";
 MODULE_LICENSE("GPL");
@@ -188,6 +188,20 @@ static void hdlcdev_exit(struct slgt_info *info);
 #define SLGT_REG_SIZE  256
 
 /*
+ * conditional wait facility
+ */
+struct cond_wait {
+	struct cond_wait *next;
+	wait_queue_head_t q;
+	wait_queue_t wait;
+	unsigned int data;
+};
+static void init_cond_wait(struct cond_wait *w, unsigned int data);
+static void add_cond_wait(struct cond_wait **head, struct cond_wait *w);
+static void remove_cond_wait(struct cond_wait **head, struct cond_wait *w);
+static void flush_cond_wait(struct cond_wait **head);
+
+/*
  * DMA buffer descriptor and access macros
  */
 struct slgt_desc
@@ -269,6 +283,9 @@ struct slgt_info {
 	struct timer_list	tx_timer;
 	struct timer_list	rx_timer;
 
+	unsigned int            gpio_present;
+	struct cond_wait        *gpio_wait_q;
+
 	spinlock_t lock;	/* spinlock for synchronizing with ISR */
 
 	struct work_struct task;
@@ -379,6 +396,11 @@ static MGSL_PARAMS default_params = {
 #define MASK_OVERRUN BIT4
 
 #define GSR   0x00 /* global status */
+#define JCR   0x04 /* JTAG control */
+#define IODR  0x08 /* GPIO direction */
+#define IOER  0x0c /* GPIO interrupt enable */
+#define IOVR  0x10 /* GPIO value */
+#define IOSR  0x14 /* GPIO interrupt status */
 #define TDR   0x80 /* tx data */
 #define RDR   0x80 /* rx data */
 #define TCR   0x82 /* tx control */
@@ -503,6 +525,9 @@ static int  tiocmset(struct tty_struct *tty, struct file *file,
 static void set_break(struct tty_struct *tty, int break_state);
 static int  get_interface(struct slgt_info *info, int __user *if_mode);
 static int  set_interface(struct slgt_info *info, int if_mode);
+static int  set_gpio(struct slgt_info *info, struct gpio_desc __user *gpio);
+static int  get_gpio(struct slgt_info *info, struct gpio_desc __user *gpio);
+static int  wait_gpio(struct slgt_info *info, struct gpio_desc __user *gpio);
 
 /*
  * driver functions
@@ -1112,6 +1137,12 @@ static int ioctl(struct tty_struct *tty, struct file *file,
 		return get_interface(info, argp);
 	case MGSL_IOCSIF:
 		return set_interface(info,(int)arg);
+	case MGSL_IOCSGPIO:
+		return set_gpio(info, argp);
+	case MGSL_IOCGGPIO:
+		return get_gpio(info, argp);
+	case MGSL_IOCWAITGPIO:
+		return wait_gpio(info, argp);
 	case TIOCGICOUNT:
 		spin_lock_irqsave(&info->lock,flags);
 		cnow = info->icount;
@@ -2158,6 +2189,24 @@ static void isr_txeom(struct slgt_info *info, unsigned short status)
 	}
 }
 
+static void isr_gpio(struct slgt_info *info, unsigned int changed, unsigned int state)
+{
+	struct cond_wait *w, *prev;
+
+	/* wake processes waiting for specific transitions */
+	for (w = info->gpio_wait_q, prev = NULL ; w != NULL ; w = w->next) {
+		if (w->data & changed) {
+			w->data = state;
+			wake_up_interruptible(&w->q);
+			if (prev != NULL)
+				prev->next = w->next;
+			else
+				info->gpio_wait_q = w->next;
+		} else
+			prev = w;
+	}
+}
+
 /* interrupt service routine
  *
  * 	irq	interrupt number
@@ -2193,6 +2242,22 @@ static irqreturn_t slgt_interrupt(int irq, void *dev_id, struct pt_regs * regs)
 		}
 	}
 
+	if (info->gpio_present) {
+		unsigned int state;
+		unsigned int changed;
+		while ((changed = rd_reg32(info, IOSR)) != 0) {
+			DBGISR(("%s iosr=%08x\n", info->device_name, changed));
+			/* read latched state of GPIO signals */
+			state = rd_reg32(info, IOVR);
+			/* clear pending GPIO interrupt bits */
+			wr_reg32(info, IOSR, changed);
+			for (i=0 ; i < info->port_count ; i++) {
+				if (info->port_array[i] != NULL)
+					isr_gpio(info->port_array[i], changed, state);
+			}
+		}
+	}
+
 	for(i=0; i < info->port_count ; i++) {
 		struct slgt_info *port = info->port_array[i];
 
@@ -2276,6 +2341,8 @@ static void shutdown(struct slgt_info *info)
 		set_signals(info);
 	}
 
+	flush_cond_wait(&info->gpio_wait_q);
+
 	spin_unlock_irqrestore(&info->lock,flags);
 
 	if (info->tty)
@@ -2650,6 +2717,175 @@ static int set_interface(struct slgt_info *info, int if_mode)
 	return 0;
 }
 
+/*
+ * set general purpose IO pin state and direction
+ *
+ * user_gpio fields:
+ * state   each bit indicates a pin state
+ * smask   set bit indicates pin state to set
+ * dir     each bit indicates a pin direction (0=input, 1=output)
+ * dmask   set bit indicates pin direction to set
+ */
+static int set_gpio(struct slgt_info *info, struct gpio_desc __user *user_gpio)
+{
+ 	unsigned long flags;
+	struct gpio_desc gpio;
+	__u32 data;
+
+	if (!info->gpio_present)
+		return -EINVAL;
+	if (copy_from_user(&gpio, user_gpio, sizeof(gpio)))
+		return -EFAULT;
+	DBGINFO(("%s set_gpio state=%08x smask=%08x dir=%08x dmask=%08x\n",
+		 info->device_name, gpio.state, gpio.smask,
+		 gpio.dir, gpio.dmask));
+
+	spin_lock_irqsave(&info->lock,flags);
+	if (gpio.dmask) {
+		data = rd_reg32(info, IODR);
+		data |= gpio.dmask & gpio.dir;
+		data &= ~(gpio.dmask & ~gpio.dir);
+		wr_reg32(info, IODR, data);
+	}
+	if (gpio.smask) {
+		data = rd_reg32(info, IOVR);
+		data |= gpio.smask & gpio.state;
+		data &= ~(gpio.smask & ~gpio.state);
+		wr_reg32(info, IOVR, data);
+	}
+	spin_unlock_irqrestore(&info->lock,flags);
+
+	return 0;
+}
+
+/*
+ * get general purpose IO pin state and direction
+ */
+static int get_gpio(struct slgt_info *info, struct gpio_desc __user *user_gpio)
+{
+	struct gpio_desc gpio;
+	if (!info->gpio_present)
+		return -EINVAL;
+	gpio.state = rd_reg32(info, IOVR);
+	gpio.smask = 0xffffffff;
+	gpio.dir   = rd_reg32(info, IODR);
+	gpio.dmask = 0xffffffff;
+	if (copy_to_user(user_gpio, &gpio, sizeof(gpio)))
+		return -EFAULT;
+	DBGINFO(("%s get_gpio state=%08x dir=%08x\n",
+		 info->device_name, gpio.state, gpio.dir));
+	return 0;
+}
+
+/*
+ * conditional wait facility
+ */
+static void init_cond_wait(struct cond_wait *w, unsigned int data)
+{
+	init_waitqueue_head(&w->q);
+	init_waitqueue_entry(&w->wait, current);
+	w->data = data;
+}
+
+static void add_cond_wait(struct cond_wait **head, struct cond_wait *w)
+{
+	set_current_state(TASK_INTERRUPTIBLE);
+	add_wait_queue(&w->q, &w->wait);
+	w->next = *head;
+	*head = w;
+}
+
+static void remove_cond_wait(struct cond_wait **head, struct cond_wait *cw)
+{
+	struct cond_wait *w, *prev;
+	remove_wait_queue(&cw->q, &cw->wait);
+	set_current_state(TASK_RUNNING);
+	for (w = *head, prev = NULL ; w != NULL ; prev = w, w = w->next) {
+		if (w == cw) {
+			if (prev != NULL)
+				prev->next = w->next;
+			else
+				*head = w->next;
+			break;
+		}
+	}
+}
+
+static void flush_cond_wait(struct cond_wait **head)
+{
+	while (*head != NULL) {
+		wake_up_interruptible(&(*head)->q);
+		*head = (*head)->next;
+	}
+}
+
+/*
+ * wait for general purpose I/O pin(s) to enter specified state
+ *
+ * user_gpio fields:
+ * state - bit indicates target pin state
+ * smask - set bit indicates watched pin
+ *
+ * The wait ends when at least one watched pin enters the specified
+ * state. When 0 (no error) is returned, user_gpio->state is set to the
+ * state of all GPIO pins when the wait ends.
+ *
+ * Note: Each pin may be a dedicated input, dedicated output, or
+ * configurable input/output. The number and configuration of pins
+ * varies with the specific adapter model. Only input pins (dedicated
+ * or configured) can be monitored with this function.
+ */
+static int wait_gpio(struct slgt_info *info, struct gpio_desc __user *user_gpio)
+{
+ 	unsigned long flags;
+	int rc = 0;
+	struct gpio_desc gpio;
+	struct cond_wait wait;
+	u32 state;
+
+	if (!info->gpio_present)
+		return -EINVAL;
+	if (copy_from_user(&gpio, user_gpio, sizeof(gpio)))
+		return -EFAULT;
+	DBGINFO(("%s wait_gpio() state=%08x smask=%08x\n",
+		 info->device_name, gpio.state, gpio.smask));
+	/* ignore output pins identified by set IODR bit */
+	if ((gpio.smask &= ~rd_reg32(info, IODR)) == 0)
+		return -EINVAL;
+	init_cond_wait(&wait, gpio.smask);
+
+	spin_lock_irqsave(&info->lock, flags);
+	/* enable interrupts for watched pins */
+	wr_reg32(info, IOER, rd_reg32(info, IOER) | gpio.smask);
+	/* get current pin states */
+	state = rd_reg32(info, IOVR);
+
+	if (gpio.smask & ~(state ^ gpio.state)) {
+		/* already in target state */
+		gpio.state = state;
+	} else {
+		/* wait for target state */
+		add_cond_wait(&info->gpio_wait_q, &wait);
+		spin_unlock_irqrestore(&info->lock, flags);
+		schedule();
+		if (signal_pending(current))
+			rc = -ERESTARTSYS;
+		else
+			gpio.state = wait.data;
+		spin_lock_irqsave(&info->lock, flags);
+		remove_cond_wait(&info->gpio_wait_q, &wait);
+	}
+
+	/* disable all GPIO interrupts if no waiting processes */
+	if (info->gpio_wait_q == NULL)
+		wr_reg32(info, IOER, 0);
+	spin_unlock_irqrestore(&info->lock,flags);
+
+	if ((rc == 0) && copy_to_user(user_gpio, &gpio, sizeof(gpio)))
+		rc = -EFAULT;
+	return rc;
+}
+
 static int modem_input_wait(struct slgt_info *info,int arg)
 {
  	unsigned long flags;
@@ -3166,8 +3402,10 @@ static void device_init(int adapter_num, struct pci_dev *pdev)
 		} else {
 			port_array[0]->irq_requested = 1;
 			adapter_test(port_array[0]);
-			for (i=1 ; i < port_count ; i++)
+			for (i=1 ; i < port_count ; i++) {
 				port_array[i]->init_error = port_array[0]->init_error;
+				port_array[i]->gpio_present = port_array[0]->gpio_present;
+			}
 		}
 	}
 }
@@ -4301,7 +4539,7 @@ static int register_test(struct slgt_info *info)
 			break;
 		}
 	}
-
+	info->gpio_present = (rd_reg32(info, JCR) & BIT5) ? 1 : 0;
 	info->init_error = rc ? 0 : DiagStatus_AddressFailure;
 	return rc;
 }
diff --git a/include/linux/synclink.h b/include/linux/synclink.h
index 1b7cd8d..2993302 100644
--- a/include/linux/synclink.h
+++ b/include/linux/synclink.h
@@ -1,7 +1,7 @@
 /*
  * SyncLink Multiprotocol Serial Adapter Driver
  *
- * $Id: synclink.h,v 3.10 2005/11/08 19:50:54 paulkf Exp $
+ * $Id: synclink.h,v 3.11 2006/02/06 21:20:29 paulkf Exp $
  *
  * Copyright (C) 1998-2000 by Microgate Corporation
  *
@@ -221,6 +221,12 @@ struct mgsl_icount {
 	__u32	rxidle;
 };
 
+struct gpio_desc {
+	__u32 state;
+	__u32 smask;
+	__u32 dir;
+	__u32 dmask;
+};
 
 #define DEBUG_LEVEL_DATA	1
 #define DEBUG_LEVEL_ERROR 	2
@@ -276,5 +282,8 @@ struct mgsl_icount {
 #define MGSL_IOCLOOPTXDONE	_IO(MGSL_MAGIC_IOC,9)
 #define MGSL_IOCSIF		_IO(MGSL_MAGIC_IOC,10)
 #define MGSL_IOCGIF		_IO(MGSL_MAGIC_IOC,11)
+#define MGSL_IOCSGPIO		_IOW(MGSL_MAGIC_IOC,16,struct gpio_desc)
+#define MGSL_IOCGGPIO		_IOR(MGSL_MAGIC_IOC,17,struct gpio_desc)
+#define MGSL_IOCWAITGPIO	_IOWR(MGSL_MAGIC_IOC,18,struct gpio_desc)
 
 #endif /* _SYNCLINK_H_ */
-- 
cgit v0.10.2


From be4676e61f969b624185f14d819adef23d45ffc2 Mon Sep 17 00:00:00 2001
From: Paul Fulghum <paulkf@microgate.com>
Date: Tue, 28 Mar 2006 01:56:16 -0800
Subject: [PATCH] synclink_gt: remove uneeded async code

Remove code in async receive handling that serves no purpose with new tty
receive buffering.  Previously this code tried to free up receive buffer
space, but now does nothing useful while making expensive calls.

Signed-off-by: Paul Fulghum <paulkf@microgate.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 8818042..b4d1f4e 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -1793,10 +1793,6 @@ static void rx_async(struct slgt_info *info)
 		DBGDATA(info, p, count, "rx");
 
 		for(i=0 ; i < count; i+=2, p+=2) {
-			if (tty && chars) {
-				tty_flip_buffer_push(tty);
-				chars = 0;
-			}
 			ch = *p;
 			icount->rx++;
 
-- 
cgit v0.10.2


From a687fb18cbd061de2092632cf77e6b9dc93cf7cd Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Tue, 28 Mar 2006 01:56:17 -0800
Subject: [PATCH] let BLK_DEV_RAM_COUNT depend on BLK_DEV_RAM

It's purely cosmetic, but with the patch there's no longer a
BLK_DEV_RAM_COUNT setting in the .config if BLK_DEV_RAM=n.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 875ae76..ae0949b 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -383,8 +383,9 @@ config BLK_DEV_RAM
 	  thus say N here.
 
 config BLK_DEV_RAM_COUNT
-	int "Default number of RAM disks" if BLK_DEV_RAM
+	int "Default number of RAM disks"
 	default "16"
+	depends on BLK_DEV_RAM
 	help
 	  The default value is 16 RAM disks. Change this if you know what
 	  are doing. If you boot from a filesystem that needs to be extracted
-- 
cgit v0.10.2


From 8637980bab3f09157eef20cc65d2eb7393c770fd Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 28 Mar 2006 01:56:18 -0800
Subject: [PATCH] paride: register_chrdev fix

If the user specified `major=0' (odd thing to do), pg.c will use dynamic
allocation.  We need to pick up that major for subsequent unregister_chrdev().

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c
index 6f5df0f..79b8682 100644
--- a/drivers/block/paride/pg.c
+++ b/drivers/block/paride/pg.c
@@ -643,7 +643,8 @@ static ssize_t pg_read(struct file *filp, char __user *buf, size_t count, loff_t
 
 static int __init pg_init(void)
 {
-	int unit, err = 0;
+	int unit;
+	int err;
 
 	if (disable){
 		err = -1;
@@ -657,16 +658,17 @@ static int __init pg_init(void)
 		goto out;
 	}
 
-	if (register_chrdev(major, name, &pg_fops)) {
+	err = register_chrdev(major, name, &pg_fops);
+	if (err < 0) {
 		printk("pg_init: unable to get major number %d\n", major);
 		for (unit = 0; unit < PG_UNITS; unit++) {
 			struct pg *dev = &devices[unit];
 			if (dev->present)
 				pi_release(dev->pi);
 		}
-		err = -1;
 		goto out;
 	}
+	major = err;	/* In case the user specified `major=0' (dynamic) */
 	pg_class = class_create(THIS_MODULE, "pg");
 	if (IS_ERR(pg_class)) {
 		err = PTR_ERR(pg_class);
-- 
cgit v0.10.2


From 829d5f68ec59ff7c0fdd472132680df8e4b64f3e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 28 Mar 2006 01:56:18 -0800
Subject: [PATCH] paride-pt: register_chrdev fix

If the user specified `major=0' (odd thing to do), pt.c will use dynamic
allocation.  We need to pick up that major for subsequent unregister_chrdev().

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c
index 715ae5d..d2013d3 100644
--- a/drivers/block/paride/pt.c
+++ b/drivers/block/paride/pt.c
@@ -943,7 +943,8 @@ static ssize_t pt_write(struct file *filp, const char __user *buf, size_t count,
 
 static int __init pt_init(void)
 {
-	int unit, err = 0;
+	int unit;
+	int err;
 
 	if (disable) {
 		err = -1;
@@ -955,14 +956,15 @@ static int __init pt_init(void)
 		goto out;
 	}
 
-	if (register_chrdev(major, name, &pt_fops)) {
+	err = register_chrdev(major, name, &pt_fops);
+	if (err < 0) {
 		printk("pt_init: unable to get major number %d\n", major);
 		for (unit = 0; unit < PT_UNITS; unit++)
 			if (pt[unit].present)
 				pi_release(pt[unit].pi);
-		err = -1;
 		goto out;
 	}
+	major = err;
 	pt_class = class_create(THIS_MODULE, "pt");
 	if (IS_ERR(pt_class)) {
 		err = PTR_ERR(pt_class);
-- 
cgit v0.10.2


From 6d9eac34104654aa129e365b8d48bbb8c957f186 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 28 Mar 2006 01:56:19 -0800
Subject: [PATCH] capi: register_chrdev() fix

If the user specified `major=0' (odd thing to do), capi.c will use dynamic
allocation.  We need to pick up that major for subsequent unregister_chrdev().

Acked-by: Karsten Keil <kkeil@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
index 623adbb..9b493f0 100644
--- a/drivers/isdn/capi/capi.c
+++ b/drivers/isdn/capi/capi.c
@@ -1485,6 +1485,7 @@ static int __init capi_init(void)
 {
 	char *p;
 	char *compileinfo;
+	int major_ret;
 
 	if ((p = strchr(revision, ':')) != 0 && p[1]) {
 		strlcpy(rev, p + 2, sizeof(rev));
@@ -1493,11 +1494,12 @@ static int __init capi_init(void)
 	} else
 		strcpy(rev, "1.0");
 
-	if (register_chrdev(capi_major, "capi20", &capi_fops)) {
+	major_ret = register_chrdev(capi_major, "capi20", &capi_fops);
+	if (major_ret < 0) {
 		printk(KERN_ERR "capi20: unable to get major %d\n", capi_major);
-		return -EIO;
+		return major_ret;
 	}
-
+	capi_major = major_ret;
 	capi_class = class_create(THIS_MODULE, "capi");
 	if (IS_ERR(capi_class)) {
 		unregister_chrdev(capi_major, "capi20");
-- 
cgit v0.10.2


From f83ca9fe3ee390755f18b4a7780c25ce593b484a Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 28 Mar 2006 01:56:20 -0800
Subject: [PATCH] symversion warning fix

gcc-4.2:

kernel/module.c: In function '__find_symbol':
kernel/module.c:158: warning: the address of '__start___kcrctab', will always evaluate as 'true'
kernel/module.c:165: warning: the address of '__start___kcrctab_gpl', will always evaluate as 'true'
kernel/module.c:182: warning: the address of '__start___kcrctab_gpl_future', will always evaluate as 'true'

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/module.c b/kernel/module.c
index 4fafd58..bd088a7 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -127,7 +127,7 @@ extern const unsigned long __start___kcrctab_gpl_future[];
 #ifndef CONFIG_MODVERSIONS
 #define symversion(base, idx) NULL
 #else
-#define symversion(base, idx) ((base) ? ((base) + (idx)) : NULL)
+#define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL)
 #endif
 
 /* lookup symbol in given range of kernel_symbols */
-- 
cgit v0.10.2


From 273577165cd206d2d6689ee4b18aa13de1ec4bde Mon Sep 17 00:00:00 2001
From: Brian Rogan <bcr6@cornell.edu>
Date: Tue, 28 Mar 2006 01:56:20 -0800
Subject: [PATCH] Add oprofile_add_ext_sample

On ppc64 we look at a profiling register to work out the sample address and
if it was in userspace or kernel.

The backtrace interface oprofile_add_sample does not allow this.  Create
oprofile_add_ext_sample and make oprofile_add_sample use it too.

Signed-off-by: Anton Blanchard <anton@samba.org>
Cc: Philippe Elie <phil.el@wanadoo.fr>
Cc: John Levon <levon@movementarian.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 330d386..fc4bc9b 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -217,11 +217,10 @@ static void oprofile_end_trace(struct oprofile_cpu_buffer * cpu_buf)
 	cpu_buf->tracing = 0;
 }
 
-void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
+void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
+				unsigned long event, int is_kernel)
 {
 	struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
-	unsigned long pc = profile_pc(regs);
-	int is_kernel = !user_mode(regs);
 
 	if (!backtrace_depth) {
 		log_sample(cpu_buf, pc, is_kernel, event);
@@ -238,6 +237,14 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
 	oprofile_end_trace(cpu_buf);
 }
 
+void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
+{
+	int is_kernel = !user_mode(regs);
+	unsigned long pc = profile_pc(regs);
+
+	oprofile_add_ext_sample(pc, regs, event, is_kernel);
+}
+
 void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
 {
 	struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index 559c4c3..b5b3197 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -61,6 +61,16 @@ void oprofile_arch_exit(void);
  */
 void oprofile_add_sample(struct pt_regs * const regs, unsigned long event);
 
+/**
+ * Add an extended sample.  Use this when the PC is not from the regs, and
+ * we cannot determine if we're in kernel mode from the regs.
+ *
+ * This function does perform a backtrace.
+ *
+ */
+void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
+				unsigned long event, int is_kernel);
+
 /* Use this instead when the PC value is not from the regs. Doesn't
  * backtrace. */
 void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event);
-- 
cgit v0.10.2


From 7a1e524a5fe6c5bf9dd4488e946fa835fda8c3d9 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 28 Mar 2006 01:56:22 -0800
Subject: [PATCH] alpha: make poll flags the same as other architectures

Renumber the recently-added POLLREMOVE and POLLRDHUP to line up with the other
architectures.

Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Richard Henderson <rth@twiddle.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-alpha/poll.h b/include/asm-alpha/poll.h
index 9570718..76f8935 100644
--- a/include/asm-alpha/poll.h
+++ b/include/asm-alpha/poll.h
@@ -12,8 +12,8 @@
 #define POLLWRNORM	(1 << 8)
 #define POLLWRBAND	(1 << 9)
 #define POLLMSG		(1 << 10)
-#define POLLREMOVE	(1 << 11)
-#define POLLRDHUP       (1 << 12)
+#define POLLREMOVE	(1 << 12)
+#define POLLRDHUP       (1 << 13)
 
 
 struct pollfd {
-- 
cgit v0.10.2


From 7b7a317cf863559b49b548a8b97b2f4bdf1e149e Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Tue, 28 Mar 2006 01:56:23 -0800
Subject: [PATCH] mqueue comment typo fix

(akpm: I don't do comment typos patches.  This one snuck through by accident)

Signed-off-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 532ec3b..41ecbd4 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -762,7 +762,7 @@ out_unlock:
  * The receiver accepts the message and returns without grabbing the queue
  * spinlock. Therefore an intermediate STATE_PENDING state and memory barriers
  * are necessary. The same algorithm is used for sysv semaphores, see
- * ipc/sem.c fore more details.
+ * ipc/sem.c for more details.
  *
  * The same algorithm is used for senders.
  */
-- 
cgit v0.10.2


From b9e20a920092eb3840424f85c78852c0433df00d Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Tue, 28 Mar 2006 01:56:24 -0800
Subject: [PATCH] Change dash2underscore() return value to char

Since dash2underscore() just operates and returns chars, I guess its safe
to change the return value to a char.  With my .config, this reduces its
size by 5 bytes.

   text    data     bss     dec     hex filename
   4155     152       0    4307    10d3 params.o.orig
   4150     152       0    4302    10ce params.o

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/params.c b/kernel/params.c
index 9de637a..af43ecd 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -31,7 +31,7 @@
 #define DEBUGP(fmt, a...)
 #endif
 
-static inline int dash2underscore(char c)
+static inline char dash2underscore(char c)
 {
 	if (c == '-')
 		return '_';
-- 
cgit v0.10.2


From 3b71797eff4352b4295919efc52de84f84d33d94 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Tue, 28 Mar 2006 01:56:25 -0800
Subject: [PATCH] drivers/block/paride/pd.c: fix an off-by-one error

The Coverity checker found this off-by-one error.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 62d2464..2403721 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -151,6 +151,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_GEO, D_SBY, D_DLY, D_SLV};
 #include <linux/cdrom.h>	/* for the eject ioctl */
 #include <linux/blkdev.h>
 #include <linux/blkpg.h>
+#include <linux/kernel.h>
 #include <asm/uaccess.h>
 #include <linux/sched.h>
 #include <linux/workqueue.h>
@@ -275,7 +276,7 @@ static void pd_print_error(struct pd_unit *disk, char *msg, int status)
 	int i;
 
 	printk("%s: %s: status = 0x%x =", disk->name, msg, status);
-	for (i = 0; i < 18; i++)
+	for (i = 0; i < ARRAY_SIZE(pd_errs); i++)
 		if (status & (1 << i))
 			printk(" %s", pd_errs[i]);
 	printk("\n");
-- 
cgit v0.10.2


From a28af471b8946de052a0eb0c080d5457be93f168 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Tue, 28 Mar 2006 01:56:26 -0800
Subject: [PATCH] fs/fat/: proper prototypes for two functions

Add proper prototypes for fat_cache_init() and fat_cache_destroy() in
msdos_fs.h.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 404bfc9..c1ce284 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1435,9 +1435,6 @@ out_fail:
 
 EXPORT_SYMBOL_GPL(fat_fill_super);
 
-int __init fat_cache_init(void);
-void fat_cache_destroy(void);
-
 static int __init init_fat_fs(void)
 {
 	int err;
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 779e6a5..53cee15 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -420,6 +420,9 @@ extern int date_dos2unix(unsigned short time, unsigned short date);
 extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date);
 extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
 
+int fat_cache_init(void);
+void fat_cache_destroy(void);
+
 #endif /* __KERNEL__ */
 
 #endif
-- 
cgit v0.10.2


From 3ca1da4a82b169417b738a35783008c35da26466 Mon Sep 17 00:00:00 2001
From: Philip Gladstone <pgladstone@cisco.com>
Date: Tue, 28 Mar 2006 01:56:27 -0800
Subject: [PATCH] Philip Gladstone has moved

I noticed that my email address is four jobs ago.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/CREDITS b/CREDITS
index 35850d8..0bf31ea 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1127,8 +1127,10 @@ S: Carnegie, Pennsylvania 15106-4304
 S: USA
 
 N: Philip Gladstone
-E: philip@raptor.com
+E: philip@gladstonefamily.net
 D: Kernel / timekeeping stuff
+S: Carlisle, MA 01741
+S: USA
   
 N: Jan-Benedict Glaw
 E: jbglaw@lug-owl.de
-- 
cgit v0.10.2


From e51236092d2f7e40e87e88804b5b42e5f8025415 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 28 Mar 2006 01:56:27 -0800
Subject: [PATCH] remove relayfs_fs.h

This is obsolete.

Cc: Tom Zanussi <zanussi@us.ibm.com>
Cc: Jens Axboe <axboe@suse.de>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/relayfs_fs.h b/include/linux/relayfs_fs.h
deleted file mode 100644
index 7342e66..0000000
--- a/include/linux/relayfs_fs.h
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * linux/include/linux/relayfs_fs.h
- *
- * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
- * Copyright (C) 1999, 2000, 2001, 2002 - Karim Yaghmour (karim@opersys.com)
- *
- * RelayFS definitions and declarations
- */
-
-#ifndef _LINUX_RELAYFS_FS_H
-#define _LINUX_RELAYFS_FS_H
-
-#include <linux/config.h>
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/wait.h>
-#include <linux/list.h>
-#include <linux/fs.h>
-#include <linux/poll.h>
-#include <linux/kref.h>
-
-/*
- * Tracks changes to rchan/rchan_buf structs
- */
-#define RELAYFS_CHANNEL_VERSION		6
-
-/*
- * Per-cpu relay channel buffer
- */
-struct rchan_buf
-{
-	void *start;			/* start of channel buffer */
-	void *data;			/* start of current sub-buffer */
-	size_t offset;			/* current offset into sub-buffer */
-	size_t subbufs_produced;	/* count of sub-buffers produced */
-	size_t subbufs_consumed;	/* count of sub-buffers consumed */
-	struct rchan *chan;		/* associated channel */
-	wait_queue_head_t read_wait;	/* reader wait queue */
-	struct work_struct wake_readers; /* reader wake-up work struct */
-	struct dentry *dentry;		/* channel file dentry */
-	struct kref kref;		/* channel buffer refcount */
-	struct page **page_array;	/* array of current buffer pages */
-	unsigned int page_count;	/* number of current buffer pages */
-	unsigned int finalized;		/* buffer has been finalized */
-	size_t *padding;		/* padding counts per sub-buffer */
-	size_t prev_padding;		/* temporary variable */
-	size_t bytes_consumed;		/* bytes consumed in cur read subbuf */
-	unsigned int cpu;		/* this buf's cpu */
-} ____cacheline_aligned;
-
-/*
- * Relay channel data structure
- */
-struct rchan
-{
-	u32 version;			/* the version of this struct */
-	size_t subbuf_size;		/* sub-buffer size */
-	size_t n_subbufs;		/* number of sub-buffers per buffer */
-	size_t alloc_size;		/* total buffer size allocated */
-	struct rchan_callbacks *cb;	/* client callbacks */
-	struct kref kref;		/* channel refcount */
-	void *private_data;		/* for user-defined data */
-	size_t last_toobig;		/* tried to log event > subbuf size */
-	struct rchan_buf *buf[NR_CPUS]; /* per-cpu channel buffers */
-};
-
-/*
- * Relay channel client callbacks
- */
-struct rchan_callbacks
-{
-	/*
-	 * subbuf_start - called on buffer-switch to a new sub-buffer
-	 * @buf: the channel buffer containing the new sub-buffer
-	 * @subbuf: the start of the new sub-buffer
-	 * @prev_subbuf: the start of the previous sub-buffer
-	 * @prev_padding: unused space at the end of previous sub-buffer
-	 *
-	 * The client should return 1 to continue logging, 0 to stop
-	 * logging.
-	 *
-	 * NOTE: subbuf_start will also be invoked when the buffer is
-	 *       created, so that the first sub-buffer can be initialized
-	 *       if necessary.  In this case, prev_subbuf will be NULL.
-	 *
-	 * NOTE: the client can reserve bytes at the beginning of the new
-	 *       sub-buffer by calling subbuf_start_reserve() in this callback.
-	 */
-	int (*subbuf_start) (struct rchan_buf *buf,
-			     void *subbuf,
-			     void *prev_subbuf,
-			     size_t prev_padding);
-
-	/*
-	 * buf_mapped - relayfs buffer mmap notification
-	 * @buf: the channel buffer
-	 * @filp: relayfs file pointer
-	 *
-	 * Called when a relayfs file is successfully mmapped
-	 */
-        void (*buf_mapped)(struct rchan_buf *buf,
-			   struct file *filp);
-
-	/*
-	 * buf_unmapped - relayfs buffer unmap notification
-	 * @buf: the channel buffer
-	 * @filp: relayfs file pointer
-	 *
-	 * Called when a relayfs file is successfully unmapped
-	 */
-        void (*buf_unmapped)(struct rchan_buf *buf,
-			     struct file *filp);
-	/*
-	 * create_buf_file - create file to represent a relayfs channel buffer
-	 * @filename: the name of the file to create
-	 * @parent: the parent of the file to create
-	 * @mode: the mode of the file to create
-	 * @buf: the channel buffer
-	 * @is_global: outparam - set non-zero if the buffer should be global
-	 *
-	 * Called during relay_open(), once for each per-cpu buffer,
-	 * to allow the client to create a file to be used to
-	 * represent the corresponding channel buffer.  If the file is
-	 * created outside of relayfs, the parent must also exist in
-	 * that filesystem.
-	 *
-	 * The callback should return the dentry of the file created
-	 * to represent the relay buffer.
-	 *
-	 * Setting the is_global outparam to a non-zero value will
-	 * cause relay_open() to create a single global buffer rather
-	 * than the default set of per-cpu buffers.
-	 *
-	 * See Documentation/filesystems/relayfs.txt for more info.
-	 */
-	struct dentry *(*create_buf_file)(const char *filename,
-					  struct dentry *parent,
-					  int mode,
-					  struct rchan_buf *buf,
-					  int *is_global);
-
-	/*
-	 * remove_buf_file - remove file representing a relayfs channel buffer
-	 * @dentry: the dentry of the file to remove
-	 *
-	 * Called during relay_close(), once for each per-cpu buffer,
-	 * to allow the client to remove a file used to represent a
-	 * channel buffer.
-	 *
-	 * The callback should return 0 if successful, negative if not.
-	 */
-	int (*remove_buf_file)(struct dentry *dentry);
-};
-
-/*
- * relayfs kernel API, fs/relayfs/relay.c
- */
-
-struct rchan *relay_open(const char *base_filename,
-			 struct dentry *parent,
-			 size_t subbuf_size,
-			 size_t n_subbufs,
-			 struct rchan_callbacks *cb);
-extern void relay_close(struct rchan *chan);
-extern void relay_flush(struct rchan *chan);
-extern void relay_subbufs_consumed(struct rchan *chan,
-				   unsigned int cpu,
-				   size_t consumed);
-extern void relay_reset(struct rchan *chan);
-extern int relay_buf_full(struct rchan_buf *buf);
-
-extern size_t relay_switch_subbuf(struct rchan_buf *buf,
-				  size_t length);
-extern struct dentry *relayfs_create_dir(const char *name,
-					 struct dentry *parent);
-extern int relayfs_remove_dir(struct dentry *dentry);
-extern struct dentry *relayfs_create_file(const char *name,
-					  struct dentry *parent,
-					  int mode,
-					  struct file_operations *fops,
-					  void *data);
-extern int relayfs_remove_file(struct dentry *dentry);
-
-/**
- *	relay_write - write data into the channel
- *	@chan: relay channel
- *	@data: data to be written
- *	@length: number of bytes to write
- *
- *	Writes data into the current cpu's channel buffer.
- *
- *	Protects the buffer by disabling interrupts.  Use this
- *	if you might be logging from interrupt context.  Try
- *	__relay_write() if you know you	won't be logging from
- *	interrupt context.
- */
-static inline void relay_write(struct rchan *chan,
-			       const void *data,
-			       size_t length)
-{
-	unsigned long flags;
-	struct rchan_buf *buf;
-
-	local_irq_save(flags);
-	buf = chan->buf[smp_processor_id()];
-	if (unlikely(buf->offset + length > chan->subbuf_size))
-		length = relay_switch_subbuf(buf, length);
-	memcpy(buf->data + buf->offset, data, length);
-	buf->offset += length;
-	local_irq_restore(flags);
-}
-
-/**
- *	__relay_write - write data into the channel
- *	@chan: relay channel
- *	@data: data to be written
- *	@length: number of bytes to write
- *
- *	Writes data into the current cpu's channel buffer.
- *
- *	Protects the buffer by disabling preemption.  Use
- *	relay_write() if you might be logging from interrupt
- *	context.
- */
-static inline void __relay_write(struct rchan *chan,
-				 const void *data,
-				 size_t length)
-{
-	struct rchan_buf *buf;
-
-	buf = chan->buf[get_cpu()];
-	if (unlikely(buf->offset + length > buf->chan->subbuf_size))
-		length = relay_switch_subbuf(buf, length);
-	memcpy(buf->data + buf->offset, data, length);
-	buf->offset += length;
-	put_cpu();
-}
-
-/**
- *	relay_reserve - reserve slot in channel buffer
- *	@chan: relay channel
- *	@length: number of bytes to reserve
- *
- *	Returns pointer to reserved slot, NULL if full.
- *
- *	Reserves a slot in the current cpu's channel buffer.
- *	Does not protect the buffer at all - caller must provide
- *	appropriate synchronization.
- */
-static inline void *relay_reserve(struct rchan *chan, size_t length)
-{
-	void *reserved;
-	struct rchan_buf *buf = chan->buf[smp_processor_id()];
-
-	if (unlikely(buf->offset + length > buf->chan->subbuf_size)) {
-		length = relay_switch_subbuf(buf, length);
-		if (!length)
-			return NULL;
-	}
-	reserved = buf->data + buf->offset;
-	buf->offset += length;
-
-	return reserved;
-}
-
-/**
- *	subbuf_start_reserve - reserve bytes at the start of a sub-buffer
- *	@buf: relay channel buffer
- *	@length: number of bytes to reserve
- *
- *	Helper function used to reserve bytes at the beginning of
- *	a sub-buffer in the subbuf_start() callback.
- */
-static inline void subbuf_start_reserve(struct rchan_buf *buf,
-					size_t length)
-{
-	BUG_ON(length >= buf->chan->subbuf_size - 1);
-	buf->offset = length;
-}
-
-/*
- * exported relay file operations, fs/relayfs/inode.c
- */
-extern struct file_operations relay_file_operations;
-
-#endif /* _LINUX_RELAYFS_FS_H */
-
-- 
cgit v0.10.2


From fa8f399a2bc36c1329672400857757e0982babf3 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@mail.ru>
Date: Tue, 28 Mar 2006 01:56:28 -0800
Subject: [PATCH] drivers/block/acsi_slm.c: size_t can't be < 0

A size_t can't be < 0.

(akpm: and rw_verify_area() already did that check)

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/block/acsi_slm.c b/drivers/block/acsi_slm.c
index a5c1c8e..4cb9c13 100644
--- a/drivers/block/acsi_slm.c
+++ b/drivers/block/acsi_slm.c
@@ -369,8 +369,6 @@ static ssize_t slm_read( struct file *file, char *buf, size_t count,
 	int length;
 	int end;
 
-	if (count < 0)
-		return( -EINVAL );
 	if (!(page = __get_free_page( GFP_KERNEL )))
 		return( -ENOMEM );
 	
-- 
cgit v0.10.2


From f5b95ff010d0a4e40f877277f8f0e62fcd83b5a8 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Tue, 28 Mar 2006 01:56:29 -0800
Subject: [PATCH] autofs4: proper prototype for autofs4_dentry_release()

Add a proper prototype for autofs4_dentry_release() to autofs_i.h.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 617fd7b..c70204a 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -230,3 +230,6 @@ static inline int __simple_empty(struct dentry *dentry)
 out:
 	return ret;
 }
+
+void autofs4_dentry_release(struct dentry *);
+
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 4eddee4e..fde78b1 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -292,7 +292,6 @@ static struct autofs_info *autofs4_mkroot(struct autofs_sb_info *sbi)
 	return ino;
 }
 
-void autofs4_dentry_release(struct dentry *);
 static struct dentry_operations autofs4_sb_dentry_operations = {
 	.d_release      = autofs4_dentry_release,
 };
-- 
cgit v0.10.2


From 9edc91df07a227dbde9f98ee1097f554130993dc Mon Sep 17 00:00:00 2001
From: Rene Herman <rene.herman@keyaccess.nl>
Date: Tue, 28 Mar 2006 01:56:30 -0800
Subject: [PATCH] ide: AMD756 no host side cable detection

>From http://marc.theaimsgroup.com/?l=linux-kernel&m=110304128900342&w=2

AMD756 doesn't support host side cable detection. Do disk side only and
don't advice obsolete options.

Acked-by: Bartlomiej Zolnierkiewicz <B.Zolnierkiewicz@elka.pw.edu.pl>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c
index 21965e5..b22ee54 100644
--- a/drivers/ide/pci/amd74xx.c
+++ b/drivers/ide/pci/amd74xx.c
@@ -347,10 +347,8 @@ static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev, const ch
 			break;
 
 		case AMD_UDMA_66:
-			pci_read_config_dword(dev, AMD_UDMA_TIMING, &u);
-			for (i = 24; i >= 0; i -= 8)
-				if ((u >> i) & 4)
-					amd_80w |= (1 << (1 - (i >> 4)));
+			/* no host side cable detection */
+			amd_80w = 0x03;
 			break;
 	}
 
@@ -386,8 +384,6 @@ static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev, const ch
 	if (amd_clock < 20000 || amd_clock > 50000) {
 		printk(KERN_WARNING "%s: User given PCI clock speed impossible (%d), using 33 MHz instead.\n",
 			amd_chipset->name, amd_clock);
-		printk(KERN_WARNING "%s: Use ide0=ata66 if you want to assume 80-wire cable\n",
-			amd_chipset->name);
 		amd_clock = 33333;
 	}
 
-- 
cgit v0.10.2


From d266ab88938e49aa95f1965ee020df1b1d4c5761 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Tue, 28 Mar 2006 01:56:31 -0800
Subject: [PATCH] Small fixes backported to old IDE SiS driver

Some quick backport bits from the libata PATA work to fix things found in
the sis driver.  The piix driver needs some fixes too but those are way to
large and need someone working on old IDE with time to do them.

This patch fixes the case where random bits get loaded into SIS timing
registers according to the description of the correct behaviour from
Vojtech Pavlik.  It also adds the SiS5517 ATA16 chipset which is not
currently supported by the driver.  Thanks to Conrad Harriss for loaning me
the machine with the 5517 chipset.

Signed-off-by: Alan Cox <alan@redhat.com>
Acked-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c
index 75a2253..8e9d877 100644
--- a/drivers/ide/pci/sis5513.c
+++ b/drivers/ide/pci/sis5513.c
@@ -112,6 +112,7 @@ static const struct {
 
 	{ "SiS5596",	PCI_DEVICE_ID_SI_5596,	ATA_16   },
 	{ "SiS5571",	PCI_DEVICE_ID_SI_5571,	ATA_16   },
+	{ "SiS5517",	PCI_DEVICE_ID_SI_5517,	ATA_16   },
 	{ "SiS551x",	PCI_DEVICE_ID_SI_5511,	ATA_16   },
 };
 
@@ -524,6 +525,7 @@ static void config_art_rwp_pio (ide_drive_t *drive, u8 pio)
 			case 3:		test1 = 0x30|0x03; break;
 			case 2:		test1 = 0x40|0x04; break;
 			case 1:		test1 = 0x60|0x07; break;
+			case 0:		test1 = 0x00; break;
 			default:	break;
 		}
 		pci_write_config_byte(dev, drive_pci, test1);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 02f6cf2..e2ab2ac 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -642,6 +642,7 @@
 #define PCI_DEVICE_ID_SI_965		0x0965
 #define PCI_DEVICE_ID_SI_5511		0x5511
 #define PCI_DEVICE_ID_SI_5513		0x5513
+#define PCI_DEVICE_ID_SI_5517		0x5517
 #define PCI_DEVICE_ID_SI_5518		0x5518
 #define PCI_DEVICE_ID_SI_5571		0x5571
 #define PCI_DEVICE_ID_SI_5581		0x5581
-- 
cgit v0.10.2


From b02389e98a7b64ad5cd4823740defa8821f30bbd Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 28 Mar 2006 01:56:32 -0800
Subject: [PATCH] ide_generic_all_on() warning fix

drivers/ide/pci/generic.c:45: warning: `ide_generic_all_on' defined but not used

Cc: Bartlomiej Zolnierkiewicz <B.Zolnierkiewicz@elka.pw.edu.pl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/ide/pci/generic.c b/drivers/ide/pci/generic.c
index 6e3ab0c..f82e821 100644
--- a/drivers/ide/pci/generic.c
+++ b/drivers/ide/pci/generic.c
@@ -41,14 +41,15 @@
 
 static int ide_generic_all;		/* Set to claim all devices */
 
+#ifndef MODULE
 static int __init ide_generic_all_on(char *unused)
 {
 	ide_generic_all = 1;
 	printk(KERN_INFO "IDE generic will claim all unknown PCI IDE storage controllers.\n");
 	return 1;
 }
-
 __setup("all-generic-ide", ide_generic_all_on);
+#endif
 
 static void __devinit init_hwif_generic (ide_hwif_t *hwif)
 {
-- 
cgit v0.10.2


From 70674f95c0a2ea694d5c39f4e514f538a09be36f Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 28 Mar 2006 01:56:33 -0800
Subject: [PATCH] Optimize select/poll by putting small data sets on the stack

Optimize select and poll by a using stack space for small fd sets

This brings back an old optimization from Linux 2.0.  Using the stack is
faster than kmalloc.  On a Intel P4 system it speeds up a select of a
single pty fd by about 13% (~4000 cycles -> ~3500)

It also saves memory because a daemon hanging in select or poll will
usually save one or two less pages.  This can add up - e.g.  if you have 10
daemons blocking in poll/select you save 40KB of memory.

I did a patch for this long ago, but it was never applied.  This version is
a reimplementation of the old patch that tries to be less intrusive.  I
only did the minimal changes needed for the stack allocation.

The cut off point before external memory is allocated is currently at
832bytes.  The system calls always allocate this much memory on the stack.

These 832 bytes are divided into 256 bytes frontend data (for the select
bitmaps of the pollfds) and the rest of the space for the wait queues used
by the low level drivers.  There are some extreme cases where this won't
work out for select and it falls back to allocating memory too early -
especially with very sparse large select bitmaps - but the majority of
processes who only have a small number of file descriptors should be ok.
[TBD: 832/256 might not be the best split for select or poll]

I suspect more optimizations might be possible, but they would be more
complicated.  One way would be to cache the select/poll context over
multiple system calls because typically the input values should be similar.
 Problem is when to flush the file descriptors out though.

Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/select.c b/fs/select.c
index 1815a57..d8b4f07 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -29,12 +29,6 @@
 #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
 #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
 
-struct poll_table_entry {
-	struct file * filp;
-	wait_queue_t wait;
-	wait_queue_head_t * wait_address;
-};
-
 struct poll_table_page {
 	struct poll_table_page * next;
 	struct poll_table_entry * entry;
@@ -64,13 +58,23 @@ void poll_initwait(struct poll_wqueues *pwq)
 	init_poll_funcptr(&pwq->pt, __pollwait);
 	pwq->error = 0;
 	pwq->table = NULL;
+	pwq->inline_index = 0;
 }
 
 EXPORT_SYMBOL(poll_initwait);
 
+static void free_poll_entry(struct poll_table_entry *entry)
+{
+	remove_wait_queue(entry->wait_address,&entry->wait);
+	fput(entry->filp);
+}
+
 void poll_freewait(struct poll_wqueues *pwq)
 {
 	struct poll_table_page * p = pwq->table;
+	int i;
+	for (i = 0; i < pwq->inline_index; i++)
+		free_poll_entry(pwq->inline_entries + i);
 	while (p) {
 		struct poll_table_entry * entry;
 		struct poll_table_page *old;
@@ -78,8 +82,7 @@ void poll_freewait(struct poll_wqueues *pwq)
 		entry = p->entry;
 		do {
 			entry--;
-			remove_wait_queue(entry->wait_address,&entry->wait);
-			fput(entry->filp);
+			free_poll_entry(entry);
 		} while (entry > p->entries);
 		old = p;
 		p = p->next;
@@ -89,12 +92,14 @@ void poll_freewait(struct poll_wqueues *pwq)
 
 EXPORT_SYMBOL(poll_freewait);
 
-static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
-		       poll_table *_p)
+static struct poll_table_entry *poll_get_entry(poll_table *_p)
 {
 	struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt);
 	struct poll_table_page *table = p->table;
 
+	if (p->inline_index < N_INLINE_POLL_ENTRIES)
+		return p->inline_entries + p->inline_index++;
+
 	if (!table || POLL_TABLE_FULL(table)) {
 		struct poll_table_page *new_table;
 
@@ -102,7 +107,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
 		if (!new_table) {
 			p->error = -ENOMEM;
 			__set_current_state(TASK_RUNNING);
-			return;
+			return NULL;
 		}
 		new_table->entry = new_table->entries;
 		new_table->next = table;
@@ -110,16 +115,21 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
 		table = new_table;
 	}
 
-	/* Add a new entry */
-	{
-		struct poll_table_entry * entry = table->entry;
-		table->entry = entry+1;
-	 	get_file(filp);
-	 	entry->filp = filp;
-		entry->wait_address = wait_address;
-		init_waitqueue_entry(&entry->wait, current);
-		add_wait_queue(wait_address,&entry->wait);
-	}
+	return table->entry++;
+}
+
+/* Add a new entry */
+static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
+				poll_table *p)
+{
+	struct poll_table_entry *entry = poll_get_entry(p);
+	if (!entry)
+		return;
+	get_file(filp);
+	entry->filp = filp;
+	entry->wait_address = wait_address;
+	init_waitqueue_entry(&entry->wait, current);
+	add_wait_queue(wait_address,&entry->wait);
 }
 
 #define FDS_IN(fds, n)		(fds->in + n)
@@ -284,16 +294,6 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
 	return retval;
 }
 
-static void *select_bits_alloc(int size)
-{
-	return kmalloc(6 * size, GFP_KERNEL);
-}
-
-static void select_bits_free(void *bits, int size)
-{
-	kfree(bits);
-}
-
 /*
  * We can actually return ERESTARTSYS instead of EINTR, but I'd
  * like to be certain this leads to no problems. So I return
@@ -312,6 +312,8 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 	char *bits;
 	int ret, size, max_fdset;
 	struct fdtable *fdt;
+	/* Allocate small arguments on the stack to save memory and be faster */
+	char stack_fds[SELECT_STACK_ALLOC];
 
 	ret = -EINVAL;
 	if (n < 0)
@@ -332,7 +334,10 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 	 */
 	ret = -ENOMEM;
 	size = FDS_BYTES(n);
-	bits = select_bits_alloc(size);
+	if (6*size < SELECT_STACK_ALLOC)
+		bits = stack_fds;
+	else
+		bits = kmalloc(6 * size, GFP_KERNEL);
 	if (!bits)
 		goto out_nofds;
 	fds.in      = (unsigned long *)  bits;
@@ -367,7 +372,8 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 		ret = -EFAULT;
 
 out:
-	select_bits_free(bits, size);
+	if (bits != stack_fds)
+		kfree(bits);
 out_nofds:
 	return ret;
 }
@@ -619,6 +625,9 @@ static int do_poll(unsigned int nfds,  struct poll_list *list,
 	return count;
 }
 
+#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list))  / \
+			sizeof(struct pollfd))
+
 int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
 {
 	struct poll_wqueues table;
@@ -628,6 +637,9 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
  	struct poll_list *walk;
 	struct fdtable *fdt;
 	int max_fdset;
+	/* Allocate small arguments on the stack to save memory and be faster */
+	char stack_pps[POLL_STACK_ALLOC];
+	struct poll_list *stack_pp = NULL;
 
 	/* Do a sanity check on nfds ... */
 	rcu_read_lock();
@@ -645,14 +657,23 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
 	err = -ENOMEM;
 	while(i!=0) {
 		struct poll_list *pp;
-		pp = kmalloc(sizeof(struct poll_list)+
-				sizeof(struct pollfd)*
-				(i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i),
-					GFP_KERNEL);
-		if(pp==NULL)
-			goto out_fds;
+		int num, size;
+		if (stack_pp == NULL)
+			num = N_STACK_PPS;
+		else
+			num = POLLFD_PER_PAGE;
+		if (num > i)
+			num = i;
+		size = sizeof(struct poll_list) + sizeof(struct pollfd)*num;
+		if (!stack_pp)
+			stack_pp = pp = (struct poll_list *)stack_pps;
+		else {
+			pp = kmalloc(size, GFP_KERNEL);
+			if (!pp)
+				goto out_fds;
+		}
 		pp->next=NULL;
-		pp->len = (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i);
+		pp->len = num;
 		if (head == NULL)
 			head = pp;
 		else
@@ -660,7 +681,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
 
 		walk = pp;
 		if (copy_from_user(pp->entries, ufds + nfds-i, 
-				sizeof(struct pollfd)*pp->len)) {
+				sizeof(struct pollfd)*num)) {
 			err = -EFAULT;
 			goto out_fds;
 		}
@@ -689,7 +710,8 @@ out_fds:
 	walk = head;
 	while(walk!=NULL) {
 		struct poll_list *pp = walk->next;
-		kfree(walk);
+		if (walk != stack_pp)
+			kfree(walk);
 		walk = pp;
 	}
 	poll_freewait(&table);
diff --git a/include/linux/poll.h b/include/linux/poll.h
index 8e8f609..51e1b56 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -11,6 +11,15 @@
 #include <linux/mm.h>
 #include <asm/uaccess.h>
 
+/* ~832 bytes of stack space used max in sys_select/sys_poll before allocating
+   additional memory. */
+#define MAX_STACK_ALLOC 832
+#define FRONTEND_STACK_ALLOC	256
+#define SELECT_STACK_ALLOC	FRONTEND_STACK_ALLOC
+#define POLL_STACK_ALLOC	FRONTEND_STACK_ALLOC
+#define WQUEUES_STACK_ALLOC	(MAX_STACK_ALLOC - FRONTEND_STACK_ALLOC)
+#define N_INLINE_POLL_ENTRIES	(WQUEUES_STACK_ALLOC / sizeof(struct poll_table_entry))
+
 struct poll_table_struct;
 
 /* 
@@ -33,6 +42,12 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
 	pt->qproc = qproc;
 }
 
+struct poll_table_entry {
+	struct file * filp;
+	wait_queue_t wait;
+	wait_queue_head_t * wait_address;
+};
+
 /*
  * Structures and helpers for sys_poll/sys_poll
  */
@@ -40,6 +55,8 @@ struct poll_wqueues {
 	poll_table pt;
 	struct poll_table_page * table;
 	int error;
+	int inline_index;
+	struct poll_table_entry inline_entries[N_INLINE_POLL_ENTRIES];
 };
 
 extern void poll_initwait(struct poll_wqueues *pwq);
-- 
cgit v0.10.2


From e4a1f129f9e43a5e5d28fe6d1b214246a398cdce Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 28 Mar 2006 01:56:34 -0800
Subject: [PATCH] use fget_light() in select/poll

Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/select.c b/fs/select.c
index d8b4f07..05cd199 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -231,17 +231,18 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
 			}
 
 			for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) {
+				int fput_needed;
 				if (i >= n)
 					break;
 				if (!(bit & all_bits))
 					continue;
-				file = fget(i);
+				file = fget_light(i, &fput_needed);
 				if (file) {
 					f_op = file->f_op;
 					mask = DEFAULT_POLLMASK;
 					if (f_op && f_op->poll)
 						mask = (*f_op->poll)(file, retval ? NULL : wait);
-					fput(file);
+					fput_light(file, fput_needed);
 					if ((mask & POLLIN_SET) && (in & bit)) {
 						res_in |= bit;
 						retval++;
@@ -557,14 +558,15 @@ static void do_pollfd(unsigned int num, struct pollfd * fdpage,
 		fdp = fdpage+i;
 		fd = fdp->fd;
 		if (fd >= 0) {
-			struct file * file = fget(fd);
+			int fput_needed;
+			struct file * file = fget_light(fd, &fput_needed);
 			mask = POLLNVAL;
 			if (file != NULL) {
 				mask = DEFAULT_POLLMASK;
 				if (file->f_op && file->f_op->poll)
 					mask = file->f_op->poll(file, *pwait);
 				mask &= fdp->events | POLLERR | POLLHUP;
-				fput(file);
+				fput_light(file, fput_needed);
 			}
 			if (mask) {
 				*pwait = NULL;
-- 
cgit v0.10.2


From 68c3431ae22912be580c68d3955ef46515582943 Mon Sep 17 00:00:00 2001
From: Vadim Lobanov <vlobanov@speakeasy.net>
Date: Tue, 28 Mar 2006 01:56:35 -0800
Subject: [PATCH] Fold select_bits_alloc/free into caller code.

Remove an unnecessary level of indirection in allocating and freeing select
bits, as per the select_bits_alloc() and select_bits_free() functions.
Both select.c and compat.c are updated.

Signed-off-by: Vadim Lobanov <vlobanov@speakeasy.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/compat.c b/fs/compat.c
index ef5a077..7f8e26e 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1639,15 +1639,6 @@ void compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
  * This is a virtual copy of sys_select from fs/select.c and probably
  * should be compared to it from time to time
  */
-static void *select_bits_alloc(int size)
-{
-	return kmalloc(6 * size, GFP_KERNEL);
-}
-
-static void select_bits_free(void *bits, int size)
-{
-	kfree(bits);
-}
 
 /*
  * We can actually return ERESTARTSYS instead of EINTR, but I'd
@@ -1686,7 +1677,7 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp,
 	 */
 	ret = -ENOMEM;
 	size = FDS_BYTES(n);
-	bits = select_bits_alloc(size);
+	bits = kmalloc(6 * size, GFP_KERNEL);
 	if (!bits)
 		goto out_nofds;
 	fds.in      = (unsigned long *)  bits;
@@ -1720,7 +1711,7 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp,
 	compat_set_fd_set(n, exp, fds.res_ex);
 
 out:
-	select_bits_free(bits, size);
+	kfree(bits);
 out_nofds:
 	return ret;
 }
-- 
cgit v0.10.2


From 631d6747e1d877a4baa924cb373b8b9511a53e5e Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 28 Mar 2006 01:56:36 -0800
Subject: [PATCH] for_each_possible_cpu: defines for_each_possible_cpu

for_each_cpu() is a for-loop over cpu_possible_map.  for_each_online_cpu is
for-loop cpu over cpu_online_map.  .....for_each_cpu() is not sufficiently
explicit and can lead to mistakes.

This patch adds for_each_possible_cpu() in preparation for the removal of
for_each_cpu().

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 99e6115..9cbb781 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -67,7 +67,7 @@
  *
  * int any_online_cpu(mask)		First online cpu in mask
  *
- * for_each_cpu(cpu)			for-loop cpu over cpu_possible_map
+ * for_each_possible_cpu(cpu)		for-loop cpu over cpu_possible_map
  * for_each_online_cpu(cpu)		for-loop cpu over cpu_online_map
  * for_each_present_cpu(cpu)		for-loop cpu over cpu_present_map
  *
@@ -405,7 +405,8 @@ int __any_online_cpu(const cpumask_t *mask);
 #define any_online_cpu(mask)		0
 #endif
 
-#define for_each_cpu(cpu)	  for_each_cpu_mask((cpu), cpu_possible_map)
+#define for_each_cpu(cpu)  for_each_cpu_mask((cpu), cpu_possible_map)
+#define for_each_possible_cpu(cpu)  for_each_cpu_mask((cpu), cpu_possible_map)
 #define for_each_online_cpu(cpu)  for_each_cpu_mask((cpu), cpu_online_map)
 #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map)
 
-- 
cgit v0.10.2


From 0a945022778f100115d0cb6234eb28fc1b15ccaf Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 28 Mar 2006 01:56:37 -0800
Subject: [PATCH] for_each_possible_cpu: fixes for generic part

replaces for_each_cpu with for_each_possible_cpu().

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 82469db..5a19e2e 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3514,7 +3514,7 @@ int __init blk_dev_init(void)
 	iocontext_cachep = kmem_cache_create("blkdev_ioc",
 			sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);
 
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 		INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
 
 	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
diff --git a/fs/file.c b/fs/file.c
index bbc7433..55f4e70 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -373,6 +373,6 @@ static void __devinit fdtable_defer_list_init(int cpu)
 void __init files_defer_init(void)
 {
 	int i;
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 		fdtable_defer_list_init(i);
 }
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 1e9ea37..1edce0c 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -534,7 +534,7 @@ static int show_stat(struct seq_file *p, void *v)
 	if (wall_to_monotonic.tv_nsec)
 		--jif;
 
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		int j;
 
 		user = cputime64_add(user, kstat_cpu(i).cpustat.user);
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 78cf455..c0caf43 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -19,7 +19,7 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 #define percpu_modcopy(pcpudst, src, size)			\
 do {								\
 	unsigned int __i;					\
-	for_each_cpu(__i)					\
+	for_each_possible_cpu(__i)				\
 		memcpy((pcpudst)+__per_cpu_offset[__i],		\
 		       (src), (size));				\
 } while (0)
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 3c1b029..10a27f2 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -152,14 +152,14 @@ struct disk_attribute {
 ({									\
 	typeof(gendiskp->dkstats->field) res = 0;			\
 	int i;								\
-	for_each_cpu(i)							\
+	for_each_possible_cpu(i)					\
 		res += per_cpu_ptr(gendiskp->dkstats, i)->field;	\
 	res;								\
 })
 
 static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)	{
 	int i;
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 		memset(per_cpu_ptr(gendiskp->dkstats, i), value,
 				sizeof (struct disk_stats));
 }		
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index a484572..b462490 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -46,7 +46,7 @@ static inline int kstat_irqs(int irq)
 {
 	int cpu, sum = 0;
 
-	for_each_cpu(cpu)
+	for_each_possible_cpu(cpu)
 		sum += kstat_cpu(cpu).irqs[irq];
 
 	return sum;
diff --git a/init/main.c b/init/main.c
index 64466ea..4a2f089 100644
--- a/init/main.c
+++ b/init/main.c
@@ -341,7 +341,7 @@ static void __init setup_per_cpu_areas(void)
 #endif
 	ptr = alloc_bootmem(size * nr_possible_cpus);
 
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		__per_cpu_offset[i] = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
 		ptr += size;
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index b4b362b..8154e75 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -301,7 +301,7 @@ rcu_torture_printk(char *page)
 	long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
 	long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
 
-	for_each_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 		for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
 			pipesummary[i] += per_cpu(rcu_torture_count, cpu)[i];
 			batchsummary[i] += per_cpu(rcu_torture_batch, cpu)[i];
@@ -535,7 +535,7 @@ rcu_torture_init(void)
 	atomic_set(&n_rcu_torture_error, 0);
 	for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
 		atomic_set(&rcu_torture_wcount[i], 0);
-	for_each_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 		for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
 			per_cpu(rcu_torture_count, cpu)[i] = 0;
 			per_cpu(rcu_torture_batch, cpu)[i] = 0;
diff --git a/kernel/sched.c b/kernel/sched.c
index 7854ee5..a9ecac3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1625,7 +1625,7 @@ unsigned long nr_uninterruptible(void)
 {
 	unsigned long i, sum = 0;
 
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 		sum += cpu_rq(i)->nr_uninterruptible;
 
 	/*
@@ -1642,7 +1642,7 @@ unsigned long long nr_context_switches(void)
 {
 	unsigned long long i, sum = 0;
 
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 		sum += cpu_rq(i)->nr_switches;
 
 	return sum;
@@ -1652,7 +1652,7 @@ unsigned long nr_iowait(void)
 {
 	unsigned long i, sum = 0;
 
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 		sum += atomic_read(&cpu_rq(i)->nr_iowait);
 
 	return sum;
@@ -6080,7 +6080,7 @@ void __init sched_init(void)
 	runqueue_t *rq;
 	int i, j, k;
 
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		prio_array_t *array;
 
 		rq = cpu_rq(i);
diff --git a/mm/slab.c b/mm/slab.c
index 6818374..4cbf8bb 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3311,7 +3311,7 @@ void *__alloc_percpu(size_t size)
 	 * and we have no way of figuring out how to fix the array
 	 * that we have allocated then....
 	 */
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		int node = cpu_to_node(i);
 
 		if (node_online(node))
@@ -3398,7 +3398,7 @@ void free_percpu(const void *objp)
 	/*
 	 * We allocate for all cpus so we cannot use for online cpu here.
 	 */
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 	    kfree(p->ptrs[i]);
 	kfree(p);
 }
diff --git a/mm/swap.c b/mm/swap.c
index 91b7e20..88895c2 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -512,7 +512,7 @@ long percpu_counter_sum(struct percpu_counter *fbc)
 
 	spin_lock(&fbc->lock);
 	ret = fbc->count;
-	for_each_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 		long *pcount = per_cpu_ptr(fbc->counters, cpu);
 		ret += *pcount;
 	}
-- 
cgit v0.10.2


From 0fed48463fb20c6bcabc5cf70e2de47b30507ee1 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 28 Mar 2006 01:56:37 -0800
Subject: [PATCH] for_each_possible_cpu: loopback device.

This patch replaces for_each_cpu with for_each_possible_cpu.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 0c13795..b79d6e8 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -172,7 +172,7 @@ static struct net_device_stats *get_stats(struct net_device *dev)
 
 	memset(stats, 0, sizeof(struct net_device_stats));
 
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		struct net_device_stats *lb_stats;
 
 		lb_stats = &per_cpu(loopback_stats, i);
-- 
cgit v0.10.2


From fe449f48368623eb47715061b4977ce982d8e03b Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 28 Mar 2006 01:56:38 -0800
Subject: [PATCH] for_each_possible_cpu: oprofile.

This patch replaces for_each_cpu with for_each_possible_cpu.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/oprofile/oprofile_stats.c b/drivers/oprofile/oprofile_stats.c
index e94b1e4..f0acb66 100644
--- a/drivers/oprofile/oprofile_stats.c
+++ b/drivers/oprofile/oprofile_stats.c
@@ -22,7 +22,7 @@ void oprofile_reset_stats(void)
 	struct oprofile_cpu_buffer * cpu_buf; 
 	int i;
  
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		cpu_buf = &cpu_buffer[i]; 
 		cpu_buf->sample_received = 0;
 		cpu_buf->sample_lost_overflow = 0;
@@ -46,7 +46,7 @@ void oprofile_create_stats_files(struct super_block * sb, struct dentry * root)
 	if (!dir)
 		return;
 
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		cpu_buf = &cpu_buffer[i]; 
 		snprintf(buf, 10, "cpu%d", i);
 		cpudir = oprofilefs_mkdir(sb, dir, buf);
-- 
cgit v0.10.2


From c8912599c677f58fd3a5e5420c9cb6df62b2d8fa Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 28 Mar 2006 01:56:39 -0800
Subject: [PATCH] for_each_possible_cpu: i386

This patch replaces for_each_cpu with for_each_possible_cpu.

under arch/i386.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 311b4e7..3b329af 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -381,7 +381,7 @@ static void do_irq_balance(void)
 	unsigned long imbalance = 0;
 	cpumask_t allowed_mask, target_cpu_mask, tmp;
 
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		int package_index;
 		CPU_IRQ(i) = 0;
 		if (!cpu_online(i))
@@ -632,7 +632,7 @@ static int __init balanced_irq_init(void)
 	else 
 		printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
 failed:
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		kfree(irq_cpu_data[i].irq_delta);
 		irq_cpu_data[i].irq_delta = NULL;
 		kfree(irq_cpu_data[i].last_irq);
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 9074818..5ad6a2c 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -138,12 +138,12 @@ static int __init check_nmi_watchdog(void)
 	if (nmi_watchdog == NMI_LOCAL_APIC)
 		smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
 
-	for_each_cpu(cpu)
+	for_each_possible_cpu(cpu)
 		prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
 	local_irq_enable();
 	mdelay((10*1000)/nmi_hz); // wait 10 ticks
 
-	for_each_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 #ifdef CONFIG_SMP
 		/* Check cpu_callin_map here because that is set
 		   after the timer is started. */
@@ -510,7 +510,7 @@ void touch_nmi_watchdog (void)
 	 * Just reset the alert counters, (other CPUs might be
 	 * spinning on locks we hold):
 	 */
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 		alert_counter[i] = 0;
 
 	/*
diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c
index 8165626..70e560a 100644
--- a/arch/i386/mach-voyager/voyager_smp.c
+++ b/arch/i386/mach-voyager/voyager_smp.c
@@ -1700,7 +1700,7 @@ after_handle_vic_irq(unsigned int irq)
 
 			printk("VOYAGER SMP: CPU%d lost interrupt %d\n",
 			       cpu, irq);
-			for_each_cpu(real_cpu, mask) {
+			for_each_possible_cpu(real_cpu, mask) {
 
 				outb(VIC_CPU_MASQUERADE_ENABLE | real_cpu,
 				     VIC_PROCESSOR_ID);
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c
index 1accce5..1a2076c 100644
--- a/arch/i386/oprofile/nmi_int.c
+++ b/arch/i386/oprofile/nmi_int.c
@@ -122,7 +122,7 @@ static void nmi_save_registers(void * dummy)
 static void free_msrs(void)
 {
 	int i;
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		kfree(cpu_msrs[i].counters);
 		cpu_msrs[i].counters = NULL;
 		kfree(cpu_msrs[i].controls);
-- 
cgit v0.10.2


From 3c30a75256a5863705fb7658cb0b2e3bb09a13df Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 28 Mar 2006 01:56:39 -0800
Subject: [PATCH] for_each_possible_cpu: documentaion

Replace for_each_cpu with for_each_possible_cpu.

Modifies occurences in documentaion.

for_each_cpu in whatisRCU.txt should be for_each_online_cpu ???
(I'm not sure..)

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index b4ea51a..07cb93b 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -605,7 +605,7 @@ are the same as those shown in the preceding section, so they are omitted.
 	{
 		int cpu;
 
-		for_each_cpu(cpu)
+		for_each_possible_cpu(cpu)
 			run_on(cpu);
 	}
 
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index 57a09f9..1bcf699 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -97,13 +97,13 @@ at which time hotplug is disabled.
 
 You really dont need to manipulate any of the system cpu maps. They should
 be read-only for most use. When setting up per-cpu resources almost always use
-cpu_possible_map/for_each_cpu() to iterate.
+cpu_possible_map/for_each_possible_cpu() to iterate.
 
 Never use anything other than cpumask_t to represent bitmap of CPUs.
 
 #include <linux/cpumask.h>
 
-for_each_cpu              - Iterate over cpu_possible_map
+for_each_possible_cpu     - Iterate over cpu_possible_map
 for_each_online_cpu       - Iterate over cpu_online_map
 for_each_present_cpu      - Iterate over cpu_present_map
 for_each_cpu_mask(x,mask) - Iterate over some random collection of cpu mask.
-- 
cgit v0.10.2


From ec1b9466cb4f6ae6d950bd67055d9410d1056d2a Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 28 Mar 2006 01:56:40 -0800
Subject: [PATCH] ia64: const f_ops fix

Tweak the proc setup code so things work OK with const
proc_dir_entry.proc_fops.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
index c686d9c..5100261 100644
--- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
+++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
@@ -93,19 +93,22 @@ static int coherence_id_open(struct inode *inode, struct file *file)
 static struct proc_dir_entry
 *sn_procfs_create_entry(const char *name, struct proc_dir_entry *parent,
 			int (*openfunc)(struct inode *, struct file *),
-			int (*releasefunc)(struct inode *, struct file *))
+	int (*releasefunc)(struct inode *, struct file *),
+	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *))
 {
 	struct proc_dir_entry *e = create_proc_entry(name, 0444, parent);
 
 	if (e) {
-		e->proc_fops = (struct file_operations *)kmalloc(
-			sizeof(struct file_operations), GFP_KERNEL);
-		if (e->proc_fops) {
-			memset(e->proc_fops, 0, sizeof(struct file_operations));
-			e->proc_fops->open = openfunc;
-			e->proc_fops->read = seq_read;
-			e->proc_fops->llseek = seq_lseek;
-			e->proc_fops->release = releasefunc;
+		struct file_operations *f;
+
+		f = kzalloc(sizeof(*f), GFP_KERNEL);
+		if (f) {
+			f->open = openfunc;
+			f->read = seq_read;
+			f->llseek = seq_lseek;
+			f->release = releasefunc;
+			f->write = write;
+			e->proc_fops = f;
 		}
 	}
 
@@ -119,31 +122,29 @@ extern int sn_topology_release(struct inode *, struct file *);
 void register_sn_procfs(void)
 {
 	static struct proc_dir_entry *sgi_proc_dir = NULL;
-	struct proc_dir_entry *e;
 
 	BUG_ON(sgi_proc_dir != NULL);
 	if (!(sgi_proc_dir = proc_mkdir("sgi_sn", NULL)))
 		return;
 
 	sn_procfs_create_entry("partition_id", sgi_proc_dir,
-			       partition_id_open, single_release);
+		partition_id_open, single_release, NULL);
 
 	sn_procfs_create_entry("system_serial_number", sgi_proc_dir,
-			       system_serial_number_open, single_release);
+		system_serial_number_open, single_release, NULL);
 
 	sn_procfs_create_entry("licenseID", sgi_proc_dir, 
-			       licenseID_open, single_release);
+		licenseID_open, single_release, NULL);
 
-	e = sn_procfs_create_entry("sn_force_interrupt", sgi_proc_dir, 
-				   sn_force_interrupt_open, single_release);
-	if (e) 
-		e->proc_fops->write = sn_force_interrupt_write_proc;
+	sn_procfs_create_entry("sn_force_interrupt", sgi_proc_dir,
+		sn_force_interrupt_open, single_release,
+		sn_force_interrupt_write_proc);
 
 	sn_procfs_create_entry("coherence_id", sgi_proc_dir, 
-			       coherence_id_open, single_release);
+		coherence_id_open, single_release, NULL);
 	
 	sn_procfs_create_entry("sn_topology", sgi_proc_dir,
-			       sn_topology_open, sn_topology_release);
+		sn_topology_open, sn_topology_release, NULL);
 }
 
 #endif /* CONFIG_PROC_FS */
-- 
cgit v0.10.2


From 99ac48f54a91d02140c497edc31dc57d4bc5c85d Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Tue, 28 Mar 2006 01:56:41 -0800
Subject: [PATCH] mark f_ops const in the inode

Mark the f_ops members of inodes as const, as well as fix the
ripple-through this causes by places that copy this f_ops and then "do
stuff" with it.

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index b3962c3a..5be40aa 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -103,7 +103,7 @@ spufs_setattr(struct dentry *dentry, struct iattr *attr)
 
 static int
 spufs_new_file(struct super_block *sb, struct dentry *dentry,
-		struct file_operations *fops, int mode,
+		const struct file_operations *fops, int mode,
 		struct spu_context *ctx)
 {
 	static struct inode_operations spufs_file_iops = {
diff --git a/arch/ppc/kernel/ppc_htab.c b/arch/ppc/kernel/ppc_htab.c
index 2f5c765..9b84bff 100644
--- a/arch/ppc/kernel/ppc_htab.c
+++ b/arch/ppc/kernel/ppc_htab.c
@@ -52,7 +52,7 @@ static int ppc_htab_open(struct inode *inode, struct file *file)
 	return single_open(file, ppc_htab_show, NULL);
 }
 
-struct file_operations ppc_htab_operations = {
+const struct file_operations ppc_htab_operations = {
 	.open		= ppc_htab_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/drivers/char/drm/drm_fops.c b/drivers/char/drm/drm_fops.c
index 641f763..b7f7951 100644
--- a/drivers/char/drm/drm_fops.c
+++ b/drivers/char/drm/drm_fops.c
@@ -175,7 +175,7 @@ int drm_stub_open(struct inode *inode, struct file *filp)
 	drm_device_t *dev = NULL;
 	int minor = iminor(inode);
 	int err = -ENODEV;
-	struct file_operations *old_fops;
+	const struct file_operations *old_fops;
 
 	DRM_DEBUG("\n");
 
diff --git a/drivers/char/drm/i810_dma.c b/drivers/char/drm/i810_dma.c
index ae0aa6d..c658dde 100644
--- a/drivers/char/drm/i810_dma.c
+++ b/drivers/char/drm/i810_dma.c
@@ -126,7 +126,7 @@ static int i810_map_buffer(drm_buf_t * buf, struct file *filp)
 	drm_device_t *dev = priv->head->dev;
 	drm_i810_buf_priv_t *buf_priv = buf->dev_private;
 	drm_i810_private_t *dev_priv = dev->dev_private;
-	struct file_operations *old_fops;
+	const struct file_operations *old_fops;
 	int retcode = 0;
 
 	if (buf_priv->currently_mapped == I810_BUF_MAPPED)
diff --git a/drivers/char/drm/i830_dma.c b/drivers/char/drm/i830_dma.c
index 163f2cb..b0f815d 100644
--- a/drivers/char/drm/i830_dma.c
+++ b/drivers/char/drm/i830_dma.c
@@ -128,7 +128,7 @@ static int i830_map_buffer(drm_buf_t * buf, struct file *filp)
 	drm_device_t *dev = priv->head->dev;
 	drm_i830_buf_priv_t *buf_priv = buf->dev_private;
 	drm_i830_private_t *dev_priv = dev->dev_private;
-	struct file_operations *old_fops;
+	const struct file_operations *old_fops;
 	unsigned long virtual;
 	int retcode = 0;
 
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 5245ba1..66719f9 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -899,7 +899,7 @@ static const struct {
 	unsigned int		minor;
 	char			*name;
 	umode_t			mode;
-	struct file_operations	*fops;
+	const struct file_operations	*fops;
 } devlist[] = { /* list of minor devices */
 	{1, "mem",     S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops},
 	{2, "kmem",    S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops},
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index 3e4c041..96eb2a7 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -129,7 +129,7 @@ static int misc_open(struct inode * inode, struct file * file)
 	int minor = iminor(inode);
 	struct miscdevice *c;
 	int err = -ENODEV;
-	struct file_operations *old_fops, *new_fops = NULL;
+	const struct file_operations *old_fops, *new_fops = NULL;
 	
 	down(&misc_sem);
 	
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 4fe3da3..f8af094 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -923,7 +923,7 @@ void input_unregister_handler(struct input_handler *handler)
 static int input_open_file(struct inode *inode, struct file *file)
 {
 	struct input_handler *handler = input_table[iminor(inode) >> 5];
-	struct file_operations *old_fops, *new_fops = NULL;
+	const struct file_operations *old_fops, *new_fops = NULL;
 	int err;
 
 	/* No load-on-demand here? */
diff --git a/drivers/isdn/capi/kcapi_proc.c b/drivers/isdn/capi/kcapi_proc.c
index 2cc8b27..ca9dc00 100644
--- a/drivers/isdn/capi/kcapi_proc.c
+++ b/drivers/isdn/capi/kcapi_proc.c
@@ -233,7 +233,7 @@ static struct file_operations proc_applstats_ops = {
 };
 
 static void
-create_seq_entry(char *name, mode_t mode, struct file_operations *f)
+create_seq_entry(char *name, mode_t mode, const struct file_operations *f)
 {
 	struct proc_dir_entry *entry;
 	entry = create_proc_entry(name, mode, NULL);
diff --git a/drivers/media/dvb/dvb-core/dvbdev.c b/drivers/media/dvb/dvb-core/dvbdev.c
index 54f8b95..96fe0ec 100644
--- a/drivers/media/dvb/dvb-core/dvbdev.c
+++ b/drivers/media/dvb/dvb-core/dvbdev.c
@@ -86,7 +86,7 @@ static int dvb_device_open(struct inode *inode, struct file *file)
 
 	if (dvbdev && dvbdev->fops) {
 		int err = 0;
-		struct file_operations *old_fops;
+		const struct file_operations *old_fops;
 
 		file->private_data = dvbdev;
 		old_fops = file->f_op;
diff --git a/drivers/media/video/videodev.c b/drivers/media/video/videodev.c
index 75e3d41..5f87dd5 100644
--- a/drivers/media/video/videodev.c
+++ b/drivers/media/video/videodev.c
@@ -97,7 +97,7 @@ static int video_open(struct inode *inode, struct file *file)
 	unsigned int minor = iminor(inode);
 	int err = 0;
 	struct video_device *vfl;
-	struct file_operations *old_fops;
+	const struct file_operations *old_fops;
 
 	if(minor>=VIDEO_NUM_DEVICES)
 		return -ENODEV;
diff --git a/drivers/message/i2o/i2o_proc.c b/drivers/message/i2o/i2o_proc.c
index 2a0c42b..3d2e76e 100644
--- a/drivers/message/i2o/i2o_proc.c
+++ b/drivers/message/i2o/i2o_proc.c
@@ -56,7 +56,7 @@
 typedef struct _i2o_proc_entry_t {
 	char *name;		/* entry name */
 	mode_t mode;		/* mode */
-	struct file_operations *fops;	/* open function */
+	const struct file_operations *fops;	/* open function */
 } i2o_proc_entry;
 
 /* global I2O /proc/i2o entry */
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index d6bae69..b62da9b 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -130,7 +130,7 @@ static struct file_operations ulong_ro_fops = {
 
 
 static struct dentry * __oprofilefs_create_file(struct super_block * sb,
-	struct dentry * root, char const * name, struct file_operations * fops,
+	struct dentry * root, char const * name, const struct file_operations * fops,
 	int perm)
 {
 	struct dentry * dentry;
@@ -203,7 +203,7 @@ int oprofilefs_create_ro_atomic(struct super_block * sb, struct dentry * root,
 
  
 int oprofilefs_create_file(struct super_block * sb, struct dentry * root,
-	char const * name, struct file_operations * fops)
+	char const * name, const struct file_operations * fops)
 {
 	if (!__oprofilefs_create_file(sb, root, name, fops, 0644))
 		return -EFAULT;
@@ -212,7 +212,7 @@ int oprofilefs_create_file(struct super_block * sb, struct dentry * root,
 
 
 int oprofilefs_create_file_perm(struct super_block * sb, struct dentry * root,
-	char const * name, struct file_operations * fops, int perm)
+	char const * name, const struct file_operations * fops, int perm)
 {
 	if (!__oprofilefs_create_file(sb, root, name, fops, perm))
 		return -EFAULT;
diff --git a/drivers/telephony/phonedev.c b/drivers/telephony/phonedev.c
index 7a6db1c..e166fff 100644
--- a/drivers/telephony/phonedev.c
+++ b/drivers/telephony/phonedev.c
@@ -49,7 +49,7 @@ static int phone_open(struct inode *inode, struct file *file)
 	unsigned int minor = iminor(inode);
 	int err = 0;
 	struct phone_device *p;
-	struct file_operations *old_fops, *new_fops = NULL;
+	const struct file_operations *old_fops, *new_fops = NULL;
 
 	if (minor >= PHONE_NUM_DEVICES)
 		return -ENODEV;
diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c
index 37b1336..b263a54 100644
--- a/drivers/usb/core/file.c
+++ b/drivers/usb/core/file.c
@@ -24,15 +24,15 @@
 #include "usb.h"
 
 #define MAX_USB_MINORS	256
-static struct file_operations *usb_minors[MAX_USB_MINORS];
+static const struct file_operations *usb_minors[MAX_USB_MINORS];
 static DEFINE_SPINLOCK(minor_lock);
 
 static int usb_open(struct inode * inode, struct file * file)
 {
 	int minor = iminor(inode);
-	struct file_operations *c;
+	const struct file_operations *c;
 	int err = -ENODEV;
-	struct file_operations *old_fops, *new_fops = NULL;
+	const struct file_operations *old_fops, *new_fops = NULL;
 
 	spin_lock (&minor_lock);
 	c = usb_minors[minor];
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index b44cfda..3f618ce 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -1581,7 +1581,7 @@ restart:
 
 static struct inode *
 gadgetfs_create_file (struct super_block *sb, char const *name,
-		void *data, struct file_operations *fops,
+		void *data, const struct file_operations *fops,
 		struct dentry **dentry_p);
 
 static int activate_ep_files (struct dev_data *dev)
@@ -1955,7 +1955,7 @@ module_param (default_perm, uint, 0644);
 
 static struct inode *
 gadgetfs_make_inode (struct super_block *sb,
-		void *data, struct file_operations *fops,
+		void *data, const struct file_operations *fops,
 		int mode)
 {
 	struct inode *inode = new_inode (sb);
@@ -1979,7 +1979,7 @@ gadgetfs_make_inode (struct super_block *sb,
  */
 static struct inode *
 gadgetfs_create_file (struct super_block *sb, char const *name,
-		void *data, struct file_operations *fops,
+		void *data, const struct file_operations *fops,
 		struct dentry **dentry_p)
 {
 	struct dentry	*dentry;
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 8c6eb04..b53dffa 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -250,7 +250,7 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
 }
 
 int register_chrdev(unsigned int major, const char *name,
-		    struct file_operations *fops)
+		    const struct file_operations *fops)
 {
 	struct char_device_struct *cd;
 	struct cdev *cdev;
@@ -473,7 +473,7 @@ struct cdev *cdev_alloc(void)
 	return p;
 }
 
-void cdev_init(struct cdev *cdev, struct file_operations *fops)
+void cdev_init(struct cdev *cdev, const struct file_operations *fops)
 {
 	memset(cdev, 0, sizeof *cdev);
 	INIT_LIST_HEAD(&cdev->list);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index d4f1a2c..85d166c 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -191,7 +191,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
  */
 struct dentry *debugfs_create_file(const char *name, mode_t mode,
 				   struct dentry *parent, void *data,
-				   struct file_operations *fops)
+				   const struct file_operations *fops)
 {
 	struct dentry *dentry = NULL;
 	int error;
diff --git a/fs/inode.c b/fs/inode.c
index 1fddf28..32b7c33 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -104,7 +104,7 @@ static struct inode *alloc_inode(struct super_block *sb)
 {
 	static struct address_space_operations empty_aops;
 	static struct inode_operations empty_iops;
-	static struct file_operations empty_fops;
+	static const struct file_operations empty_fops;
 	struct inode *inode;
 
 	if (sb->s_op->alloc_inode)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 5320e5a..3101833 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -706,7 +706,7 @@ nfsd_close(struct file *filp)
  * after it.
  */
 static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
-			      struct file_operations *fop)
+			      const struct file_operations *fop)
 {
 	struct inode *inode = dp->d_inode;
 	int (*fsync) (struct file *, struct dentry *, int);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 47b7a20..4ba0300 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -560,7 +560,7 @@ static void proc_kill_inodes(struct proc_dir_entry *de)
 		struct file * filp = list_entry(p, struct file, f_u.fu_list);
 		struct dentry * dentry = filp->f_dentry;
 		struct inode * inode;
-		struct file_operations *fops;
+		const struct file_operations *fops;
 
 		if (dentry->d_op != &proc_dentry_operations)
 			continue;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 95a1cf3..0502f17 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -30,7 +30,7 @@ do {						\
 
 #endif
 
-extern void create_seq_entry(char *name, mode_t mode, struct file_operations *f);
+extern void create_seq_entry(char *name, mode_t mode, const struct file_operations *f);
 extern int proc_exe_link(struct inode *, struct dentry **, struct vfsmount **);
 extern int proc_tid_stat(struct task_struct *,  char *);
 extern int proc_tgid_stat(struct task_struct *, char *);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 1edce0c..ef5a332 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -731,7 +731,7 @@ static struct file_operations proc_sysrq_trigger_operations = {
 
 struct proc_dir_entry *proc_root_kcore;
 
-void create_seq_entry(char *name, mode_t mode, struct file_operations *f)
+void create_seq_entry(char *name, mode_t mode, const struct file_operations *f)
 {
 	struct proc_dir_entry *entry;
 	entry = create_proc_entry(name, mode, NULL);
diff --git a/fs/select.c b/fs/select.c
index 05cd199..b3a3a13 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -220,7 +220,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
 		for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
 			unsigned long in, out, ex, all_bits, bit = 1, mask, j;
 			unsigned long res_in = 0, res_out = 0, res_ex = 0;
-			struct file_operations *f_op = NULL;
+			const struct file_operations *f_op = NULL;
 			struct file *file = NULL;
 
 			in = *inp++; out = *outp++; ex = *exp++;
diff --git a/include/linux/cdev.h b/include/linux/cdev.h
index 8da37e2..2216638 100644
--- a/include/linux/cdev.h
+++ b/include/linux/cdev.h
@@ -5,13 +5,13 @@
 struct cdev {
 	struct kobject kobj;
 	struct module *owner;
-	struct file_operations *ops;
+	const struct file_operations *ops;
 	struct list_head list;
 	dev_t dev;
 	unsigned int count;
 };
 
-void cdev_init(struct cdev *, struct file_operations *);
+void cdev_init(struct cdev *, const struct file_operations *);
 
 struct cdev *cdev_alloc(void);
 
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 4b0428e..176e2d3 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -29,7 +29,7 @@ struct debugfs_blob_wrapper {
 #if defined(CONFIG_DEBUG_FS)
 struct dentry *debugfs_create_file(const char *name, mode_t mode,
 				   struct dentry *parent, void *data,
-				   struct file_operations *fops);
+				   const struct file_operations *fops);
 
 struct dentry *debugfs_create_dir(const char *name, struct dentry *parent);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 680d913..ef355bc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -496,7 +496,7 @@ struct inode {
 	struct mutex		i_mutex;
 	struct rw_semaphore	i_alloc_sem;
 	struct inode_operations	*i_op;
-	struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
+	const struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
 	struct super_block	*i_sb;
 	struct file_lock	*i_flock;
 	struct address_space	*i_mapping;
@@ -636,7 +636,7 @@ struct file {
 	} f_u;
 	struct dentry		*f_dentry;
 	struct vfsmount         *f_vfsmnt;
-	struct file_operations	*f_op;
+	const struct file_operations	*f_op;
 	atomic_t		f_count;
 	unsigned int 		f_flags;
 	mode_t			f_mode;
@@ -1414,7 +1414,7 @@ extern void bd_release_from_disk(struct block_device *, struct gendisk *);
 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
 extern int register_chrdev_region(dev_t, unsigned, const char *);
 extern int register_chrdev(unsigned int, const char *,
-			   struct file_operations *);
+			   const struct file_operations *);
 extern int unregister_chrdev(unsigned int, const char *);
 extern void unregister_chrdev_region(dev_t, unsigned);
 extern int chrdev_open(struct inode *, struct file *);
diff --git a/include/linux/input.h b/include/linux/input.h
index 6d4cc3c..1d4e341 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -957,7 +957,7 @@ struct input_handler {
 	struct input_handle* (*connect)(struct input_handler *handler, struct input_dev *dev, struct input_device_id *id);
 	void (*disconnect)(struct input_handle *handle);
 
-	struct file_operations *fops;
+	const struct file_operations *fops;
 	int minor;
 	char *name;
 
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 14ceebf..5b584da 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -36,7 +36,7 @@ struct class_device;
 struct miscdevice  {
 	int minor;
 	const char *name;
-	struct file_operations *fops;
+	const struct file_operations *fops;
 	struct list_head list;
 	struct device *dev;
 	struct class_device *class;
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index b5b3197..0d514b2 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -84,10 +84,10 @@ void oprofile_add_trace(unsigned long eip);
  * the specified file operations.
  */
 int oprofilefs_create_file(struct super_block * sb, struct dentry * root,
-	char const * name, struct file_operations * fops);
+	char const * name, const struct file_operations * fops);
 
 int oprofilefs_create_file_perm(struct super_block * sb, struct dentry * root,
-	char const * name, struct file_operations * fops, int perm);
+	char const * name, const struct file_operations * fops, int perm);
  
 /** Create a file for read/write access to an unsigned long. */
 int oprofilefs_create_ulong(struct super_block * sb, struct dentry * root,
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index cb224cf..6d03d02 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -58,7 +58,7 @@ struct proc_dir_entry {
 	gid_t gid;
 	loff_t size;
 	struct inode_operations * proc_iops;
-	struct file_operations * proc_fops;
+	const struct file_operations * proc_fops;
 	get_info_t *get_info;
 	struct module *owner;
 	struct proc_dir_entry *next, *parent, *subdir;
@@ -189,7 +189,7 @@ static inline struct proc_dir_entry *proc_net_create(const char *name,
 }
 
 static inline struct proc_dir_entry *proc_net_fops_create(const char *name,
-	mode_t mode, struct file_operations *fops)
+	mode_t mode, const struct file_operations *fops)
 {
 	struct proc_dir_entry *res = create_proc_entry(name, mode, proc_net);
 	if (res)
diff --git a/include/linux/sound.h b/include/linux/sound.h
index 72b9af4..f63d834 100644
--- a/include/linux/sound.h
+++ b/include/linux/sound.h
@@ -30,12 +30,12 @@
  */
  
 struct device;
-extern int register_sound_special(struct file_operations *fops, int unit);
-extern int register_sound_special_device(struct file_operations *fops, int unit, struct device *dev);
-extern int register_sound_mixer(struct file_operations *fops, int dev);
-extern int register_sound_midi(struct file_operations *fops, int dev);
-extern int register_sound_dsp(struct file_operations *fops, int dev);
-extern int register_sound_synth(struct file_operations *fops, int dev);
+extern int register_sound_special(const struct file_operations *fops, int unit);
+extern int register_sound_special_device(const struct file_operations *fops, int unit, struct device *dev);
+extern int register_sound_mixer(const struct file_operations *fops, int dev);
+extern int register_sound_midi(const struct file_operations *fops, int dev);
+extern int register_sound_dsp(const struct file_operations *fops, int dev);
+extern int register_sound_synth(const struct file_operations *fops, int dev);
 
 extern void unregister_sound_special(int unit);
 extern void unregister_sound_mixer(int unit);
diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h
index 0d6ed3c..d93c24b 100644
--- a/include/linux/sunrpc/stats.h
+++ b/include/linux/sunrpc/stats.h
@@ -50,7 +50,7 @@ struct proc_dir_entry *	rpc_proc_register(struct rpc_stat *);
 void			rpc_proc_unregister(const char *);
 void			rpc_proc_zero(struct rpc_program *);
 struct proc_dir_entry *	svc_proc_register(struct svc_stat *,
-					  struct file_operations *);
+					  const struct file_operations *);
 void			svc_proc_unregister(const char *);
 
 void			svc_seq_show(struct seq_file *,
@@ -65,7 +65,7 @@ static inline void rpc_proc_unregister(const char *p) {}
 static inline void rpc_proc_zero(struct rpc_program *p) {}
 
 static inline struct proc_dir_entry *svc_proc_register(struct svc_stat *s,
-						       struct file_operations *f) { return NULL; }
+						       const struct file_operations *f) { return NULL; }
 static inline void svc_proc_unregister(const char *p) {}
 
 static inline void svc_seq_show(struct seq_file *seq,
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 130d125..e34e5e3 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -615,7 +615,7 @@ extern struct bus_type usb_bus_type;
  */
 struct usb_class_driver {
 	char *name;
-	struct file_operations *fops;
+	const struct file_operations *fops;
 	int minor_base;
 };
 
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 2275bfe..af2d615 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -75,7 +75,7 @@ struct video_device
 	int minor;
 
 	/* device ops + callbacks */
-	struct file_operations *fops;
+	const struct file_operations *fops;
 	void (*release)(struct video_device *vfd);
 
 
diff --git a/include/sound/core.h b/include/sound/core.h
index 144bdc2..7f32c12 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -186,7 +186,7 @@ struct snd_minor {
 	int type;			/* SNDRV_DEVICE_TYPE_XXX */
 	int card;			/* card number */
 	int device;			/* device number */
-	struct file_operations *f_ops;	/* file operations */
+	const struct file_operations *f_ops;	/* file operations */
 	void *private_data;		/* private data for f_ops->open */
 	char name[0];			/* device name (keep at the end of
 								structure) */
@@ -200,14 +200,14 @@ extern int snd_ecards_limit;
 void snd_request_card(int card);
 
 int snd_register_device(int type, struct snd_card *card, int dev,
-			struct file_operations *f_ops, void *private_data,
+			const struct file_operations *f_ops, void *private_data,
 			const char *name);
 int snd_unregister_device(int type, struct snd_card *card, int dev);
 void *snd_lookup_minor_data(unsigned int minor, int type);
 
 #ifdef CONFIG_SND_OSSEMUL
 int snd_register_oss_device(int type, struct snd_card *card, int dev,
-			    struct file_operations *f_ops, void *private_data,
+			    const struct file_operations *f_ops, void *private_data,
 			    const char *name);
 int snd_unregister_oss_device(int type, struct snd_card *card, int dev);
 void *snd_lookup_oss_minor_data(unsigned int minor, int type);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index aa4158b..cc673dd 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -395,7 +395,7 @@ enum {
  */
 struct rpc_filelist {
 	char *name;
-	struct file_operations *i_fop;
+	const struct file_operations *i_fop;
 	int mode;
 };
 
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 790941e..dea5296 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -225,7 +225,7 @@ EXPORT_SYMBOL(rpc_print_iostats);
  * Register/unregister RPC proc files
  */
 static inline struct proc_dir_entry *
-do_register(const char *name, void *data, struct file_operations *fops)
+do_register(const char *name, void *data, const struct file_operations *fops)
 {
 	struct proc_dir_entry *ent;
 
@@ -253,7 +253,7 @@ rpc_proc_unregister(const char *name)
 }
 
 struct proc_dir_entry *
-svc_proc_register(struct svc_stat *statp, struct file_operations *fops)
+svc_proc_register(struct svc_stat *statp, const struct file_operations *fops)
 {
 	return do_register(statp->program->pg_name, statp, fops);
 }
diff --git a/sound/core/init.c b/sound/core/init.c
index ad68761..5bb8a8b 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -223,7 +223,8 @@ int snd_card_disconnect(struct snd_card *card)
 	struct snd_monitor_file *mfile;
 	struct file *file;
 	struct snd_shutdown_f_ops *s_f_ops;
-	struct file_operations *f_ops, *old_f_ops;
+	struct file_operations *f_ops;
+	const struct file_operations *old_f_ops;
 	int err;
 
 	spin_lock(&card->files_lock);
diff --git a/sound/core/sound.c b/sound/core/sound.c
index 4d28e52..108e430 100644
--- a/sound/core/sound.c
+++ b/sound/core/sound.c
@@ -137,7 +137,7 @@ static int snd_open(struct inode *inode, struct file *file)
 {
 	unsigned int minor = iminor(inode);
 	struct snd_minor *mptr = NULL;
-	struct file_operations *old_fops;
+	const struct file_operations *old_fops;
 	int err = 0;
 
 	if (minor >= ARRAY_SIZE(snd_minors))
@@ -240,7 +240,7 @@ static int snd_kernel_minor(int type, struct snd_card *card, int dev)
  * Retrurns zero if successful, or a negative error code on failure.
  */
 int snd_register_device(int type, struct snd_card *card, int dev,
-			struct file_operations *f_ops, void *private_data,
+			const struct file_operations *f_ops, void *private_data,
 			const char *name)
 {
 	int minor;
diff --git a/sound/core/sound_oss.c b/sound/core/sound_oss.c
index 4023d3b..9055c6d 100644
--- a/sound/core/sound_oss.c
+++ b/sound/core/sound_oss.c
@@ -95,7 +95,7 @@ static int snd_oss_kernel_minor(int type, struct snd_card *card, int dev)
 }
 
 int snd_register_oss_device(int type, struct snd_card *card, int dev,
-			    struct file_operations *f_ops, void *private_data,
+			    const struct file_operations *f_ops, void *private_data,
 			    const char *name)
 {
 	int minor = snd_oss_kernel_minor(type, card, dev);
diff --git a/sound/sound_core.c b/sound/sound_core.c
index 394b53e..6f84972 100644
--- a/sound/sound_core.c
+++ b/sound/sound_core.c
@@ -53,7 +53,7 @@
 struct sound_unit
 {
 	int unit_minor;
-	struct file_operations *unit_fops;
+	const struct file_operations *unit_fops;
 	struct sound_unit *next;
 	char name[32];
 };
@@ -73,7 +73,7 @@ EXPORT_SYMBOL(sound_class);
  *	join into it. Called with the lock asserted
  */
 
-static int __sound_insert_unit(struct sound_unit * s, struct sound_unit **list, struct file_operations *fops, int index, int low, int top)
+static int __sound_insert_unit(struct sound_unit * s, struct sound_unit **list, const struct file_operations *fops, int index, int low, int top)
 {
 	int n=low;
 
@@ -153,7 +153,7 @@ static DEFINE_SPINLOCK(sound_loader_lock);
  *	list. Acquires locks as needed
  */
 
-static int sound_insert_unit(struct sound_unit **list, struct file_operations *fops, int index, int low, int top, const char *name, umode_t mode, struct device *dev)
+static int sound_insert_unit(struct sound_unit **list, const struct file_operations *fops, int index, int low, int top, const char *name, umode_t mode, struct device *dev)
 {
 	struct sound_unit *s = kmalloc(sizeof(*s), GFP_KERNEL);
 	int r;
@@ -237,7 +237,7 @@ static struct sound_unit *chains[SOUND_STEP];
  *	a negative error code is returned.
  */
  
-int register_sound_special_device(struct file_operations *fops, int unit,
+int register_sound_special_device(const struct file_operations *fops, int unit,
 				  struct device *dev)
 {
 	const int chain = unit % SOUND_STEP;
@@ -301,7 +301,7 @@ int register_sound_special_device(struct file_operations *fops, int unit,
  
 EXPORT_SYMBOL(register_sound_special_device);
 
-int register_sound_special(struct file_operations *fops, int unit)
+int register_sound_special(const struct file_operations *fops, int unit)
 {
 	return register_sound_special_device(fops, unit, NULL);
 }
@@ -318,7 +318,7 @@ EXPORT_SYMBOL(register_sound_special);
  *	number is returned, on failure a negative error code is returned.
  */
 
-int register_sound_mixer(struct file_operations *fops, int dev)
+int register_sound_mixer(const struct file_operations *fops, int dev)
 {
 	return sound_insert_unit(&chains[0], fops, dev, 0, 128,
 				 "mixer", S_IRUSR | S_IWUSR, NULL);
@@ -336,7 +336,7 @@ EXPORT_SYMBOL(register_sound_mixer);
  *	number is returned, on failure a negative error code is returned.
  */
 
-int register_sound_midi(struct file_operations *fops, int dev)
+int register_sound_midi(const struct file_operations *fops, int dev)
 {
 	return sound_insert_unit(&chains[2], fops, dev, 2, 130,
 				 "midi", S_IRUSR | S_IWUSR, NULL);
@@ -362,7 +362,7 @@ EXPORT_SYMBOL(register_sound_midi);
  *	and will always allocate them as a matching pair - eg dsp3/audio3
  */
 
-int register_sound_dsp(struct file_operations *fops, int dev)
+int register_sound_dsp(const struct file_operations *fops, int dev)
 {
 	return sound_insert_unit(&chains[3], fops, dev, 3, 131,
 				 "dsp", S_IWUSR | S_IRUSR, NULL);
@@ -381,7 +381,7 @@ EXPORT_SYMBOL(register_sound_dsp);
  */
 
 
-int register_sound_synth(struct file_operations *fops, int dev)
+int register_sound_synth(const struct file_operations *fops, int dev)
 {
 	return sound_insert_unit(&chains[9], fops, dev, 9, 137,
 				 "synth", S_IRUSR | S_IWUSR, NULL);
@@ -501,7 +501,7 @@ int soundcore_open(struct inode *inode, struct file *file)
 	int chain;
 	int unit = iminor(inode);
 	struct sound_unit *s;
-	struct file_operations *new_fops = NULL;
+	const struct file_operations *new_fops = NULL;
 
 	chain=unit&0x0F;
 	if(chain==4 || chain==5)	/* dsp/audio/dsp16 */
@@ -540,7 +540,7 @@ int soundcore_open(struct inode *inode, struct file *file)
 		 * switching ->f_op in the first place.
 		 */
 		int err = 0;
-		struct file_operations *old_fops = file->f_op;
+		const struct file_operations *old_fops = file->f_op;
 		file->f_op = new_fops;
 		spin_unlock(&sound_loader_lock);
 		if(file->f_op->open)
-- 
cgit v0.10.2


From 4b6f5d20b04dcbc3d888555522b90ba6d36c4106 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Tue, 28 Mar 2006 01:56:42 -0800
Subject: [PATCH] Make most file operations structs in fs/ const

This is a conversion to make the various file_operations structs in fs/
const.  Basically a regexp job, with a few manual fixups

The goal is both to increase correctness (harder to accidentally write to
shared datastructures) and reducing the false sharing of cachelines with
things that get dirty in .data (while .rodata is nicely read only and thus
cache clean)

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
index 5c550fc..26a230b 100644
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -101,7 +101,7 @@ static struct super_operations ibmasmfs_s_ops = {
 	.drop_inode	= generic_delete_inode,
 };
 
-static struct file_operations *ibmasmfs_dir_ops = &simple_dir_operations;
+static const struct file_operations *ibmasmfs_dir_ops = &simple_dir_operations;
 
 static struct file_system_type ibmasmfs_type = {
 	.owner          = THIS_MODULE,
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 43c9f7d..f867b8d 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -39,8 +39,8 @@
 
 extern struct file_system_type v9fs_fs_type;
 extern struct address_space_operations v9fs_addr_operations;
-extern struct file_operations v9fs_file_operations;
-extern struct file_operations v9fs_dir_operations;
+extern const struct file_operations v9fs_file_operations;
+extern const struct file_operations v9fs_dir_operations;
 extern struct dentry_operations v9fs_dentry_operations;
 
 struct inode *v9fs_get_inode(struct super_block *sb, int mode);
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 766f11f..e32d597 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -204,7 +204,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-struct file_operations v9fs_dir_operations = {
+const struct file_operations v9fs_dir_operations = {
 	.read = generic_read_dir,
 	.readdir = v9fs_dir_readdir,
 	.open = v9fs_file_open,
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 59e7441..083dcfc 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -266,7 +266,7 @@ v9fs_file_write(struct file *filp, const char __user * data,
 	return total;
 }
 
-struct file_operations v9fs_file_operations = {
+const struct file_operations v9fs_file_operations = {
 	.llseek = generic_file_llseek,
 	.read = v9fs_file_read,
 	.write = v9fs_file_write,
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index f6cd013..29217ff 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -85,7 +85,7 @@ void __adfs_error(struct super_block *sb, const char *function,
 
 /* dir_*.c */
 extern struct inode_operations adfs_dir_inode_operations;
-extern struct file_operations adfs_dir_operations;
+extern const struct file_operations adfs_dir_operations;
 extern struct dentry_operations adfs_dentry_operations;
 extern struct adfs_dir_ops adfs_f_dir_ops;
 extern struct adfs_dir_ops adfs_fplus_dir_ops;
@@ -94,7 +94,7 @@ extern int adfs_dir_update(struct super_block *sb, struct object_info *obj);
 
 /* file.c */
 extern struct inode_operations adfs_file_inode_operations;
-extern struct file_operations adfs_file_operations;
+extern const struct file_operations adfs_file_operations;
 
 static inline __u32 signed_asl(__u32 val, signed int shift)
 {
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 0b4c3a0..7b075fc 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -196,7 +196,7 @@ out:
 	return ret;
 }
 
-struct file_operations adfs_dir_operations = {
+const struct file_operations adfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= adfs_readdir,
 	.fsync		= file_fsync,
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index 6af1088..1014b9f 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -25,7 +25,7 @@
 
 #include "adfs.h"
 
-struct file_operations adfs_file_operations = {
+const struct file_operations adfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.mmap		= generic_file_mmap,
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 0c6799f..a43a876 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -192,9 +192,9 @@ extern void   affs_dir_truncate(struct inode *);
 extern struct inode_operations	 affs_file_inode_operations;
 extern struct inode_operations	 affs_dir_inode_operations;
 extern struct inode_operations   affs_symlink_inode_operations;
-extern struct file_operations	 affs_file_operations;
-extern struct file_operations	 affs_file_operations_ofs;
-extern struct file_operations	 affs_dir_operations;
+extern const struct file_operations	 affs_file_operations;
+extern const struct file_operations	 affs_file_operations_ofs;
+extern const struct file_operations	 affs_dir_operations;
 extern struct address_space_operations	 affs_symlink_aops;
 extern struct address_space_operations	 affs_aops;
 extern struct address_space_operations	 affs_aops_ofs;
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index 548efd0..5d9649f 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -17,7 +17,7 @@
 
 static int affs_readdir(struct file *, void *, filldir_t);
 
-struct file_operations affs_dir_operations = {
+const struct file_operations affs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= affs_readdir,
 	.fsync		= file_fsync,
diff --git a/fs/affs/file.c b/fs/affs/file.c
index f72fb77..7076262 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -25,7 +25,7 @@ static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
 static int affs_file_open(struct inode *inode, struct file *filp);
 static int affs_file_release(struct inode *inode, struct file *filp);
 
-struct file_operations affs_file_operations = {
+const struct file_operations affs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 5c61c24..a6dff6a 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -32,7 +32,7 @@ static int afs_d_delete(struct dentry *dentry);
 static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen,
 				  loff_t fpos, ino_t ino, unsigned dtype);
 
-struct file_operations afs_dir_file_operations = {
+const struct file_operations afs_dir_file_operations = {
 	.open		= afs_dir_open,
 	.readdir	= afs_dir_readdir,
 };
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index ab8f87c..72febdf 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -64,7 +64,7 @@ extern struct cachefs_index_def afs_cache_cell_index_def;
  * dir.c
  */
 extern struct inode_operations afs_dir_inode_operations;
-extern struct file_operations afs_dir_file_operations;
+extern const struct file_operations afs_dir_file_operations;
 
 /*
  * file.c
@@ -105,7 +105,7 @@ extern struct cachefs_netfs afs_cache_netfs;
  * mntpt.c
  */
 extern struct inode_operations afs_mntpt_inode_operations;
-extern struct file_operations afs_mntpt_file_operations;
+extern const struct file_operations afs_mntpt_file_operations;
 extern struct afs_timer afs_mntpt_expiry_timer;
 extern struct afs_timer_ops afs_mntpt_expiry_timer_ops;
 extern unsigned long afs_mntpt_expiry_timeout;
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 31ee065..4e6eeb5 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -32,7 +32,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
 static int afs_mntpt_open(struct inode *inode, struct file *file);
 static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd);
 
-struct file_operations afs_mntpt_file_operations = {
+const struct file_operations afs_mntpt_file_operations = {
 	.open		= afs_mntpt_open,
 };
 
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 9c81b8f..101d21b 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -37,7 +37,7 @@ static struct seq_operations afs_proc_cells_ops = {
 	.show	= afs_proc_cells_show,
 };
 
-static struct file_operations afs_proc_cells_fops = {
+static const struct file_operations afs_proc_cells_fops = {
 	.open		= afs_proc_cells_open,
 	.read		= seq_read,
 	.write		= afs_proc_cells_write,
@@ -53,7 +53,7 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
 				       const char __user *buf,
 				       size_t size, loff_t *_pos);
 
-static struct file_operations afs_proc_rootcell_fops = {
+static const struct file_operations afs_proc_rootcell_fops = {
 	.open		= afs_proc_rootcell_open,
 	.read		= afs_proc_rootcell_read,
 	.write		= afs_proc_rootcell_write,
@@ -77,7 +77,7 @@ static struct seq_operations afs_proc_cell_volumes_ops = {
 	.show	= afs_proc_cell_volumes_show,
 };
 
-static struct file_operations afs_proc_cell_volumes_fops = {
+static const struct file_operations afs_proc_cell_volumes_fops = {
 	.open		= afs_proc_cell_volumes_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
@@ -101,7 +101,7 @@ static struct seq_operations afs_proc_cell_vlservers_ops = {
 	.show	= afs_proc_cell_vlservers_show,
 };
 
-static struct file_operations afs_proc_cell_vlservers_fops = {
+static const struct file_operations afs_proc_cell_vlservers_fops = {
 	.open		= afs_proc_cell_vlservers_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
@@ -124,7 +124,7 @@ static struct seq_operations afs_proc_cell_servers_ops = {
 	.show	= afs_proc_cell_servers_show,
 };
 
-static struct file_operations afs_proc_cell_servers_fops = {
+static const struct file_operations afs_proc_cell_servers_fops = {
 	.open		= afs_proc_cell_servers_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 990c28d..a62327f 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -146,7 +146,7 @@ struct autofs_dir_ent *autofs_expire(struct super_block *,struct autofs_sb_info
 
 extern struct inode_operations autofs_root_inode_operations;
 extern struct inode_operations autofs_symlink_inode_operations;
-extern struct file_operations autofs_root_operations;
+extern const struct file_operations autofs_root_operations;
 
 /* Initializing function */
 
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 870e2cf..9cac08d 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -26,7 +26,7 @@ static int autofs_root_rmdir(struct inode *,struct dentry *);
 static int autofs_root_mkdir(struct inode *,struct dentry *,int);
 static int autofs_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long);
 
-struct file_operations autofs_root_operations = {
+const struct file_operations autofs_root_operations = {
 	.read		= generic_read_dir,
 	.readdir	= autofs_root_readdir,
 	.ioctl		= autofs_root_ioctl,
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index c70204a..57c4903 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -176,8 +176,8 @@ extern struct inode_operations autofs4_dir_inode_operations;
 extern struct inode_operations autofs4_root_inode_operations;
 extern struct inode_operations autofs4_indirect_root_inode_operations;
 extern struct inode_operations autofs4_direct_root_inode_operations;
-extern struct file_operations autofs4_dir_operations;
-extern struct file_operations autofs4_root_operations;
+extern const struct file_operations autofs4_dir_operations;
+extern const struct file_operations autofs4_root_operations;
 
 /* Initializing function */
 
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index c8fe43a..84e030c 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -32,7 +32,7 @@ static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t fil
 static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
 static void *autofs4_follow_link(struct dentry *, struct nameidata *);
 
-struct file_operations autofs4_root_operations = {
+const struct file_operations autofs4_root_operations = {
 	.open		= dcache_dir_open,
 	.release	= dcache_dir_close,
 	.read		= generic_read_dir,
@@ -40,7 +40,7 @@ struct file_operations autofs4_root_operations = {
 	.ioctl		= autofs4_root_ioctl,
 };
 
-struct file_operations autofs4_dir_operations = {
+const struct file_operations autofs4_dir_operations = {
 	.open		= autofs4_dir_open,
 	.release	= autofs4_dir_close,
 	.read		= generic_read_dir,
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index e172180..80599ae 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -22,7 +22,7 @@ static int return_EIO(void)
 
 #define EIO_ERROR ((void *) (return_EIO))
 
-static struct file_operations bad_file_ops =
+static const struct file_operations bad_file_ops =
 {
 	.llseek		= EIO_ERROR,
 	.aio_read	= EIO_ERROR,
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 044a595..68ebd10 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -64,7 +64,7 @@ static const struct super_operations befs_sops = {
 /* slab cache for befs_inode_info objects */
 static kmem_cache_t *befs_inode_cachep;
 
-static struct file_operations befs_dir_operations = {
+static const struct file_operations befs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= befs_readdir,
 };
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 1fbc53f..9d79100 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -49,11 +49,11 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode)
 
 /* file.c */
 extern struct inode_operations bfs_file_inops;
-extern struct file_operations bfs_file_operations;
+extern const struct file_operations bfs_file_operations;
 extern struct address_space_operations bfs_aops;
 
 /* dir.c */
 extern struct inode_operations bfs_dir_inops;
-extern struct file_operations bfs_dir_operations;
+extern const struct file_operations bfs_dir_operations;
 
 #endif /* _FS_BFS_BFS_H */
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 5af928f..26fad96 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -70,7 +70,7 @@ static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir)
 	return 0;	
 }
 
-struct file_operations bfs_dir_operations = {
+const struct file_operations bfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= bfs_readdir,
 	.fsync		= file_fsync,
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 807723b..d83cd74 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -17,7 +17,7 @@
 #define dprintf(x...)
 #endif
 
-struct file_operations bfs_file_operations = {
+const struct file_operations bfs_file_operations = {
 	.llseek 	= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 6a7b730..d73d755 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -600,7 +600,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
 	return count;
 }
 
-static struct file_operations bm_entry_operations = {
+static const struct file_operations bm_entry_operations = {
 	.read		= bm_entry_read,
 	.write		= bm_entry_write,
 };
@@ -668,7 +668,7 @@ out:
 	return count;
 }
 
-static struct file_operations bm_register_operations = {
+static const struct file_operations bm_register_operations = {
 	.write		= bm_register_write,
 };
 
@@ -715,7 +715,7 @@ static ssize_t bm_status_write(struct file * file, const char __user * buffer,
 	return count;
 }
 
-static struct file_operations bm_status_operations = {
+static const struct file_operations bm_status_operations = {
 	.read		= bm_status_read,
 	.write		= bm_status_write,
 };
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 17c7618..af88c43 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1087,7 +1087,7 @@ struct address_space_operations def_blk_aops = {
 	.direct_IO	= blkdev_direct_IO,
 };
 
-struct file_operations def_blk_fops = {
+const struct file_operations def_blk_fops = {
 	.open		= blkdev_open,
 	.release	= blkdev_close,
 	.llseek		= block_llseek,
diff --git a/fs/char_dev.c b/fs/char_dev.c
index b53dffa..4e1b849 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -406,7 +406,7 @@ static void cdev_purge(struct cdev *cdev)
  * is contain the open that then fills in the correct operations
  * depending on the special file...
  */
-struct file_operations def_chr_fops = {
+const struct file_operations def_chr_fops = {
 	.open = chrdev_open,
 };
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 6b99b51..4bbc544 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -583,7 +583,7 @@ struct inode_operations cifs_symlink_inode_ops = {
 #endif 
 };
 
-struct file_operations cifs_file_ops = {
+const struct file_operations cifs_file_ops = {
 	.read = do_sync_read,
 	.write = do_sync_write,
 	.readv = generic_file_readv,
@@ -607,7 +607,7 @@ struct file_operations cifs_file_ops = {
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
 
-struct file_operations cifs_file_direct_ops = {
+const struct file_operations cifs_file_direct_ops = {
 	/* no mmap, no aio, no readv - 
 	   BB reevaluate whether they can be done with directio, no cache */
 	.read = cifs_user_read,
@@ -626,7 +626,7 @@ struct file_operations cifs_file_direct_ops = {
 	.dir_notify = cifs_dir_notify,
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
-struct file_operations cifs_file_nobrl_ops = {
+const struct file_operations cifs_file_nobrl_ops = {
 	.read = do_sync_read,
 	.write = do_sync_write,
 	.readv = generic_file_readv,
@@ -649,7 +649,7 @@ struct file_operations cifs_file_nobrl_ops = {
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
 
-struct file_operations cifs_file_direct_nobrl_ops = {
+const struct file_operations cifs_file_direct_nobrl_ops = {
 	/* no mmap, no aio, no readv - 
 	   BB reevaluate whether they can be done with directio, no cache */
 	.read = cifs_user_read,
@@ -668,7 +668,7 @@ struct file_operations cifs_file_direct_nobrl_ops = {
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
 
-struct file_operations cifs_dir_ops = {
+const struct file_operations cifs_dir_ops = {
 	.readdir = cifs_readdir,
 	.release = cifs_closedir,
 	.read    = generic_read_dir,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 821a8eb..74f405a 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -61,10 +61,10 @@ extern struct inode_operations cifs_file_inode_ops;
 extern struct inode_operations cifs_symlink_inode_ops;
 
 /* Functions related to files and directories */
-extern struct file_operations cifs_file_ops;
-extern struct file_operations cifs_file_direct_ops; /* if directio mount */
-extern struct file_operations cifs_file_nobrl_ops;
-extern struct file_operations cifs_file_direct_nobrl_ops; /* if directio mount */
+extern const struct file_operations cifs_file_ops;
+extern const struct file_operations cifs_file_direct_ops; /* if directio mount */
+extern const struct file_operations cifs_file_nobrl_ops;
+extern const struct file_operations cifs_file_direct_nobrl_ops; /* if directio mount */
 extern int cifs_open(struct inode *inode, struct file *file);
 extern int cifs_close(struct inode *inode, struct file *file);
 extern int cifs_closedir(struct inode *inode, struct file *file);
@@ -76,7 +76,7 @@ extern int cifs_lock(struct file *, int, struct file_lock *);
 extern int cifs_fsync(struct file *, struct dentry *, int);
 extern int cifs_flush(struct file *);
 extern int cifs_file_mmap(struct file * , struct vm_area_struct *);
-extern struct file_operations cifs_dir_ops;
+extern const struct file_operations cifs_dir_ops;
 extern int cifs_dir_open(struct inode *inode, struct file *file);
 extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir);
 extern int cifs_dir_notify(struct file *, unsigned long arg);
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 54f76de..71f2ea6 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -82,7 +82,7 @@ struct inode_operations coda_dir_inode_operations =
 	.setattr	= coda_setattr,
 };
 
-struct file_operations coda_dir_operations = {
+const struct file_operations coda_dir_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= coda_readdir,
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 146a991..7c26424 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -288,7 +288,7 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync)
 	return err;
 }
 
-struct file_operations coda_file_operations = {
+const struct file_operations coda_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= coda_file_read,
 	.write		= coda_file_write,
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 1277149..214822b 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -36,7 +36,7 @@ struct inode_operations coda_ioctl_inode_operations =
 	.setattr	= coda_setattr,
 };
 
-struct file_operations coda_ioctl_operations = {
+const struct file_operations coda_ioctl_operations = {
 	.owner		= THIS_MODULE,
 	.ioctl		= coda_pioctl,
 };
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 98c74fe..6c6771d 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -342,7 +342,7 @@ static int coda_psdev_release(struct inode * inode, struct file * file)
 }
 
 
-static struct file_operations coda_psdev_fops = {
+static const struct file_operations coda_psdev_fops = {
 	.owner		= THIS_MODULE,
 	.read		= coda_psdev_read,
 	.write		= coda_psdev_write,
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index f70e469..3f4ff7a 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -72,9 +72,9 @@ extern void configfs_release_fs(void);
 
 extern struct rw_semaphore configfs_rename_sem;
 extern struct super_block * configfs_sb;
-extern struct file_operations configfs_dir_operations;
-extern struct file_operations configfs_file_operations;
-extern struct file_operations bin_fops;
+extern const struct file_operations configfs_dir_operations;
+extern const struct file_operations configfs_file_operations;
+extern const struct file_operations bin_fops;
 extern struct inode_operations configfs_dir_inode_operations;
 extern struct inode_operations configfs_symlink_inode_operations;
 
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index ca60e3a..8ed9b06 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1027,7 +1027,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
 	return offset;
 }
 
-struct file_operations configfs_dir_operations = {
+const struct file_operations configfs_dir_operations = {
 	.open		= configfs_dir_open,
 	.release	= configfs_dir_close,
 	.llseek		= configfs_dir_lseek,
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 3921920..f499803 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -322,7 +322,7 @@ static int configfs_release(struct inode * inode, struct file * filp)
 	return 0;
 }
 
-struct file_operations configfs_file_operations = {
+const struct file_operations configfs_file_operations = {
 	.read		= configfs_read_file,
 	.write		= configfs_write_file,
 	.llseek		= generic_file_llseek,
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index acc1b2c..9efcc3a 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -29,7 +29,7 @@
 
 static struct super_operations cramfs_ops;
 static struct inode_operations cramfs_dir_inode_operations;
-static struct file_operations cramfs_directory_operations;
+static const struct file_operations cramfs_directory_operations;
 static struct address_space_operations cramfs_aops;
 
 static DEFINE_MUTEX(read_mutex);
@@ -512,7 +512,7 @@ static struct address_space_operations cramfs_aops = {
 /*
  * A directory can only readdir
  */
-static struct file_operations cramfs_directory_operations = {
+static const struct file_operations cramfs_directory_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= cramfs_readdir,
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 40c4fc9..66a5054 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -39,7 +39,7 @@ static int default_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-struct file_operations debugfs_file_operations = {
+const struct file_operations debugfs_file_operations = {
 	.read =		default_read_file,
 	.write =	default_write_file,
 	.open =		default_open,
@@ -213,7 +213,7 @@ static ssize_t write_file_bool(struct file *file, const char __user *user_buf,
 	return count;
 }
 
-static struct file_operations fops_bool = {
+static const struct file_operations fops_bool = {
 	.read =		read_file_bool,
 	.write =	write_file_bool,
 	.open =		default_open,
diff --git a/fs/devfs/base.c b/fs/devfs/base.c
index b621521..52f5059 100644
--- a/fs/devfs/base.c
+++ b/fs/devfs/base.c
@@ -856,14 +856,14 @@ static int devfsd_close(struct inode *inode, struct file *file);
 #ifdef CONFIG_DEVFS_DEBUG
 static ssize_t stat_read(struct file *file, char __user *buf, size_t len,
 			 loff_t * ppos);
-static struct file_operations stat_fops = {
+static const struct file_operations stat_fops = {
 	.open = nonseekable_open,
 	.read = stat_read,
 };
 #endif
 
 /*  Devfs daemon file operations  */
-static struct file_operations devfsd_fops = {
+static const struct file_operations devfsd_fops = {
 	.open = nonseekable_open,
 	.read = devfsd_read,
 	.ioctl = devfsd_ioctl,
@@ -1842,8 +1842,8 @@ static int try_modload(struct devfs_entry *parent, struct fs_info *fs_info,
 
 static struct inode_operations devfs_iops;
 static struct inode_operations devfs_dir_iops;
-static struct file_operations devfs_fops;
-static struct file_operations devfs_dir_fops;
+static const struct file_operations devfs_fops;
+static const struct file_operations devfs_dir_fops;
 static struct inode_operations devfs_symlink_iops;
 
 static int devfs_notify_change(struct dentry *dentry, struct iattr *iattr)
@@ -2061,11 +2061,11 @@ static int devfs_open(struct inode *inode, struct file *file)
 	return err;
 }				/*  End Function devfs_open  */
 
-static struct file_operations devfs_fops = {
+static const struct file_operations devfs_fops = {
 	.open = devfs_open,
 };
 
-static struct file_operations devfs_dir_fops = {
+static const struct file_operations devfs_dir_fops = {
 	.read = generic_read_dir,
 	.readdir = devfs_readdir,
 };
diff --git a/fs/efs/dir.c b/fs/efs/dir.c
index 777c614..17f5b2d 100644
--- a/fs/efs/dir.c
+++ b/fs/efs/dir.c
@@ -10,7 +10,7 @@
 
 static int efs_readdir(struct file *, void *, filldir_t);
 
-struct file_operations efs_dir_operations = {
+const struct file_operations efs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= efs_readdir,
 };
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index e067a06..242fe1a 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -290,7 +290,7 @@ static kmem_cache_t *pwq_cache __read_mostly;
 static struct vfsmount *eventpoll_mnt __read_mostly;
 
 /* File callbacks that implement the eventpoll file behaviour */
-static struct file_operations eventpoll_fops = {
+static const struct file_operations eventpoll_fops = {
 	.release	= ep_eventpoll_close,
 	.poll		= ep_eventpoll_poll
 };
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 0165388..d672aa9 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -658,7 +658,7 @@ not_empty:
 	return 0;
 }
 
-struct file_operations ext2_dir_operations = {
+const struct file_operations ext2_dir_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= ext2_readdir,
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 11035ac..9f74a62 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -154,12 +154,12 @@ extern void ext2_write_super (struct super_block *);
  */
 
 /* dir.c */
-extern struct file_operations ext2_dir_operations;
+extern const struct file_operations ext2_dir_operations;
 
 /* file.c */
 extern struct inode_operations ext2_file_inode_operations;
-extern struct file_operations ext2_file_operations;
-extern struct file_operations ext2_xip_file_operations;
+extern const struct file_operations ext2_file_operations;
+extern const struct file_operations ext2_xip_file_operations;
 
 /* inode.c */
 extern struct address_space_operations ext2_aops;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index a484412..509ccec 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -39,7 +39,7 @@ static int ext2_release_file (struct inode * inode, struct file * filp)
  * We have mostly NULL's here: the current defaults are ok for
  * the ext2 filesystem.
  */
-struct file_operations ext2_file_operations = {
+const struct file_operations ext2_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
@@ -56,7 +56,7 @@ struct file_operations ext2_file_operations = {
 };
 
 #ifdef CONFIG_EXT2_FS_XIP
-struct file_operations ext2_xip_file_operations = {
+const struct file_operations ext2_xip_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= xip_file_read,
 	.write		= xip_file_write,
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 38bd3f6..f37528e 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -39,7 +39,7 @@ static int ext3_dx_readdir(struct file * filp,
 static int ext3_release_dir (struct inode * inode,
 				struct file * filp);
 
-struct file_operations ext3_dir_operations = {
+const struct file_operations ext3_dir_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= ext3_readdir,		/* we take BKL. needed?*/
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 59098ea..783a796 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -105,7 +105,7 @@ force_commit:
 	return ret;
 }
 
-struct file_operations ext3_file_operations = {
+const struct file_operations ext3_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 4095bc1..698b85b 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -741,7 +741,7 @@ static int fat_dir_ioctl(struct inode * inode, struct file * filp,
 	return ret;
 }
 
-struct file_operations fat_dir_operations = {
+const struct file_operations fat_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= fat_readdir,
 	.ioctl		= fat_dir_ioctl,
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 88aa1ae..1ee2523 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -112,7 +112,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
 	}
 }
 
-struct file_operations fat_file_operations = {
+const struct file_operations fat_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
diff --git a/fs/fifo.c b/fs/fifo.c
index d13fcd3..889f722 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -145,6 +145,6 @@ err_nocleanup:
  * is contain the open that then fills in the correct operations
  * depending on the access mode of the file...
  */
-struct file_operations def_fifo_fops = {
+const struct file_operations def_fifo_fops = {
 	.open		= fifo_open,	/* will set read or write pipe_fops */
 };
diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h
index 927acf7..1cf1fe8 100644
--- a/fs/freevxfs/vxfs_extern.h
+++ b/fs/freevxfs/vxfs_extern.h
@@ -63,7 +63,7 @@ extern void			vxfs_clear_inode(struct inode *);
 
 /* vxfs_lookup.c */
 extern struct inode_operations	vxfs_dir_inode_ops;
-extern struct file_operations	vxfs_dir_operations;
+extern const struct file_operations	vxfs_dir_operations;
 
 /* vxfs_olt.c */
 extern int			vxfs_read_olt(struct super_block *, u_long);
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 554eb45..29cce45 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -56,7 +56,7 @@ struct inode_operations vxfs_dir_inode_ops = {
 	.lookup =		vxfs_lookup,
 };
 
-struct file_operations vxfs_dir_operations = {
+const struct file_operations vxfs_dir_operations = {
 	.readdir =		vxfs_readdir,
 };
 
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 0c9a2ee..23d1f52 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -922,7 +922,7 @@ static int fuse_dev_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-struct file_operations fuse_dev_operations = {
+const struct file_operations fuse_dev_operations = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.read		= fuse_dev_read,
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c72a8a9..256355b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1170,7 +1170,7 @@ static struct inode_operations fuse_dir_inode_operations = {
 	.removexattr	= fuse_removexattr,
 };
 
-static struct file_operations fuse_dir_operations = {
+static const struct file_operations fuse_dir_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= fuse_readdir,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 6f05379..975f269 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -12,7 +12,7 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 
-static struct file_operations fuse_direct_io_file_operations;
+static const struct file_operations fuse_direct_io_file_operations;
 
 static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
 			  struct fuse_open_out *outargp)
@@ -611,7 +611,7 @@ static int fuse_set_page_dirty(struct page *page)
 	return 0;
 }
 
-static struct file_operations fuse_file_operations = {
+static const struct file_operations fuse_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
@@ -623,7 +623,7 @@ static struct file_operations fuse_file_operations = {
 	.sendfile	= generic_file_sendfile,
 };
 
-static struct file_operations fuse_direct_io_file_operations = {
+static const struct file_operations fuse_direct_io_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= fuse_direct_read,
 	.write		= fuse_direct_write,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 4a83adf..a16a04f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -346,7 +346,7 @@ static inline u64 get_node_id(struct inode *inode)
 }
 
 /** Device operations */
-extern struct file_operations fuse_dev_operations;
+extern const struct file_operations fuse_dev_operations;
 
 /**
  * This is the single global spinlock which protects FUSE's structures
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 534e5a7..7cd8cc0 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -313,7 +313,7 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	return res;
 }
 
-struct file_operations hfs_dir_operations = {
+const struct file_operations hfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= hfs_readdir,
 	.llseek		= generic_file_llseek,
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 18ce47a..3ed8663 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -169,7 +169,7 @@ extern int hfs_cat_move(u32, struct inode *, struct qstr *,
 extern void hfs_cat_build_key(struct super_block *, btree_key *, u32, struct qstr *);
 
 /* dir.c */
-extern struct file_operations hfs_dir_operations;
+extern const struct file_operations hfs_dir_operations;
 extern struct inode_operations hfs_dir_inode_operations;
 
 /* extent.c */
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 2c56470..2d4ced2 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -17,7 +17,7 @@
 #include "hfs_fs.h"
 #include "btree.h"
 
-static struct file_operations hfs_file_operations;
+static const struct file_operations hfs_file_operations;
 static struct inode_operations hfs_file_inode_operations;
 
 /*================ Variable-like macros ================*/
@@ -601,7 +601,7 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
 }
 
 
-static struct file_operations hfs_file_operations = {
+static const struct file_operations hfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 01a6fe3..1f9ece0 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -483,7 +483,7 @@ struct inode_operations hfsplus_dir_inode_operations = {
 	.rename		= hfsplus_rename,
 };
 
-struct file_operations hfsplus_dir_operations = {
+const struct file_operations hfsplus_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= hfsplus_readdir,
 	.ioctl          = hfsplus_ioctl,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 9fbe4d2..acf66db 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -280,7 +280,7 @@ static struct inode_operations hfsplus_file_inode_operations = {
 	.listxattr	= hfsplus_listxattr,
 };
 
-static struct file_operations hfsplus_file_operations = {
+static const struct file_operations hfsplus_file_operations = {
 	.llseek 	= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index b3ad0bd..bf0f8e1 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -384,7 +384,7 @@ int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 	return fsync_file(HOSTFS_I(dentry->d_inode)->fd, datasync);
 }
 
-static struct file_operations hostfs_file_fops = {
+static const struct file_operations hostfs_file_fops = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.sendfile	= generic_file_sendfile,
@@ -399,7 +399,7 @@ static struct file_operations hostfs_file_fops = {
 	.fsync		= hostfs_fsync,
 };
 
-static struct file_operations hostfs_dir_fops = {
+static const struct file_operations hostfs_dir_fops = {
 	.llseek		= generic_file_llseek,
 	.readdir	= hostfs_readdir,
 	.read		= generic_read_dir,
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 5591f96..ecc9180 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -310,7 +310,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
 	return ERR_PTR(-ENOENT);
 }
 
-struct file_operations hpfs_dir_ops =
+const struct file_operations hpfs_dir_ops =
 {
 	.llseek		= hpfs_dir_lseek,
 	.read		= generic_read_dir,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 7c995ac..d3b9fff 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -119,7 +119,7 @@ static ssize_t hpfs_file_write(struct file *file, const char __user *buf,
 	return retval;
 }
 
-struct file_operations hpfs_file_ops =
+const struct file_operations hpfs_file_ops =
 {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 4c6473a..29b7a3e 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -240,7 +240,7 @@ void hpfs_set_dentry_operations(struct dentry *);
 /* dir.c */
 
 struct dentry *hpfs_lookup(struct inode *, struct dentry *, struct nameidata *);
-extern struct file_operations hpfs_dir_ops;
+extern const struct file_operations hpfs_dir_ops;
 
 /* dnode.c */
 
@@ -266,7 +266,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, char *, char *, int);
 /* file.c */
 
 int hpfs_file_fsync(struct file *, struct dentry *, int);
-extern struct file_operations hpfs_file_ops;
+extern const struct file_operations hpfs_file_ops;
 extern struct inode_operations hpfs_file_iops;
 extern struct address_space_operations hpfs_aops;
 
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index a44dc58..2ba20cd 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -558,7 +558,7 @@ static loff_t hppfs_llseek(struct file *file, loff_t off, int where)
 	return(default_llseek(file, off, where));
 }
 
-static struct file_operations hppfs_file_fops = {
+static const struct file_operations hppfs_file_fops = {
 	.owner		= NULL,
 	.llseek		= hppfs_llseek,
 	.read		= hppfs_read,
@@ -609,7 +609,7 @@ static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 	return(0);
 }
 
-static struct file_operations hppfs_dir_fops = {
+static const struct file_operations hppfs_dir_fops = {
 	.owner		= NULL,
 	.readdir	= hppfs_readdir,
 	.open		= hppfs_dir_open,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 25fa8bb..3a5b4e9 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -35,7 +35,7 @@
 
 static struct super_operations hugetlbfs_ops;
 static struct address_space_operations hugetlbfs_aops;
-struct file_operations hugetlbfs_file_operations;
+const struct file_operations hugetlbfs_file_operations;
 static struct inode_operations hugetlbfs_dir_inode_operations;
 static struct inode_operations hugetlbfs_inode_operations;
 
@@ -566,7 +566,7 @@ static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 		inode_init_once(&ei->vfs_inode);
 }
 
-struct file_operations hugetlbfs_file_operations = {
+const struct file_operations hugetlbfs_file_operations = {
 	.mmap			= hugetlbfs_file_mmap,
 	.fsync			= simple_sync_file,
 	.get_unmapped_area	= hugetlb_get_unmapped_area,
diff --git a/fs/inotify.c b/fs/inotify.c
index f48a3da..367c487 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -920,7 +920,7 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
 	return ret;
 }
 
-static struct file_operations inotify_fops = {
+static const struct file_operations inotify_fops = {
 	.poll           = inotify_poll,
 	.read           = inotify_read,
 	.release        = inotify_release,
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 7901ac9..5440ea2 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -16,7 +16,7 @@
 
 static int isofs_readdir(struct file *, void *, filldir_t);
 
-struct file_operations isofs_dir_operations =
+const struct file_operations isofs_dir_operations =
 {
 	.read		= generic_read_dir,
 	.readdir	= isofs_readdir,
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 439a19b..b87ba06 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -175,6 +175,6 @@ isofs_normalize_block_and_offset(struct iso_directory_record* de,
 }
 
 extern struct inode_operations isofs_dir_inode_operations;
-extern struct file_operations isofs_dir_operations;
+extern const struct file_operations isofs_dir_operations;
 extern struct address_space_operations isofs_symlink_aops;
 extern struct export_operations isofs_export_ops;
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 5a4519e..020cc09 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -55,9 +55,9 @@
 static int jffs_remove(struct inode *dir, struct dentry *dentry, int type);
 
 static struct super_operations jffs_ops;
-static struct file_operations jffs_file_operations;
+static const struct file_operations jffs_file_operations;
 static struct inode_operations jffs_file_inode_operations;
-static struct file_operations jffs_dir_operations;
+static const struct file_operations jffs_dir_operations;
 static struct inode_operations jffs_dir_inode_operations;
 static struct address_space_operations jffs_address_operations;
 
@@ -1629,7 +1629,7 @@ static int jffs_fsync(struct file *f, struct dentry *d, int datasync)
 }
 
 
-static struct file_operations jffs_file_operations =
+static const struct file_operations jffs_file_operations =
 {
 	.open		= generic_file_open,
 	.llseek		= generic_file_llseek,
@@ -1649,7 +1649,7 @@ static struct inode_operations jffs_file_inode_operations =
 };
 
 
-static struct file_operations jffs_dir_operations =
+static const struct file_operations jffs_dir_operations =
 {
 	.readdir	= jffs_readdir,
 };
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index a7bf9cb..8bc7a50 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -37,7 +37,7 @@ static int jffs2_mknod (struct inode *,struct dentry *,int,dev_t);
 static int jffs2_rename (struct inode *, struct dentry *,
                         struct inode *, struct dentry *);
 
-struct file_operations jffs2_dir_operations =
+const struct file_operations jffs2_dir_operations =
 {
 	.read =		generic_read_dir,
 	.readdir =	jffs2_readdir,
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 935f273..9f41712 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -38,7 +38,7 @@ int jffs2_fsync(struct file *filp, struct dentry *dentry, int datasync)
 	return 0;
 }
 
-struct file_operations jffs2_file_operations =
+const struct file_operations jffs2_file_operations =
 {
 	.llseek =	generic_file_llseek,
 	.open =		generic_file_open,
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 59e7a39..d307cf5 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -159,11 +159,11 @@ void jffs2_stop_garbage_collect_thread(struct jffs2_sb_info *c);
 void jffs2_garbage_collect_trigger(struct jffs2_sb_info *c);
 
 /* dir.c */
-extern struct file_operations jffs2_dir_operations;
+extern const struct file_operations jffs2_dir_operations;
 extern struct inode_operations jffs2_dir_inode_operations;
 
 /* file.c */
-extern struct file_operations jffs2_file_operations;
+extern const struct file_operations jffs2_file_operations;
 extern struct inode_operations jffs2_file_inode_operations;
 extern struct address_space_operations jffs2_file_address_operations;
 int jffs2_fsync(struct file *, struct dentry *, int);
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index e1ac6e4..1c9745b 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -100,7 +100,7 @@ struct inode_operations jfs_file_inode_operations = {
 #endif
 };
 
-struct file_operations jfs_file_operations = {
+const struct file_operations jfs_file_operations = {
 	.open		= jfs_open,
 	.llseek		= generic_file_llseek,
 	.write		= generic_file_write,
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 095d471..c300726 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -35,9 +35,9 @@ extern void jfs_set_inode_flags(struct inode *);
 
 extern struct address_space_operations jfs_aops;
 extern struct inode_operations jfs_dir_inode_operations;
-extern struct file_operations jfs_dir_operations;
+extern const struct file_operations jfs_dir_operations;
 extern struct inode_operations jfs_file_inode_operations;
-extern struct file_operations jfs_file_operations;
+extern const struct file_operations jfs_file_operations;
 extern struct inode_operations jfs_symlink_inode_operations;
 extern struct dentry_operations jfs_ci_dentry_operations;
 #endif				/* _H_JFS_INODE */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 309cee5..09ea03f 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1519,7 +1519,7 @@ struct inode_operations jfs_dir_inode_operations = {
 #endif
 };
 
-struct file_operations jfs_dir_operations = {
+const struct file_operations jfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= jfs_readdir,
 	.fsync		= jfs_fsync,
diff --git a/fs/libfs.c b/fs/libfs.c
index 4fdeace..7145ba7 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -179,7 +179,7 @@ ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t
 	return -EISDIR;
 }
 
-struct file_operations simple_dir_operations = {
+const struct file_operations simple_dir_operations = {
 	.open		= dcache_dir_open,
 	.release	= dcache_dir_close,
 	.llseek		= dcache_dir_lseek,
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 732502a..69224d1 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -14,7 +14,7 @@ typedef struct minix_dir_entry minix_dirent;
 
 static int minix_readdir(struct file *, void *, filldir_t);
 
-struct file_operations minix_dir_operations = {
+const struct file_operations minix_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= minix_readdir,
 	.fsync		= minix_sync_file,
diff --git a/fs/minix/file.c b/fs/minix/file.c
index f1d77ac..420b328 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -15,7 +15,7 @@
  */
 int minix_sync_file(struct file *, struct dentry *, int);
 
-struct file_operations minix_file_operations = {
+const struct file_operations minix_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index e42a8bb..c55b77c 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -81,8 +81,8 @@ extern int minix_sync_file(struct file *, struct dentry *, int);
 
 extern struct inode_operations minix_file_inode_operations;
 extern struct inode_operations minix_dir_inode_operations;
-extern struct file_operations minix_file_operations;
-extern struct file_operations minix_dir_operations;
+extern const struct file_operations minix_file_operations;
+extern const struct file_operations minix_dir_operations;
 extern struct dentry_operations minix_dentry_operations;
 
 static inline struct minix_sb_info *minix_sb(struct super_block *sb)
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index cfd76f4..f0860c6 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -49,7 +49,7 @@ extern int ncp_symlink(struct inode *, struct dentry *, const char *);
 #define ncp_symlink NULL
 #endif
 		      
-struct file_operations ncp_dir_operations =
+const struct file_operations ncp_dir_operations =
 {
 	.read		= generic_read_dir,
 	.readdir	= ncp_readdir,
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index ebdad8f..e6b7c67 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -283,7 +283,7 @@ static int ncp_release(struct inode *inode, struct file *file) {
 	return 0;
 }
 
-struct file_operations ncp_file_operations =
+const struct file_operations ncp_file_operations =
 {
 	.llseek		= remote_llseek,
 	.read		= ncp_file_read,
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 06c48b3..a23f348 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -54,7 +54,7 @@ static int nfs_rename(struct inode *, struct dentry *,
 static int nfs_fsync_dir(struct file *, struct dentry *, int);
 static loff_t nfs_llseek_dir(struct file *, loff_t, int);
 
-struct file_operations nfs_dir_operations = {
+const struct file_operations nfs_dir_operations = {
 	.llseek		= nfs_llseek_dir,
 	.read		= generic_read_dir,
 	.readdir	= nfs_readdir,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index dee49a0..f1df2c8 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -49,7 +49,7 @@ static int nfs_check_flags(int flags);
 static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
 static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
 
-struct file_operations nfs_file_operations = {
+const struct file_operations nfs_file_operations = {
 	.llseek		= nfs_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index c8960af..3ef017b 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -134,7 +134,7 @@ static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size
 	return simple_transaction_read(file, buf, size, pos);
 }
 
-static struct file_operations transaction_ops = {
+static const struct file_operations transaction_ops = {
 	.write		= nfsctl_transaction_write,
 	.read		= nfsctl_transaction_read,
 	.release	= simple_transaction_release,
@@ -146,7 +146,7 @@ static int exports_open(struct inode *inode, struct file *file)
 	return seq_open(file, &nfs_exports_op);
 }
 
-static struct file_operations exports_operations = {
+static const struct file_operations exports_operations = {
 	.open		= exports_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 1cf955b..57265d5 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -80,7 +80,7 @@ static int nfsd_proc_open(struct inode *inode, struct file *file)
 	return single_open(file, nfsd_proc_show, NULL);
 }
 
-static struct file_operations nfsd_proc_fops = {
+static const struct file_operations nfsd_proc_fops = {
 	.owner = THIS_MODULE,
 	.open = nfsd_proc_open,
 	.read  = seq_read,
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 9d9ed3f..d1e2c6f 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1553,7 +1553,7 @@ static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry,
 
 #endif /* NTFS_RW */
 
-struct file_operations ntfs_dir_ops = {
+const struct file_operations ntfs_dir_ops = {
 	.llseek		= generic_file_llseek,	/* Seek inside directory. */
 	.read		= generic_read_dir,	/* Return -EISDIR. */
 	.readdir	= ntfs_readdir,		/* Read directory contents. */
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index f5d057e..c63a83e 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2294,7 +2294,7 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry,
 
 #endif /* NTFS_RW */
 
-struct file_operations ntfs_file_ops = {
+const struct file_operations ntfs_file_ops = {
 	.llseek		= generic_file_llseek,	 /* Seek inside file. */
 	.read		= generic_file_read,	 /* Read from file. */
 	.aio_read	= generic_file_aio_read, /* Async read from file. */
@@ -2337,6 +2337,6 @@ struct inode_operations ntfs_file_inode_ops = {
 #endif /* NTFS_RW */
 };
 
-struct file_operations ntfs_empty_file_ops = {};
+const struct file_operations ntfs_empty_file_ops = {};
 
 struct inode_operations ntfs_empty_inode_ops = {};
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index 1661429..bf7b3d7 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -60,13 +60,13 @@ extern struct kmem_cache *ntfs_index_ctx_cache;
 extern struct address_space_operations ntfs_aops;
 extern struct address_space_operations ntfs_mst_aops;
 
-extern struct  file_operations ntfs_file_ops;
+extern const struct  file_operations ntfs_file_ops;
 extern struct inode_operations ntfs_file_inode_ops;
 
-extern struct  file_operations ntfs_dir_ops;
+extern const struct  file_operations ntfs_dir_ops;
 extern struct inode_operations ntfs_dir_inode_ops;
 
-extern struct  file_operations ntfs_empty_file_ops;
+extern const struct  file_operations ntfs_empty_file_ops;
 extern struct inode_operations ntfs_empty_inode_ops;
 
 extern struct export_operations ntfs_export_ops;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 84f153a..64cd528 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2017,7 +2017,7 @@ out:
 	return ret;
 }
 
-static struct file_operations ocfs2_dlm_debug_fops = {
+static const struct file_operations ocfs2_dlm_debug_fops = {
 	.open =		ocfs2_dlm_debug_open,
 	.release =	ocfs2_dlm_debug_release,
 	.read =		seq_read,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 4b4cbad..34e903a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1176,7 +1176,7 @@ struct inode_operations ocfs2_special_file_iops = {
 	.getattr	= ocfs2_getattr,
 };
 
-struct file_operations ocfs2_fops = {
+const struct file_operations ocfs2_fops = {
 	.read		= do_sync_read,
 	.write		= do_sync_write,
 	.sendfile	= generic_file_sendfile,
@@ -1188,7 +1188,7 @@ struct file_operations ocfs2_fops = {
 	.aio_write	= ocfs2_file_aio_write,
 };
 
-struct file_operations ocfs2_dops = {
+const struct file_operations ocfs2_dops = {
 	.read		= generic_read_dir,
 	.readdir	= ocfs2_readdir,
 	.fsync		= ocfs2_sync_file,
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index a5ea33b..740c9e7 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -26,8 +26,8 @@
 #ifndef OCFS2_FILE_H
 #define OCFS2_FILE_H
 
-extern struct file_operations ocfs2_fops;
-extern struct file_operations ocfs2_dops;
+extern const struct file_operations ocfs2_fops;
+extern const struct file_operations ocfs2_dops;
 extern struct inode_operations ocfs2_file_iops;
 extern struct inode_operations ocfs2_special_file_iops;
 struct ocfs2_alloc_context;
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index aeb0106..0f14276 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -581,17 +581,17 @@ int property_release (struct inode *inode, struct file *filp)
 	return 0;
 }
 
-static struct file_operations openpromfs_prop_ops = {
+static const struct file_operations openpromfs_prop_ops = {
 	.read		= property_read,
 	.write		= property_write,
 	.release	= property_release,
 };
 
-static struct file_operations openpromfs_nodenum_ops = {
+static const struct file_operations openpromfs_nodenum_ops = {
 	.read		= nodenum_read,
 };
 
-static struct file_operations openprom_operations = {
+static const struct file_operations openprom_operations = {
 	.read		= generic_read_dir,
 	.readdir	= openpromfs_readdir,
 };
diff --git a/fs/pipe.c b/fs/pipe.c
index 4384c92..e2f4f1d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -568,7 +568,7 @@ pipe_rdwr_open(struct inode *inode, struct file *filp)
  * The file_operations structs are not static because they
  * are also used in linux/fs/fifo.c to do operations on FIFOs.
  */
-struct file_operations read_fifo_fops = {
+const struct file_operations read_fifo_fops = {
 	.llseek		= no_llseek,
 	.read		= pipe_read,
 	.readv		= pipe_readv,
@@ -580,7 +580,7 @@ struct file_operations read_fifo_fops = {
 	.fasync		= pipe_read_fasync,
 };
 
-struct file_operations write_fifo_fops = {
+const struct file_operations write_fifo_fops = {
 	.llseek		= no_llseek,
 	.read		= bad_pipe_r,
 	.write		= pipe_write,
@@ -592,7 +592,7 @@ struct file_operations write_fifo_fops = {
 	.fasync		= pipe_write_fasync,
 };
 
-struct file_operations rdwr_fifo_fops = {
+const struct file_operations rdwr_fifo_fops = {
 	.llseek		= no_llseek,
 	.read		= pipe_read,
 	.readv		= pipe_readv,
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index adc2cd9..17f6e8f 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -31,7 +31,7 @@ static int open_kcore(struct inode * inode, struct file * filp)
 
 static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *);
 
-struct file_operations proc_kcore_operations = {
+const struct file_operations proc_kcore_operations = {
 	.read		= read_kcore,
 	.open		= open_kcore,
 };
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index 10d37bf..ff3b90b 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -47,7 +47,7 @@ static unsigned int kmsg_poll(struct file *file, poll_table *wait)
 }
 
 
-struct file_operations proc_kmsg_operations = {
+const struct file_operations proc_kmsg_operations = {
 	.read		= kmsg_read,
 	.poll		= kmsg_poll,
 	.open		= kmsg_open,
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 4063fb3..7efa73d 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -172,7 +172,7 @@ static int open_vmcore(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-struct file_operations proc_vmcore_operations = {
+const struct file_operations proc_vmcore_operations = {
 	.read		= read_vmcore,
 	.open		= open_vmcore,
 };
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index 7a8f559..9031948 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -81,7 +81,7 @@ out:
 	return 0;
 }
 
-struct file_operations qnx4_dir_operations =
+const struct file_operations qnx4_dir_operations =
 {
 	.read		= generic_read_dir,
 	.readdir	= qnx4_readdir,
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index c33963f..62af4b1 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -19,7 +19,7 @@
  * We have mostly NULL's here: the current defaults are ok for
  * the qnx4 filesystem.
  */
-struct file_operations qnx4_file_operations =
+const struct file_operations qnx4_file_operations =
 {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 6ada209..00a933e 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -32,7 +32,7 @@ struct address_space_operations ramfs_aops = {
 	.commit_write	= simple_commit_write
 };
 
-struct file_operations ramfs_file_operations = {
+const struct file_operations ramfs_file_operations = {
 	.read		= generic_file_read,
 	.write		= generic_file_write,
 	.mmap		= generic_file_mmap,
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index b1ca234..f443a84 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -33,7 +33,7 @@ struct address_space_operations ramfs_aops = {
 	.commit_write		= simple_commit_write
 };
 
-struct file_operations ramfs_file_operations = {
+const struct file_operations ramfs_file_operations = {
 	.mmap			= ramfs_nommu_mmap,
 	.get_unmapped_area	= ramfs_nommu_get_unmapped_area,
 	.read			= generic_file_read,
diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h
index 272c8a7..3132376 100644
--- a/fs/ramfs/internal.h
+++ b/fs/ramfs/internal.h
@@ -11,5 +11,5 @@
 
 
 extern struct address_space_operations ramfs_aops;
-extern struct file_operations ramfs_file_operations;
+extern const struct file_operations ramfs_file_operations;
 extern struct inode_operations ramfs_file_inode_operations;
diff --git a/fs/read_write.c b/fs/read_write.c
index 34b1bf2..6256ca8 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -19,7 +19,7 @@
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 
-struct file_operations generic_ro_fops = {
+const struct file_operations generic_ro_fops = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.mmap		= generic_file_readonly_mmap,
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index d71ac65..973c819 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -18,7 +18,7 @@ static int reiserfs_readdir(struct file *, void *, filldir_t);
 static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
 			      int datasync);
 
-struct file_operations reiserfs_dir_operations = {
+const struct file_operations reiserfs_dir_operations = {
 	.read = generic_read_dir,
 	.readdir = reiserfs_readdir,
 	.fsync = reiserfs_dir_fsync,
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index d0c1e86..010094d1 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1566,7 +1566,7 @@ static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user * buf,
 	return generic_file_aio_write(iocb, buf, count, pos);
 }
 
-struct file_operations reiserfs_file_operations = {
+const struct file_operations reiserfs_file_operations = {
 	.read = generic_file_read,
 	.write = reiserfs_file_write,
 	.ioctl = reiserfs_ioctl,
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index ef6caed..731688e 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -470,7 +470,7 @@ static int r_open(struct inode *inode, struct file *file)
 	return ret;
 }
 
-static struct file_operations r_file_operations = {
+static const struct file_operations r_file_operations = {
 	.open = r_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index c2fc424..9b9eda7 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -463,7 +463,7 @@ static struct address_space_operations romfs_aops = {
 	.readpage = romfs_readpage
 };
 
-static struct file_operations romfs_dir_operations = {
+static const struct file_operations romfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= romfs_readdir,
 };
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
index 0424d06..34c7a11 100644
--- a/fs/smbfs/dir.c
+++ b/fs/smbfs/dir.c
@@ -34,7 +34,7 @@ static int smb_rename(struct inode *, struct dentry *,
 static int smb_make_node(struct inode *,struct dentry *,int,dev_t);
 static int smb_link(struct dentry *, struct inode *, struct dentry *);
 
-struct file_operations smb_dir_operations =
+const struct file_operations smb_dir_operations =
 {
 	.read		= generic_read_dir,
 	.readdir	= smb_readdir,
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index 7042e62..c56bd99 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -401,7 +401,7 @@ smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
 	return error;
 }
 
-struct file_operations smb_file_operations =
+const struct file_operations smb_file_operations =
 {
 	.llseek		= remote_llseek,
 	.read		= smb_file_read,
diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h
index e866ec8..4766459 100644
--- a/fs/smbfs/proto.h
+++ b/fs/smbfs/proto.h
@@ -35,7 +35,7 @@ extern int smb_proc_symlink(struct smb_sb_info *server, struct dentry *d, const
 extern int smb_proc_link(struct smb_sb_info *server, struct dentry *dentry, struct dentry *new_dentry);
 extern void smb_install_null_ops(struct smb_ops *ops);
 /* dir.c */
-extern struct file_operations smb_dir_operations;
+extern const struct file_operations smb_dir_operations;
 extern struct inode_operations smb_dir_inode_operations;
 extern struct inode_operations smb_dir_inode_operations_unix;
 extern void smb_new_dentry(struct dentry *dentry);
@@ -64,7 +64,7 @@ extern int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
 extern int smb_notify_change(struct dentry *dentry, struct iattr *attr);
 /* file.c */
 extern struct address_space_operations smb_file_aops;
-extern struct file_operations smb_file_operations;
+extern const struct file_operations smb_file_operations;
 extern struct inode_operations smb_file_inode_operations;
 /* ioctl.c */
 extern int smb_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 78899ee..c16a93c 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -163,7 +163,7 @@ static int release(struct inode * inode, struct file * file)
 	return 0;
 }
 
-struct file_operations bin_fops = {
+const struct file_operations bin_fops = {
 	.read		= read,
 	.write		= write,
 	.mmap		= mmap,
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 9ee9568..f26880a 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -503,7 +503,7 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
 	return offset;
 }
 
-struct file_operations sysfs_dir_operations = {
+const struct file_operations sysfs_dir_operations = {
 	.open		= sysfs_dir_open,
 	.release	= sysfs_dir_close,
 	.llseek		= sysfs_dir_lseek,
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 5e83e72..830f76f 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -348,7 +348,7 @@ static int sysfs_release(struct inode * inode, struct file * filp)
 	return 0;
 }
 
-struct file_operations sysfs_file_operations = {
+const struct file_operations sysfs_file_operations = {
 	.read		= sysfs_read_file,
 	.write		= sysfs_write_file,
 	.llseek		= generic_file_llseek,
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index cf11d5b..32958a7 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -21,9 +21,9 @@ extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
 
 extern struct rw_semaphore sysfs_rename_sem;
 extern struct super_block * sysfs_sb;
-extern struct file_operations sysfs_dir_operations;
-extern struct file_operations sysfs_file_operations;
-extern struct file_operations bin_fops;
+extern const struct file_operations sysfs_dir_operations;
+extern const struct file_operations sysfs_file_operations;
+extern const struct file_operations bin_fops;
 extern struct inode_operations sysfs_dir_inode_operations;
 extern struct inode_operations sysfs_symlink_inode_operations;
 
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index cce8b05..8c66e92 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -20,7 +20,7 @@
 
 static int sysv_readdir(struct file *, void *, filldir_t);
 
-struct file_operations sysv_dir_operations = {
+const struct file_operations sysv_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= sysv_readdir,
 	.fsync		= sysv_sync_file,
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index da69abc..a59e303 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -19,7 +19,7 @@
  * We have mostly NULLs here: the current defaults are OK for
  * the coh filesystem.
  */
-struct file_operations sysv_file_operations = {
+const struct file_operations sysv_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index b7f9b4a..393a480 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -159,8 +159,8 @@ extern ino_t sysv_inode_by_name(struct dentry *);
 extern struct inode_operations sysv_file_inode_operations;
 extern struct inode_operations sysv_dir_inode_operations;
 extern struct inode_operations sysv_fast_symlink_inode_operations;
-extern struct file_operations sysv_file_operations;
-extern struct file_operations sysv_dir_operations;
+extern const struct file_operations sysv_file_operations;
+extern const struct file_operations sysv_dir_operations;
 extern struct address_space_operations sysv_aops;
 extern struct super_operations sysv_sops;
 extern struct dentry_operations sysv_dentry_operations;
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index f522252..8c28efa 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -42,7 +42,7 @@ static int do_udf_readdir(struct inode *, struct file *, filldir_t, void *);
 
 /* readdir and lookup functions */
 
-struct file_operations udf_dir_operations = {
+const struct file_operations udf_dir_operations = {
 	.read			= generic_read_dir,
 	.readdir		= udf_readdir,
 	.ioctl			= udf_ioctl,
diff --git a/fs/udf/file.c b/fs/udf/file.c
index a6f2acc..e34b00e 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -248,7 +248,7 @@ static int udf_release_file(struct inode * inode, struct file * filp)
 	return 0;
 }
 
-struct file_operations udf_file_operations = {
+const struct file_operations udf_file_operations = {
 	.read			= generic_file_read,
 	.ioctl			= udf_ioctl,
 	.open			= generic_file_open,
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 1d5800e..023e19b 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -44,9 +44,9 @@ struct buffer_head;
 struct super_block;
 
 extern struct inode_operations udf_dir_inode_operations;
-extern struct file_operations udf_dir_operations;
+extern const struct file_operations udf_dir_operations;
 extern struct inode_operations udf_file_inode_operations;
-extern struct file_operations udf_file_operations;
+extern const struct file_operations udf_file_operations;
 extern struct address_space_operations udf_aops;
 extern struct address_space_operations udf_adinicb_aops;
 extern struct address_space_operations udf_symlink_aops;
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 7c10c68..1a56120 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -620,7 +620,7 @@ int ufs_empty_dir (struct inode * inode)
 	return 1;
 }
 
-struct file_operations ufs_dir_operations = {
+const struct file_operations ufs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= ufs_readdir,
 	.fsync		= file_fsync,
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 62ad481..312fd3f 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -31,7 +31,7 @@
  * the ufs filesystem.
  */
  
-struct file_operations ufs_file_operations = {
+const struct file_operations ufs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
 	.write		= generic_file_write,
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 185567a..85997b1 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -528,7 +528,7 @@ open_exec_out:
 }
 #endif /* HAVE_FOP_OPEN_EXEC */
 
-struct file_operations xfs_file_operations = {
+const struct file_operations xfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
@@ -550,7 +550,7 @@ struct file_operations xfs_file_operations = {
 #endif
 };
 
-struct file_operations xfs_invis_file_operations = {
+const struct file_operations xfs_invis_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
@@ -570,7 +570,7 @@ struct file_operations xfs_invis_file_operations = {
 };
 
 
-struct file_operations xfs_dir_file_operations = {
+const struct file_operations xfs_dir_file_operations = {
 	.read		= generic_read_dir,
 	.readdir	= xfs_file_readdir,
 	.unlocked_ioctl	= xfs_file_ioctl,
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index a8417d7..ad6173d 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -22,9 +22,9 @@ extern struct inode_operations xfs_inode_operations;
 extern struct inode_operations xfs_dir_inode_operations;
 extern struct inode_operations xfs_symlink_inode_operations;
 
-extern struct file_operations xfs_file_operations;
-extern struct file_operations xfs_dir_file_operations;
-extern struct file_operations xfs_invis_file_operations;
+extern const struct file_operations xfs_file_operations;
+extern const struct file_operations xfs_dir_file_operations;
+extern const struct file_operations xfs_invis_file_operations;
 
 extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *,
                         int, unsigned int, void __user *);
diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h
index cc621ec..b3ecf8f 100644
--- a/include/linux/coda_linux.h
+++ b/include/linux/coda_linux.h
@@ -30,9 +30,9 @@ extern struct inode_operations coda_ioctl_inode_operations;
 extern struct address_space_operations coda_file_aops;
 extern struct address_space_operations coda_symlink_aops;
 
-extern struct file_operations coda_dir_operations;
-extern struct file_operations coda_file_operations;
-extern struct file_operations coda_ioctl_operations;
+extern const struct file_operations coda_dir_operations;
+extern const struct file_operations coda_file_operations;
+extern const struct file_operations coda_ioctl_operations;
 
 /* operations shared over more than one file */
 int coda_open(struct inode *i, struct file *f);
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 534d750..3250365 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -11,7 +11,7 @@
 extern unsigned long long elfcorehdr_addr;
 extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
 						unsigned long, int);
-extern struct file_operations proc_vmcore_operations;
+extern const struct file_operations proc_vmcore_operations;
 extern struct proc_dir_entry *proc_vmcore;
 
 #endif /* CONFIG_CRASH_DUMP */
diff --git a/include/linux/efs_fs.h b/include/linux/efs_fs.h
index 28f368c..fbfa6b5 100644
--- a/include/linux/efs_fs.h
+++ b/include/linux/efs_fs.h
@@ -37,7 +37,7 @@ static inline struct efs_sb_info *SUPER_INFO(struct super_block *sb)
 struct statfs;
 
 extern struct inode_operations efs_dir_inode_operations;
-extern struct file_operations efs_dir_operations;
+extern const struct file_operations efs_dir_operations;
 extern struct address_space_operations efs_symlink_aops;
 
 extern void efs_read_inode(struct inode *);
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 8bb4f84..3ade6a4 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -833,11 +833,11 @@ do {								\
  */
 
 /* dir.c */
-extern struct file_operations ext3_dir_operations;
+extern const struct file_operations ext3_dir_operations;
 
 /* file.c */
 extern struct inode_operations ext3_file_inode_operations;
-extern struct file_operations ext3_file_operations;
+extern const struct file_operations ext3_file_operations;
 
 /* namei.c */
 extern struct inode_operations ext3_dir_inode_operations;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ef355bc..408fe89 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1390,11 +1390,11 @@ extern void bd_set_size(struct block_device *, loff_t size);
 extern void bd_forget(struct inode *inode);
 extern void bdput(struct block_device *);
 extern struct block_device *open_by_devnum(dev_t, unsigned);
-extern struct file_operations def_blk_fops;
+extern const struct file_operations def_blk_fops;
 extern struct address_space_operations def_blk_aops;
-extern struct file_operations def_chr_fops;
-extern struct file_operations bad_sock_fops;
-extern struct file_operations def_fifo_fops;
+extern const struct file_operations def_chr_fops;
+extern const struct file_operations bad_sock_fops;
+extern const struct file_operations def_fifo_fops;
 extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
 extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long);
 extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
@@ -1444,9 +1444,9 @@ extern void init_special_inode(struct inode *, umode_t, dev_t);
 extern void make_bad_inode(struct inode *);
 extern int is_bad_inode(struct inode *);
 
-extern struct file_operations read_fifo_fops;
-extern struct file_operations write_fifo_fops;
-extern struct file_operations rdwr_fifo_fops;
+extern const struct file_operations read_fifo_fops;
+extern const struct file_operations write_fifo_fops;
+extern const struct file_operations rdwr_fifo_fops;
 
 extern int fs_may_remount_ro(struct super_block *);
 
@@ -1688,7 +1688,7 @@ static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb,
 				nr_segs, get_block, end_io, DIO_OWN_LOCKING);
 }
 
-extern struct file_operations generic_ro_fops;
+extern const struct file_operations generic_ro_fops;
 
 #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
 
@@ -1744,9 +1744,9 @@ extern int simple_commit_write(struct file *file, struct page *page,
 
 extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *);
 extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
-extern struct file_operations simple_dir_operations;
+extern const struct file_operations simple_dir_operations;
 extern struct inode_operations simple_dir_inode_operations;
-struct tree_descr { char *name; struct file_operations *ops; int mode; };
+struct tree_descr { char *name; const struct file_operations *ops; int mode; };
 struct dentry *d_alloc_name(struct dentry *, const char *);
 extern int simple_fill_super(struct super_block *, int, struct tree_descr *);
 extern int simple_pin_fs(char *name, struct vfsmount **mount, int *count);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index d6f1019..4c5e610 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -154,7 +154,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
 	return sb->s_fs_info;
 }
 
-extern struct file_operations hugetlbfs_file_operations;
+extern const struct file_operations hugetlbfs_file_operations;
 extern struct vm_operations_struct hugetlb_vm_ops;
 struct file *hugetlb_zero_setup(size_t);
 int hugetlb_extend_reservation(struct hugetlbfs_inode_info *info,
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 53cee15..d9035c7 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -334,7 +334,7 @@ extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
 		    unsigned long *mapped_blocks);
 
 /* fat/dir.c */
-extern struct file_operations fat_dir_operations;
+extern const struct file_operations fat_dir_operations;
 extern int fat_search_long(struct inode *inode, const unsigned char *name,
 			   int name_len, struct fat_slot_info *sinfo);
 extern int fat_dir_empty(struct inode *dir);
@@ -397,7 +397,7 @@ extern int fat_count_free_clusters(struct super_block *sb);
 /* fat/file.c */
 extern int fat_generic_ioctl(struct inode *inode, struct file *filp,
 			     unsigned int cmd, unsigned long arg);
-extern struct file_operations fat_file_operations;
+extern const struct file_operations fat_file_operations;
 extern struct inode_operations fat_file_inode_operations;
 extern int fat_notify_change(struct dentry * dentry, struct iattr * attr);
 extern void fat_truncate(struct inode *inode);
diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h
index e013425..96dc237 100644
--- a/include/linux/ncp_fs.h
+++ b/include/linux/ncp_fs.h
@@ -209,7 +209,7 @@ void ncp_update_inode2(struct inode *, struct ncp_entry_info *);
 
 /* linux/fs/ncpfs/dir.c */
 extern struct inode_operations ncp_dir_inode_operations;
-extern struct file_operations ncp_dir_operations;
+extern const struct file_operations ncp_dir_operations;
 int ncp_conn_logged_in(struct super_block *);
 int ncp_date_dos2unix(__le16 time, __le16 date);
 void ncp_date_unix2dos(int unix_date, __le16 * time, __le16 * date);
@@ -230,7 +230,7 @@ void ncp_unlock_server(struct ncp_server *server);
 
 /* linux/fs/ncpfs/file.c */
 extern struct inode_operations ncp_file_inode_operations;
-extern struct file_operations ncp_file_operations;
+extern const struct file_operations ncp_file_operations;
 int ncp_make_open(struct inode *, int);
 
 /* linux/fs/ncpfs/mmap.c */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index cbebd7d..c71227d 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -324,7 +324,7 @@ extern struct inode_operations nfs_file_inode_operations;
 #ifdef CONFIG_NFS_V3
 extern struct inode_operations nfs3_file_inode_operations;
 #endif /* CONFIG_NFS_V3 */
-extern struct file_operations nfs_file_operations;
+extern const struct file_operations nfs_file_operations;
 extern struct address_space_operations nfs_file_aops;
 
 static inline struct rpc_cred *nfs_file_cred(struct file *file)
@@ -371,7 +371,7 @@ extern struct inode_operations nfs_dir_inode_operations;
 #ifdef CONFIG_NFS_V3
 extern struct inode_operations nfs3_dir_inode_operations;
 #endif /* CONFIG_NFS_V3 */
-extern struct file_operations nfs_dir_operations;
+extern const struct file_operations nfs_dir_operations;
 extern struct dentry_operations nfs_dentry_operations;
 
 extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 6d03d02..135871d 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -128,9 +128,9 @@ extern int proc_match(int, const char *,struct proc_dir_entry *);
 extern int proc_readdir(struct file *, void *, filldir_t);
 extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
 
-extern struct file_operations proc_kcore_operations;
-extern struct file_operations proc_kmsg_operations;
-extern struct file_operations ppc_htab_operations;
+extern const struct file_operations proc_kcore_operations;
+extern const struct file_operations proc_kmsg_operations;
+extern const struct file_operations ppc_htab_operations;
 
 /*
  * proc_tty.c
diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h
index fc610bb..27f49c8 100644
--- a/include/linux/qnx4_fs.h
+++ b/include/linux/qnx4_fs.h
@@ -118,8 +118,8 @@ extern struct buffer_head *qnx4_bread(struct inode *, int, int);
 
 extern struct inode_operations qnx4_file_inode_operations;
 extern struct inode_operations qnx4_dir_inode_operations;
-extern struct file_operations qnx4_file_operations;
-extern struct file_operations qnx4_dir_operations;
+extern const struct file_operations qnx4_file_operations;
+extern const struct file_operations qnx4_dir_operations;
 extern int qnx4_is_free(struct super_block *sb, long block);
 extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy);
 extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd);
diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h
index 953b6df..78ecfa2 100644
--- a/include/linux/ramfs.h
+++ b/include/linux/ramfs.h
@@ -15,7 +15,7 @@ extern unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
 extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma);
 #endif
 
-extern struct file_operations ramfs_file_operations;
+extern const struct file_operations ramfs_file_operations;
 extern struct vm_operations_struct generic_file_vm_ops;
 
 #endif
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 912f1b7..5676c42 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -1960,7 +1960,7 @@ int reiserfs_global_version_in_proc(char *buffer, char **start, off_t offset,
 extern struct inode_operations reiserfs_dir_inode_operations;
 extern struct inode_operations reiserfs_symlink_inode_operations;
 extern struct inode_operations reiserfs_special_inode_operations;
-extern struct file_operations reiserfs_dir_operations;
+extern const struct file_operations reiserfs_dir_operations;
 
 /* tail_conversion.c */
 int direct2indirect(struct reiserfs_transaction_handle *, struct inode *,
@@ -1972,7 +1972,7 @@ void reiserfs_unmap_buffer(struct buffer_head *);
 
 /* file.c */
 extern struct inode_operations reiserfs_file_inode_operations;
-extern struct file_operations reiserfs_file_operations;
+extern const struct file_operations reiserfs_file_operations;
 extern struct address_space_operations reiserfs_address_space_operations;
 
 /* fix_nodes.c */
diff --git a/include/linux/ufs_fs.h b/include/linux/ufs_fs.h
index b0ffe43..843aeaa 100644
--- a/include/linux/ufs_fs.h
+++ b/include/linux/ufs_fs.h
@@ -895,7 +895,7 @@ extern void ufs_set_link(struct inode *, struct ufs_dir_entry *, struct buffer_h
 
 /* file.c */
 extern struct inode_operations ufs_file_inode_operations;
-extern struct file_operations ufs_file_operations;
+extern const struct file_operations ufs_file_operations;
 
 extern struct address_space_operations ufs_aops;
 
@@ -915,7 +915,7 @@ extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *);
 extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create);
 
 /* namei.c */
-extern struct file_operations ufs_dir_operations;
+extern const struct file_operations ufs_dir_operations;
         
 /* super.c */
 extern void ufs_warning (struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4)));
diff --git a/net/nonet.c b/net/nonet.c
index 1230f0a..92e7664 100644
--- a/net/nonet.c
+++ b/net/nonet.c
@@ -19,7 +19,7 @@ static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
 	return -ENXIO;
 }
 
-struct file_operations bad_sock_fops = {
+const struct file_operations bad_sock_fops = {
 	.owner = THIS_MODULE,
 	.open = sock_no_open,
 };
diff --git a/net/socket.c b/net/socket.c
index 5211ba2..fcd77ea 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -539,7 +539,7 @@ static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
 	return -ENXIO;
 }
 
-struct file_operations bad_sock_fops = {
+const struct file_operations bad_sock_fops = {
 	.owner = THIS_MODULE,
 	.open = sock_no_open,
 };
-- 
cgit v0.10.2


From 50fc9999ec27ad66ce6db31ebb03759f77962bc1 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Tue, 28 Mar 2006 01:56:43 -0800
Subject: [PATCH] Docs update: missing files and descriptions for
 filesystems/00-INDEX

Add missing files and descriptions to
 Documentation/filesystems/00-INDEX

Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index 74052d2..66fdc07 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -1,27 +1,47 @@
 00-INDEX
 	- this file (info on some of the filesystems supported by linux).
+Exporting
+	- explanation of how to make filesystems exportable.
 Locking
 	- info on locking rules as they pertain to Linux VFS.
 adfs.txt
 	- info and mount options for the Acorn Advanced Disc Filing System.
+afs.txt
+	- info and examples for the distributed AFS (Andrew File System) fs.
 affs.txt
 	- info and mount options for the Amiga Fast File System.
+automount-support.txt
+	- information about filesystem automount support.
+befs.txt
+	- information about the BeOS filesystem for Linux.
 bfs.txt
 	- info for the SCO UnixWare Boot Filesystem (BFS).
 cifs.txt
-	- description of the CIFS filesystem
+	- description of the CIFS filesystem.
 coda.txt
 	- description of the CODA filesystem.
 configfs/
 	- directory containing configfs documentation and example code.
 cramfs.txt
-	- info on the cram filesystem for small storage (ROMs etc)
+	- info on the cram filesystem for small storage (ROMs etc).
+dentry-locking.txt
+	- info on the RCU-based dcache locking model.
 devfs/
 	- directory containing devfs documentation.
+directory-locking
+	- info about the locking scheme used for directory operations.
 dlmfs.txt
 	- info on the userspace interface to the OCFS2 DLM.
 ext2.txt
 	- info, mount options and specifications for the Ext2 filesystem.
+ext3.txt
+	- info, mount options and specifications for the Ext3 filesystem.
+files.txt
+	- info on file management in the Linux kernel.
+fuse.txt
+	- info on the Filesystem in User SpacE including mount options.
+hfs.txt
+	- info on the Macintosh HFS Filesystem for Linux.
 hpfs.txt
 	- info and mount options for the OS/2 HPFS.
 isofs.txt
@@ -32,23 +52,43 @@ ncpfs.txt
 	- info on Novell Netware(tm) filesystem using NCP protocol.
 ntfs.txt
 	- info and mount options for the NTFS filesystem (Windows NT).
-proc.txt
-	- info on Linux's /proc filesystem.
 ocfs2.txt
 	- info and mount options for the OCFS2 clustered filesystem.
+porting
+	- various information on filesystem porting.
+proc.txt
+	- info on Linux's /proc filesystem.
+ramfs-rootfs-initramfs.txt
+	- info on the 'in memory' filesystems ramfs, rootfs and initramfs.
+reiser4.txt
+	- info on the Reiser4 filesystem based on dancing tree algorithms.
+relayfs.txt
+	- info on relayfs, for efficient streaming from kernel to user space.
 romfs.txt
-	- Description of the ROMFS filesystem.
+	- description of the ROMFS filesystem.
 smbfs.txt
-	- info on using filesystems with the SMB protocol (Windows 3.11 and NT)
+	- info on using filesystems with the SMB protocol (Win 3.11 and NT).
+spufs.txt
+	- info and mount options for the SPU filesystem used on Cell.
+sysfs-pci.txt
+	- info on accessing PCI device resources through sysfs.
+sysfs.txt
+	- info on sysfs, a ram-based filesystem for exporting kernel objects.
 sysv-fs.txt
 	- info on the SystemV/V7/Xenix/Coherent filesystem.
+tmpfs.txt
+	- info on tmpfs, a filesystem that holds all files in virtual memory.
 udf.txt
 	- info and mount options for the UDF filesystem.
 ufs.txt
 	- info on the ufs filesystem.
+v9fs.txt
+	- v9fs is a Unix implementation of the Plan 9 9p remote fs protocol.
 vfat.txt
 	- info on using the VFAT filesystem used in Windows NT and Windows 95
 vfs.txt
-	- Overview of the Virtual File System
+	- overview of the Virtual File System
 xfs.txt
 	- info and mount options for the XFS filesystem.
+xip.txt
+	- info on execute-in-place for file mappings.
-- 
cgit v0.10.2


From f45e4656ac0609437267b242953c07d523649f8d Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Tue, 28 Mar 2006 01:56:43 -0800
Subject: [PATCH] arch/i386/kernel/microcode.c: remove the obsolete
 microcode_ioctl

Nowadays, even Debian stable ships a microcode_ctl utility recent enough to no
longer use this ioctl.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Tigran Aivazian <tigran_aivazian@symantec.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/Documentation/ioctl-number.txt b/Documentation/ioctl-number.txt
index aa7ba00..171a44e 100644
--- a/Documentation/ioctl-number.txt
+++ b/Documentation/ioctl-number.txt
@@ -78,8 +78,6 @@ Code	Seq#	Include File		Comments
 '#'	00-3F	IEEE 1394 Subsystem	Block for the entire subsystem
 '1'	00-1F	<linux/timepps.h>	PPS kit from Ulrich Windl
 					<ftp://ftp.de.kernel.org/pub/linux/daemons/ntp/PPS/>
-'6'	00-10	<asm-i386/processor.h>	Intel IA32 microcode update driver
-					<mailto:tigran@veritas.com>
 '8'	all				SNP8023 advanced NIC card
 					<mailto:mcr@solidum.com>
 'A'	00-1F	linux/apm_bios.h
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c
index dd780a0..e7c138f 100644
--- a/arch/i386/kernel/microcode.c
+++ b/arch/i386/kernel/microcode.c
@@ -459,26 +459,9 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_
 	return ret;
 }
 
-static int microcode_ioctl (struct inode *inode, struct file *file, 
-		unsigned int cmd, unsigned long arg)
-{
-	switch (cmd) {
-		/* 
-		 *  XXX: will be removed after microcode_ctl 
-		 *  is updated to ignore failure of this ioctl()
-		 */
-		case MICROCODE_IOCFREE:
-			return 0;
-		default:
-			return -EINVAL;
-	}
-	return -EINVAL;
-}
-
 static struct file_operations microcode_fops = {
 	.owner		= THIS_MODULE,
 	.write		= microcode_write,
-	.ioctl		= microcode_ioctl,
 	.open		= microcode_open,
 };
 
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index af4bfd0..805f0dc 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -621,8 +621,6 @@ struct extended_sigtable {
 	unsigned int reserved[3];
 	struct extended_signature sigs[0];
 };
-/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */
-#define MICROCODE_IOCFREE	_IO('6',0)
 
 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
 static inline void rep_nop(void)
diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h
index 1aa2cee..37a3ec4 100644
--- a/include/asm-x86_64/processor.h
+++ b/include/asm-x86_64/processor.h
@@ -358,9 +358,6 @@ struct extended_sigtable {
 	struct extended_signature sigs[0];
 };
 
-/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */
-#define MICROCODE_IOCFREE	_IO('6',0)
-
 
 #define ASM_NOP1 K8_NOP1
 #define ASM_NOP2 K8_NOP2
-- 
cgit v0.10.2


From 50297cbf07427b47f0fff5ade8e21cdabf860249 Mon Sep 17 00:00:00 2001
From: Marcelo Feitoza Parisi <marcelo@feitoza.com.br>
Date: Tue, 28 Mar 2006 01:56:44 -0800
Subject: [PATCH] drivers/block/*: use time_after() and friends

They deal with wrapping correctly and are nicer to read.

Signed-off-by: Marcelo Feitoza Parisi <marcelo@feitoza.com.br>
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Jens Axboe <axboe@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 49c7cd5..45bcda5 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -41,6 +41,7 @@
 #include <linux/timer.h>
 #include <linux/pci.h>
 #include <linux/init.h>
+#include <linux/jiffies.h>
 #include <linux/random.h>
 #include <asm/io.h>
 #include <asm/uaccess.h>
@@ -3654,8 +3655,8 @@ static void DAC960_V1_ProcessCompletedCommand(DAC960_Command_T *Command)
 	      (NewEnquiry->EventLogSequenceNumber !=
 	       OldEnquiry->EventLogSequenceNumber) ||
 	      Controller->MonitoringTimerCount == 0 ||
-	      (jiffies - Controller->SecondaryMonitoringTime
-	       >= DAC960_SecondaryMonitoringInterval))
+	      time_after_eq(jiffies, Controller->SecondaryMonitoringTime
+	       + DAC960_SecondaryMonitoringInterval))
 	    {
 	      Controller->V1.NeedLogicalDriveInformation = true;
 	      Controller->V1.NewEventLogSequenceNumber =
@@ -5640,8 +5641,8 @@ static void DAC960_MonitoringTimerFunction(unsigned long TimerData)
       unsigned int StatusChangeCounter =
 	Controller->V2.HealthStatusBuffer->StatusChangeCounter;
       boolean ForceMonitoringCommand = false;
-      if (jiffies - Controller->SecondaryMonitoringTime
-	  > DAC960_SecondaryMonitoringInterval)
+      if (time_after(jiffies, Controller->SecondaryMonitoringTime
+	  + DAC960_SecondaryMonitoringInterval))
 	{
 	  int LogicalDriveNumber;
 	  for (LogicalDriveNumber = 0;
@@ -5669,8 +5670,8 @@ static void DAC960_MonitoringTimerFunction(unsigned long TimerData)
 	   ControllerInfo->ConsistencyChecksActive +
 	   ControllerInfo->RebuildsActive +
 	   ControllerInfo->OnlineExpansionsActive == 0 ||
-	   jiffies - Controller->PrimaryMonitoringTime
-	   < DAC960_MonitoringTimerInterval) &&
+	   time_before(jiffies, Controller->PrimaryMonitoringTime
+	   + DAC960_MonitoringTimerInterval)) &&
 	  !ForceMonitoringCommand)
 	{
 	  Controller->MonitoringTimer.expires =
@@ -5807,8 +5808,8 @@ static void DAC960_Message(DAC960_MessageLevel_T MessageLevel,
       Controller->ProgressBufferLength = Length;
       if (Controller->EphemeralProgressMessage)
 	{
-	  if (jiffies - Controller->LastProgressReportTime
-	      >= DAC960_ProgressReportingInterval)
+	  if (time_after_eq(jiffies, Controller->LastProgressReportTime
+	      + DAC960_ProgressReportingInterval))
 	    {
 	      printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel],
 		     Controller->ControllerNumber, Buffer);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index d3ad908..bedb689 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -170,6 +170,7 @@ static int print_unex = 1;
 #include <linux/mm.h>
 #include <linux/bio.h>
 #include <linux/string.h>
+#include <linux/jiffies.h>
 #include <linux/fcntl.h>
 #include <linux/delay.h>
 #include <linux/mc146818rtc.h>	/* CMOS defines */
@@ -747,7 +748,7 @@ static int disk_change(int drive)
 {
 	int fdc = FDC(drive);
 #ifdef FLOPPY_SANITY_CHECK
-	if (jiffies - UDRS->select_date < UDP->select_delay)
+	if (time_before(jiffies, UDRS->select_date + UDP->select_delay))
 		DPRINT("WARNING disk change called early\n");
 	if (!(FDCS->dor & (0x10 << UNIT(drive))) ||
 	    (FDCS->dor & 3) != UNIT(drive) || fdc != FDC(drive)) {
@@ -1075,7 +1076,7 @@ static int fd_wait_for_completion(unsigned long delay, timeout_fn function)
 		return 1;
 	}
 
-	if ((signed)(jiffies - delay) < 0) {
+	if (time_before(jiffies, delay)) {
 		del_timer(&fd_timer);
 		fd_timer.function = function;
 		fd_timer.expires = delay;
@@ -1535,7 +1536,7 @@ static void setup_rw_floppy(void)
 		 * again just before spinup completion. Beware that
 		 * after scandrives, we must again wait for selection.
 		 */
-		if ((signed)(ready_date - jiffies) > DP->select_delay) {
+		if (time_after(ready_date, jiffies + DP->select_delay)) {
 			ready_date -= DP->select_delay;
 			function = (timeout_fn) floppy_start;
 		} else
@@ -3823,7 +3824,7 @@ static int check_floppy_change(struct gendisk *disk)
 	if (UTESTF(FD_DISK_CHANGED) || UTESTF(FD_VERIFY))
 		return 1;
 
-	if (UDP->checkfreq < (int)(jiffies - UDRS->last_checked)) {
+	if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) {
 		if (floppy_grab_irq_and_dma()) {
 			return 1;
 		}
-- 
cgit v0.10.2


From 05613bdd8647bfc4535b0dcc8f1b95c8b39be394 Mon Sep 17 00:00:00 2001
From: Marcelo Feitoza Parisi <marcelo@feitoza.com.br>
Date: Tue, 28 Mar 2006 01:56:45 -0800
Subject: [PATCH] nvidia-agp: use time_before_eq()

It deals with wrapping correctly and is nicer to read.

Signed-off-by: Marcelo Feitoza Parisi <marcelo@feitoza.com.br>
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/char/agp/nvidia-agp.c b/drivers/char/agp/nvidia-agp.c
index 70b8ed9..4c67135 100644
--- a/drivers/char/agp/nvidia-agp.c
+++ b/drivers/char/agp/nvidia-agp.c
@@ -11,6 +11,7 @@
 #include <linux/gfp.h>
 #include <linux/page-flags.h>
 #include <linux/mm.h>
+#include <linux/jiffies.h>
 #include "agp.h"
 
 /* NVIDIA registers */
@@ -256,7 +257,7 @@ static void nvidia_tlbflush(struct agp_memory *mem)
 		do {
 			pci_read_config_dword(nvidia_private.dev_1,
 					NVIDIA_1_WBC, &wbc_reg);
-			if ((signed)(end - jiffies) <= 0) {
+			if (time_before_eq(end, jiffies)) {
 				printk(KERN_ERR PFX
 				    "TLB flush took more than 3 seconds.\n");
 			}
-- 
cgit v0.10.2


From 9bae1ff3e7926fe5e92db2f3c6d8832f18f777bc Mon Sep 17 00:00:00 2001
From: Marcelo Feitoza Parisi <marcelo@feitoza.com.br>
Date: Tue, 28 Mar 2006 01:56:46 -0800
Subject: [PATCH] ide-tape: use time_after(), time_after_eq()

They deal with wrapping correctly and are nicer to read.

Signed-off-by: Marcelo Feitoza Parisi <marcelo@feitoza.com.br>
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Bartlomiej Zolnierkiewicz <B.Zolnierkiewicz@elka.pw.edu.pl>
Cc: Jens Axboe <axboe@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index ebc5906..f04791a 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -433,6 +433,7 @@
 #include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
+#include <linux/jiffies.h>
 #include <linux/major.h>
 #include <linux/devfs_fs_kernel.h>
 #include <linux/errno.h>
@@ -2336,7 +2337,7 @@ static ide_startstop_t idetape_rw_callback (ide_drive_t *drive)
 	}
 	if (time_after(jiffies, tape->insert_time))
 		tape->insert_speed = tape->insert_size / 1024 * HZ / (jiffies - tape->insert_time);
-	if (jiffies - tape->avg_time >= HZ) {
+	if (time_after_eq(jiffies, tape->avg_time + HZ)) {
 		tape->avg_speed = tape->avg_size * HZ / (jiffies - tape->avg_time) / 1024;
 		tape->avg_size = 0;
 		tape->avg_time = jiffies;
@@ -2497,7 +2498,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 			} else {
 				return ide_do_reset(drive);
 			}
-		} else if (jiffies - tape->dsc_polling_start > IDETAPE_DSC_MA_THRESHOLD)
+		} else if (time_after(jiffies, tape->dsc_polling_start + IDETAPE_DSC_MA_THRESHOLD))
 			tape->dsc_polling_frequency = IDETAPE_DSC_MA_SLOW;
 		idetape_postpone_request(drive);
 		return ide_stopped;
-- 
cgit v0.10.2


From 60c904ae5bded8bb71f7bff7d63f2a6959d2a8e4 Mon Sep 17 00:00:00 2001
From: Marcelo Feitoza Parisi <marcelo@feitoza.com.br>
Date: Tue, 28 Mar 2006 01:56:47 -0800
Subject: [PATCH] drivers/scsi/*: use time_after() and friends

They deal with wrapping correctly and are nicer to read.

Signed-off-by: Marcelo Feitoza Parisi <marcelo@feitoza.com.br>
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: James Bottomley <James.Bottomley@steeleye.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c
index 1c45934..5bf83cb 100644
--- a/drivers/scsi/BusLogic.c
+++ b/drivers/scsi/BusLogic.c
@@ -41,6 +41,7 @@
 #include <linux/stat.h>
 #include <linux/pci.h>
 #include <linux/spinlock.h>
+#include <linux/jiffies.h>
 #include <scsi/scsicam.h>
 
 #include <asm/dma.h>
@@ -2896,7 +2897,7 @@ static int BusLogic_QueueCommand(struct scsi_cmnd *Command, void (*CompletionRou
 		 */
 		if (HostAdapter->ActiveCommands[TargetID] == 0)
 			HostAdapter->LastSequencePoint[TargetID] = jiffies;
-		else if (jiffies - HostAdapter->LastSequencePoint[TargetID] > 4 * HZ) {
+		else if (time_after(jiffies, HostAdapter->LastSequencePoint[TargetID] + 4 * HZ)) {
 			HostAdapter->LastSequencePoint[TargetID] = jiffies;
 			QueueTag = BusLogic_OrderedQueueTag;
 		}
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 66ea47a..e3bd4bc 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -49,6 +49,7 @@ static const char * osst_version = "0.99.4";
 #include <linux/blkdev.h>
 #include <linux/moduleparam.h>
 #include <linux/delay.h>
+#include <linux/jiffies.h>
 #include <asm/uaccess.h>
 #include <asm/dma.h>
 #include <asm/system.h>
@@ -856,7 +857,7 @@ static int osst_wait_frame(struct osst_tape * STp, struct osst_request ** aSRpnt
 		    ) && result >= 0)
 		{
 #if DEBUG			
-			if (debugging || jiffies - startwait >= 2*HZ/OSST_POLL_PER_SEC)
+			if (debugging || time_after_eq(jiffies, startwait + 2*HZ/OSST_POLL_PER_SEC))
 				printk (OSST_DEB_MSG
 					"%s:D: Succ wait f fr %i (>%i): %i-%i %i (%i): %3li.%li s\n",
 					name, curr, curr+minlast, STp->first_frame_position,
@@ -867,7 +868,7 @@ static int osst_wait_frame(struct osst_tape * STp, struct osst_request ** aSRpnt
 			return 0;
 		}
 #if DEBUG
-		if (jiffies - startwait >= 2*HZ/OSST_POLL_PER_SEC && notyetprinted)
+		if (time_after_eq(jiffies, startwait + 2*HZ/OSST_POLL_PER_SEC) && notyetprinted)
 		{
 			printk (OSST_DEB_MSG "%s:D: Wait for frame %i (>%i): %i-%i %i (%i)\n",
 				name, curr, curr+minlast, STp->first_frame_position,
diff --git a/drivers/scsi/ppa.c b/drivers/scsi/ppa.c
index 05347ee..fee843f 100644
--- a/drivers/scsi/ppa.c
+++ b/drivers/scsi/ppa.c
@@ -18,6 +18,7 @@
 #include <linux/parport.h>
 #include <linux/workqueue.h>
 #include <linux/delay.h>
+#include <linux/jiffies.h>
 #include <asm/io.h>
 
 #include <scsi/scsi.h>
@@ -726,7 +727,7 @@ static int ppa_engine(ppa_struct *dev, struct scsi_cmnd *cmd)
 				retv--;
 
 			if (retv) {
-				if ((jiffies - dev->jstart) > (1 * HZ)) {
+				if (time_after(jiffies, dev->jstart + (1 * HZ))) {
 					printk
 					    ("ppa: Parallel port cable is unplugged!!\n");
 					ppa_fail(dev, DID_BUS_BUSY);
diff --git a/drivers/scsi/qlogicfc.c b/drivers/scsi/qlogicfc.c
index 94ef3f0..5b15998 100644
--- a/drivers/scsi/qlogicfc.c
+++ b/drivers/scsi/qlogicfc.c
@@ -61,6 +61,7 @@
 #include <linux/unistd.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
+#include <linux/jiffies.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include "scsi.h"
@@ -1325,7 +1326,7 @@ static int isp2x00_queuecommand(Scsi_Cmnd * Cmnd, void (*done) (Scsi_Cmnd *))
 		cmd->control_flags = cpu_to_le16(CFLAG_READ);
 
 	if (Cmnd->device->tagged_supported) {
-		if ((jiffies - hostdata->tag_ages[Cmnd->device->id]) > (2 * ISP_TIMEOUT)) {
+		if (time_after(jiffies, hostdata->tag_ages[Cmnd->device->id] + (2 * ISP_TIMEOUT))) {
 			cmd->control_flags |= cpu_to_le16(CFLAG_ORDERED_TAG);
 			hostdata->tag_ages[Cmnd->device->id] = jiffies;
 		} else
diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c
index 1fd5fc6..c7e78dc 100644
--- a/drivers/scsi/qlogicpti.c
+++ b/drivers/scsi/qlogicpti.c
@@ -24,6 +24,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/jiffies.h>
 
 #include <asm/byteorder.h>
 
@@ -1017,7 +1018,7 @@ static inline void cmd_frob(struct Command_Entry *cmd, struct scsi_cmnd *Cmnd,
 	if (Cmnd->device->tagged_supported) {
 		if (qpti->cmd_count[Cmnd->device->id] == 0)
 			qpti->tag_ages[Cmnd->device->id] = jiffies;
-		if ((jiffies - qpti->tag_ages[Cmnd->device->id]) > (5*HZ)) {
+		if (time_after(jiffies, qpti->tag_ages[Cmnd->device->id] + (5*HZ))) {
 			cmd->control_flags = CFLAG_ORDERED_TAG;
 			qpti->tag_ages[Cmnd->device->id] = jiffies;
 		} else
-- 
cgit v0.10.2


From 910638ae7ed4be27d6af55f6c9b5bf54b838e78b Mon Sep 17 00:00:00 2001
From: Matthias Gehre <M.Gehre@gmx.de>
Date: Tue, 28 Mar 2006 01:56:48 -0800
Subject: [PATCH] Replace 0xff.. with correct DMA_xBIT_MASK

Replace all occurences of 0xff..  in calls to function pci_set_dma_mask()
and pci_set_consistant_dma_mask() with the corresponding DMA_xBIT_MASK from
linux/dma-mapping.h.

Signed-off-by: Matthias Gehre <M.Gehre@gmx.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/Documentation/DMA-mapping.txt b/Documentation/DMA-mapping.txt
index 6845574..ee4bb73 100644
--- a/Documentation/DMA-mapping.txt
+++ b/Documentation/DMA-mapping.txt
@@ -199,6 +199,8 @@ address during PCI bus mastering you might do something like:
 		       "mydev: 24-bit DMA addressing not available.\n");
 		goto ignore_this_device;
 	}
+[Better use DMA_24BIT_MASK instead of 0x00ffffff.
+See linux/include/dma-mapping.h for reference.]
 
 When pci_set_dma_mask() is successful, and returns zero, the PCI layer
 saves away this mask you have provided.  The PCI layer will use this
diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c
index 69f4c7c..cac09e3 100644
--- a/drivers/atm/lanai.c
+++ b/drivers/atm/lanai.c
@@ -1972,7 +1972,7 @@ static int __devinit lanai_pci_start(struct lanai_dev *lanai)
 		    "(itf %d): No suitable DMA available.\n", lanai->number);
 		return -EBUSY;
 	}
-	if (pci_set_consistent_dma_mask(pci, 0xFFFFFFFF) != 0) {
+	if (pci_set_consistent_dma_mask(pci, DMA_32BIT_MASK) != 0) {
 		printk(KERN_WARNING DEV_LABEL
 		    "(itf %d): No suitable DMA available.\n", lanai->number);
 		return -EBUSY;
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index c16e66b..f7d4c65 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -50,6 +50,7 @@
 #include <linux/timer.h>
 #include <linux/pci.h>
 #include <linux/slab.h>
+#include <linux/dma-mapping.h>
 
 #include <linux/fcntl.h>        /* O_ACCMODE */
 #include <linux/hdreg.h>  /* HDIO_GETGEO */
@@ -881,8 +882,8 @@ static int __devinit mm_pci_probe(struct pci_dev *dev, const struct pci_device_i
 	printk(KERN_INFO "Micro Memory(tm) controller #%d found at %02x:%02x (PCI Mem Module (Battery Backup))\n",
 	       card->card_number, dev->bus->number, dev->devfn);
 
-	if (pci_set_dma_mask(dev, 0xffffffffffffffffLL) &&
-	    pci_set_dma_mask(dev, 0xffffffffLL)) {
+	if (pci_set_dma_mask(dev, DMA_64BIT_MASK) &&
+	    pci_set_dma_mask(dev, DMA_32BIT_MASK)) {
 		printk(KERN_WARNING "MM%d: NO suitable DMA found\n",num_cards);
 		return  -ENOMEM;
 	}
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index e7fc28b..7627a75 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -134,6 +134,7 @@
 #include <linux/random.h>
 #include <linux/init.h>
 #include <linux/if_vlan.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/irq.h>
 #include <asm/io.h>
@@ -2932,7 +2933,7 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 	if (id->driver_data & DEV_HAS_HIGH_DMA) {
 		/* packet format 3: supports 40-bit addressing */
 		np->desc_ver = DESC_VER_3;
-		if (pci_set_dma_mask(pci_dev, 0x0000007fffffffffULL)) {
+		if (pci_set_dma_mask(pci_dev, DMA_39BIT_MASK)) {
 			printk(KERN_INFO "forcedeth: 64-bit DMA failed, using 32-bit addressing for device %s.\n",
 					pci_name(pci_dev));
 		} else {
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index 9b8295e..ae71ed5 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -44,6 +44,7 @@
 #include <linux/ip.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
+#include <linux/dma-mapping.h>
 
 #ifdef CONFIG_SERIAL_8250
 #include <linux/serial_core.h>
@@ -1195,17 +1196,17 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	int err, pci_using_dac;
 
 	/* Configure DMA attributes. */
-	err = pci_set_dma_mask(pdev, 0xffffffffffffffffULL);
+	err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
 	if (!err) {
 		pci_using_dac = 1;
-		err = pci_set_consistent_dma_mask(pdev, 0xffffffffffffffffULL);
+		err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
 		if (err < 0) {
 			printk(KERN_ERR "%s: Unable to obtain 64 bit DMA "
 			       "for consistent allocations\n", pci_name(pdev));
 			goto out;
 		}
 	} else {
-		err = pci_set_dma_mask(pdev, 0xffffffffULL);
+		err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
 		if (err) {
 			printk(KERN_ERR "%s: No usable DMA configuration, "
 			       "aborting.\n", pci_name(pdev));
diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c
index 0fede50a..8e9b1a5 100644
--- a/drivers/net/ns83820.c
+++ b/drivers/net/ns83820.c
@@ -1828,10 +1828,10 @@ static int __devinit ns83820_init_one(struct pci_dev *pci_dev, const struct pci_
 	int using_dac = 0;
 
 	/* See if we can set the dma mask early on; failure is fatal. */
-	if (sizeof(dma_addr_t) == 8 && 
-	 	!pci_set_dma_mask(pci_dev, 0xffffffffffffffffULL)) {
+	if (sizeof(dma_addr_t) == 8 &&
+	 	!pci_set_dma_mask(pci_dev, DMA_64BIT_MASK)) {
 		using_dac = 1;
-	} else if (!pci_set_dma_mask(pci_dev, 0xffffffff)) {
+	} else if (!pci_set_dma_mask(pci_dev, DMA_32BIT_MASK)) {
 		using_dac = 0;
 	} else {
 		printk(KERN_WARNING "ns83820.c: pci_set_dma_mask failed!\n");
diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c
index 9d3b51c..29a756d 100644
--- a/drivers/net/wan/wanxl.c
+++ b/drivers/net/wan/wanxl.c
@@ -577,8 +577,8 @@ static int __devinit wanxl_pci_init_one(struct pci_dev *pdev,
 	   We set both dma_mask and consistent_dma_mask to 28 bits
 	   and pray pci_alloc_consistent() will use this info. It should
 	   work on most platforms */
-	if (pci_set_consistent_dma_mask(pdev, 0x0FFFFFFF) ||
-	    pci_set_dma_mask(pdev, 0x0FFFFFFF)) {
+	if (pci_set_consistent_dma_mask(pdev, DMA_28BIT_MASK) ||
+	    pci_set_dma_mask(pdev, DMA_28BIT_MASK)) {
 		printk(KERN_ERR "wanXL: No usable DMA configuration\n");
 		return -EIO;
 	}
diff --git a/drivers/net/wireless/prism54/islpci_hotplug.c b/drivers/net/wireless/prism54/islpci_hotplug.c
index b41d666..bfa0cc3 100644
--- a/drivers/net/wireless/prism54/islpci_hotplug.c
+++ b/drivers/net/wireless/prism54/islpci_hotplug.c
@@ -22,6 +22,7 @@
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <linux/init.h> /* For __init, __exit */
+#include <linux/dma-mapping.h>
 
 #include "prismcompat.h"
 #include "islpci_dev.h"
@@ -124,7 +125,7 @@ prism54_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 
 	/* enable PCI DMA */
-	if (pci_set_dma_mask(pdev, 0xffffffff)) {
+	if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) {
 		printk(KERN_ERR "%s: 32-bit PCI DMA not supported", DRV_NAME);
 		goto do_pci_disable_device;
         }
diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c
index 5bf83cb..bde3d58 100644
--- a/drivers/scsi/BusLogic.c
+++ b/drivers/scsi/BusLogic.c
@@ -42,6 +42,7 @@
 #include <linux/pci.h>
 #include <linux/spinlock.h>
 #include <linux/jiffies.h>
+#include <linux/dma-mapping.h>
 #include <scsi/scsicam.h>
 
 #include <asm/dma.h>
@@ -677,7 +678,7 @@ static int __init BusLogic_InitializeMultiMasterProbeInfo(struct BusLogic_HostAd
 		if (pci_enable_device(PCI_Device))
 			continue;
 
-		if (pci_set_dma_mask(PCI_Device, (u64) 0xffffffff))
+		if (pci_set_dma_mask(PCI_Device, DMA_32BIT_MASK ))
 			continue;
 
 		Bus = PCI_Device->bus->number;
@@ -832,7 +833,7 @@ static int __init BusLogic_InitializeMultiMasterProbeInfo(struct BusLogic_HostAd
 		if (pci_enable_device(PCI_Device))
 			continue;
 
-		if (pci_set_dma_mask(PCI_Device, (u64) 0xffffffff))
+		if (pci_set_dma_mask(PCI_Device, DMA_32BIT_MASK))
 			continue;
 
 		Bus = PCI_Device->bus->number;
@@ -886,7 +887,7 @@ static int __init BusLogic_InitializeFlashPointProbeInfo(struct BusLogic_HostAda
 		if (pci_enable_device(PCI_Device))
 			continue;
 
-		if (pci_set_dma_mask(PCI_Device, (u64) 0xffffffff))
+		if (pci_set_dma_mask(PCI_Device, DMA_32BIT_MASK))
 			continue;
 
 		Bus = PCI_Device->bus->number;
diff --git a/drivers/scsi/a100u2w.c b/drivers/scsi/a100u2w.c
index 9f45ae1..3dce21c 100644
--- a/drivers/scsi/a100u2w.c
+++ b/drivers/scsi/a100u2w.c
@@ -89,6 +89,7 @@
 #include <linux/string.h>
 #include <linux/ioport.h>
 #include <linux/slab.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -1052,7 +1053,7 @@ static int __devinit inia100_probe_one(struct pci_dev *pdev,
 
 	if (pci_enable_device(pdev))
 		goto out;
-	if (pci_set_dma_mask(pdev, 0xffffffffULL)) {
+	if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) {
 		printk(KERN_WARNING "Unable to set 32bit DMA "
 				    "on inia100 adapter, ignoring.\n");
 		goto out_disable_device;
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index a16f8de..8df4a0e 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -32,6 +32,7 @@
 #include <linux/slab.h>
 #include <linux/completion.h>
 #include <linux/blkdev.h>
+#include <linux/dma-mapping.h>
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
 
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index c259633..7203307 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -45,6 +45,7 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/dma-mapping.h>
 #include <linux/syscalls.h>
 #include <linux/delay.h>
 #include <linux/smp_lock.h>
@@ -806,8 +807,8 @@ static int __devinit aac_probe_one(struct pci_dev *pdev,
 	 * to driver communication memory to be allocated below 2gig
 	 */
 	if (aac_drivers[index].quirks & AAC_QUIRK_31BIT) 
-		if (pci_set_dma_mask(pdev, 0x7FFFFFFFULL) ||
-				pci_set_consistent_dma_mask(pdev, 0x7FFFFFFFULL))
+		if (pci_set_dma_mask(pdev, DMA_31BIT_MASK) ||
+				pci_set_consistent_dma_mask(pdev, DMA_31BIT_MASK))
 			goto out;
 	
 	pci_set_master(pdev);
diff --git a/drivers/scsi/atp870u.c b/drivers/scsi/atp870u.c
index 5227a77..a198d86 100644
--- a/drivers/scsi/atp870u.c
+++ b/drivers/scsi/atp870u.c
@@ -28,6 +28,7 @@
 #include <linux/spinlock.h>
 #include <linux/pci.h>
 #include <linux/blkdev.h>
+#include <linux/dma-mapping.h>
 #include <asm/system.h>
 #include <asm/io.h>
 
@@ -2631,7 +2632,7 @@ static int atp870u_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (pci_enable_device(pdev))
 		return -EIO;
 
-        if (!pci_set_dma_mask(pdev, 0xFFFFFFFFUL)) {
+        if (!pci_set_dma_mask(pdev, DMA_32BIT_MASK)) {
                 printk(KERN_INFO "atp870u: use 32bit DMA mask.\n");
         } else {
                 printk(KERN_ERR "atp870u: DMA mask required but not available.\n");
diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c
index 6e6b293..b1b704a 100644
--- a/drivers/scsi/dpt_i2o.c
+++ b/drivers/scsi/dpt_i2o.c
@@ -57,6 +57,7 @@ MODULE_DESCRIPTION("Adaptec I2O RAID Driver");
 #include <linux/reboot.h>
 #include <linux/spinlock.h>
 #include <linux/smp_lock.h>
+#include <linux/dma-mapping.h>
 
 #include <linux/timer.h>
 #include <linux/string.h>
@@ -906,8 +907,8 @@ static int adpt_install_hba(struct scsi_host_template* sht, struct pci_dev* pDev
 	}
 
 	pci_set_master(pDev);
-	if (pci_set_dma_mask(pDev, 0xffffffffffffffffULL) &&
-	    pci_set_dma_mask(pDev, 0xffffffffULL))
+	if (pci_set_dma_mask(pDev, DMA_64BIT_MASK) &&
+	    pci_set_dma_mask(pDev, DMA_32BIT_MASK))
 		return -EINVAL;
 
 	base_addr0_phys = pci_resource_start(pDev,0);
diff --git a/drivers/scsi/eata.c b/drivers/scsi/eata.c
index b3f9de8..059eeee 100644
--- a/drivers/scsi/eata.c
+++ b/drivers/scsi/eata.c
@@ -490,6 +490,7 @@
 #include <linux/init.h>
 #include <linux/ctype.h>
 #include <linux/spinlock.h>
+#include <linux/dma-mapping.h>
 #include <asm/byteorder.h>
 #include <asm/dma.h>
 #include <asm/io.h>
@@ -1426,7 +1427,7 @@ static int port_detect(unsigned long port_base, unsigned int j,
 
 	if (ha->pdev) {
 		pci_set_master(ha->pdev);
-		if (pci_set_dma_mask(ha->pdev, 0xffffffff))
+		if (pci_set_dma_mask(ha->pdev, DMA_32BIT_MASK))
 			printk("%s: warning, pci_set_dma_mask failed.\n",
 			       ha->board_name);
 	}
diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index 7f7013e..d5740bb 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c
@@ -388,6 +388,7 @@
 #include <linux/proc_fs.h>
 #include <linux/time.h>
 #include <linux/timer.h>
+#include <linux/dma-mapping.h>
 #ifdef GDTH_RTC
 #include <linux/mc146818rtc.h>
 #endif
@@ -4527,15 +4528,15 @@ static int __init gdth_detect(struct scsi_host_template *shtp)
             if (!(ha->cache_feat & ha->raw_feat & ha->screen_feat &GDT_64BIT)||
                 /* 64-bit DMA only supported from FW >= x.43 */
                 (!ha->dma64_support)) {
-                if (pci_set_dma_mask(pcistr[ctr].pdev, 0xffffffff)) {
+                if (pci_set_dma_mask(pcistr[ctr].pdev, DMA_32BIT_MASK)) {
                     printk(KERN_WARNING "GDT-PCI %d: Unable to set 32-bit DMA\n", hanum);
                     err = TRUE;
                 }
             } else {
                 shp->max_cmd_len = 16;
-                if (!pci_set_dma_mask(pcistr[ctr].pdev, 0xffffffffffffffffULL)) {
+                if (!pci_set_dma_mask(pcistr[ctr].pdev, DMA_64BIT_MASK)) {
                     printk("GDT-PCI %d: 64-bit DMA enabled\n", hanum);
-                } else if (pci_set_dma_mask(pcistr[ctr].pdev, 0xffffffff)) {
+                } else if (pci_set_dma_mask(pcistr[ctr].pdev, DMA_32BIT_MASK)) {
                     printk(KERN_WARNING "GDT-PCI %d: Unable to set 64/32-bit DMA\n", hanum);
                     err = TRUE;
                 }
diff --git a/drivers/scsi/initio.c b/drivers/scsi/initio.c
index ea6f3c0..0cc7f65 100644
--- a/drivers/scsi/initio.c
+++ b/drivers/scsi/initio.c
@@ -127,6 +127,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/jiffies.h>
+#include <linux/dma-mapping.h>
 #include <asm/io.h>
 
 #include <scsi/scsi.h>
@@ -2780,7 +2781,7 @@ static int tul_NewReturnNumberOfAdapters(void)
 			if (((dRegValue & 0xFF00) >> 8) == 0xFF)
 				dRegValue = 0;
 			wBIOS = (wBIOS << 8) + ((UWORD) ((dRegValue & 0xFF00) >> 8));
-			if (pci_set_dma_mask(pDev, 0xffffffff)) {
+			if (pci_set_dma_mask(pDev, DMA_32BIT_MASK)) {
 				printk(KERN_WARNING 
 				       "i91u: Could not set 32 bit DMA mask\n");
 				continue;
diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index 481708d..a4c0b04 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -179,6 +179,7 @@
 
 #include <linux/blkdev.h>
 #include <linux/types.h>
+#include <linux/dma-mapping.h>
 
 #include <scsi/sg.h>
 
@@ -7284,10 +7285,10 @@ ips_init_phase1(struct pci_dev *pci_dev, int *indexPtr)
 	 * are guaranteed to be < 4G.
 	 */
 	if (IPS_ENABLE_DMA64 && IPS_HAS_ENH_SGLIST(ha) &&
-	    !pci_set_dma_mask(ha->pcidev, 0xffffffffffffffffULL)) {
+	    !pci_set_dma_mask(ha->pcidev, DMA_64BIT_MASK)) {
 		(ha)->flags |= IPS_HA_ENH_SG;
 	} else {
-		if (pci_set_dma_mask(ha->pcidev, 0xffffffffULL) != 0) {
+		if (pci_set_dma_mask(ha->pcidev, DMA_32BIT_MASK) != 0) {
 			printk(KERN_WARNING "Unable to set DMA Mask\n");
 			return ips_abort_init(ha, index);
 		}
diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index 7144674..80b68a2 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -45,6 +45,7 @@
 #include <linux/interrupt.h>
 #include <linux/pci.h>
 #include <linux/init.h>
+#include <linux/dma-mapping.h>
 #include <scsi/scsicam.h>
 
 #include "scsi.h"
@@ -2094,7 +2095,7 @@ make_local_pdev(adapter_t *adapter, struct pci_dev **pdev)
 
 	memcpy(*pdev, adapter->dev, sizeof(struct pci_dev));
 
-	if( pci_set_dma_mask(*pdev, 0xffffffff) != 0 ) {
+	if( pci_set_dma_mask(*pdev, DMA_32BIT_MASK) != 0 ) {
 		kfree(*pdev);
 		return -1;
 	}
@@ -4859,10 +4860,10 @@ megaraid_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	/* Set the Mode of addressing to 64 bit if we can */
 	if ((adapter->flag & BOARD_64BIT) && (sizeof(dma_addr_t) == 8)) {
-		pci_set_dma_mask(pdev, 0xffffffffffffffffULL);
+		pci_set_dma_mask(pdev, DMA_64BIT_MASK);
 		adapter->has_64bit_addr = 1;
 	} else  {
-		pci_set_dma_mask(pdev, 0xffffffff);
+		pci_set_dma_mask(pdev, DMA_32BIT_MASK);
 		adapter->has_64bit_addr = 0;
 	}
 		
diff --git a/drivers/scsi/nsp32.c b/drivers/scsi/nsp32.c
index a279ebb..30ee0ef 100644
--- a/drivers/scsi/nsp32.c
+++ b/drivers/scsi/nsp32.c
@@ -38,6 +38,7 @@
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <linux/ctype.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/dma.h>
 #include <asm/system.h>
@@ -2776,7 +2777,7 @@ static int nsp32_detect(struct scsi_host_template *sht)
 	/*
 	 * setup DMA 
 	 */
-	if (pci_set_dma_mask(PCIDEV, 0xffffffffUL) != 0) {
+	if (pci_set_dma_mask(PCIDEV, DMA_32BIT_MASK) != 0) {
 		nsp32_msg (KERN_ERR, "failed to set PCI DMA mask");
 		goto scsi_unregister;
 	}
diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index e023024..5a48e55 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -350,6 +350,7 @@
 #include <linux/pci_ids.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -4321,7 +4322,7 @@ qla1280_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 
 #ifdef QLA_64BIT_PTR
 	if (pci_set_dma_mask(ha->pdev, (dma_addr_t) ~ 0ULL)) {
-		if (pci_set_dma_mask(ha->pdev, 0xffffffff)) {
+		if (pci_set_dma_mask(ha->pdev, DMA_32BIT_MASK)) {
 			printk(KERN_WARNING "scsi(%li): Unable to set a "
 			       "suitable DMA mask - aborting\n", ha->host_no);
 			error = -ENODEV;
@@ -4331,7 +4332,7 @@ qla1280_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		dprintk(2, "scsi(%li): 64 Bit PCI Addressing Enabled\n",
 			ha->host_no);
 #else
-	if (pci_set_dma_mask(ha->pdev, 0xffffffff)) {
+	if (pci_set_dma_mask(ha->pdev, DMA_32BIT_MASK)) {
 		printk(KERN_WARNING "scsi(%li): Unable to set a "
 		       "suitable DMA mask - aborting\n", ha->host_no);
 		error = -ENODEV;
diff --git a/drivers/scsi/qlogicfc.c b/drivers/scsi/qlogicfc.c
index 5b15998..52b224a 100644
--- a/drivers/scsi/qlogicfc.c
+++ b/drivers/scsi/qlogicfc.c
@@ -61,6 +61,7 @@
 #include <linux/unistd.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
 #include <linux/jiffies.h>
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -738,8 +739,8 @@ static int isp2x00_detect(struct scsi_host_template * tmpt)
 				continue;
 
 			/* Try to configure DMA attributes. */
-			if (pci_set_dma_mask(pdev, 0xffffffffffffffffULL) &&
-			    pci_set_dma_mask(pdev, 0xffffffffULL))
+			if (pci_set_dma_mask(pdev, DMA_64BIT_MASK) &&
+			    pci_set_dma_mask(pdev, DMA_32BIT_MASK))
 					continue;
 
 		        host = scsi_register(tmpt, sizeof(struct isp2x00_hostdata));
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index a873106..9b4751a 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -21,6 +21,7 @@ enum dma_data_direction {
 #define DMA_30BIT_MASK	0x000000003fffffffULL
 #define DMA_29BIT_MASK	0x000000001fffffffULL
 #define DMA_28BIT_MASK	0x000000000fffffffULL
+#define DMA_24BIT_MASK 0x0000000000ffffffULL
 
 #include <asm/dma-mapping.h>
 
diff --git a/sound/oss/esssolo1.c b/sound/oss/esssolo1.c
index 78d3e29..6861563 100644
--- a/sound/oss/esssolo1.c
+++ b/sound/oss/esssolo1.c
@@ -2348,7 +2348,7 @@ static int __devinit solo1_probe(struct pci_dev *pcidev, const struct pci_device
 	/* Recording requires 24-bit DMA, so attempt to set dma mask
 	 * to 24 bits first, then 32 bits (playback only) if that fails.
 	 */
-	if (pci_set_dma_mask(pcidev, 0x00ffffff) &&
+	if (pci_set_dma_mask(pcidev, DMA_24BIT_MASK) &&
 	    pci_set_dma_mask(pcidev, DMA_32BIT_MASK)) {
 		printk(KERN_WARNING "solo1: architecture does not support 24bit or 32bit PCI busmaster DMA\n");
 		return -ENODEV;
diff --git a/sound/oss/sonicvibes.c b/sound/oss/sonicvibes.c
index 4471757..42bd276 100644
--- a/sound/oss/sonicvibes.c
+++ b/sound/oss/sonicvibes.c
@@ -116,6 +116,7 @@
 #include <linux/spinlock.h>
 #include <linux/smp_lock.h>
 #include <linux/gameport.h>
+#include <linux/dma-mapping.h>
 #include <linux/mutex.h>
 
 
@@ -2535,7 +2536,7 @@ static int __devinit sv_probe(struct pci_dev *pcidev, const struct pci_device_id
 		return -ENODEV;
 	if (pcidev->irq == 0)
 		return -ENODEV;
-	if (pci_set_dma_mask(pcidev, 0x00ffffff)) {
+	if (pci_set_dma_mask(pcidev, DMA_24BIT_MASK)) {
 		printk(KERN_WARNING "sonicvibes: architecture does not support 24bit PCI busmaster DMA\n");
 		return -ENODEV;
 	}
diff --git a/sound/pci/ad1889.c b/sound/pci/ad1889.c
index 2aa5a7f..c6c8333 100644
--- a/sound/pci/ad1889.c
+++ b/sound/pci/ad1889.c
@@ -39,6 +39,7 @@
 #include <linux/interrupt.h>
 #include <linux/compiler.h>
 #include <linux/delay.h>
+#include <linux/dma-mapping.h>
 
 #include <sound/driver.h>
 #include <sound/core.h>
diff --git a/sound/pci/ali5451/ali5451.c b/sound/pci/ali5451/ali5451.c
index e264136..fc92b68 100644
--- a/sound/pci/ali5451/ali5451.c
+++ b/sound/pci/ali5451/ali5451.c
@@ -33,6 +33,7 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/moduleparam.h>
+#include <linux/dma-mapping.h>
 #include <sound/core.h>
 #include <sound/pcm.h>
 #include <sound/info.h>
@@ -2220,8 +2221,8 @@ static int __devinit snd_ali_create(struct snd_card *card,
 	if ((err = pci_enable_device(pci)) < 0)
 		return err;
 	/* check, if we can restrict PCI DMA transfers to 31 bits */
-	if (pci_set_dma_mask(pci, 0x7fffffff) < 0 ||
-	    pci_set_consistent_dma_mask(pci, 0x7fffffff) < 0) {
+	if (pci_set_dma_mask(pci, DMA_31BIT_MASK) < 0 ||
+	    pci_set_consistent_dma_mask(pci, DMA_31BIT_MASK) < 0) {
 		snd_printk(KERN_ERR "architecture does not support 31bit PCI busmaster DMA\n");
 		pci_disable_device(pci);
 		return -ENXIO;
diff --git a/sound/pci/als4000.c b/sound/pci/als4000.c
index 7b2ff5f..100d812 100644
--- a/sound/pci/als4000.c
+++ b/sound/pci/als4000.c
@@ -70,6 +70,7 @@
 #include <linux/slab.h>
 #include <linux/gameport.h>
 #include <linux/moduleparam.h>
+#include <linux/dma-mapping.h>
 #include <sound/core.h>
 #include <sound/pcm.h>
 #include <sound/rawmidi.h>
@@ -688,8 +689,8 @@ static int __devinit snd_card_als4000_probe(struct pci_dev *pci,
 		return err;
 	}
 	/* check, if we can restrict PCI DMA transfers to 24 bits */
-	if (pci_set_dma_mask(pci, 0x00ffffff) < 0 ||
-	    pci_set_consistent_dma_mask(pci, 0x00ffffff) < 0) {
+	if (pci_set_dma_mask(pci, DMA_24BIT_MASK) < 0 ||
+	    pci_set_consistent_dma_mask(pci, DMA_24BIT_MASK) < 0) {
 		snd_printk(KERN_ERR "architecture does not support 24bit PCI busmaster DMA\n");
 		pci_disable_device(pci);
 		return -ENXIO;
diff --git a/sound/pci/azt3328.c b/sound/pci/azt3328.c
index e077eb3..680077e 100644
--- a/sound/pci/azt3328.c
+++ b/sound/pci/azt3328.c
@@ -104,6 +104,7 @@
 #include <linux/slab.h>
 #include <linux/gameport.h>
 #include <linux/moduleparam.h>
+#include <linux/dma-mapping.h>
 #include <sound/core.h>
 #include <sound/control.h>
 #include <sound/pcm.h>
@@ -1669,8 +1670,8 @@ snd_azf3328_create(struct snd_card *card,
 	chip->irq = -1;
 
 	/* check if we can restrict PCI DMA transfers to 24 bits */
-	if (pci_set_dma_mask(pci, 0x00ffffff) < 0 ||
-	    pci_set_consistent_dma_mask(pci, 0x00ffffff) < 0) {
+	if (pci_set_dma_mask(pci, DMA_24BIT_MASK) < 0 ||
+	    pci_set_consistent_dma_mask(pci, DMA_24BIT_MASK) < 0) {
 		snd_printk(KERN_ERR "architecture does not support 24bit PCI busmaster DMA\n");
 		err = -ENXIO;
 		goto out_err;
diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c
index 2208dbd..3e332f3 100644
--- a/sound/pci/emu10k1/emu10k1x.c
+++ b/sound/pci/emu10k1/emu10k1x.c
@@ -36,6 +36,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
 #include <linux/moduleparam.h>
+#include <linux/dma-mapping.h>
 #include <sound/core.h>
 #include <sound/initval.h>
 #include <sound/pcm.h>
diff --git a/sound/pci/es1938.c b/sound/pci/es1938.c
index 0d556b0..4d62fe4 100644
--- a/sound/pci/es1938.c
+++ b/sound/pci/es1938.c
@@ -55,6 +55,7 @@
 #include <linux/gameport.h>
 #include <linux/moduleparam.h>
 #include <linux/delay.h>
+#include <linux/dma-mapping.h>
 #include <sound/core.h>
 #include <sound/control.h>
 #include <sound/pcm.h>
@@ -1517,8 +1518,8 @@ static int __devinit snd_es1938_create(struct snd_card *card,
 	if ((err = pci_enable_device(pci)) < 0)
 		return err;
         /* check, if we can restrict PCI DMA transfers to 24 bits */
-	if (pci_set_dma_mask(pci, 0x00ffffff) < 0 ||
-	    pci_set_consistent_dma_mask(pci, 0x00ffffff) < 0) {
+	if (pci_set_dma_mask(pci, DMA_24BIT_MASK) < 0 ||
+	    pci_set_consistent_dma_mask(pci, DMA_24BIT_MASK) < 0) {
 		snd_printk(KERN_ERR "architecture does not support 24bit PCI busmaster DMA\n");
 		pci_disable_device(pci);
                 return -ENXIO;
diff --git a/sound/pci/es1968.c b/sound/pci/es1968.c
index dd465a1..e3ad17f 100644
--- a/sound/pci/es1968.c
+++ b/sound/pci/es1968.c
@@ -104,6 +104,7 @@
 #include <linux/slab.h>
 #include <linux/gameport.h>
 #include <linux/moduleparam.h>
+#include <linux/dma-mapping.h>
 #include <linux/mutex.h>
 
 #include <sound/core.h>
diff --git a/sound/pci/ice1712/ice1712.c b/sound/pci/ice1712/ice1712.c
index 672e198..b88eeba 100644
--- a/sound/pci/ice1712/ice1712.c
+++ b/sound/pci/ice1712/ice1712.c
@@ -56,7 +56,9 @@
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
 #include <linux/moduleparam.h>
+#include <linux/dma-mapping.h>
 #include <linux/mutex.h>
+
 #include <sound/core.h>
 #include <sound/cs8427.h>
 #include <sound/info.h>
diff --git a/sound/pci/maestro3.c b/sound/pci/maestro3.c
index 8bc0849..44393e1 100644
--- a/sound/pci/maestro3.c
+++ b/sound/pci/maestro3.c
@@ -41,6 +41,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/moduleparam.h>
+#include <linux/dma-mapping.h>
 #include <sound/core.h>
 #include <sound/info.h>
 #include <sound/control.h>
diff --git a/sound/pci/mixart/mixart.c b/sound/pci/mixart/mixart.c
index 43ee3b2..b5a0950 100644
--- a/sound/pci/mixart/mixart.c
+++ b/sound/pci/mixart/mixart.c
@@ -28,6 +28,8 @@
 #include <linux/dma-mapping.h>
 #include <linux/moduleparam.h>
 #include <linux/mutex.h>
+#include <linux/dma-mapping.h>
+
 #include <sound/core.h>
 #include <sound/initval.h>
 #include <sound/info.h>
diff --git a/sound/pci/pcxhr/pcxhr.c b/sound/pci/pcxhr/pcxhr.c
index f679779..35875c8 100644
--- a/sound/pci/pcxhr/pcxhr.c
+++ b/sound/pci/pcxhr/pcxhr.c
@@ -30,6 +30,7 @@
 #include <linux/delay.h>
 #include <linux/moduleparam.h>
 #include <linux/mutex.h>
+#include <linux/dma-mapping.h>
 
 #include <sound/core.h>
 #include <sound/initval.h>
diff --git a/sound/pci/sonicvibes.c b/sound/pci/sonicvibes.c
index 7bbea37..2d66a09 100644
--- a/sound/pci/sonicvibes.c
+++ b/sound/pci/sonicvibes.c
@@ -30,6 +30,7 @@
 #include <linux/slab.h>
 #include <linux/gameport.h>
 #include <linux/moduleparam.h>
+#include <linux/dma-mapping.h>
 
 #include <sound/core.h>
 #include <sound/pcm.h>
@@ -1227,8 +1228,8 @@ static int __devinit snd_sonicvibes_create(struct snd_card *card,
 	if ((err = pci_enable_device(pci)) < 0)
 		return err;
 	/* check, if we can restrict PCI DMA transfers to 24 bits */
-        if (pci_set_dma_mask(pci, 0x00ffffff) < 0 ||
-	    pci_set_consistent_dma_mask(pci, 0x00ffffff) < 0) {
+        if (pci_set_dma_mask(pci, DMA_24BIT_MASK) < 0 ||
+	    pci_set_consistent_dma_mask(pci, DMA_24BIT_MASK) < 0) {
 		snd_printk(KERN_ERR "architecture does not support 24bit PCI busmaster DMA\n");
 		pci_disable_device(pci);
                 return -ENXIO;
diff --git a/sound/pci/trident/trident_main.c b/sound/pci/trident/trident_main.c
index 83b7d8a..52178b8 100644
--- a/sound/pci/trident/trident_main.c
+++ b/sound/pci/trident/trident_main.c
@@ -35,6 +35,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/gameport.h>
+#include <linux/dma-mapping.h>
 
 #include <sound/core.h>
 #include <sound/info.h>
@@ -3554,8 +3555,8 @@ int __devinit snd_trident_create(struct snd_card *card,
 	if ((err = pci_enable_device(pci)) < 0)
 		return err;
 	/* check, if we can restrict PCI DMA transfers to 30 bits */
-	if (pci_set_dma_mask(pci, 0x3fffffff) < 0 ||
-	    pci_set_consistent_dma_mask(pci, 0x3fffffff) < 0) {
+	if (pci_set_dma_mask(pci, DMA_30BIT_MASK) < 0 ||
+	    pci_set_consistent_dma_mask(pci, DMA_30BIT_MASK) < 0) {
 		snd_printk(KERN_ERR "architecture does not support 30bit PCI busmaster DMA\n");
 		pci_disable_device(pci);
 		return -ENXIO;
-- 
cgit v0.10.2


From 457d3d432bede99ed04d3bdeb5efb238a3627e8f Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Tue, 28 Mar 2006 01:56:49 -0800
Subject: [PATCH] vfree NULL check fixup for sb_card

There's no need to check the vfree() argument for NULL.

Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/oss/sb_card.c b/sound/oss/sb_card.c
index d38e88a..4708cbd 100644
--- a/sound/oss/sb_card.c
+++ b/sound/oss/sb_card.c
@@ -348,10 +348,8 @@ static void __exit sb_exit(void)
 
 	sb_unregister_all();
 
-	if (smw_free) {
-		vfree(smw_free);
-		smw_free = NULL;
-	}
+	vfree(smw_free);
+	smw_free = NULL;
 }
 
 module_init(sb_init);
-- 
cgit v0.10.2


From 07e0e93d9a3cdbe3cad450e0d0a2937585dffc55 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Tue, 28 Mar 2006 01:56:49 -0800
Subject: [PATCH] maestro3 vfree NULL check fixup

vfree() checks for NULL, no need to do it explicitly.

Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Acked-by: Zach Brown <zab@zabbo.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/oss/maestro3.c b/sound/oss/maestro3.c
index 66044af..4a5e423 100644
--- a/sound/oss/maestro3.c
+++ b/sound/oss/maestro3.c
@@ -2582,15 +2582,9 @@ static int alloc_dsp_suspendmem(struct m3_card *card)
 
     return 0;
 }
-static void free_dsp_suspendmem(struct m3_card *card)
-{
-   if(card->suspend_mem)
-       vfree(card->suspend_mem);
-}
 
 #else
 #define alloc_dsp_suspendmem(args...) 0
-#define free_dsp_suspendmem(args...) 
 #endif
 
 /*
@@ -2717,7 +2711,7 @@ out:
     if(ret) {
         if(card->iobase)
             release_region(pci_resource_start(pci_dev, 0), pci_resource_len(pci_dev, 0));
-        free_dsp_suspendmem(card);
+        vfree(card->suspend_mem);
         if(card->ac97) {
             unregister_sound_mixer(card->ac97->dev_mixer);
             kfree(card->ac97);
@@ -2760,7 +2754,7 @@ static void m3_remove(struct pci_dev *pci_dev)
         }
 
         release_region(card->iobase, 256);
-        free_dsp_suspendmem(card);
+        vfree(card->suspend_mem);
         kfree(card);
     }
     devs = NULL;
-- 
cgit v0.10.2


From 5a83fdddb8a104c021837637a59f93948b906819 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Tue, 28 Mar 2006 01:56:50 -0800
Subject: [PATCH] no need to check vfree arg for null in oss/sequencer

There's no need to check pointers for NULL before handing them to vfree().

Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/oss/sequencer.c b/sound/oss/sequencer.c
index 347cd79..6815c30 100644
--- a/sound/oss/sequencer.c
+++ b/sound/oss/sequencer.c
@@ -1671,14 +1671,7 @@ void sequencer_init(void)
 
 void sequencer_unload(void)
 {
-	if(queue)
-	{
-		vfree(queue);
-		queue=NULL;
-	}
-	if(iqueue)
-	{
-		vfree(iqueue);
-		iqueue=NULL;
-	}
+	vfree(queue);
+	vfree(iqueue);
+	queue = iqueue = NULL;
 }
-- 
cgit v0.10.2


From ca43b317fc557bbcca845c684d93375286bf9e0b Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Tue, 28 Mar 2006 01:56:51 -0800
Subject: [PATCH] vfree does its own NULL check, no need to be explicit in
 oss/msnd.c

vfree() does it's own NULL checking, no need for explicit check before
calling it.

Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/oss/msnd.c b/sound/oss/msnd.c
index a7ad2b0..5dbfc0f 100644
--- a/sound/oss/msnd.c
+++ b/sound/oss/msnd.c
@@ -95,10 +95,8 @@ void msnd_fifo_init(msnd_fifo *f)
 
 void msnd_fifo_free(msnd_fifo *f)
 {
-	if (f->data) {
-		vfree(f->data);
-		f->data = NULL;
-	}
+	vfree(f->data);
+	f->data = NULL;
 }
 
 int msnd_fifo_alloc(msnd_fifo *f, size_t n)
-- 
cgit v0.10.2


From b791ccef21129f9c7e4ab8274d5d7a0ec0a2fb7c Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Tue, 28 Mar 2006 01:56:52 -0800
Subject: [PATCH] fix signed vs unsigned in nmi watchdog

Fix "signed vs unsigned" in nmi_watchdog_tick.

Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 5ad6a2c..d43b498 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -529,7 +529,8 @@ void nmi_watchdog_tick (struct pt_regs * regs)
 	 * always switch the stack NMI-atomically, it's safe to use
 	 * smp_processor_id().
 	 */
-	int sum, cpu = smp_processor_id();
+	unsigned int sum;
+	int cpu = smp_processor_id();
 
 	sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
 
-- 
cgit v0.10.2


From ded23ac62776b4360d88e9b0330792d2c57fdfdf Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Tue, 28 Mar 2006 01:56:52 -0800
Subject: [PATCH] trivial typos in Documentation/cputopology.txt

Fix a few trivial mistakes in Documentation/cputopology.txt

Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index ff280e2..2b28e9e 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -1,5 +1,5 @@
 
-Export cpu topology info by sysfs. Items (attributes) are similar
+Export cpu topology info via sysfs. Items (attributes) are similar
 to /proc/cpuinfo.
 
 1) /sys/devices/system/cpu/cpuX/topology/physical_package_id:
@@ -12,7 +12,7 @@ represent the thread siblings to cpu X in the same core;
 represent the thread siblings to cpu X in the same physical package;
 
 To implement it in an architecture-neutral way, a new source file,
-driver/base/topology.c, is to export the 5 attributes.
+drivers/base/topology.c, is to export the 4 attributes.
 
 If one architecture wants to support this feature, it just needs to
 implement 4 defines, typically in file include/asm-XXX/topology.h.
-- 
cgit v0.10.2


From 7f927fcc2fd1575d01efb4b76665975007945690 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 28 Mar 2006 01:56:53 -0800
Subject: [PATCH] Typo fixes

Fix a lot of typos.  Eyeballed by jmc@ in OpenBSD.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/Documentation/m68k/README.buddha b/Documentation/m68k/README.buddha
index bf802ff..ef484a7 100644
--- a/Documentation/m68k/README.buddha
+++ b/Documentation/m68k/README.buddha
@@ -29,7 +29,7 @@ address is written to $4a, then the whole Byte is written to
 $48, while it doesn't matter how often you're writing to $4a
 as  long as $48 is not touched.  After $48 has been written,
 the  whole card disappears from $e8 and is mapped to the new
-address just written.  Make shure $4a is written before $48,
+address just written.  Make sure $4a is written before $48,
 otherwise your chance is only 1:16 to find the board :-).
 
 The local memory-map is even active when mapped to $e8:
diff --git a/Documentation/networking/ifenslave.c b/Documentation/networking/ifenslave.c
index 545447a..a120598 100644
--- a/Documentation/networking/ifenslave.c
+++ b/Documentation/networking/ifenslave.c
@@ -87,7 +87,7 @@
  *	   would fail and generate an error message in the system log.
  * 	 - For opt_c: slave should not be set to the master's setting
  *	   while it is running. It was already set during enslave. To
- *	   simplify things, it is now handeled separately.
+ *	   simplify things, it is now handled separately.
  *
  *    - 2003/12/01 - Shmulik Hen <shmulik.hen at intel dot com>
  *	 - Code cleanup and style changes
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index 838e435..cab355c 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -236,7 +236,7 @@
 
 
 /*
- * Abort preanble and completion macros.
+ * Abort preamble and completion macros.
  * If a fixup handler is required then those macros must surround it.
  * It is assumed that the fixup code will handle the private part of
  * the exit macro.
diff --git a/drivers/char/mxser.h b/drivers/char/mxser.h
index e7fd0b0..7e188a4 100644
--- a/drivers/char/mxser.h
+++ b/drivers/char/mxser.h
@@ -118,7 +118,7 @@
 
 // enable CTS interrupt
 #define MOXA_MUST_IER_ECTSI		0x80
-// eanble RTS interrupt
+// enable RTS interrupt
 #define MOXA_MUST_IER_ERTSI		0x40
 // enable Xon/Xoff interrupt
 #define MOXA_MUST_IER_XINT		0x20
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index abb03e5..fee2aca 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -5996,7 +5996,7 @@ static void usc_set_async_mode( struct mgsl_struct *info )
 	 * <15..8>	?		RxFIFO IRQ Request Level
 	 *
 	 * Note: For async mode the receive FIFO level must be set
-	 * to 0 to aviod the situation where the FIFO contains fewer bytes
+	 * to 0 to avoid the situation where the FIFO contains fewer bytes
 	 * than the trigger level and no more data is expected.
 	 *
 	 * <7>		0		Exited Hunt IA (Interrupt Arm)
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 905f58b..ea06e3a 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -2044,7 +2044,7 @@ static int __init edac_mc_init(void)
 	 */
 	clear_pci_parity_errors();
 
-	/* Create the MC sysfs entires */
+	/* Create the MC sysfs entries */
 	if (edac_sysfs_memctrl_setup()) {
 		edac_printk(KERN_ERR, EDAC_MC,
 			"Error initializing sysfs code\n");
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index 9aa074b..cc7ff8f 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -812,7 +812,7 @@ static int nsc_ircc_init_39x(nsc_chip_t *chip, chipio_t *info)
 	int cfg_base = info->cfg_base;
 	int enabled;
 
-	/* User is shure about his config... accept it. */
+	/* User is sure about his config... accept it. */
 	IRDA_DEBUG(2, "%s(): nsc_ircc_init_39x (user settings): "
 		   "io=0x%04x, irq=%d, dma=%d\n", 
 		   __FUNCTION__, info->fir_base, info->irq, info->dma);
diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c
index 8429ceb..b82191d 100644
--- a/drivers/net/sis900.c
+++ b/drivers/net/sis900.c
@@ -2283,7 +2283,7 @@ static void set_rx_mode(struct net_device *net_dev)
 	int i, table_entries;
 	u32 rx_mode;
 
-	/* 635 Hash Table entires = 256(2^16) */
+	/* 635 Hash Table entries = 256(2^16) */
 	if((sis_priv->chipset_rev >= SIS635A_900_REV) ||
 			(sis_priv->chipset_rev == SIS900B_900_REV))
 		table_entries = 16;
diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c
index ee48bfd..d1a86a0 100644
--- a/drivers/net/tulip/de4x5.c
+++ b/drivers/net/tulip/de4x5.c
@@ -513,7 +513,7 @@ struct mii_phy {
     u_char  *rst;           /* Start of reset sequence in SROM           */
     u_int mc;               /* Media Capabilities                        */
     u_int ana;              /* NWay Advertisement                        */
-    u_int fdx;              /* Full DupleX capabilites for each media    */
+    u_int fdx;              /* Full DupleX capabilities for each media   */
     u_int ttm;              /* Transmit Threshold Mode for each media    */
     u_int mci;              /* 21142 MII Connector Interrupt info        */
 };
diff --git a/drivers/net/tulip/pnic2.c b/drivers/net/tulip/pnic2.c
index 55f4a9a..ab98502 100644
--- a/drivers/net/tulip/pnic2.c
+++ b/drivers/net/tulip/pnic2.c
@@ -199,7 +199,7 @@ void pnic2_lnk_change(struct net_device *dev, int csr5)
 	               /* negotiation ended successfully */
 
 	               /* get the link partners reply and mask out all but
-                        * bits 24-21 which show the partners capabilites
+                        * bits 24-21 which show the partners capabilities
                         * and match those to what we advertised
                         *
                         * then begin to interpret the results of the negotiation.
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index cde35dd..c1ce87a 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -208,7 +208,7 @@ static const struct typhoon_card_info typhoon_card_info[] __devinitdata = {
 };
 
 /* Notes on the new subsystem numbering scheme:
- * bits 0-1 indicate crypto capabilites: (0) variable, (1) DES, or (2) 3DES
+ * bits 0-1 indicate crypto capabilities: (0) variable, (1) DES, or (2) 3DES
  * bit 4 indicates if this card has secured firmware (we don't support it)
  * bit 8 indicates if this is a (0) copper or (1) fiber card
  * bits 12-16 indicate card type: (0) client and (1) server
@@ -788,7 +788,7 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev)
 	/* we have two rings to choose from, but we only use txLo for now
 	 * If we start using the Hi ring as well, we'll need to update
 	 * typhoon_stop_runtime(), typhoon_interrupt(), typhoon_num_free_tx(),
-	 * and TXHI_ENTIRES to match, as well as update the TSO code below
+	 * and TXHI_ENTRIES to match, as well as update the TSO code below
 	 * to get the right DMA address
 	 */
 	txRing = &tp->txLoRing;
diff --git a/drivers/net/wireless/orinoco.c b/drivers/net/wireless/orinoco.c
index 6fd0bf7..8dfdfbd 100644
--- a/drivers/net/wireless/orinoco.c
+++ b/drivers/net/wireless/orinoco.c
@@ -3858,7 +3858,7 @@ static int orinoco_ioctl_setscan(struct net_device *dev,
 	unsigned long flags;
 
 	/* Note : you may have realised that, as this is a SET operation,
-	 * this is priviledged and therefore a normal user can't
+	 * this is privileged and therefore a normal user can't
 	 * perform scanning.
 	 * This is not an error, while the device perform scanning,
 	 * traffic doesn't flow, so it's a perfect DoS...
diff --git a/drivers/net/wireless/prism54/isl_ioctl.c b/drivers/net/wireless/prism54/isl_ioctl.c
index e5bb9f5..989599a 100644
--- a/drivers/net/wireless/prism54/isl_ioctl.c
+++ b/drivers/net/wireless/prism54/isl_ioctl.c
@@ -747,7 +747,7 @@ prism54_get_essid(struct net_device *ndev, struct iw_request_info *info,
 
 	if (essid->length) {
 		dwrq->flags = 1;	/* set ESSID to ON for Wireless Extensions */
-		/* if it is to big, trunk it */
+		/* if it is too big, trunk it */
 		dwrq->length = min((u8)IW_ESSID_MAX_SIZE, essid->length);
 	} else {
 		dwrq->flags = 0;
diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index 0ab26d0..0d2b447 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -1026,7 +1026,7 @@ static void twa_free_request_id(TW_Device_Extension *tw_dev, int request_id)
 	tw_dev->free_tail = (tw_dev->free_tail + 1) % TW_Q_LENGTH;
 } /* End twa_free_request_id() */
 
-/* This function will get parameter table entires from the firmware */
+/* This function will get parameter table entries from the firmware */
 static void *twa_get_param(TW_Device_Extension *tw_dev, int request_id, int table_id, int parameter_id, int parameter_size_bytes)
 {
 	TW_Command_Full *full_command_packet;
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 5996d3c..674b15c 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -1528,7 +1528,7 @@ static int serial8250_startup(struct uart_port *port)
 
 	/*
 	 * Clear the FIFO buffers and disable them.
-	 * (they will be reeanbled in set_termios())
+	 * (they will be reenabled in set_termios())
 	 */
 	serial8250_clear_fifos(up);
 
diff --git a/drivers/serial/serial_txx9.c b/drivers/serial/serial_txx9.c
index b848b7d..3bdee64 100644
--- a/drivers/serial/serial_txx9.c
+++ b/drivers/serial/serial_txx9.c
@@ -483,7 +483,7 @@ static int serial_txx9_startup(struct uart_port *port)
 
 	/*
 	 * Clear the FIFO buffers and disable them.
-	 * (they will be reeanbled in set_termios())
+	 * (they will be reenabled in set_termios())
 	 */
 	sio_set(up, TXX9_SIFCR,
 		TXX9_SIFCR_TFRST | TXX9_SIFCR_RFRST | TXX9_SIFCR_FRSTE);
diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c
index 9fe2283..1c4396c 100644
--- a/drivers/serial/sunsu.c
+++ b/drivers/serial/sunsu.c
@@ -641,7 +641,7 @@ static int sunsu_startup(struct uart_port *port)
 
 	/*
 	 * Clear the FIFO buffers and disable them.
-	 * (they will be reeanbled in set_termios())
+	 * (they will be reenabled in set_termios())
 	 */
 	if (uart_config[up->port.type].flags & UART_CLEAR_FIFO) {
 		serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO);
diff --git a/drivers/usb/host/ohci-s3c2410.c b/drivers/usb/host/ohci-s3c2410.c
index 372527a..682bf22 100644
--- a/drivers/usb/host/ohci-s3c2410.c
+++ b/drivers/usb/host/ohci-s3c2410.c
@@ -158,7 +158,7 @@ static int ohci_s3c2410_hub_control (
 		"s3c2410_hub_control(%p,0x%04x,0x%04x,0x%04x,%p,%04x)\n",
 		hcd, typeReq, wValue, wIndex, buf, wLength);
 
-	/* if we are only an humble host without any special capabilites
+	/* if we are only an humble host without any special capabilities
 	 * process the request straight away and exit */
 
 	if (info == NULL) {
diff --git a/drivers/usb/net/zaurus.c b/drivers/usb/net/zaurus.c
index 9c5ab25..f7ac9d6 100644
--- a/drivers/usb/net/zaurus.c
+++ b/drivers/usb/net/zaurus.c
@@ -217,7 +217,7 @@ static int blan_mdlm_bind(struct usbnet *dev, struct usb_interface *intf)
 			 * with devices that use it and those that don't.
 			 */
 			if ((detail->bDetailData[1] & ~0x02) != 0x01) {
-				/* bmDataCapabilites == 0 would be fine too,
+				/* bmDataCapabilities == 0 would be fine too,
 				 * but framing is minidriver-coupled for now.
 				 */
 bad_detail:
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 73e754f..e4fde1a 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -311,7 +311,7 @@ fail:
 /*
  * mb_cache_shrink()
  *
- * Removes all cache entires of a device from the cache. All cache entries
+ * Removes all cache entries of a device from the cache. All cache entries
  * currently in use cannot be freed, and thus remain in the cache. All others
  * are freed.
  *
diff --git a/include/asm-parisc/pdc.h b/include/asm-parisc/pdc.h
index 8e23e4c..0a3face 100644
--- a/include/asm-parisc/pdc.h
+++ b/include/asm-parisc/pdc.h
@@ -333,7 +333,7 @@ struct pdc_model {		/* for PDC_MODEL */
 	unsigned long curr_key;
 };
 
-/* Values for PDC_MODEL_CAPABILITES non-equivalent virtual aliasing support */
+/* Values for PDC_MODEL_CAPABILITIES non-equivalent virtual aliasing support */
 
 #define PDC_MODEL_IOPDIR_FDC            (1 << 2)        /* see sba_iommu.c */
 #define PDC_MODEL_NVA_MASK		(3 << 4)
diff --git a/include/asm-sh/addrspace.h b/include/asm-sh/addrspace.h
index dbb05d1..720afc1 100644
--- a/include/asm-sh/addrspace.h
+++ b/include/asm-sh/addrspace.h
@@ -13,7 +13,7 @@
 
 #include <asm/cpu/addrspace.h>
 
-/* Memory segments (32bit Priviledged mode addresses)  */
+/* Memory segments (32bit Privileged mode addresses)  */
 #define P0SEG		0x00000000
 #define P1SEG		0x80000000
 #define P2SEG		0xa0000000
diff --git a/include/asm-sparc64/floppy.h b/include/asm-sparc64/floppy.h
index 49d49a2..6a95d5d 100644
--- a/include/asm-sparc64/floppy.h
+++ b/include/asm-sparc64/floppy.h
@@ -738,7 +738,7 @@ static unsigned long __init sun_floppy_init(void)
 		if (!sun_floppy_types[0] && sun_floppy_types[1]) {
 			/*
 			 * Set the drive exchange bit in FCR on NS87303,
-			 * make shure other bits are sane before doing so.
+			 * make sure other bits are sane before doing so.
 			 */
 			ns87303_modify(config, FER, FER_EDM, 0);
 			ns87303_modify(config, ASC, ASC_DRV2_SEL, 0);
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 2cb19e6..d03fadf 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -734,7 +734,7 @@ struct fb_tile_ops {
 
 /* A driver may set this flag to indicate that it does want a set_par to be
  * called every time when fbcon_switch is executed. The advantage is that with
- * this flag set you can really be shure that set_par is always called before
+ * this flag set you can really be sure that set_par is always called before
  * any of the functions dependant on the correct hardware state or altering
  * that state, even if you are using some broken X releases. The disadvantage
  * is that it introduces unwanted delays to every console switch if set_par
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 4f71cfd..dec8249 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -912,7 +912,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
 	/*
 	 * Check if this process has the right to modify the specified
 	 * process. The right exists if the process has administrative
-	 * capabilities, superuser priviledges or the same
+	 * capabilities, superuser privileges or the same
 	 * userid as the target process.
 	 */
 	if ((current->euid != task->suid) && (current->euid != task->uid) &&
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 7594456..627b113 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1302,7 +1302,7 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
 	if (sk->sk_state != TCP_ESTABLISHED)
 		return -ENOTCONN;
 
-	/* Check that we don't send out to big frames */
+	/* Check that we don't send out too big frames */
 	if (len > self->max_data_size) {
 		IRDA_DEBUG(2, "%s(), Chopping frame from %zd to %d bytes!\n",
 			   __FUNCTION__, len, self->max_data_size);
@@ -1546,7 +1546,7 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
 	IRDA_ASSERT(self != NULL, return -1;);
 
 	/*
-	 * Check that we don't send out to big frames. This is an unreliable
+	 * Check that we don't send out too big frames. This is an unreliable
 	 * service, so we have no fragmentation and no coalescence
 	 */
 	if (len > self->max_data_size) {
@@ -1642,7 +1642,7 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
 	}
 
 	/*
-	 * Check that we don't send out to big frames. This is an unreliable
+	 * Check that we don't send out too big frames. This is an unreliable
 	 * service, so we have no fragmentation and no coalescence
 	 */
 	if (len > self->max_data_size) {
diff --git a/sound/pci/rme32.c b/sound/pci/rme32.c
index 0cbef5f..ab78544 100644
--- a/sound/pci/rme32.c
+++ b/sound/pci/rme32.c
@@ -313,7 +313,7 @@ static int snd_rme32_capture_copy(struct snd_pcm_substream *substream, int chann
 }
 
 /*
- * SPDIF I/O capabilites (half-duplex mode)
+ * SPDIF I/O capabilities (half-duplex mode)
  */
 static struct snd_pcm_hardware snd_rme32_spdif_info = {
 	.info =		(SNDRV_PCM_INFO_MMAP_IOMEM |
@@ -339,7 +339,7 @@ static struct snd_pcm_hardware snd_rme32_spdif_info = {
 };
 
 /*
- * ADAT I/O capabilites (half-duplex mode)
+ * ADAT I/O capabilities (half-duplex mode)
  */
 static struct snd_pcm_hardware snd_rme32_adat_info =
 {
@@ -364,7 +364,7 @@ static struct snd_pcm_hardware snd_rme32_adat_info =
 };
 
 /*
- * SPDIF I/O capabilites (full-duplex mode)
+ * SPDIF I/O capabilities (full-duplex mode)
  */
 static struct snd_pcm_hardware snd_rme32_spdif_fd_info = {
 	.info =		(SNDRV_PCM_INFO_MMAP |
@@ -390,7 +390,7 @@ static struct snd_pcm_hardware snd_rme32_spdif_fd_info = {
 };
 
 /*
- * ADAT I/O capabilites (full-duplex mode)
+ * ADAT I/O capabilities (full-duplex mode)
  */
 static struct snd_pcm_hardware snd_rme32_adat_fd_info =
 {
diff --git a/sound/pci/rme96.c b/sound/pci/rme96.c
index 0e694b0..6c2a9f4 100644
--- a/sound/pci/rme96.c
+++ b/sound/pci/rme96.c
@@ -359,7 +359,7 @@ snd_rme96_capture_copy(struct snd_pcm_substream *substream,
 }
 
 /*
- * Digital output capabilites (S/PDIF)
+ * Digital output capabilities (S/PDIF)
  */
 static struct snd_pcm_hardware snd_rme96_playback_spdif_info =
 {
@@ -388,7 +388,7 @@ static struct snd_pcm_hardware snd_rme96_playback_spdif_info =
 };
 
 /*
- * Digital input capabilites (S/PDIF)
+ * Digital input capabilities (S/PDIF)
  */
 static struct snd_pcm_hardware snd_rme96_capture_spdif_info =
 {
@@ -417,7 +417,7 @@ static struct snd_pcm_hardware snd_rme96_capture_spdif_info =
 };
 
 /*
- * Digital output capabilites (ADAT)
+ * Digital output capabilities (ADAT)
  */
 static struct snd_pcm_hardware snd_rme96_playback_adat_info =
 {
@@ -442,7 +442,7 @@ static struct snd_pcm_hardware snd_rme96_playback_adat_info =
 };
 
 /*
- * Digital input capabilites (ADAT)
+ * Digital input capabilities (ADAT)
  */
 static struct snd_pcm_hardware snd_rme96_capture_adat_info =
 {
diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c
index 980b9cd..b5538ef 100644
--- a/sound/pci/rme9652/hdspm.c
+++ b/sound/pci/rme9652/hdspm.c
@@ -2256,7 +2256,7 @@ static int snd_hdspm_create_controls(struct snd_card *card, struct hdspm * hdspm
 	}
 
 	/* Channel playback mixer as default control 
-	   Note: the whole matrix would be 128*HDSPM_MIXER_CHANNELS Faders, thats to big for any alsamixer 
+	   Note: the whole matrix would be 128*HDSPM_MIXER_CHANNELS Faders, thats too big for any alsamixer
 	   they are accesible via special IOCTL on hwdep
 	   and the mixer 2dimensional mixer control */
 
diff --git a/sound/usb/usx2y/usx2yhwdeppcm.c b/sound/usb/usx2y/usx2yhwdeppcm.c
index 3158550..fe67a92 100644
--- a/sound/usb/usx2y/usx2yhwdeppcm.c
+++ b/sound/usb/usx2y/usx2yhwdeppcm.c
@@ -404,7 +404,7 @@ static void usX2Y_usbpcm_subs_startup(struct snd_usX2Y_substream *subs)
 	struct usX2Ydev * usX2Y = subs->usX2Y;
 	usX2Y->prepare_subs = subs;
 	subs->urb[0]->start_frame = -1;
-	smp_wmb();	// Make shure above modifications are seen by i_usX2Y_subs_startup()
+	smp_wmb();	// Make sure above modifications are seen by i_usX2Y_subs_startup()
 	usX2Y_urbs_set_complete(usX2Y, i_usX2Y_usbpcm_subs_startup);
 }
 
-- 
cgit v0.10.2


From 4fa95ef639830ccf0ca1ad42ee9bed87ef642f32 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Tue, 28 Mar 2006 01:56:54 -0800
Subject: [PATCH] sound: Remove unneeded kmalloc() return value casts

Get rid of unnessesary casts of kmalloc() return value in sound/

Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index 6b7a367..87b47c9 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -631,7 +631,8 @@ int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream,
 		return -EINVAL;
 	}
 	if (params->buffer_size != runtime->buffer_size) {
-		if ((newbuf = (char *) kmalloc(params->buffer_size, GFP_KERNEL)) == NULL)
+		newbuf = kmalloc(params->buffer_size, GFP_KERNEL);
+		if (!newbuf)
 			return -ENOMEM;
 		kfree(runtime->buffer);
 		runtime->buffer = newbuf;
@@ -657,7 +658,8 @@ int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream,
 		return -EINVAL;
 	}
 	if (params->buffer_size != runtime->buffer_size) {
-		if ((newbuf = (char *) kmalloc(params->buffer_size, GFP_KERNEL)) == NULL)
+		newbuf = kmalloc(params->buffer_size, GFP_KERNEL);
+		if (!newbuf)
 			return -ENOMEM;
 		kfree(runtime->buffer);
 		runtime->buffer = newbuf;
diff --git a/sound/oss/dmasound/dmasound_awacs.c b/sound/oss/dmasound/dmasound_awacs.c
index 6ba8d6f..3bbc810 100644
--- a/sound/oss/dmasound/dmasound_awacs.c
+++ b/sound/oss/dmasound/dmasound_awacs.c
@@ -2798,7 +2798,7 @@ __init setup_beep(void)
 			DBDMA_ALIGN(beep_dbdma_cmd_space);
 	/* set up emergency dbdma cmd */
 	emergency_dbdma_cmd = beep_dbdma_cmd+1 ;
-	beep_buf = (short *) kmalloc(BEEP_BUFLEN * 4, GFP_KERNEL);
+	beep_buf = kmalloc(BEEP_BUFLEN * 4, GFP_KERNEL);
 	if (beep_buf == NULL) {
 		printk(KERN_ERR "dmasound_pmac: no memory for beep buffer\n");
 		kfree(beep_dbdma_cmd_space) ;
diff --git a/sound/oss/emu10k1/midi.c b/sound/oss/emu10k1/midi.c
index 959a967..25ae8e4 100644
--- a/sound/oss/emu10k1/midi.c
+++ b/sound/oss/emu10k1/midi.c
@@ -65,7 +65,8 @@ static int midiin_add_buffer(struct emu10k1_mididevice *midi_dev, struct midi_hd
 
 	init_midi_hdr(midihdr);
 
-	if ((midihdr->data = (u8 *) kmalloc(MIDIIN_BUFLEN, GFP_KERNEL)) == NULL) {
+	midihdr->data = kmalloc(MIDIIN_BUFLEN, GFP_KERNEL);
+	if (!midihdr->data) {
 		ERROR();
 		kfree(midihdr);
 		return -1;
@@ -334,7 +335,8 @@ static ssize_t emu10k1_midi_write(struct file *file, const char __user *buffer,
 	midihdr->bytesrecorded = 0;
 	midihdr->flags = 0;
 
-	if ((midihdr->data = (u8 *) kmalloc(count, GFP_KERNEL)) == NULL) {
+	midihdr->data = kmalloc(count, GFP_KERNEL);
+	if (!midihdr->data) {
 		ERROR();
 		kfree(midihdr);
 		return -EINVAL;
@@ -545,7 +547,8 @@ int emu10k1_seq_midi_out(int dev, unsigned char midi_byte)
 	midihdr->bytesrecorded = 0;
 	midihdr->flags = 0;
 
-	if ((midihdr->data = (u8 *) kmalloc(1, GFP_KERNEL)) == NULL) {
+	midihdr->data = kmalloc(1, GFP_KERNEL);
+	if (!midihdr->data) {
 		ERROR();
 		kfree(midihdr);
 		return -EINVAL;
diff --git a/sound/oss/sh_dac_audio.c b/sound/oss/sh_dac_audio.c
index 8a9917c..3f7427c 100644
--- a/sound/oss/sh_dac_audio.c
+++ b/sound/oss/sh_dac_audio.c
@@ -289,7 +289,7 @@ static int __init dac_audio_init(void)
 
 	in_use = 0;
 
-	data_buffer = (char *)kmalloc(BUFFER_SIZE, GFP_KERNEL);
+	data_buffer = kmalloc(BUFFER_SIZE, GFP_KERNEL);
 	if (data_buffer == NULL)
 		return -ENOMEM;
 
-- 
cgit v0.10.2


From ec7e15d6486e9d1da1c2f344b670b8388ba7019b Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 28 Mar 2006 01:56:55 -0800
Subject: [PATCH] compat_sys_futex() warning fix

kernel/futex_compat.c: In function `compat_sys_futex':
kernel/futex_compat.c:140: warning: passing arg 1 of `do_futex' makes integer from pointer without a cast
kernel/futex_compat.c:140: warning: passing arg 5 of `do_futex' makes integer from pointer without a cast

Not sure what Ingo was thinking of here.  Put the casts back in.

Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 9c077cf..54274fc 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -137,5 +137,6 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
 	if (op >= FUTEX_REQUEUE)
 		val2 = (int) (unsigned long) utime;
 
-	return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3);
+	return do_futex((unsigned long)uaddr, op, val, timeout,
+			(unsigned long)uaddr2, val2, val3);
 }
-- 
cgit v0.10.2


From c326e27eb79e98050d855e371ac534ff4352e910 Mon Sep 17 00:00:00 2001
From: Mattia Dongili <malattia@linux.it>
Date: Mon, 27 Mar 2006 22:55:55 +0200
Subject: [CPUFREQ] cpufreq_conservative: keep ignore_nice_load and freq_step
 values when reselected

Keep the value of ignore_nice_load and freq_step of the conservative
governor after the governor is deselected and reselected.

Signed-off-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index a152d2c..037f6bf 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -88,6 +88,8 @@ static struct dbs_tuners dbs_tuners_ins = {
 	.up_threshold 		= DEF_FREQUENCY_UP_THRESHOLD,
 	.down_threshold 	= DEF_FREQUENCY_DOWN_THRESHOLD,
 	.sampling_down_factor 	= DEF_SAMPLING_DOWN_FACTOR,
+	.ignore_nice		= 0,
+	.freq_step		= 5,
 };
 
 static inline unsigned int get_cpu_idle_time(unsigned int cpu)
@@ -490,8 +492,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 				def_sampling_rate = MIN_STAT_SAMPLING_RATE;
 
 			dbs_tuners_ins.sampling_rate = def_sampling_rate;
-			dbs_tuners_ins.ignore_nice = 0;
-			dbs_tuners_ins.freq_step = 5;
 
 			dbs_timer_init();
 		}
-- 
cgit v0.10.2


From 23bdf86aa06ebe71bcbf6b7d25de9958c6ab33fa Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Tue, 28 Mar 2006 21:00:40 +0100
Subject: [ARM] 3377/2: add support for intel xsc3 core

Patch from Lennert Buytenhek

This patch adds support for the new XScale v3 core.  This is an
ARMv5 ISA core with the following additions:

- L2 cache
- I/O coherency support (on select chipsets)
- Low-Locality Reference cache attributes (replaces mini-cache)
- Supersections (v6 compatible)
- 36-bit addressing (v6 compatible)
- Single instruction cache line clean/invalidate
- LRU cache replacement (vs round-robin)

I attempted to merge the XSC3 support into proc-xscale.S, but XSC3
cores have separate errata and have to handle things like L2, so it
is simpler to keep it separate.

L2 cache support is currently a build option because the L2 enable
bit must be set before we enable the MMU and there is no easy way to
capture command line parameters at this point.

There are still optimizations that can be done such as using LLR for
copypage (in theory using the exisiting mini-cache code) but those
can be addressed down the road.

Signed-off-by: Deepak Saxena <dsaxena@plexity.net>
Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 99c0d32..0f571d3 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -57,6 +57,7 @@ tune-$(CONFIG_CPU_ARM926T)	:=-mtune=arm9tdmi
 tune-$(CONFIG_CPU_SA110)	:=-mtune=strongarm110
 tune-$(CONFIG_CPU_SA1100)	:=-mtune=strongarm1100
 tune-$(CONFIG_CPU_XSCALE)	:=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
+tune-$(CONFIG_CPU_XSC3)		:=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
 tune-$(CONFIG_CPU_V6)		:=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
 
 ifeq ($(CONFIG_AEABI),y)
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index e680c5f..c55b739 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -239,6 +239,17 @@ config CPU_XSCALE
 	select CPU_CACHE_VIVT
 	select CPU_TLB_V4WBI
 
+# XScale Core Version 3
+config CPU_XSC3
+	bool
+	depends on ARCH_IXP23XX
+	default y
+	select CPU_32v5
+	select CPU_ABRT_EV5T
+	select CPU_CACHE_VIVT
+	select CPU_TLB_V4WBI
+	select IO_36
+
 # ARMv6
 config CPU_V6
 	bool "Support ARM V6 processor"
@@ -361,11 +372,17 @@ config CPU_TLB_V4WBI
 config CPU_TLB_V6
 	bool
 
+#
+# CPU supports 36-bit I/O
+#
+config IO_36
+	bool
+
 comment "Processor Features"
 
 config ARM_THUMB
 	bool "Support Thumb user binaries"
-	depends on CPU_ARM720T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_V6
+	depends on CPU_ARM720T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_XSC3 || CPU_V6
 	default y
 	help
 	  Say Y if you want to include kernel support for running user space
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index ffe73ba..07a5385 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_CPU_COPY_V4WB)	+= copypage-v4wb.o
 obj-$(CONFIG_CPU_COPY_V6)	+= copypage-v6.o mmu.o
 obj-$(CONFIG_CPU_SA1100)	+= copypage-v4mc.o
 obj-$(CONFIG_CPU_XSCALE)	+= copypage-xscale.o
+obj-$(CONFIG_CPU_XSC3)		+= copypage-xsc3.o
 
 obj-$(CONFIG_CPU_TLB_V3)	+= tlb-v3.o
 obj-$(CONFIG_CPU_TLB_V4WT)	+= tlb-v4.o
@@ -51,4 +52,5 @@ obj-$(CONFIG_CPU_ARM1026)	+= proc-arm1026.o
 obj-$(CONFIG_CPU_SA110)		+= proc-sa110.o
 obj-$(CONFIG_CPU_SA1100)	+= proc-sa1100.o
 obj-$(CONFIG_CPU_XSCALE)	+= proc-xscale.o
+obj-$(CONFIG_CPU_XSC3)		+= proc-xsc3.o
 obj-$(CONFIG_CPU_V6)		+= proc-v6.o
diff --git a/arch/arm/mm/copypage-xsc3.S b/arch/arm/mm/copypage-xsc3.S
new file mode 100644
index 0000000..9a2cb43
--- /dev/null
+++ b/arch/arm/mm/copypage-xsc3.S
@@ -0,0 +1,97 @@
+/*
+ *  linux/arch/arm/lib/copypage-xsc3.S
+ *
+ *  Copyright (C) 2004 Intel Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Adapted for 3rd gen XScale core, no more mini-dcache
+ * Author: Matt Gilbert (matthew.m.gilbert@intel.com)
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/asm-offsets.h>
+
+/*
+ * General note:
+ *  We don't really want write-allocate cache behaviour for these functions
+ *  since that will just eat through 8K of the cache.
+ */
+
+	.text
+	.align	5
+/*
+ * XSC3 optimised copy_user_page
+ *  r0 = destination
+ *  r1 = source
+ *  r2 = virtual user address of ultimate destination page
+ *
+ * The source page may have some clean entries in the cache already, but we
+ * can safely ignore them - break_cow() will flush them out of the cache
+ * if we eventually end up using our copied page.
+ *
+ */
+ENTRY(xsc3_mc_copy_user_page)
+	stmfd	sp!, {r4, r5, lr}
+	mov	lr, #PAGE_SZ/64-1
+
+	pld	[r1, #0]
+	pld	[r1, #32]
+1:	pld	[r1, #64]
+	pld	[r1, #96]
+
+2:	ldrd	r2, [r1], #8
+	mov	ip, r0
+	ldrd	r4, [r1], #8
+	mcr	p15, 0, ip, c7, c6, 1		@ invalidate
+	strd	r2, [r0], #8
+	ldrd	r2, [r1], #8
+	strd	r4, [r0], #8
+	ldrd	r4, [r1], #8
+	strd	r2, [r0], #8
+	strd	r4, [r0], #8
+	ldrd	r2, [r1], #8
+	mov	ip, r0
+	ldrd	r4, [r1], #8
+	mcr	p15, 0, ip, c7, c6, 1		@ invalidate
+	strd	r2, [r0], #8
+	ldrd	r2, [r1], #8
+	subs	lr, lr, #1
+	strd	r4, [r0], #8
+	ldrd	r4, [r1], #8
+	strd	r2, [r0], #8
+	strd	r4, [r0], #8
+	bgt	1b
+	beq	2b
+
+	ldmfd	sp!, {r4, r5, pc}
+
+	.align	5
+/*
+ * XScale optimised clear_user_page
+ *  r0 = destination
+ *  r1 = virtual user address of ultimate destination page
+ */
+ENTRY(xsc3_mc_clear_user_page)
+	mov	r1, #PAGE_SZ/32
+	mov	r2, #0
+	mov	r3, #0
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate line
+	strd	r2, [r0], #8
+	strd	r2, [r0], #8
+	strd	r2, [r0], #8
+	strd	r2, [r0], #8
+	subs	r1, r1, #1
+	bne	1b
+	mov	pc, lr
+
+	__INITDATA
+
+	.type	xsc3_mc_user_fns, #object
+ENTRY(xsc3_mc_user_fns)
+	.long	xsc3_mc_clear_user_page
+	.long	xsc3_mc_copy_user_page
+	.size	xsc3_mc_user_fns, . - xsc3_mc_user_fns
diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c
index ef8d30a..5e5d05b 100644
--- a/arch/arm/mm/mm-armv.c
+++ b/arch/arm/mm/mm-armv.c
@@ -557,7 +557,8 @@ void __init create_mapping(struct map_desc *md)
 	 *	supersections are only allocated for domain 0 regardless
 	 *	of the actual domain assignments in use.
 	 */
-	if (cpu_architecture() >= CPU_ARCH_ARMv6 && domain == 0) {
+	if ((cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())
+		&& domain == 0) {
 		/*
 		 * Align to supersection boundary if !high pages.
 		 * High pages have already been checked for proper
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
new file mode 100644
index 0000000..f90513e
--- /dev/null
+++ b/arch/arm/mm/proc-xsc3.S
@@ -0,0 +1,498 @@
+/*
+ * linux/arch/arm/mm/proc-xsc3.S
+ *
+ * Original Author: Matthew Gilbert
+ * Current Maintainer: Deepak Saxena <dsaxena@plexity.net>
+ *
+ * Copyright 2004 (C) Intel Corp.
+ * Copyright 2005 (c) MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * MMU functions for the Intel XScale3 Core (XSC3).  The XSC3 core is an
+ * extension to Intel's original XScale core that adds the following
+ * features:
+ *
+ * - ARMv6 Supersections
+ * - Low Locality Reference pages (replaces mini-cache)
+ * - 36-bit addressing
+ * - L2 cache
+ * - Cache-coherency if chipset supports it
+ *
+ * Based on orignal XScale code by Nicolas Pitre
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/procinfo.h>
+#include <asm/hardware.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include "proc-macros.S"
+
+/*
+ * This is the maximum size of an area which will be flushed.  If the
+ * area is larger than this, then we flush the whole cache.
+ */
+#define MAX_AREA_SIZE	32768
+
+/*
+ * The cache line size of the I and D cache.
+ */
+#define CACHELINESIZE	32
+
+/*
+ * The size of the data cache.
+ */
+#define CACHESIZE	32768
+
+/*
+ * Run with L2 enabled.
+ */
+#define L2_CACHE_ENABLE	1
+
+/*
+ * Enable the Branch Target Buffer (can cause crashes, see erratum #42.)
+ */
+#define BTB_ENABLE	0
+
+/*
+ * This macro is used to wait for a CP15 write and is needed
+ * when we have to ensure that the last operation to the co-pro
+ * was completed before continuing with operation.
+ */
+	.macro	cpwait_ret, lr, rd
+	mrc	p15, 0, \rd, c2, c0, 0		@ arbitrary read of cp15
+	sub	pc, \lr, \rd, LSR #32		@ wait for completion and
+						@ flush instruction pipeline
+	.endm
+
+/*
+ * This macro cleans & invalidates the entire xsc3 dcache by set & way.
+ */
+
+ 	.macro  clean_d_cache rd, rs
+	mov	\rd, #0x1f00
+	orr	\rd, \rd, #0x00e0
+1:	mcr	p15, 0, \rd, c7, c14, 2		@ clean/inv set/way
+	adds	\rd, \rd, #0x40000000
+	bcc	1b
+	subs	\rd, \rd, #0x20
+	bpl	1b
+	.endm
+
+	.text
+
+/*
+ * cpu_xsc3_proc_init()
+ *
+ * Nothing too exciting at the moment
+ */
+ENTRY(cpu_xsc3_proc_init)
+	mov	pc, lr
+
+/*
+ * cpu_xsc3_proc_fin()
+ */
+ENTRY(cpu_xsc3_proc_fin)
+	str	lr, [sp, #-4]!
+	mov	r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
+	msr	cpsr_c, r0
+	bl	xsc3_flush_kern_cache_all	@ clean caches
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1800			@ ...IZ...........
+	bic	r0, r0, #0x0006			@ .............CA.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ldr	pc, [sp], #4
+
+/*
+ * cpu_xsc3_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+ENTRY(cpu_xsc3_reset)
+	mov	r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
+	msr	cpsr_c, r1			@ reset CPSR
+	mrc	p15, 0, r1, c1, c0, 0		@ ctrl register
+	bic	r1, r1, #0x0086			@ ........B....CA.
+	bic	r1, r1, #0x3900			@ ..VIZ..S........
+	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches & BTB
+	bic	r1, r1, #0x0001			@ ...............M
+	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
+	@ CAUTION: MMU turned off from this point.  We count on the pipeline
+	@ already containing those two last instructions to survive.
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mov	pc, r0
+
+/*
+ * cpu_xsc3_do_idle()
+ *
+ * Cause the processor to idle
+ *
+ * For now we do nothing but go to idle mode for every case
+ *
+ * XScale supports clock switching, but using idle mode support
+ * allows external hardware to react to system state changes.
+
+ MMG: Come back to this one.
+ */
+	.align	5
+
+ENTRY(cpu_xsc3_do_idle)
+	mov	r0, #1
+	mcr	p14, 0, r0, c7, c0, 0		@ Go to IDLE
+	mov	pc, lr
+
+/* ================================= CACHE ================================ */
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(xsc3_flush_user_cache_all)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(xsc3_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+	clean_d_cache r0, r1
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ Invalidate I cache & BTB
+	mcrne	p15, 0, ip, c7, c10, 4		@ Drain Write Buffer
+	mcrne	p15, 0, ip, c7, c5, 4		@ Prefetch Flush
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_range(start, end, vm_flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start - start address (may not be aligned)
+ *	- end	- end address (exclusive, may not be aligned)
+ *	- vma	- vma_area_struct describing address space
+ */
+	.align	5
+ENTRY(xsc3_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #MAX_AREA_SIZE
+	bhs	__flush_whole_cache
+
+1:	tst	r2, #VM_EXEC
+	mcrne	p15, 0, r0, c7, c5, 1		@ Invalidate I cache line
+	mcr	p15, 0, r0, c7, c14, 1		@ Clean/invalidate D cache line
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 6		@ Invalidate BTB
+	mcrne	p15, 0, ip, c7, c10, 4		@ Drain Write Buffer
+	mcrne	p15, 0, ip, c7, c5, 4		@ Prefetch Flush
+	mov	pc, lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ *
+ *	Note: single I-cache line invalidation isn't used here since
+ *	it also trashes the mini I-cache used by JTAG debuggers.
+ */
+ENTRY(xsc3_coherent_kern_range)
+/* FALLTHROUGH */
+ENTRY(xsc3_coherent_user_range)
+	bic	r0, r0, #CACHELINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ Invalidate I cache & BTB
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write Buffer
+	mcr	p15, 0, r0, c7, c5, 4		@ Prefetch Flush
+	mov	pc, lr
+
+/*
+ *	flush_kern_dcache_page(void *page)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- page aligned address
+ */
+ENTRY(xsc3_flush_kern_dcache_page)
+	add	r1, r0, #PAGE_SZ
+1:	mcr	p15, 0, r0, c7, c14, 1		@ Clean/Invalidate D Cache line
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ Invalidate I cache & BTB
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write Buffer
+	mcr	p15, 0, r0, c7, c5, 4		@ Prefetch Flush
+	mov	pc, lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(xsc3_dma_inv_range)
+	tst	r0, #CACHELINESIZE - 1
+	bic	r0, r0, #CACHELINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean L1 D entry
+	mcrne	p15, 1, r0, c7, c11, 1		@ clean L2 D entry
+	tst	r1, #CACHELINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean L1 D entry
+	mcrne	p15, 1, r1, c7, c11, 1		@ clean L2 D entry
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate L1 D entry
+	mcr	p15, 1, r0, c7, c7, 1		@ Invalidate L2 D cache line
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write Buffer
+	mov	pc, lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(xsc3_dma_clean_range)
+	bic	r0, r0, #CACHELINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D entry
+	mcr	p15, 1, r0, c7, c11, 1		@ clean L2 D entry
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write Buffer
+	mov	pc, lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(xsc3_dma_flush_range)
+	bic	r0, r0, #CACHELINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c14, 1	@ Clean/invalidate L1 D cache line
+	mcr	p15, 1, r0, c7, c11, 1	@ Clean L2 D cache line
+	mcr	p15, 1, r0, c7, c7, 1	@ Invalidate L2 D cache line
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write Buffer
+	mov	pc, lr
+
+ENTRY(xsc3_cache_fns)
+	.long	xsc3_flush_kern_cache_all
+	.long	xsc3_flush_user_cache_all
+	.long	xsc3_flush_user_cache_range
+	.long	xsc3_coherent_kern_range
+	.long	xsc3_coherent_user_range
+	.long	xsc3_flush_kern_dcache_page
+	.long	xsc3_dma_inv_range
+	.long	xsc3_dma_clean_range
+	.long	xsc3_dma_flush_range
+
+ENTRY(cpu_xsc3_dcache_clean_area)
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHELINESIZE
+	subs	r1, r1, #CACHELINESIZE
+	bhi	1b
+	mov	pc, lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_xsc3_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_xsc3_switch_mm)
+	clean_d_cache r1, r2
+	mcr	p15, 0, ip, c7, c5, 0		@ Invalidate I cache & BTB
+	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write Buffer
+	mcr	p15, 0, ip, c7, c5, 4		@ Prefetch Flush
+#ifdef L2_CACHE_ENABLE
+	orr	r0, r0, #0x18			@ cache the page table in L2
+#endif
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	cpwait_ret lr, ip
+
+/*
+ * cpu_xsc3_set_pte(ptep, pte)
+ *
+ * Set a PTE and flush it out
+ *
+ */
+	.align	5
+ENTRY(cpu_xsc3_set_pte)
+	str	r1, [r0], #-2048		@ linux version
+
+	bic	r2, r1, #0xff0
+	orr	r2, r2, #PTE_TYPE_EXT		@ extended page
+
+	eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+
+	tst	r3, #L_PTE_USER			@ User?
+	orrne	r2, r2, #PTE_EXT_AP_URO_SRW	@ yes -> user r/o, system r/w
+
+	tst	r3, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
+	orreq	r2, r2, #PTE_EXT_AP_UNO_SRW	@ yes -> user n/a, system r/w
+						@ combined with user -> user r/w
+
+#if L2_CACHE_ENABLE
+	@ If its cacheable it needs to be in L2 also.
+	eor	ip, r1, #L_PTE_CACHEABLE
+	tst	ip, #L_PTE_CACHEABLE
+	orreq	r2, r2, #PTE_EXT_TEX(0x5)
+#endif
+
+	tst	r3, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
+	movne	r2, #0				@ no -> fault
+
+	str	r2, [r0]			@ hardware version
+	mov	ip, #0
+	mcr	p15, 0, r0, c7, c10, 1		@ Clean D cache line mcr
+	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write Buffer
+	mov	pc, lr
+
+	.ltorg
+
+	.align
+
+	__INIT
+
+	.type	__xsc3_setup, #function
+__xsc3_setup:
+	mov	r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
+	msr	cpsr_c, r0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I, D caches & BTB
+	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write Buffer
+	mcr	p15, 0, ip, c7, c5, 4		@ Prefetch Flush
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I, D TLBs
+#if L2_CACHE_ENABLE
+	orr	r4, r4, #0x18			@ cache the page table in L2
+#endif
+	mcr	p15, 0, r4, c2, c0, 0		@ load page table pointer
+	mov	r0, #1				@ Allow access to CP0 and CP13
+	orr	r0, r0, #1 << 13		@ Its undefined whether this
+	mcr	p15, 0, r0, c15, c1, 0		@ affects USR or SVC modes
+	mrc	p15, 0, r0, c1, c0, 1		@ get auxiliary control reg
+	and	r0, r0, #2			@ preserve bit P bit setting
+#if L2_CACHE_ENABLE
+	orr	r0, r0, #(1 << 10)		@ enable L2 for LLR cache
+#endif
+	mcr	p15, 0, r0, c1, c0, 1		@ set auxiliary control reg
+	mrc	p15, 0, r0, c1, c0, 0		@ get control register
+	bic	r0, r0, #0x0200			@ .... ..R. .... ....
+	bic	r0, r0, #0x0002			@ .... .... .... ..A.
+	orr	r0, r0, #0x0005			@ .... .... .... .C.M
+#if BTB_ENABLE
+	orr	r0, r0, #0x3900			@ ..VI Z..S .... ....
+#else
+	orr	r0, r0, #0x3100			@ ..VI ...S .... ....
+#endif
+#if L2_CACHE_ENABLE
+	orr 	r0, r0, #0x4000000		@ L2 enable
+#endif
+	mov	pc, lr
+
+	.size	__xsc3_setup, . - __xsc3_setup
+
+	__INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+
+	.type	xsc3_processor_functions, #object
+ENTRY(xsc3_processor_functions)
+	.word	v5t_early_abort
+	.word	cpu_xsc3_proc_init
+	.word	cpu_xsc3_proc_fin
+	.word	cpu_xsc3_reset
+	.word	cpu_xsc3_do_idle
+	.word	cpu_xsc3_dcache_clean_area
+	.word	cpu_xsc3_switch_mm
+	.word	cpu_xsc3_set_pte
+	.size	xsc3_processor_functions, . - xsc3_processor_functions
+
+	.section ".rodata"
+
+	.type	cpu_arch_name, #object
+cpu_arch_name:
+	.asciz	"armv5te"
+	.size	cpu_arch_name, . - cpu_arch_name
+
+	.type	cpu_elf_name, #object
+cpu_elf_name:
+	.asciz	"v5"
+	.size	cpu_elf_name, . - cpu_elf_name
+
+	.type	cpu_xsc3_name, #object
+cpu_xsc3_name:
+	.asciz	"XScale-Core3"
+	.size	cpu_xsc3_name, . - cpu_xsc3_name
+
+	.align
+
+	.section ".proc.info.init", #alloc, #execinstr
+
+	.type	__xsc3_proc_info,#object
+__xsc3_proc_info:
+	.long	0x69056000
+	.long	0xffffe000
+	.long	0x00000c0e
+	b	__xsc3_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long	cpu_xsc3_name
+	.long	xsc3_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	xsc3_mc_user_fns
+	.long	xsc3_cache_fns
+	.size	__xsc3_proc_info, . - __xsc3_proc_info
diff --git a/include/asm-arm/cacheflush.h b/include/asm-arm/cacheflush.h
index 09e19a7..746be56 100644
--- a/include/asm-arm/cacheflush.h
+++ b/include/asm-arm/cacheflush.h
@@ -71,6 +71,14 @@
 # endif
 #endif
 
+#if defined(CONFIG_CPU_XSC3)
+# ifdef _CACHE
+#  define MULTI_CACHE 1
+# else
+#  define _CACHE xsc3
+# endif
+#endif
+
 #if defined(CONFIG_CPU_V6)
 //# ifdef _CACHE
 #  define MULTI_CACHE 1
diff --git a/include/asm-arm/domain.h b/include/asm-arm/domain.h
index da1d960..f8ea2de 100644
--- a/include/asm-arm/domain.h
+++ b/include/asm-arm/domain.h
@@ -16,11 +16,29 @@
  *  DOMAIN_IO     - domain 2 includes all IO only
  *  DOMAIN_USER   - domain 1 includes all user memory only
  *  DOMAIN_KERNEL - domain 0 includes all kernel memory only
+ *
+ * The domain numbering depends on whether we support 36 physical
+ * address for I/O or not.  Addresses above the 32 bit boundary can
+ * only be mapped using supersections and supersections can only
+ * be set for domain 0.  We could just default to DOMAIN_IO as zero,
+ * but there may be systems with supersection support and no 36-bit
+ * addressing.  In such cases, we want to map system memory with
+ * supersections to reduce TLB misses and footprint.
+ *
+ * 36-bit addressing and supersections are only available on
+ * CPUs based on ARMv6+ or the Intel XSC3 core.
  */
+#ifndef CONFIG_IO_36
 #define DOMAIN_KERNEL	0
 #define DOMAIN_TABLE	0
 #define DOMAIN_USER	1
 #define DOMAIN_IO	2
+#else
+#define DOMAIN_KERNEL	2
+#define DOMAIN_TABLE	2
+#define DOMAIN_USER	1
+#define DOMAIN_IO	0
+#endif
 
 /*
  * Domain types
diff --git a/include/asm-arm/page.h b/include/asm-arm/page.h
index 416320d..a404d2b 100644
--- a/include/asm-arm/page.h
+++ b/include/asm-arm/page.h
@@ -40,6 +40,7 @@
  *	  v4wb		- ARMv4 with writeback cache, without minicache
  *	  v4_mc		- ARMv4 with minicache
  *	  xscale	- Xscale
+ *	  xsc3		- XScalev3
  */
 #undef _USER
 #undef MULTI_USER
@@ -84,6 +85,14 @@
 # endif
 #endif
 
+#ifdef CONFIG_CPU_XSC3
+# ifdef _USER
+#  define MULTI_USER 1
+# else
+#  define _USER xsc3_mc
+# endif
+#endif
+
 #ifdef CONFIG_CPU_COPY_V6
 # define MULTI_USER 1
 #endif
diff --git a/include/asm-arm/proc-fns.h b/include/asm-arm/proc-fns.h
index 7bef2bf..106045e 100644
--- a/include/asm-arm/proc-fns.h
+++ b/include/asm-arm/proc-fns.h
@@ -138,6 +138,14 @@
 #   define CPU_NAME cpu_xscale
 #  endif
 # endif
+# ifdef CONFIG_CPU_XSC3
+#  ifdef CPU_NAME
+#   undef  MULTI_CPU
+#   define MULTI_CPU
+#  else
+#   define CPU_NAME cpu_xsc3
+#  endif
+# endif
 # ifdef CONFIG_CPU_V6
 #  ifdef CPU_NAME
 #   undef  MULTI_CPU
diff --git a/include/asm-arm/system.h b/include/asm-arm/system.h
index ec91d1f..95b3abf 100644
--- a/include/asm-arm/system.h
+++ b/include/asm-arm/system.h
@@ -108,6 +108,25 @@ extern void __show_regs(struct pt_regs *);
 extern int cpu_architecture(void);
 extern void cpu_init(void);
 
+/*
+ * Intel's XScale3 core supports some v6 features (supersections, L2)
+ * but advertises itself as v5 as it does not support the v6 ISA.  For
+ * this reason, we need a way to explicitly test for this type of CPU.
+ */
+#ifndef CONFIG_CPU_XSC3
+#define cpu_is_xsc3()	0
+#else
+static inline int cpu_is_xsc3(void)
+{
+	extern unsigned int processor_id;
+
+	if ((processor_id & 0xffffe000) == 0x69056000)
+		return 1;
+
+	return 0;
+}
+#endif
+
 #define set_cr(x)					\
 	__asm__ __volatile__(				\
 	"mcr	p15, 0, %0, c1, c0, 0	@ set CR"	\
-- 
cgit v0.10.2


From fa5ebfccf30741dc432cb81c25bb591c8018eb18 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Tue, 28 Mar 2006 21:02:26 +0100
Subject: [ARM] 3387/1: ixp23xx: add defconfig

Patch from Lennert Buytenhek

Add ixp23xx defconfig.

Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/configs/ixp23xx_defconfig b/arch/arm/configs/ixp23xx_defconfig
new file mode 100644
index 0000000..1a2751e
--- /dev/null
+++ b/arch/arm/configs/ixp23xx_defconfig
@@ -0,0 +1,1302 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.16
+# Tue Mar 21 03:27:20 2006
+#
+CONFIG_ARM=y
+CONFIG_MMU=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+# CONFIG_POSIX_MQUEUE is not set
+CONFIG_BSD_PROCESS_ACCT=y
+# CONFIG_BSD_PROCESS_ACCT_V3 is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_UID16=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EMBEDDED=y
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SHMEM=y
+CONFIG_CC_ALIGN_FUNCTIONS=0
+CONFIG_CC_ALIGN_LABELS=0
+CONFIG_CC_ALIGN_LOOPS=0
+CONFIG_CC_ALIGN_JUMPS=0
+CONFIG_SLAB=y
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
+# CONFIG_SLOB is not set
+CONFIG_OBSOLETE_INTERMODULE=y
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_OBSOLETE_MODPARM=y
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_KMOD=y
+
+#
+# Block layer
+#
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+
+#
+# System Type
+#
+# CONFIG_ARCH_CLPS7500 is not set
+# CONFIG_ARCH_CLPS711X is not set
+# CONFIG_ARCH_CO285 is not set
+# CONFIG_ARCH_EBSA110 is not set
+# CONFIG_ARCH_EP93XX is not set
+# CONFIG_ARCH_FOOTBRIDGE is not set
+# CONFIG_ARCH_INTEGRATOR is not set
+# CONFIG_ARCH_IOP3XX is not set
+# CONFIG_ARCH_IXP4XX is not set
+# CONFIG_ARCH_IXP2000 is not set
+CONFIG_ARCH_IXP23XX=y
+# CONFIG_ARCH_L7200 is not set
+# CONFIG_ARCH_PXA is not set
+# CONFIG_ARCH_RPC is not set
+# CONFIG_ARCH_SA1100 is not set
+# CONFIG_ARCH_S3C2410 is not set
+# CONFIG_ARCH_SHARK is not set
+# CONFIG_ARCH_LH7A40X is not set
+# CONFIG_ARCH_OMAP is not set
+# CONFIG_ARCH_VERSATILE is not set
+# CONFIG_ARCH_REALVIEW is not set
+# CONFIG_ARCH_IMX is not set
+# CONFIG_ARCH_H720X is not set
+# CONFIG_ARCH_AAEC2000 is not set
+# CONFIG_ARCH_AT91RM9200 is not set
+CONFIG_ARCH_SUPPORTS_BIG_ENDIAN=y
+
+#
+# Intel IXP23xx Implementation Options
+#
+
+#
+# IXP23xx Platforms
+#
+CONFIG_MACH_ESPRESSO=y
+CONFIG_MACH_IXDP2351=y
+CONFIG_MACH_ROADRUNNER=y
+
+#
+# Processor Type
+#
+CONFIG_CPU_32=y
+CONFIG_CPU_XSC3=y
+CONFIG_CPU_32v5=y
+CONFIG_CPU_ABRT_EV5T=y
+CONFIG_CPU_CACHE_VIVT=y
+CONFIG_CPU_TLB_V4WBI=y
+CONFIG_IO_36=y
+
+#
+# Processor Features
+#
+# CONFIG_ARM_THUMB is not set
+CONFIG_CPU_BIG_ENDIAN=y
+
+#
+# Bus support
+#
+CONFIG_PCI=y
+CONFIG_PCI_LEGACY_PROC=y
+# CONFIG_PCI_DEBUG is not set
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# Kernel Features
+#
+# CONFIG_PREEMPT is not set
+# CONFIG_NO_IDLE_HZ is not set
+CONFIG_HZ=100
+# CONFIG_AEABI is not set
+# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
+CONFIG_ALIGNMENT_TRAP=y
+
+#
+# Boot options
+#
+CONFIG_ZBOOT_ROM_TEXT=0x0
+CONFIG_ZBOOT_ROM_BSS=0x0
+CONFIG_CMDLINE="console=ttyS0,115200 root=/dev/nfs ip=bootp"
+# CONFIG_XIP_KERNEL is not set
+
+#
+# Floating point emulation
+#
+
+#
+# At least one emulation must be selected
+#
+CONFIG_FPE_NWFPE=y
+CONFIG_FPE_NWFPE_XP=y
+# CONFIG_FPE_FASTFPE is not set
+
+#
+# Userspace binary formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+# CONFIG_ARTHUR is not set
+
+#
+# Power management options
+#
+# CONFIG_PM is not set
+# CONFIG_APM is not set
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+# CONFIG_NETDEBUG is not set
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_BIC=y
+# CONFIG_IPV6 is not set
+# CONFIG_NETFILTER is not set
+
+#
+# DCCP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_DCCP is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+
+#
+# TIPC Configuration (EXPERIMENTAL)
+#
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_IEEE80211 is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Connector - unified userspace <-> kernelspace linker
+#
+# CONFIG_CONNECTOR is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+CONFIG_MTD=y
+# CONFIG_MTD_DEBUG is not set
+# CONFIG_MTD_CONCAT is not set
+CONFIG_MTD_PARTITIONS=y
+CONFIG_MTD_REDBOOT_PARTS=y
+CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1
+CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED=y
+CONFIG_MTD_REDBOOT_PARTS_READONLY=y
+# CONFIG_MTD_CMDLINE_PARTS is not set
+# CONFIG_MTD_AFS_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLOCK=y
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=y
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_GEN_PROBE=y
+# CONFIG_MTD_CFI_ADV_OPTIONS is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+CONFIG_MTD_CFI_INTELEXT=y
+# CONFIG_MTD_CFI_AMDSTD is not set
+# CONFIG_MTD_CFI_STAA is not set
+CONFIG_MTD_CFI_UTIL=y
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+# CONFIG_MTD_OBSOLETE_CHIPS is not set
+
+#
+# Mapping drivers for chip access
+#
+CONFIG_MTD_COMPLEX_MAPPINGS=y
+CONFIG_MTD_PHYSMAP=y
+CONFIG_MTD_PHYSMAP_START=0x0
+CONFIG_MTD_PHYSMAP_LEN=0x0
+CONFIG_MTD_PHYSMAP_BANKWIDTH=1
+# CONFIG_MTD_ARM_INTEGRATOR is not set
+# CONFIG_MTD_PCI is not set
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_PMC551 is not set
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+# CONFIG_MTD_BLKMTD is not set
+# CONFIG_MTD_BLOCK2MTD is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+
+#
+# NAND Flash Device Drivers
+#
+# CONFIG_MTD_NAND is not set
+
+#
+# OneNAND Flash Device Drivers
+#
+# CONFIG_MTD_ONENAND is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+
+#
+# Block devices
+#
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=y
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_SX8 is not set
+# CONFIG_BLK_DEV_UB is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_IDE_SATA is not set
+CONFIG_BLK_DEV_IDEDISK=y
+# CONFIG_IDEDISK_MULTI_MODE is not set
+# CONFIG_BLK_DEV_IDECD is not set
+# CONFIG_BLK_DEV_IDETAPE is not set
+# CONFIG_BLK_DEV_IDEFLOPPY is not set
+# CONFIG_BLK_DEV_IDESCSI is not set
+# CONFIG_IDE_TASK_IOCTL is not set
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_IDEPCI=y
+# CONFIG_IDEPCI_SHARE_IRQ is not set
+# CONFIG_BLK_DEV_OFFBOARD is not set
+# CONFIG_BLK_DEV_GENERIC is not set
+# CONFIG_BLK_DEV_OPTI621 is not set
+# CONFIG_BLK_DEV_SL82C105 is not set
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+# CONFIG_IDEDMA_PCI_AUTO is not set
+# CONFIG_BLK_DEV_AEC62XX is not set
+# CONFIG_BLK_DEV_ALI15X3 is not set
+# CONFIG_BLK_DEV_AMD74XX is not set
+# CONFIG_BLK_DEV_CMD64X is not set
+# CONFIG_BLK_DEV_TRIFLEX is not set
+# CONFIG_BLK_DEV_CY82C693 is not set
+# CONFIG_BLK_DEV_CS5520 is not set
+# CONFIG_BLK_DEV_CS5530 is not set
+# CONFIG_BLK_DEV_HPT34X is not set
+# CONFIG_BLK_DEV_HPT366 is not set
+# CONFIG_BLK_DEV_SC1200 is not set
+# CONFIG_BLK_DEV_PIIX is not set
+# CONFIG_BLK_DEV_IT821X is not set
+# CONFIG_BLK_DEV_NS87415 is not set
+# CONFIG_BLK_DEV_PDC202XX_OLD is not set
+# CONFIG_BLK_DEV_PDC202XX_NEW is not set
+# CONFIG_BLK_DEV_SVWKS is not set
+CONFIG_BLK_DEV_SIIMAGE=y
+# CONFIG_BLK_DEV_SLC90E66 is not set
+# CONFIG_BLK_DEV_TRM290 is not set
+# CONFIG_BLK_DEV_VIA82CXXX is not set
+# CONFIG_IDE_ARM is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_IVB is not set
+# CONFIG_IDEDMA_AUTO is not set
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+# CONFIG_CHR_DEV_SG is not set
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+
+#
+# SCSI Transport Attributes
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+# CONFIG_SCSI_SAS_ATTRS is not set
+
+#
+# SCSI low-level drivers
+#
+# CONFIG_ISCSI_TCP is not set
+# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+# CONFIG_SCSI_3W_9XXX is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AACRAID is not set
+# CONFIG_SCSI_AIC7XXX is not set
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+# CONFIG_SCSI_AIC79XX is not set
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_MEGARAID_NEWGEN is not set
+# CONFIG_MEGARAID_LEGACY is not set
+# CONFIG_MEGARAID_SAS is not set
+# CONFIG_SCSI_SATA is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_SYM53C8XX_2 is not set
+# CONFIG_SCSI_IPR is not set
+# CONFIG_SCSI_QLOGIC_FC is not set
+# CONFIG_SCSI_QLOGIC_1280 is not set
+# CONFIG_SCSI_QLA_FC is not set
+# CONFIG_SCSI_LPFC is not set
+# CONFIG_SCSI_DC395x is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_NSP32 is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+
+#
+# Fusion MPT device support
+#
+# CONFIG_FUSION is not set
+# CONFIG_FUSION_SPI is not set
+# CONFIG_FUSION_FC is not set
+# CONFIG_FUSION_SAS is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+# CONFIG_I2O is not set
+
+#
+# Network device support
+#
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=y
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+
+#
+# PHY device support
+#
+# CONFIG_PHYLIB is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNGEM is not set
+# CONFIG_CASSINI is not set
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_SMC91X is not set
+# CONFIG_DM9000 is not set
+
+#
+# Tulip family network device support
+#
+# CONFIG_NET_TULIP is not set
+# CONFIG_HP100 is not set
+CONFIG_NET_PCI=y
+# CONFIG_PCNET32 is not set
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_B44 is not set
+# CONFIG_FORCEDETH is not set
+# CONFIG_CS89x0 is not set
+# CONFIG_DGRS is not set
+# CONFIG_EEPRO100 is not set
+CONFIG_E100=y
+# CONFIG_FEALNX is not set
+# CONFIG_NATSEMI is not set
+# CONFIG_NE2K_PCI is not set
+# CONFIG_8139CP is not set
+# CONFIG_8139TOO is not set
+# CONFIG_SIS900 is not set
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+# CONFIG_TLAN is not set
+# CONFIG_VIA_RHINE is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+# CONFIG_ACENIC is not set
+# CONFIG_DL2K is not set
+CONFIG_E1000=y
+CONFIG_E1000_NAPI=y
+# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+# CONFIG_R8169 is not set
+# CONFIG_SIS190 is not set
+# CONFIG_SKGE is not set
+# CONFIG_SKY2 is not set
+# CONFIG_SK98LIN is not set
+# CONFIG_VIA_VELOCITY is not set
+# CONFIG_TIGON3 is not set
+# CONFIG_BNX2 is not set
+
+#
+# Ethernet (10000 Mbit)
+#
+# CONFIG_CHELSIO_T1 is not set
+# CONFIG_IXGB is not set
+# CONFIG_S2IO is not set
+
+#
+# Token Ring devices
+#
+# CONFIG_TR is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+CONFIG_WAN=y
+# CONFIG_DSCC4 is not set
+# CONFIG_LANMEDIA is not set
+# CONFIG_SYNCLINK_SYNCPPP is not set
+CONFIG_HDLC=y
+CONFIG_HDLC_RAW=y
+# CONFIG_HDLC_RAW_ETH is not set
+CONFIG_HDLC_CISCO=y
+CONFIG_HDLC_FR=y
+CONFIG_HDLC_PPP=y
+
+#
+# X.25/LAPB support is disabled
+#
+# CONFIG_PCI200SYN is not set
+# CONFIG_WANXL is not set
+# CONFIG_PC300 is not set
+# CONFIG_FARSYNC is not set
+CONFIG_DLCI=y
+CONFIG_DLCI_COUNT=24
+CONFIG_DLCI_MAX=8
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NET_FC is not set
+# CONFIG_SHAPER is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+# CONFIG_SERIAL_8250_EXTENDED is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+CONFIG_WATCHDOG=y
+# CONFIG_WATCHDOG_NOWAYOUT is not set
+
+#
+# Watchdog Device Drivers
+#
+# CONFIG_SOFT_WATCHDOG is not set
+
+#
+# PCI-based Watchdog Cards
+#
+# CONFIG_PCIPCWATCHDOG is not set
+# CONFIG_WDTPCI is not set
+
+#
+# USB-based Watchdog Cards
+#
+# CONFIG_USBPCWATCHDOG is not set
+# CONFIG_NVRAM is not set
+# CONFIG_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_DRM is not set
+# CONFIG_RAW_DRIVER is not set
+
+#
+# TPM devices
+#
+# CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
+
+#
+# I2C support
+#
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+
+#
+# I2C Algorithms
+#
+CONFIG_I2C_ALGOBIT=y
+# CONFIG_I2C_ALGOPCF is not set
+# CONFIG_I2C_ALGOPCA is not set
+
+#
+# I2C Hardware Bus support
+#
+# CONFIG_I2C_ALI1535 is not set
+# CONFIG_I2C_ALI1563 is not set
+# CONFIG_I2C_ALI15X3 is not set
+# CONFIG_I2C_AMD756 is not set
+# CONFIG_I2C_AMD8111 is not set
+# CONFIG_I2C_I801 is not set
+# CONFIG_I2C_I810 is not set
+# CONFIG_I2C_PIIX4 is not set
+# CONFIG_I2C_NFORCE2 is not set
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_PROSAVAGE is not set
+# CONFIG_I2C_SAVAGE4 is not set
+# CONFIG_SCx200_ACB is not set
+# CONFIG_I2C_SIS5595 is not set
+# CONFIG_I2C_SIS630 is not set
+# CONFIG_I2C_SIS96X is not set
+# CONFIG_I2C_STUB is not set
+# CONFIG_I2C_VIA is not set
+# CONFIG_I2C_VIAPRO is not set
+# CONFIG_I2C_VOODOO3 is not set
+# CONFIG_I2C_PCA_ISA is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_SENSORS_DS1337 is not set
+# CONFIG_SENSORS_DS1374 is not set
+CONFIG_SENSORS_EEPROM=y
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_SENSORS_PCA9539 is not set
+# CONFIG_SENSORS_PCF8591 is not set
+# CONFIG_SENSORS_RTC8564 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_RTC_X1205_I2C is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+
+#
+# SPI support
+#
+# CONFIG_SPI is not set
+# CONFIG_SPI_MASTER is not set
+
+#
+# Dallas's 1-wire bus
+#
+# CONFIG_W1 is not set
+
+#
+# Hardware Monitoring support
+#
+CONFIG_HWMON=y
+# CONFIG_HWMON_VID is not set
+# CONFIG_SENSORS_ADM1021 is not set
+# CONFIG_SENSORS_ADM1025 is not set
+# CONFIG_SENSORS_ADM1026 is not set
+# CONFIG_SENSORS_ADM1031 is not set
+# CONFIG_SENSORS_ADM9240 is not set
+# CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_ATXP1 is not set
+# CONFIG_SENSORS_DS1621 is not set
+# CONFIG_SENSORS_F71805F is not set
+# CONFIG_SENSORS_FSCHER is not set
+# CONFIG_SENSORS_FSCPOS is not set
+# CONFIG_SENSORS_GL518SM is not set
+# CONFIG_SENSORS_GL520SM is not set
+# CONFIG_SENSORS_IT87 is not set
+# CONFIG_SENSORS_LM63 is not set
+# CONFIG_SENSORS_LM75 is not set
+# CONFIG_SENSORS_LM77 is not set
+# CONFIG_SENSORS_LM78 is not set
+# CONFIG_SENSORS_LM80 is not set
+# CONFIG_SENSORS_LM83 is not set
+# CONFIG_SENSORS_LM85 is not set
+# CONFIG_SENSORS_LM87 is not set
+# CONFIG_SENSORS_LM90 is not set
+# CONFIG_SENSORS_LM92 is not set
+# CONFIG_SENSORS_MAX1619 is not set
+# CONFIG_SENSORS_PC87360 is not set
+# CONFIG_SENSORS_SIS5595 is not set
+# CONFIG_SENSORS_SMSC47M1 is not set
+# CONFIG_SENSORS_SMSC47B397 is not set
+# CONFIG_SENSORS_VIA686A is not set
+# CONFIG_SENSORS_VT8231 is not set
+# CONFIG_SENSORS_W83781D is not set
+# CONFIG_SENSORS_W83792D is not set
+# CONFIG_SENSORS_W83L785TS is not set
+# CONFIG_SENSORS_W83627HF is not set
+# CONFIG_SENSORS_W83627EHF is not set
+# CONFIG_HWMON_DEBUG_CHIP is not set
+
+#
+# Misc devices
+#
+
+#
+# Multimedia Capabilities Port drivers
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FB is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB=y
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+# CONFIG_USB_DEVICEFS is not set
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_OTG is not set
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_SPLIT_ISO is not set
+# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+# CONFIG_USB_ISP116X_HCD is not set
+CONFIG_USB_OHCI_HCD=y
+# CONFIG_USB_OHCI_BIG_ENDIAN is not set
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+CONFIG_USB_UHCI_HCD=y
+# CONFIG_USB_SL811_HCD is not set
+
+#
+# USB Device Class drivers
+#
+# CONFIG_USB_ACM is not set
+# CONFIG_USB_PRINTER is not set
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# may also be needed; see USB_STORAGE Help for more information
+#
+CONFIG_USB_STORAGE=y
+# CONFIG_USB_STORAGE_DEBUG is not set
+# CONFIG_USB_STORAGE_DATAFAB is not set
+# CONFIG_USB_STORAGE_FREECOM is not set
+# CONFIG_USB_STORAGE_ISD200 is not set
+# CONFIG_USB_STORAGE_DPCM is not set
+# CONFIG_USB_STORAGE_USBAT is not set
+# CONFIG_USB_STORAGE_SDDR09 is not set
+# CONFIG_USB_STORAGE_SDDR55 is not set
+# CONFIG_USB_STORAGE_JUMPSHOT is not set
+# CONFIG_USB_STORAGE_ALAUDA is not set
+# CONFIG_USB_LIBUSUAL is not set
+
+#
+# USB Input Devices
+#
+# CONFIG_USB_HID is not set
+
+#
+# USB HID Boot Protocol drivers
+#
+# CONFIG_USB_KBD is not set
+# CONFIG_USB_MOUSE is not set
+# CONFIG_USB_AIPTEK is not set
+# CONFIG_USB_WACOM is not set
+# CONFIG_USB_ACECAD is not set
+# CONFIG_USB_KBTAB is not set
+# CONFIG_USB_POWERMATE is not set
+# CONFIG_USB_MTOUCH is not set
+# CONFIG_USB_ITMTOUCH is not set
+# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_YEALINK is not set
+# CONFIG_USB_XPAD is not set
+# CONFIG_USB_ATI_REMOTE is not set
+# CONFIG_USB_ATI_REMOTE2 is not set
+# CONFIG_USB_KEYSPAN_REMOTE is not set
+# CONFIG_USB_APPLETOUCH is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+
+#
+# USB Multimedia devices
+#
+# CONFIG_USB_DABUSB is not set
+
+#
+# Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+CONFIG_USB_MON=y
+
+#
+# USB port drivers
+#
+
+#
+# USB Serial Converter support
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_AUERSWALD is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_PHIDGETKIT is not set
+# CONFIG_USB_PHIDGETSERVO is not set
+# CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_SISUSBVGA is not set
+# CONFIG_USB_LD is not set
+
+#
+# USB DSL modem support
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# MMC/SD Card support
+#
+# CONFIG_MMC is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+# CONFIG_EXT2_FS_SECURITY is not set
+# CONFIG_EXT2_FS_XIP is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+# CONFIG_EXT3_FS_SECURITY is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+CONFIG_FS_POSIX_ACL=y
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+CONFIG_INOTIFY=y
+# CONFIG_QUOTA is not set
+CONFIG_DNOTIFY=y
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=y
+CONFIG_MSDOS_FS=y
+# CONFIG_VFAT_FS is not set
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_RAMFS=y
+# CONFIG_RELAYFS_FS is not set
+# CONFIG_CONFIGFS_FS is not set
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS_FS is not set
+CONFIG_JFFS2_FS=y
+CONFIG_JFFS2_FS_DEBUG=0
+CONFIG_JFFS2_FS_WRITEBUFFER=y
+# CONFIG_JFFS2_SUMMARY is not set
+# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
+CONFIG_JFFS2_ZLIB=y
+CONFIG_JFFS2_RTIME=y
+# CONFIG_JFFS2_RUBIN is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFS_DIRECTIO is not set
+# CONFIG_NFSD is not set
+CONFIG_ROOT_NFS=y
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+# CONFIG_9P_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_KARMA_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+# CONFIG_NLS_ISO8859_1 is not set
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+
+#
+# Profiling support
+#
+# CONFIG_PROFILING is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+CONFIG_DEBUG_MUTEXES=y
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
+CONFIG_FRAME_POINTER=y
+CONFIG_FORCED_INLINING=y
+# CONFIG_RCU_TORTURE_TEST is not set
+CONFIG_DEBUG_USER=y
+# CONFIG_DEBUG_WAITQ is not set
+CONFIG_DEBUG_ERRORS=y
+CONFIG_DEBUG_LL=y
+# CONFIG_DEBUG_ICEDCC is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+# CONFIG_CRYPTO is not set
+
+#
+# Hardware crypto devices
+#
+
+#
+# Library routines
+#
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+CONFIG_CRC32=y
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=y
-- 
cgit v0.10.2


From e9937d4b0a9382c4c78411d1c53e62be396ee9a9 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Tue, 28 Mar 2006 21:08:13 +0100
Subject: [ARM] 3417/1: add support for logicpd pxa270 card engine

Patch from Lennert Buytenhek

Add support for the LogicPD PXA270 Card Engine.

Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig
index c1d77f5..0104fd1 100644
--- a/arch/arm/mach-pxa/Kconfig
+++ b/arch/arm/mach-pxa/Kconfig
@@ -10,6 +10,11 @@ config ARCH_LUBBOCK
 	select PXA25x
 	select SA1111
 
+config MACH_LOGICPD_PXA270
+	bool "LogicPD PXA270 Card Engine Development Platform"
+	select PXA27x
+	select IWMMXT
+
 config MACH_MAINSTONE
 	bool "Intel HCDDBBVA0 Development Platform"
 	select PXA27x
diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile
index 3826444..4e8a983 100644
--- a/arch/arm/mach-pxa/Makefile
+++ b/arch/arm/mach-pxa/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_PXA27x) += pxa27x.o
 
 # Specific board support
 obj-$(CONFIG_ARCH_LUBBOCK) += lubbock.o
+obj-$(CONFIG_MACH_LOGICPD_PXA270) += lpd270.o
 obj-$(CONFIG_MACH_MAINSTONE) += mainstone.o
 obj-$(CONFIG_ARCH_PXA_IDP) += idp.o
 obj-$(CONFIG_PXA_SHARP_C7xx)	+= corgi.o corgi_ssp.o corgi_lcd.o sharpsl_pm.o corgi_pm.o
diff --git a/arch/arm/mach-pxa/lpd270.c b/arch/arm/mach-pxa/lpd270.c
new file mode 100644
index 0000000..ec0f43a
--- /dev/null
+++ b/arch/arm/mach-pxa/lpd270.c
@@ -0,0 +1,393 @@
+/*
+ * linux/arch/arm/mach-pxa/lpd270.c
+ *
+ * Support for the LogicPD PXA270 Card Engine.
+ * Derived from the mainstone code, which carries these notices:
+ *
+ * Author:	Nicolas Pitre
+ * Created:	Nov 05, 2002
+ * Copyright:	MontaVista Software Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/sysdev.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/bitops.h>
+#include <linux/fb.h>
+#include <linux/ioport.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+
+#include <asm/types.h>
+#include <asm/setup.h>
+#include <asm/memory.h>
+#include <asm/mach-types.h>
+#include <asm/hardware.h>
+#include <asm/irq.h>
+#include <asm/sizes.h>
+
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+#include <asm/mach/irq.h>
+#include <asm/mach/flash.h>
+
+#include <asm/arch/pxa-regs.h>
+#include <asm/arch/lpd270.h>
+#include <asm/arch/audio.h>
+#include <asm/arch/pxafb.h>
+#include <asm/arch/mmc.h>
+#include <asm/arch/irda.h>
+#include <asm/arch/ohci.h>
+
+#include "generic.h"
+
+
+static unsigned int lpd270_irq_enabled;
+
+static void lpd270_mask_irq(unsigned int irq)
+{
+	int lpd270_irq = irq - LPD270_IRQ(0);
+
+	__raw_writew(~(1 << lpd270_irq), LPD270_INT_STATUS);
+
+	lpd270_irq_enabled &= ~(1 << lpd270_irq);
+	__raw_writew(lpd270_irq_enabled, LPD270_INT_MASK);
+}
+
+static void lpd270_unmask_irq(unsigned int irq)
+{
+	int lpd270_irq = irq - LPD270_IRQ(0);
+
+	lpd270_irq_enabled |= 1 << lpd270_irq;
+	__raw_writew(lpd270_irq_enabled, LPD270_INT_MASK);
+}
+
+static struct irqchip lpd270_irq_chip = {
+	.ack		= lpd270_mask_irq,
+	.mask		= lpd270_mask_irq,
+	.unmask		= lpd270_unmask_irq,
+};
+
+static void lpd270_irq_handler(unsigned int irq, struct irqdesc *desc,
+				  struct pt_regs *regs)
+{
+	unsigned long pending;
+
+	pending = __raw_readw(LPD270_INT_STATUS) & lpd270_irq_enabled;
+	do {
+		GEDR(0) = GPIO_bit(0);  /* clear useless edge notification */
+		if (likely(pending)) {
+			irq = LPD270_IRQ(0) + __ffs(pending);
+			desc = irq_desc + irq;
+			desc_handle_irq(irq, desc, regs);
+
+			pending = __raw_readw(LPD270_INT_STATUS) &
+						lpd270_irq_enabled;
+		}
+	} while (pending);
+}
+
+static void __init lpd270_init_irq(void)
+{
+	int irq;
+
+	pxa_init_irq();
+
+	__raw_writew(0, LPD270_INT_MASK);
+	__raw_writew(0, LPD270_INT_STATUS);
+
+	/* setup extra LogicPD PXA270 irqs */
+	for (irq = LPD270_IRQ(2); irq <= LPD270_IRQ(4); irq++) {
+		set_irq_chip(irq, &lpd270_irq_chip);
+		set_irq_handler(irq, do_level_IRQ);
+		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
+	}
+	set_irq_chained_handler(IRQ_GPIO(0), lpd270_irq_handler);
+	set_irq_type(IRQ_GPIO(0), IRQT_FALLING);
+}
+
+
+#ifdef CONFIG_PM
+static int lpd270_irq_resume(struct sys_device *dev)
+{
+	__raw_writew(lpd270_irq_enabled, LPD270_INT_MASK);
+	return 0;
+}
+
+static struct sysdev_class lpd270_irq_sysclass = {
+	set_kset_name("cpld_irq"),
+	.resume = lpd270_irq_resume,
+};
+
+static struct sys_device lpd270_irq_device = {
+	.cls = &lpd270_irq_sysclass,
+};
+
+static int __init lpd270_irq_device_init(void)
+{
+	int ret = sysdev_class_register(&lpd270_irq_sysclass);
+	if (ret == 0)
+		ret = sysdev_register(&lpd270_irq_device);
+	return ret;
+}
+
+device_initcall(lpd270_irq_device_init);
+#endif
+
+
+static struct resource smc91x_resources[] = {
+	[0] = {
+		.start	= LPD270_ETH_PHYS,
+		.end	= (LPD270_ETH_PHYS + 0xfffff),
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= LPD270_ETHERNET_IRQ,
+		.end	= LPD270_ETHERNET_IRQ,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device smc91x_device = {
+	.name		= "smc91x",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(smc91x_resources),
+	.resource	= smc91x_resources,
+};
+
+static struct platform_device lpd270_audio_device = {
+	.name		= "pxa2xx-ac97",
+	.id		= -1,
+};
+
+static struct resource lpd270_flash_resources[] = {
+	[0] = {
+		.start	= PXA_CS0_PHYS,
+		.end	= PXA_CS0_PHYS + SZ_64M - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= PXA_CS1_PHYS,
+		.end	= PXA_CS1_PHYS + SZ_64M - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+};
+
+static struct mtd_partition lpd270_flash0_partitions[] = {
+	{
+		.name =		"Bootloader",
+		.size =		0x00040000,
+		.offset =	0,
+		.mask_flags =	MTD_WRITEABLE  /* force read-only */
+	}, {
+		.name =		"Kernel",
+		.size =		0x00400000,
+		.offset =	0x00040000,
+	}, {
+		.name =		"Filesystem",
+		.size =		MTDPART_SIZ_FULL,
+		.offset =	0x00440000
+	},
+};
+
+static struct flash_platform_data lpd270_flash_data[2] = {
+	{
+		.name		= "processor-flash",
+		.map_name	= "cfi_probe",
+		.parts		= lpd270_flash0_partitions,
+		.nr_parts	= ARRAY_SIZE(lpd270_flash0_partitions),
+	}, {
+		.name		= "mainboard-flash",
+		.map_name	= "cfi_probe",
+		.parts		= NULL,
+		.nr_parts	= 0,
+	}
+};
+
+static struct platform_device lpd270_flash_device[2] = {
+	{
+		.name		= "pxa2xx-flash",
+		.id		= 0,
+		.dev = {
+			.platform_data	= &lpd270_flash_data[0],
+		},
+		.resource	= &lpd270_flash_resources[0],
+		.num_resources	= 1,
+	}, {
+		.name		= "pxa2xx-flash",
+		.id		= 1,
+		.dev = {
+			.platform_data	= &lpd270_flash_data[1],
+		},
+		.resource	= &lpd270_flash_resources[1],
+		.num_resources	= 1,
+	},
+};
+
+static void lpd270_backlight_power(int on)
+{
+	if (on) {
+		pxa_gpio_mode(GPIO16_PWM0_MD);
+		pxa_set_cken(CKEN0_PWM0, 1);
+		PWM_CTRL0 = 0;
+		PWM_PWDUTY0 = 0x3ff;
+		PWM_PERVAL0 = 0x3ff;
+	} else {
+		PWM_CTRL0 = 0;
+		PWM_PWDUTY0 = 0x0;
+		PWM_PERVAL0 = 0x3FF;
+		pxa_set_cken(CKEN0_PWM0, 0);
+	}
+}
+
+/* 5.7" TFT QVGA (LoLo display number 1) */
+static struct pxafb_mach_info sharp_lq057q3dc02 __initdata = {
+	.pixclock		= 100000,
+	.xres			= 240,
+	.yres			= 320,
+	.bpp			= 16,
+	.hsync_len		= 64,
+	.left_margin		= 0x27,
+	.right_margin		= 0x09,
+	.vsync_len		= 0x04,
+	.upper_margin		= 0x08,
+	.lower_margin		= 0x14,
+	.sync			= 0,
+	.lccr0			= 0x07800080,
+	.lccr3			= 0x04400007,
+	.pxafb_backlight_power	= lpd270_backlight_power,
+};
+
+/* 6.4" TFT VGA (LoLo display number 5) */
+static struct pxafb_mach_info sharp_lq64d343 __initdata = {
+	.pixclock		= 20000,
+	.xres			= 640,
+	.yres			= 480,
+	.bpp			= 16,
+	.hsync_len		= 49,
+	.left_margin		= 0x89,
+	.right_margin		= 0x19,
+	.vsync_len		= 18,
+	.upper_margin		= 0x22,
+	.lower_margin		= 0,
+	.sync			= FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+	.lccr0			= 0x07800080,
+	.lccr3			= 0x04400001,
+	.pxafb_backlight_power	= lpd270_backlight_power,
+};
+
+/* 3.5" TFT QVGA (LoLo display number 8) */
+static struct pxafb_mach_info sharp_lq035q7db02_20 __initdata = {
+	.pixclock		= 100000,
+	.xres			= 240,
+	.yres			= 320,
+	.bpp			= 16,
+	.hsync_len		= 0x34,
+	.left_margin		= 0x09,
+	.right_margin		= 0x09,
+	.vsync_len		= 0x08,
+	.upper_margin		= 0x05,
+	.lower_margin		= 0x14,
+	.sync			= 0,
+	.lccr0			= 0x07800080,
+	.lccr3			= 0x04400007,
+	.pxafb_backlight_power	= lpd270_backlight_power,
+};
+
+static struct platform_device *platform_devices[] __initdata = {
+	&smc91x_device,
+	&lpd270_audio_device,
+	&lpd270_flash_device[0],
+	&lpd270_flash_device[1],
+};
+
+static int lpd270_ohci_init(struct device *dev)
+{
+	/* setup Port1 GPIO pin. */
+	pxa_gpio_mode(88 | GPIO_ALT_FN_1_IN);	/* USBHPWR1 */
+	pxa_gpio_mode(89 | GPIO_ALT_FN_2_OUT);	/* USBHPEN1 */
+
+	/* Set the Power Control Polarity Low and Power Sense
+	   Polarity Low to active low. */
+	UHCHR = (UHCHR | UHCHR_PCPL | UHCHR_PSPL) &
+		~(UHCHR_SSEP1 | UHCHR_SSEP2 | UHCHR_SSEP3 | UHCHR_SSE);
+
+	return 0;
+}
+
+static struct pxaohci_platform_data lpd270_ohci_platform_data = {
+	.port_mode	= PMM_PERPORT_MODE,
+	.init		= lpd270_ohci_init,
+};
+
+static void __init lpd270_init(void)
+{
+	lpd270_flash_data[0].width = (BOOT_DEF & 1) ? 2 : 4;
+	lpd270_flash_data[1].width = 4;
+
+	/*
+	 * System bus arbiter setting:
+	 * - Core_Park
+	 * - LCD_wt:DMA_wt:CORE_Wt = 2:3:4
+	 */
+	ARB_CNTRL = ARB_CORE_PARK | 0x234;
+
+	/*
+	 * On LogicPD PXA270, we route AC97_SYSCLK via GPIO45.
+	 */
+	pxa_gpio_mode(GPIO45_SYSCLK_AC97_MD);
+
+	platform_add_devices(platform_devices, ARRAY_SIZE(platform_devices));
+
+	// set_pxa_fb_info(&sharp_lq057q3dc02);
+	set_pxa_fb_info(&sharp_lq64d343);
+	// set_pxa_fb_info(&sharp_lq035q7db02_20);
+
+	pxa_set_ohci_info(&lpd270_ohci_platform_data);
+}
+
+
+static struct map_desc lpd270_io_desc[] __initdata = {
+	{
+		.virtual	= LPD270_CPLD_VIRT,
+		.pfn		= __phys_to_pfn(LPD270_CPLD_PHYS),
+		.length		= LPD270_CPLD_SIZE,
+		.type		= MT_DEVICE,
+	},
+};
+
+static void __init lpd270_map_io(void)
+{
+	pxa_map_io();
+	iotable_init(lpd270_io_desc, ARRAY_SIZE(lpd270_io_desc));
+
+	/* initialize sleep mode regs (wake-up sources, etc) */
+	PGSR0 = 0x00008800;
+	PGSR1 = 0x00000002;
+	PGSR2 = 0x0001FC00;
+	PGSR3 = 0x00001F81;
+	PWER  = 0xC0000002;
+	PRER  = 0x00000002;
+	PFER  = 0x00000002;
+
+	/* for use I SRAM as framebuffer.  */
+	PSLR |= 0x00000F04;
+	PCFR  = 0x00000066;
+}
+
+MACHINE_START(LOGICPD_PXA270, "LogicPD PXA270 Card Engine")
+	/* Maintainer: Peter Barada */
+	.phys_io	= 0x40000000,
+	.io_pg_offst	= (io_p2v(0x40000000) >> 18) & 0xfffc,
+	.boot_params	= 0xa0000100,
+	.map_io		= lpd270_map_io,
+	.init_irq	= lpd270_init_irq,
+	.timer		= &pxa_timer,
+	.init_machine	= lpd270_init,
+MACHINE_END
diff --git a/include/asm-arm/arch-pxa/irqs.h b/include/asm-arm/arch-pxa/irqs.h
index 05c4b70..67af238 100644
--- a/include/asm-arm/arch-pxa/irqs.h
+++ b/include/asm-arm/arch-pxa/irqs.h
@@ -176,6 +176,7 @@
 #elif defined(CONFIG_SHARP_LOCOMO)
 #define NR_IRQS			(IRQ_LOCOMO_SPI_TEND + 1)
 #elif defined(CONFIG_ARCH_LUBBOCK) || \
+      defined(CONFIG_MACH_LOGICPD_PXA270) || \
       defined(CONFIG_MACH_MAINSTONE)
 #define NR_IRQS			(IRQ_BOARD_END)
 #else
@@ -196,6 +197,11 @@
 #define LUBBOCK_USB_DISC_IRQ	LUBBOCK_IRQ(6)  /* usb disconnect */
 #define LUBBOCK_LAST_IRQ	LUBBOCK_IRQ(6)
 
+#define LPD270_IRQ(x)		(IRQ_BOARD_START + (x))
+#define LPD270_USBC_IRQ		LPD270_IRQ(2)
+#define LPD270_ETHERNET_IRQ	LPD270_IRQ(3)
+#define LPD270_AC97_IRQ		LPD270_IRQ(4)
+
 #define MAINSTONE_IRQ(x)	(IRQ_BOARD_START + (x))
 #define MAINSTONE_MMC_IRQ	MAINSTONE_IRQ(0)
 #define MAINSTONE_USIM_IRQ	MAINSTONE_IRQ(1)
diff --git a/include/asm-arm/arch-pxa/lpd270.h b/include/asm-arm/arch-pxa/lpd270.h
new file mode 100644
index 0000000..501d240
--- /dev/null
+++ b/include/asm-arm/arch-pxa/lpd270.h
@@ -0,0 +1,38 @@
+/*
+ * include/asm-arm/arch-pxa/lpd270.h
+ *
+ * Author:	Lennert Buytenhek
+ * Created:	Feb 10, 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_ARCH_LPD270_H
+#define __ASM_ARCH_LPD270_H
+
+#define LPD270_CPLD_PHYS	PXA_CS2_PHYS
+#define LPD270_CPLD_VIRT	0xf0000000
+#define LPD270_CPLD_SIZE	0x00100000
+
+#define LPD270_ETH_PHYS		(PXA_CS2_PHYS + 0x01000000)
+
+/* CPLD registers  */
+#define LPD270_CPLD_REG(x)	((unsigned long)(LPD270_CPLD_VIRT + (x)))
+#define LPD270_CONTROL		LPD270_CPLD_REG(0x00)
+#define LPD270_PERIPHERAL0	LPD270_CPLD_REG(0x04)
+#define LPD270_PERIPHERAL1	LPD270_CPLD_REG(0x08)
+#define LPD270_CPLD_REVISION	LPD270_CPLD_REG(0x14)
+#define LPD270_EEPROM_SPI_ITF	LPD270_CPLD_REG(0x20)
+#define LPD270_MODE_PINS	LPD270_CPLD_REG(0x24)
+#define LPD270_EGPIO		LPD270_CPLD_REG(0x30)
+#define LPD270_INT_MASK		LPD270_CPLD_REG(0x40)
+#define LPD270_INT_STATUS	LPD270_CPLD_REG(0x50)
+
+#define LPD270_INT_AC97		(1 << 4)  /* AC'97 CODEC IRQ */
+#define LPD270_INT_ETHERNET	(1 << 3)  /* Ethernet controller IRQ */
+#define LPD270_INT_USBC		(1 << 2)  /* USB client cable detection IRQ */
+
+
+#endif
-- 
cgit v0.10.2


From c4713074375c61f939310b04e92090afe29810dc Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Tue, 28 Mar 2006 21:18:54 +0100
Subject: [ARM] 3388/1: ixp23xx: add core ixp23xx support

Patch from Lennert Buytenhek

This patch adds support for the Intel ixp23xx series of CPUs.  The
ixp23xx is an XSC3 based CPU with 512K of L2 cache, a 64bit 66MHz PCI
interface, two DDR RAM interfaces, QDR RAM interfaces, two gigabit
MACs, two 10/100 MACs, expansion bus, four microengines, a Media and
Switch Fabric unit almost identical to the one on the ixp2400, two
xscale (8250ish) UARTs and a bunch of other stuff.

This patch adds the core ixp23xx support code, and support for the
ADI Engineering Roadrunner, Intel IXDP2351, and IP Fabrics Double
Espresso platforms.

Signed-off-by: Deepak Saxena <dsaxena@plexity.net>
Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0dd24eb..427c721 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -148,6 +148,12 @@ config ARCH_IXP2000
 	help
 	  Support for Intel's IXP2400/2800 (XScale) family of processors.
 
+config ARCH_IXP23XX
+ 	bool "IXP23XX-based"
+ 	select PCI
+	help
+	  Support for Intel's IXP23xx (XScale) family of processors.
+
 config ARCH_L7200
 	bool "LinkUp-L7200"
 	select FIQ
@@ -269,6 +275,8 @@ source "arch/arm/mach-ixp4xx/Kconfig"
 
 source "arch/arm/mach-ixp2000/Kconfig"
 
+source "arch/arm/mach-ixp23xx/Kconfig"
+
 source "arch/arm/mach-pxa/Kconfig"
 
 source "arch/arm/mach-sa1100/Kconfig"
@@ -787,7 +795,8 @@ source "drivers/acorn/block/Kconfig"
 
 if PCMCIA || ARCH_CLPS7500 || ARCH_IOP3XX || ARCH_IXP4XX \
 	|| ARCH_L7200 || ARCH_LH7A40X || ARCH_PXA || ARCH_RPC \
-	|| ARCH_S3C2410 || ARCH_SA1100 || ARCH_SHARK || FOOTBRIDGE
+	|| ARCH_S3C2410 || ARCH_SA1100 || ARCH_SHARK || FOOTBRIDGE \
+	|| ARCH_IXP23XX
 source "drivers/ide/Kconfig"
 endif
 
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 0f571d3..ce3e804 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -98,6 +98,7 @@ endif
  machine-$(CONFIG_ARCH_IOP3XX)	   := iop3xx
  machine-$(CONFIG_ARCH_IXP4XX)	   := ixp4xx
  machine-$(CONFIG_ARCH_IXP2000)    := ixp2000
+ machine-$(CONFIG_ARCH_IXP23XX)    := ixp23xx
  machine-$(CONFIG_ARCH_OMAP1)	   := omap1
  machine-$(CONFIG_ARCH_OMAP2)	   := omap2
   incdir-$(CONFIG_ARCH_OMAP)	   := omap
diff --git a/arch/arm/mach-ixp23xx/Kconfig b/arch/arm/mach-ixp23xx/Kconfig
new file mode 100644
index 0000000..982670e
--- /dev/null
+++ b/arch/arm/mach-ixp23xx/Kconfig
@@ -0,0 +1,25 @@
+if ARCH_IXP23XX
+
+config ARCH_SUPPORTS_BIG_ENDIAN
+	bool
+	default y
+
+menu "Intel IXP23xx Implementation Options"
+
+comment "IXP23xx Platforms"
+
+config MACH_ESPRESSO
+	bool "Support IP Fabrics Double Espresso platform"
+	help
+
+config MACH_IXDP2351
+	bool "Support Intel IXDP2351 platform"
+	help
+
+config MACH_ROADRUNNER
+	bool "Support ADI RoadRunner platform"
+	help
+
+endmenu
+
+endif
diff --git a/arch/arm/mach-ixp23xx/Makefile b/arch/arm/mach-ixp23xx/Makefile
new file mode 100644
index 0000000..288b371
--- /dev/null
+++ b/arch/arm/mach-ixp23xx/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for the linux kernel.
+#
+obj-y			:= core.o pci.o
+obj-m			:=
+obj-n			:=
+obj-			:=
+
+obj-$(CONFIG_MACH_ESPRESSO)	+= espresso.o
+obj-$(CONFIG_MACH_IXDP2351)	+= ixdp2351.o
+obj-$(CONFIG_MACH_ROADRUNNER)	+= roadrunner.o
diff --git a/arch/arm/mach-ixp23xx/Makefile.boot b/arch/arm/mach-ixp23xx/Makefile.boot
new file mode 100644
index 0000000..d5561ad
--- /dev/null
+++ b/arch/arm/mach-ixp23xx/Makefile.boot
@@ -0,0 +1,2 @@
+   zreladdr-y	:= 0x00008000
+params_phys-y	:= 0x00000100
diff --git a/arch/arm/mach-ixp23xx/core.c b/arch/arm/mach-ixp23xx/core.c
new file mode 100644
index 0000000..092ee12
--- /dev/null
+++ b/arch/arm/mach-ixp23xx/core.c
@@ -0,0 +1,431 @@
+/*
+ * arch/arm/mach-ixp23xx/core.c
+ *
+ * Core routines for IXP23xx chips
+ *
+ * Author: Deepak Saxena <dsaxena@plexity.net>
+ *
+ * Copyright 2005 (c) MontaVista Software, Inc.
+ *
+ * Based on 2.4 code Copyright 2004 (c) Intel Corporation
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/serial.h>
+#include <linux/tty.h>
+#include <linux/bitops.h>
+#include <linux/serial.h>
+#include <linux/serial_8250.h>
+#include <linux/serial_core.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+
+#include <asm/types.h>
+#include <asm/setup.h>
+#include <asm/memory.h>
+#include <asm/hardware.h>
+#include <asm/mach-types.h>
+#include <asm/irq.h>
+#include <asm/system.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
+
+#include <asm/mach/map.h>
+#include <asm/mach/time.h>
+#include <asm/mach/irq.h>
+#include <asm/mach/arch.h>
+
+
+/*************************************************************************
+ * Chip specific mappings shared by all IXP23xx systems
+ *************************************************************************/
+static struct map_desc ixp23xx_io_desc[] __initdata = {
+	{ /* XSI-CPP CSRs */
+	 	.virtual	= IXP23XX_XSI2CPP_CSR_VIRT,
+	 	.pfn		= __phys_to_pfn(IXP23XX_XSI2CPP_CSR_PHYS),
+	 	.length		= IXP23XX_XSI2CPP_CSR_SIZE,
+		.type		= MT_DEVICE,
+	}, { /* Expansion Bus Config */
+	 	.virtual	= IXP23XX_EXP_CFG_VIRT,
+	 	.pfn		= __phys_to_pfn(IXP23XX_EXP_CFG_PHYS),
+	 	.length		= IXP23XX_EXP_CFG_SIZE,
+		.type		= MT_DEVICE,
+	}, { /* UART, Interrupt ctrl, GPIO, timers, NPEs, MACS,.... */
+	 	.virtual	= IXP23XX_PERIPHERAL_VIRT,
+	 	.pfn		= __phys_to_pfn(IXP23XX_PERIPHERAL_PHYS),
+	 	.length		= IXP23XX_PERIPHERAL_SIZE,
+		.type		= MT_DEVICE,
+	}, { /* CAP CSRs */
+	 	.virtual	= IXP23XX_CAP_CSR_VIRT,
+	 	.pfn		= __phys_to_pfn(IXP23XX_CAP_CSR_PHYS),
+	 	.length		= IXP23XX_CAP_CSR_SIZE,
+		.type		= MT_DEVICE,
+	}, { /* MSF CSRs */
+	 	.virtual	= IXP23XX_MSF_CSR_VIRT,
+	 	.pfn		= __phys_to_pfn(IXP23XX_MSF_CSR_PHYS),
+	 	.length		= IXP23XX_MSF_CSR_SIZE,
+		.type		= MT_DEVICE,
+	}, { /* PCI I/O Space */
+	 	.virtual	= IXP23XX_PCI_IO_VIRT,
+	 	.pfn		= __phys_to_pfn(IXP23XX_PCI_IO_PHYS),
+	 	.length		= IXP23XX_PCI_IO_SIZE,
+		.type		= MT_DEVICE,
+	}, { /* PCI Config Space */
+	 	.virtual	= IXP23XX_PCI_CFG_VIRT,
+	 	.pfn		= __phys_to_pfn(IXP23XX_PCI_CFG_PHYS),
+	 	.length		= IXP23XX_PCI_CFG_SIZE,
+		.type		= MT_DEVICE,
+	}, { /* PCI local CFG CSRs */
+	 	.virtual	= IXP23XX_PCI_CREG_VIRT,
+	 	.pfn		= __phys_to_pfn(IXP23XX_PCI_CREG_PHYS),
+	 	.length		= IXP23XX_PCI_CREG_SIZE,
+		.type		= MT_DEVICE,
+	}, { /* PCI MEM Space */
+	 	.virtual	= IXP23XX_PCI_MEM_VIRT,
+	 	.pfn		= __phys_to_pfn(IXP23XX_PCI_MEM_PHYS),
+	 	.length		= IXP23XX_PCI_MEM_SIZE,
+		.type		= MT_DEVICE,
+	}
+};
+
+void __init ixp23xx_map_io(void)
+{
+	iotable_init(ixp23xx_io_desc, ARRAY_SIZE(ixp23xx_io_desc));
+}
+
+
+/***************************************************************************
+ * IXP23xx Interrupt Handling
+ ***************************************************************************/
+enum ixp23xx_irq_type {
+	IXP23XX_IRQ_LEVEL, IXP23XX_IRQ_EDGE
+};
+
+static void ixp23xx_config_irq(unsigned int, enum ixp23xx_irq_type);
+
+static int ixp23xx_irq_set_type(unsigned int irq, unsigned int type)
+{
+	int line = irq - IRQ_IXP23XX_GPIO6 + 6;
+	u32 int_style;
+	enum ixp23xx_irq_type irq_type;
+	volatile u32 *int_reg;
+
+	/*
+	 * Only GPIOs 6-15 are wired to interrupts on IXP23xx
+	 */
+	if (line < 6 || line > 15)
+		return -EINVAL;
+
+	switch (type) {
+	case IRQT_BOTHEDGE:
+		int_style = IXP23XX_GPIO_STYLE_TRANSITIONAL;
+		irq_type = IXP23XX_IRQ_EDGE;
+		break;
+	case IRQT_RISING:
+		int_style = IXP23XX_GPIO_STYLE_RISING_EDGE;
+		irq_type = IXP23XX_IRQ_EDGE;
+		break;
+	case IRQT_FALLING:
+		int_style = IXP23XX_GPIO_STYLE_FALLING_EDGE;
+		irq_type = IXP23XX_IRQ_EDGE;
+		break;
+	case IRQT_HIGH:
+		int_style = IXP23XX_GPIO_STYLE_ACTIVE_HIGH;
+		irq_type = IXP23XX_IRQ_LEVEL;
+		break;
+	case IRQT_LOW:
+		int_style = IXP23XX_GPIO_STYLE_ACTIVE_LOW;
+		irq_type = IXP23XX_IRQ_LEVEL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ixp23xx_config_irq(irq, irq_type);
+
+	if (line >= 8) {	/* pins 8-15 */
+		line -= 8;
+		int_reg = (volatile u32 *)IXP23XX_GPIO_GPIT2R;
+	} else {		/* pins 0-7 */
+		int_reg = (volatile u32 *)IXP23XX_GPIO_GPIT1R;
+	}
+
+	/*
+	 * Clear pending interrupts
+	 */
+	*IXP23XX_GPIO_GPISR = (1 << line);
+
+	/* Clear the style for the appropriate pin */
+	*int_reg &= ~(IXP23XX_GPIO_STYLE_MASK <<
+			(line * IXP23XX_GPIO_STYLE_SIZE));
+
+	/* Set the new style */
+	*int_reg |= (int_style << (line * IXP23XX_GPIO_STYLE_SIZE));
+
+	return 0;
+}
+
+static void ixp23xx_irq_mask(unsigned int irq)
+{
+	volatile unsigned long *intr_reg = IXP23XX_INTR_EN1 + (irq / 32);
+
+	*intr_reg &= ~(1 << (irq % 32));
+}
+
+static void ixp23xx_irq_ack(unsigned int irq)
+{
+	int line = irq - IRQ_IXP23XX_GPIO6 + 6;
+
+	if ((line < 6) || (line > 15))
+		return;
+
+	*IXP23XX_GPIO_GPISR = (1 << line);
+}
+
+/*
+ * Level triggered interrupts on GPIO lines can only be cleared when the
+ * interrupt condition disappears.
+ */
+static void ixp23xx_irq_level_unmask(unsigned int irq)
+{
+	volatile unsigned long *intr_reg = IXP23XX_INTR_EN1 + (irq / 32);
+
+	ixp23xx_irq_ack(irq);
+
+	*intr_reg |= (1 << (irq % 32));
+}
+
+static void ixp23xx_irq_edge_unmask(unsigned int irq)
+{
+	volatile unsigned long *intr_reg = IXP23XX_INTR_EN1 + (irq / 32);
+
+	*intr_reg |= (1 << (irq % 32));
+}
+
+static struct irqchip ixp23xx_irq_level_chip = {
+	.ack		= ixp23xx_irq_mask,
+	.mask		= ixp23xx_irq_mask,
+	.unmask		= ixp23xx_irq_level_unmask,
+	.set_type	= ixp23xx_irq_set_type
+};
+
+static struct irqchip ixp23xx_irq_edge_chip = {
+	.ack		= ixp23xx_irq_ack,
+	.mask		= ixp23xx_irq_mask,
+	.unmask		= ixp23xx_irq_edge_unmask,
+	.set_type	= ixp23xx_irq_set_type
+};
+
+static void ixp23xx_pci_irq_mask(unsigned int irq)
+{
+	*IXP23XX_PCI_XSCALE_INT_ENABLE &= ~(1 << (IRQ_IXP23XX_INTA + 27 - irq));
+}
+
+static void ixp23xx_pci_irq_unmask(unsigned int irq)
+{
+	*IXP23XX_PCI_XSCALE_INT_ENABLE |= (1 << (IRQ_IXP23XX_INTA + 27 - irq));
+}
+
+/*
+ * TODO: Should this just be done at ASM level?
+ */
+static void pci_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
+{
+	u32 pci_interrupt;
+	unsigned int irqno;
+	struct irqdesc *int_desc;
+
+	pci_interrupt = *IXP23XX_PCI_XSCALE_INT_STATUS;
+
+	desc->chip->ack(irq);
+
+	/* See which PCI_INTA, or PCI_INTB interrupted */
+	if (pci_interrupt & (1 << 26)) {
+		irqno = IRQ_IXP23XX_INTB;
+	} else if (pci_interrupt & (1 << 27)) {
+		irqno = IRQ_IXP23XX_INTA;
+	} else {
+		BUG();
+	}
+
+	int_desc = irq_desc + irqno;
+	int_desc->handle(irqno, int_desc, regs);
+
+	desc->chip->unmask(irq);
+}
+
+static struct irqchip ixp23xx_pci_irq_chip = {
+	.ack	= ixp23xx_pci_irq_mask,
+	.mask	= ixp23xx_pci_irq_mask,
+	.unmask	= ixp23xx_pci_irq_unmask
+};
+
+static void ixp23xx_config_irq(unsigned int irq, enum ixp23xx_irq_type type)
+{
+	switch (type) {
+	case IXP23XX_IRQ_LEVEL:
+		set_irq_chip(irq, &ixp23xx_irq_level_chip);
+		set_irq_handler(irq, do_level_IRQ);
+		break;
+	case IXP23XX_IRQ_EDGE:
+		set_irq_chip(irq, &ixp23xx_irq_edge_chip);
+		set_irq_handler(irq, do_edge_IRQ);
+		break;
+	}
+	set_irq_flags(irq, IRQF_VALID);
+}
+
+void __init ixp23xx_init_irq(void)
+{
+	int irq;
+
+	/* Route everything to IRQ */
+	*IXP23XX_INTR_SEL1 = 0x0;
+	*IXP23XX_INTR_SEL2 = 0x0;
+	*IXP23XX_INTR_SEL3 = 0x0;
+	*IXP23XX_INTR_SEL4 = 0x0;
+
+	/* Mask all sources */
+	*IXP23XX_INTR_EN1 = 0x0;
+	*IXP23XX_INTR_EN2 = 0x0;
+	*IXP23XX_INTR_EN3 = 0x0;
+	*IXP23XX_INTR_EN4 = 0x0;
+
+	/*
+	 * Configure all IRQs for level-sensitive operation
+	 */
+	for (irq = 0; irq <= NUM_IXP23XX_RAW_IRQS; irq++) {
+		ixp23xx_config_irq(irq, IXP23XX_IRQ_LEVEL);
+	}
+
+	for (irq = IRQ_IXP23XX_INTA; irq <= IRQ_IXP23XX_INTB; irq++) {
+		set_irq_chip(irq, &ixp23xx_pci_irq_chip);
+		set_irq_handler(irq, do_level_IRQ);
+		set_irq_flags(irq, IRQF_VALID);
+	}
+
+	set_irq_chained_handler(IRQ_IXP23XX_PCI_INT_RPH, pci_handler);
+}
+
+
+/*************************************************************************
+ * Timer-tick functions for IXP23xx
+ *************************************************************************/
+#define CLOCK_TICKS_PER_USEC	CLOCK_TICK_RATE / (USEC_PER_SEC)
+
+static unsigned long next_jiffy_time;
+
+static unsigned long
+ixp23xx_gettimeoffset(void)
+{
+	unsigned long elapsed;
+
+	elapsed = *IXP23XX_TIMER_CONT - (next_jiffy_time - LATCH);
+
+	return elapsed / CLOCK_TICKS_PER_USEC;
+}
+
+static irqreturn_t
+ixp23xx_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	/* Clear Pending Interrupt by writing '1' to it */
+	*IXP23XX_TIMER_STATUS = IXP23XX_TIMER1_INT_PEND;
+	while ((*IXP23XX_TIMER_CONT - next_jiffy_time) > LATCH) {
+		timer_tick(regs);
+		next_jiffy_time += LATCH;
+	}
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction ixp23xx_timer_irq = {
+	.name		= "IXP23xx Timer Tick",
+	.handler	= ixp23xx_timer_interrupt,
+	.flags		= SA_INTERRUPT | SA_TIMER,
+};
+
+void __init ixp23xx_init_timer(void)
+{
+	/* Clear Pending Interrupt by writing '1' to it */
+	*IXP23XX_TIMER_STATUS = IXP23XX_TIMER1_INT_PEND;
+
+	/* Setup the Timer counter value */
+	*IXP23XX_TIMER1_RELOAD =
+		(LATCH & ~IXP23XX_TIMER_RELOAD_MASK) | IXP23XX_TIMER_ENABLE;
+
+	*IXP23XX_TIMER_CONT = 0;
+	next_jiffy_time = LATCH;
+
+	/* Connect the interrupt handler and enable the interrupt */
+	setup_irq(IRQ_IXP23XX_TIMER1, &ixp23xx_timer_irq);
+}
+
+struct sys_timer ixp23xx_timer = {
+	.init		= ixp23xx_init_timer,
+	.offset		= ixp23xx_gettimeoffset,
+};
+
+
+/*************************************************************************
+ * IXP23xx Platform Initializaion
+ *************************************************************************/
+static struct resource ixp23xx_uart_resources[] = {
+	{
+		.start		= IXP23XX_UART1_PHYS,
+		.end		= IXP23XX_UART1_PHYS + 0x0fff,
+		.flags		= IORESOURCE_MEM
+	}, {
+		.start		= IXP23XX_UART2_PHYS,
+		.end		= IXP23XX_UART2_PHYS + 0x0fff,
+		.flags		= IORESOURCE_MEM
+	}
+};
+
+static struct plat_serial8250_port ixp23xx_uart_data[] = {
+	{
+		.mapbase	= IXP23XX_UART1_PHYS,
+		.membase	= (char *)(IXP23XX_UART1_VIRT + 3),
+		.irq		= IRQ_IXP23XX_UART1,
+		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
+		.iotype		= UPIO_MEM,
+		.regshift	= 2,
+		.uartclk	= IXP23XX_UART_XTAL,
+	}, {
+		.mapbase	= IXP23XX_UART2_PHYS,
+		.membase	= (char *)(IXP23XX_UART2_VIRT + 3),
+		.irq		= IRQ_IXP23XX_UART2,
+		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
+		.iotype		= UPIO_MEM,
+		.regshift	= 2,
+		.uartclk	= IXP23XX_UART_XTAL,
+	},
+	{ },
+};
+
+static struct platform_device ixp23xx_uart = {
+	.name			= "serial8250",
+	.id			= 0,
+	.dev.platform_data	= ixp23xx_uart_data,
+	.num_resources		= 2,
+	.resource		= ixp23xx_uart_resources,
+};
+
+static struct platform_device *ixp23xx_devices[] __initdata = {
+	&ixp23xx_uart,
+};
+
+void __init ixp23xx_sys_init(void)
+{
+	platform_add_devices(ixp23xx_devices, ARRAY_SIZE(ixp23xx_devices));
+}
diff --git a/arch/arm/mach-ixp23xx/espresso.c b/arch/arm/mach-ixp23xx/espresso.c
new file mode 100644
index 0000000..2327c97
--- /dev/null
+++ b/arch/arm/mach-ixp23xx/espresso.c
@@ -0,0 +1,69 @@
+/*
+ * arch/arm/mach-ixp23xx/espresso.c
+ *
+ * Double Espresso-specific routines
+ *
+ * Author: Lennert Buytenhek <buytenh@wantstofly.org>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/serial.h>
+#include <linux/tty.h>
+#include <linux/bitops.h>
+#include <linux/ioport.h>
+#include <linux/serial.h>
+#include <linux/serial_8250.h>
+#include <linux/serial_core.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/pci.h>
+#include <linux/mtd/physmap.h>
+
+#include <asm/types.h>
+#include <asm/setup.h>
+#include <asm/memory.h>
+#include <asm/hardware.h>
+#include <asm/mach-types.h>
+#include <asm/irq.h>
+#include <asm/system.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
+
+#include <asm/mach/map.h>
+#include <asm/mach/irq.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/irq.h>
+#include <asm/mach/pci.h>
+
+static void __init espresso_init(void)
+{
+	physmap_configure(0x90000000, 0x02000000, 2, NULL);
+
+	/*
+	 * Mark flash as writeable.
+	 */
+	IXP23XX_EXP_CS0[0] |= IXP23XX_FLASH_WRITABLE;
+	IXP23XX_EXP_CS0[1] |= IXP23XX_FLASH_WRITABLE;
+
+	ixp23xx_sys_init();
+}
+
+MACHINE_START(ESPRESSO, "IP Fabrics Double Espresso")
+	/* Maintainer: Lennert Buytenhek */
+	.phys_io	= IXP23XX_PERIPHERAL_PHYS,
+	.io_pg_offst	= ((IXP23XX_PERIPHERAL_VIRT >> 18)) & 0xfffc,
+	.map_io		= ixp23xx_map_io,
+	.init_irq	= ixp23xx_init_irq,
+	.timer		= &ixp23xx_timer,
+	.boot_params	= 0x00000100,
+	.init_machine	= espresso_init,
+MACHINE_END
diff --git a/arch/arm/mach-ixp23xx/ixdp2351.c b/arch/arm/mach-ixp23xx/ixdp2351.c
new file mode 100644
index 0000000..00146c3
--- /dev/null
+++ b/arch/arm/mach-ixp23xx/ixdp2351.c
@@ -0,0 +1,325 @@
+/*
+ * arch/arm/mach-ixp23xx/ixdp2351.c
+ *
+ * IXDP2351 board-specific routines
+ *
+ * Author: Deepak Saxena <dsaxena@plexity.net>
+ *
+ * Copyright 2005 (c) MontaVista Software, Inc.
+ *
+ * Based on 2.4 code Copyright 2004 (c) Intel Corporation
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/serial.h>
+#include <linux/tty.h>
+#include <linux/bitops.h>
+#include <linux/ioport.h>
+#include <linux/serial.h>
+#include <linux/serial_8250.h>
+#include <linux/serial_core.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/pci.h>
+#include <linux/mtd/physmap.h>
+
+#include <asm/types.h>
+#include <asm/setup.h>
+#include <asm/memory.h>
+#include <asm/hardware.h>
+#include <asm/mach-types.h>
+#include <asm/irq.h>
+#include <asm/system.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
+
+#include <asm/mach/map.h>
+#include <asm/mach/irq.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/irq.h>
+#include <asm/mach/pci.h>
+
+/*
+ * IXDP2351 Interrupt Handling
+ */
+static void ixdp2351_inta_mask(unsigned int irq)
+{
+	*IXDP2351_CPLD_INTA_MASK_SET_REG = IXDP2351_INTA_IRQ_MASK(irq);
+}
+
+static void ixdp2351_inta_unmask(unsigned int irq)
+{
+	*IXDP2351_CPLD_INTA_MASK_CLR_REG = IXDP2351_INTA_IRQ_MASK(irq);
+}
+
+static void ixdp2351_inta_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
+{
+	u16 ex_interrupt =
+		*IXDP2351_CPLD_INTA_STAT_REG & IXDP2351_INTA_IRQ_VALID;
+	int i;
+
+	desc->chip->mask(irq);
+
+	for (i = 0; i < IXDP2351_INTA_IRQ_NUM; i++) {
+		if (ex_interrupt & (1 << i)) {
+			struct irqdesc *cpld_desc;
+			int cpld_irq =
+				IXP23XX_MACH_IRQ(IXDP2351_INTA_IRQ_BASE + i);
+			cpld_desc = irq_desc + cpld_irq;
+			cpld_desc->handle(cpld_irq, cpld_desc, regs);
+		}
+	}
+
+	desc->chip->unmask(irq);
+}
+
+static struct irqchip ixdp2351_inta_chip = {
+	.ack	= ixdp2351_inta_mask,
+	.mask	= ixdp2351_inta_mask,
+	.unmask	= ixdp2351_inta_unmask
+};
+
+static void ixdp2351_intb_mask(unsigned int irq)
+{
+	*IXDP2351_CPLD_INTB_MASK_SET_REG = IXDP2351_INTB_IRQ_MASK(irq);
+}
+
+static void ixdp2351_intb_unmask(unsigned int irq)
+{
+	*IXDP2351_CPLD_INTB_MASK_CLR_REG = IXDP2351_INTB_IRQ_MASK(irq);
+}
+
+static void ixdp2351_intb_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
+{
+	u16 ex_interrupt =
+		*IXDP2351_CPLD_INTB_STAT_REG & IXDP2351_INTB_IRQ_VALID;
+	int i;
+
+	desc->chip->ack(irq);
+
+	for (i = 0; i < IXDP2351_INTB_IRQ_NUM; i++) {
+		if (ex_interrupt & (1 << i)) {
+			struct irqdesc *cpld_desc;
+			int cpld_irq =
+				IXP23XX_MACH_IRQ(IXDP2351_INTB_IRQ_BASE + i);
+			cpld_desc = irq_desc + cpld_irq;
+			cpld_desc->handle(cpld_irq, cpld_desc, regs);
+		}
+	}
+
+	desc->chip->unmask(irq);
+}
+
+static struct irqchip ixdp2351_intb_chip = {
+	.ack	= ixdp2351_intb_mask,
+	.mask	= ixdp2351_intb_mask,
+	.unmask	= ixdp2351_intb_unmask
+};
+
+void ixdp2351_init_irq(void)
+{
+	int irq;
+
+	/* Mask all interrupts from CPLD, disable simulation */
+	*IXDP2351_CPLD_INTA_MASK_SET_REG = (u16) -1;
+	*IXDP2351_CPLD_INTB_MASK_SET_REG = (u16) -1;
+	*IXDP2351_CPLD_INTA_SIM_REG = 0;
+	*IXDP2351_CPLD_INTB_SIM_REG = 0;
+
+	ixp23xx_init_irq();
+
+	for (irq = IXP23XX_MACH_IRQ(IXDP2351_INTA_IRQ_BASE);
+	     irq <
+	     IXP23XX_MACH_IRQ(IXDP2351_INTA_IRQ_BASE + IXDP2351_INTA_IRQ_NUM);
+	     irq++) {
+		if (IXDP2351_INTA_IRQ_MASK(irq) & IXDP2351_INTA_IRQ_VALID) {
+			set_irq_flags(irq, IRQF_VALID);
+			set_irq_handler(irq, do_level_IRQ);
+			set_irq_chip(irq, &ixdp2351_inta_chip);
+		}
+	}
+
+	for (irq = IXP23XX_MACH_IRQ(IXDP2351_INTB_IRQ_BASE);
+	     irq <
+	     IXP23XX_MACH_IRQ(IXDP2351_INTB_IRQ_BASE + IXDP2351_INTB_IRQ_NUM);
+	     irq++) {
+		if (IXDP2351_INTB_IRQ_MASK(irq) & IXDP2351_INTB_IRQ_VALID) {
+			set_irq_flags(irq, IRQF_VALID);
+			set_irq_handler(irq, do_level_IRQ);
+			set_irq_chip(irq, &ixdp2351_intb_chip);
+		}
+	}
+
+	set_irq_chained_handler(IRQ_IXP23XX_INTA, &ixdp2351_inta_handler);
+	set_irq_chained_handler(IRQ_IXP23XX_INTB, &ixdp2351_intb_handler);
+}
+
+/*
+ * IXDP2351 PCI
+ */
+
+/*
+ * This board does not do normal PCI IRQ routing, or any
+ * sort of swizzling, so we just need to check where on the
+ * bus the device is and figure out what CPLD pin it is
+ * being routed to.
+ */
+#define DEVPIN(dev, pin) ((pin) | ((dev) << 3))
+
+static int __init ixdp2351_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
+{
+	u8 bus = dev->bus->number;
+	u32 devpin = DEVPIN(PCI_SLOT(dev->devfn), pin);
+	struct pci_bus *tmp_bus = dev->bus;
+
+	/* Primary bus, no interrupts here */
+	if (!bus)
+		return -1;
+
+	/* Lookup first leaf in bus tree */
+	while ((tmp_bus->parent != NULL) && (tmp_bus->parent->parent != NULL))
+		tmp_bus = tmp_bus->parent;
+
+	/* Select between known bridges */
+	switch (tmp_bus->self->devfn | (tmp_bus->self->bus->number << 8)) {
+		/* Device is located after first bridge */
+	case 0x0008:
+		if (tmp_bus == dev->bus) {
+			/* Device is located directy after first bridge */
+			switch (devpin) {
+				/* Onboard 82546 */
+			case DEVPIN(1, 1):	/* Onboard 82546 ch 0 */
+				return IRQ_IXDP2351_INTA_82546;
+			case DEVPIN(1, 2):	/* Onboard 82546 ch 1 */
+				return IRQ_IXDP2351_INTB_82546;
+				/* PMC SLOT */
+			case DEVPIN(0, 1):	/* PMCP INTA# */
+			case DEVPIN(2, 4):	/* PMCS INTD# */
+				return IRQ_IXDP2351_SPCI_PMC_INTA;
+			case DEVPIN(0, 2):	/* PMCP INTB# */
+			case DEVPIN(2, 1):	/* PMCS INTA# */
+				return IRQ_IXDP2351_SPCI_PMC_INTB;
+			case DEVPIN(0, 3):	/* PMCP INTC# */
+			case DEVPIN(2, 2):	/* PMCS INTB# */
+				return IRQ_IXDP2351_SPCI_PMC_INTC;
+			case DEVPIN(0, 4):	/* PMCP INTD# */
+			case DEVPIN(2, 3):	/* PMCS INTC# */
+				return IRQ_IXDP2351_SPCI_PMC_INTD;
+			}
+		} else {
+			/* Device is located indirectly after first bridge */
+			/* Not supported now */
+			return -1;
+		}
+		break;
+	case 0x0010:
+		if (tmp_bus == dev->bus) {
+			/* Device is located directy after second bridge */
+			/* Secondary bus of second bridge */
+			switch (devpin) {
+			case DEVPIN(0, 1):	/* DB#0 */
+			case DEVPIN(0, 2):
+			case DEVPIN(0, 3):
+			case DEVPIN(0, 4):
+				return IRQ_IXDP2351_SPCI_DB_0;
+			case DEVPIN(1, 1):	/* DB#1 */
+			case DEVPIN(1, 2):
+			case DEVPIN(1, 3):
+			case DEVPIN(1, 4):
+				return IRQ_IXDP2351_SPCI_DB_1;
+			case DEVPIN(2, 1):	/* FIC1 */
+			case DEVPIN(2, 2):
+			case DEVPIN(2, 3):
+			case DEVPIN(2, 4):
+			case DEVPIN(3, 1):	/* FIC2 */
+			case DEVPIN(3, 2):
+			case DEVPIN(3, 3):
+			case DEVPIN(3, 4):
+				return IRQ_IXDP2351_SPCI_FIC;
+			}
+		} else {
+			/* Device is located indirectly after second bridge */
+			/* Not supported now */
+			return -1;
+		}
+		break;
+	}
+
+	return -1;
+}
+
+struct hw_pci ixdp2351_pci __initdata = {
+	.nr_controllers	= 1,
+	.preinit	= ixp23xx_pci_preinit,
+	.setup		= ixp23xx_pci_setup,
+	.scan		= ixp23xx_pci_scan_bus,
+	.map_irq	= ixdp2351_map_irq,
+};
+
+int __init ixdp2351_pci_init(void)
+{
+	if (machine_is_ixdp2351())
+		pci_common_init(&ixdp2351_pci);
+
+	return 0;
+}
+
+subsys_initcall(ixdp2351_pci_init);
+
+/*
+ * IXDP2351 Static Mapped I/O
+ */
+static struct map_desc ixdp2351_io_desc[] __initdata = {
+	{
+		.virtual	= IXDP2351_NP_VIRT_BASE,
+		.pfn		= __phys_to_pfn((u64)IXDP2351_NP_PHYS_BASE),
+		.length		= IXDP2351_NP_PHYS_SIZE,
+		.type		= MT_DEVICE
+	}, {
+		.virtual	= IXDP2351_BB_BASE_VIRT,
+		.pfn		= __phys_to_pfn((u64)IXDP2351_BB_BASE_PHYS),
+		.length		= IXDP2351_BB_SIZE,
+		.type		= MT_DEVICE
+	}
+};
+
+static void __init ixdp2351_map_io(void)
+{
+	ixp23xx_map_io();
+	iotable_init(ixdp2351_io_desc, ARRAY_SIZE(ixdp2351_io_desc));
+}
+
+static void __init ixdp2351_init(void)
+{
+	physmap_configure(0x90000000, 0x04000000, 1, NULL);
+
+	/*
+	 * Mark flash as writeable
+	 */
+	IXP23XX_EXP_CS0[0] |= IXP23XX_FLASH_WRITABLE;
+	IXP23XX_EXP_CS0[1] |= IXP23XX_FLASH_WRITABLE;
+	IXP23XX_EXP_CS0[2] |= IXP23XX_FLASH_WRITABLE;
+	IXP23XX_EXP_CS0[3] |= IXP23XX_FLASH_WRITABLE;
+
+	ixp23xx_sys_init();
+}
+
+MACHINE_START(IXDP2351, "Intel IXDP2351 Development Platform")
+	/* Maintainer: MontaVista Software, Inc. */
+	.phys_io	= IXP23XX_PERIPHERAL_PHYS,
+	.io_pg_offst	= ((IXP23XX_PERIPHERAL_VIRT >> 18)) & 0xfffc,
+	.map_io		= ixdp2351_map_io,
+	.init_irq	= ixdp2351_init_irq,
+	.timer		= &ixp23xx_timer,
+	.boot_params	= 0x00000100,
+	.init_machine	= ixdp2351_init,
+MACHINE_END
diff --git a/arch/arm/mach-ixp23xx/pci.c b/arch/arm/mach-ixp23xx/pci.c
new file mode 100644
index 0000000..5330ad7
--- /dev/null
+++ b/arch/arm/mach-ixp23xx/pci.c
@@ -0,0 +1,275 @@
+/*
+ * arch/arm/mach-ixp23xx/pci.c
+ *
+ * PCI routines for IXP23XX based systems
+ *
+ * Copyright (c) 2005 MontaVista Software, Inc.
+ *
+ * based on original code:
+ *
+ * Author: Naeem Afzal <naeem.m.afzal@intel.com>
+ * Copyright 2002-2005 Intel Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/sizes.h>
+#include <asm/system.h>
+#include <asm/mach/pci.h>
+#include <asm/mach-types.h>
+#include <asm/hardware.h>
+
+extern int (*external_fault) (unsigned long, struct pt_regs *);
+
+static int pci_master_aborts = 0;
+
+#ifdef DEBUG
+#define DBG(x...)	printk(x)
+#else
+#define DBG(x...)
+#endif
+
+int clear_master_aborts(void);
+
+static u32
+*ixp23xx_pci_config_addr(unsigned int bus_nr, unsigned int devfn, int where)
+{
+	u32 *paddress;
+
+	/*
+	 * Must be dword aligned
+	 */
+	where &= ~3;
+
+	/*
+	 * For top bus, generate type 0, else type 1
+	 */
+	if (!bus_nr) {
+		if (PCI_SLOT(devfn) >= 8)
+			return 0;
+
+		paddress = (u32 *) (IXP23XX_PCI_CFG0_VIRT
+				    | (1 << (PCI_SLOT(devfn) + 16))
+				    | (PCI_FUNC(devfn) << 8) | where);
+	} else {
+		paddress = (u32 *) (IXP23XX_PCI_CFG1_VIRT
+				    | (bus_nr << 16)
+				    | (PCI_SLOT(devfn) << 11)
+				    | (PCI_FUNC(devfn) << 8) | where);
+	}
+
+	return paddress;
+}
+
+/*
+ * Mask table, bits to mask for quantity of size 1, 2 or 4 bytes.
+ * 0 and 3 are not valid indexes...
+ */
+static u32 bytemask[] = {
+	/*0*/	0,
+	/*1*/	0xff,
+	/*2*/	0xffff,
+	/*3*/	0,
+	/*4*/	0xffffffff,
+};
+
+static int ixp23xx_pci_read_config(struct pci_bus *bus, unsigned int devfn,
+				int where, int size, u32 *value)
+{
+	u32 n;
+	u32 *addr;
+
+	n = where % 4;
+
+	DBG("In config_read(%d) %d from dev %d:%d:%d\n", size, where,
+		bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+	addr = ixp23xx_pci_config_addr(bus->number, devfn, where);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	pci_master_aborts = 0;
+	*value = (*addr >> (8*n)) & bytemask[size];
+	if (pci_master_aborts) {
+			pci_master_aborts = 0;
+			*value = 0xffffffff;
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+/*
+ * We don't do error checking on the address for writes.
+ * It's assumed that the user checked for the device existing first
+ * by doing a read first.
+ */
+static int ixp23xx_pci_write_config(struct pci_bus *bus, unsigned int devfn,
+					int where, int size, u32 value)
+{
+	u32 mask;
+	u32 *addr;
+	u32 temp;
+
+	mask = ~(bytemask[size] << ((where % 0x4) * 8));
+	addr = ixp23xx_pci_config_addr(bus->number, devfn, where);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	temp = (u32) (value) << ((where % 0x4) * 8);
+	*addr = (*addr & mask) | temp;
+
+	clear_master_aborts();
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops ixp23xx_pci_ops = {
+	.read	= ixp23xx_pci_read_config,
+	.write	= ixp23xx_pci_write_config,
+};
+
+struct pci_bus *ixp23xx_pci_scan_bus(int nr, struct pci_sys_data *sysdata)
+{
+	return pci_scan_bus(sysdata->busnr, &ixp23xx_pci_ops, sysdata);
+}
+
+int ixp23xx_pci_abort_handler(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+{
+	volatile unsigned long temp;
+	unsigned long flags;
+
+	pci_master_aborts = 1;
+
+	local_irq_save(flags);
+	temp = *IXP23XX_PCI_CONTROL;
+
+	/*
+	 * master abort and cmd tgt err
+	 */
+	if (temp & ((1 << 8) | (1 << 5)))
+		*IXP23XX_PCI_CONTROL = temp;
+
+	temp = *IXP23XX_PCI_CMDSTAT;
+
+	if (temp & (1 << 29))
+		*IXP23XX_PCI_CMDSTAT = temp;
+	local_irq_restore(flags);
+
+	/*
+	 * If it was an imprecise abort, then we need to correct the
+	 * return address to be _after_ the instruction.
+	 */
+	if (fsr & (1 << 10))
+		regs->ARM_pc += 4;
+
+	return 0;
+}
+
+int clear_master_aborts(void)
+{
+	volatile u32 temp;
+
+	temp = *IXP23XX_PCI_CONTROL;
+
+	/*
+	 * master abort and cmd tgt err
+	 */
+	if (temp & ((1 << 8) | (1 << 5)))
+		*IXP23XX_PCI_CONTROL = temp;
+
+	temp = *IXP23XX_PCI_CMDSTAT;
+
+	if (temp & (1 << 29))
+		*IXP23XX_PCI_CMDSTAT = temp;
+
+	return 0;
+}
+
+void __init ixp23xx_pci_preinit(void)
+{
+#ifdef __ARMEB__
+	*IXP23XX_PCI_CONTROL |= 0x20000;	/* set I/O swapping */
+#endif
+	/*
+	 * ADDR_31 needs to be clear for PCI memory access to CPP memory
+	 */
+	*IXP23XX_CPP2XSI_CURR_XFER_REG3 &= ~IXP23XX_CPP2XSI_ADDR_31;
+	*IXP23XX_CPP2XSI_CURR_XFER_REG3 |= IXP23XX_CPP2XSI_PSH_OFF;
+
+	/*
+	 * Select correct memory for PCI inbound transactions
+	 */
+	if (ixp23xx_cpp_boot()) {
+		*IXP23XX_PCI_CPP_ADDR_BITS &= ~(1 << 1);
+	} else {
+		*IXP23XX_PCI_CPP_ADDR_BITS |= (1 << 1);
+	}
+
+	hook_fault_code(16+6, ixp23xx_pci_abort_handler, SIGBUS,
+			"PCI config cycle to non-existent device");
+
+	*IXP23XX_PCI_ADDR_EXT = 0x0000e000;
+}
+
+/*
+ * Prevent PCI layer from seeing the inbound host-bridge resources
+ */
+static void __devinit pci_fixup_ixp23xx(struct pci_dev *dev)
+{
+	int i;
+
+	dev->class &= 0xff;
+	dev->class |= PCI_CLASS_BRIDGE_HOST << 8;
+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+		dev->resource[i].start = 0;
+		dev->resource[i].end   = 0;
+		dev->resource[i].flags = 0;
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9002, pci_fixup_ixp23xx);
+
+/*
+ * IXP2300 systems often have large resource requirements, so we just
+ * use our own resource space.
+ */
+static struct resource ixp23xx_pci_mem_space = {
+	.start	= IXP23XX_PCI_MEM_START,
+	.end	= IXP23XX_PCI_MEM_START + IXP23XX_PCI_MEM_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+	.name	= "PCI Mem Space"
+};
+
+static struct resource ixp23xx_pci_io_space = {
+	.start	= 0x00000100,
+	.end	= 0x01ffffff,
+	.flags	= IORESOURCE_IO,
+	.name	= "PCI I/O Space"
+};
+
+int ixp23xx_pci_setup(int nr, struct pci_sys_data *sys)
+{
+	if (nr >= 1)
+		return 0;
+
+	sys->resource[0] = &ixp23xx_pci_io_space;
+	sys->resource[1] = &ixp23xx_pci_mem_space;
+	sys->resource[2] = NULL;
+
+	return 1;
+}
diff --git a/arch/arm/mach-ixp23xx/roadrunner.c b/arch/arm/mach-ixp23xx/roadrunner.c
new file mode 100644
index 0000000..43c14e7
--- /dev/null
+++ b/arch/arm/mach-ixp23xx/roadrunner.c
@@ -0,0 +1,164 @@
+/*
+ * arch/arm/mach-ixp23xx/roadrunner.c
+ *
+ * RoadRunner board-specific routines
+ *
+ * Author: Deepak Saxena <dsaxena@plexity.net>
+ *
+ * Copyright 2005 (c) MontaVista Software, Inc.
+ *
+ * Based on 2.4 code Copyright 2005 (c) ADI Engineering Corporation
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/serial.h>
+#include <linux/tty.h>
+#include <linux/bitops.h>
+#include <linux/ioport.h>
+#include <linux/serial.h>
+#include <linux/serial_8250.h>
+#include <linux/serial_core.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/pci.h>
+#include <linux/mtd/physmap.h>
+
+#include <asm/types.h>
+#include <asm/setup.h>
+#include <asm/memory.h>
+#include <asm/hardware.h>
+#include <asm/mach-types.h>
+#include <asm/irq.h>
+#include <asm/system.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
+
+#include <asm/mach/map.h>
+#include <asm/mach/irq.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/irq.h>
+#include <asm/mach/pci.h>
+
+/*
+ * Interrupt mapping
+ */
+#define INTA		IRQ_ROADRUNNER_PCI_INTA
+#define INTB		IRQ_ROADRUNNER_PCI_INTB
+#define INTC		IRQ_ROADRUNNER_PCI_INTC
+#define INTD		IRQ_ROADRUNNER_PCI_INTD
+
+#define INTC_PIN	IXP23XX_GPIO_PIN_11
+#define INTD_PIN	IXP23XX_GPIO_PIN_12
+
+static int __init roadrunner_map_irq(struct pci_dev *dev, u8 idsel, u8 pin)
+{
+	static int pci_card_slot_irq[] = {INTB, INTC, INTD, INTA};
+	static int pmc_card_slot_irq[] = {INTA, INTB, INTC, INTD};
+	static int usb_irq[] = {INTB, INTC, INTD, -1};
+	static int mini_pci_1_irq[] = {INTB, INTC, -1, -1};
+	static int mini_pci_2_irq[] = {INTC, INTD, -1, -1};
+
+	switch(dev->bus->number) {
+		case 0:
+			switch(dev->devfn) {
+			case 0x0: // PCI-PCI bridge
+				break;
+			case 0x8: // PCI Card Slot
+				return pci_card_slot_irq[pin - 1];
+			case 0x10: // PMC Slot
+				return pmc_card_slot_irq[pin - 1];
+			case 0x18: // PMC Slot Secondary Agent
+				break;
+			case 0x20: // IXP Processor
+				break;
+			default:
+				return NO_IRQ;
+			}
+			break;
+
+		case 1:
+			switch(dev->devfn) {
+			case 0x0: // IDE Controller
+				return (pin == 1) ? INTC : -1;
+			case 0x8: // USB fun 0
+			case 0x9: // USB fun 1
+			case 0xa: // USB fun 2
+				return usb_irq[pin - 1];
+			case 0x10: // Mini PCI 1
+				return mini_pci_1_irq[pin-1];
+			case 0x18: // Mini PCI 2
+				return mini_pci_2_irq[pin-1];
+			case 0x20: // MEM slot
+				return (pin == 1) ? INTA : -1;
+			default:
+				return NO_IRQ;
+			}
+			break;
+
+		default:
+			return NO_IRQ;
+	}
+
+	return NO_IRQ;
+}
+
+static void roadrunner_pci_preinit(void)
+{
+	set_irq_type(IRQ_ROADRUNNER_PCI_INTC, IRQT_LOW);
+	set_irq_type(IRQ_ROADRUNNER_PCI_INTD, IRQT_LOW);
+
+	ixp23xx_pci_preinit();
+}
+
+static struct hw_pci roadrunner_pci __initdata = {
+	.nr_controllers	= 1,
+	.preinit	= roadrunner_pci_preinit,
+	.setup		= ixp23xx_pci_setup,
+	.scan		= ixp23xx_pci_scan_bus,
+	.map_irq	= roadrunner_map_irq,
+};
+
+static int __init roadrunner_pci_init(void)
+{
+	if (machine_is_roadrunner())
+		pci_common_init(&roadrunner_pci);
+
+	return 0;
+};
+
+subsys_initcall(roadrunner_pci_init);
+
+static void __init roadrunner_init(void)
+{
+	physmap_configure(0x90000000, 0x04000000, 2, NULL);
+
+	/*
+	 * Mark flash as writeable
+	 */
+	IXP23XX_EXP_CS0[0] |= IXP23XX_FLASH_WRITABLE;
+	IXP23XX_EXP_CS0[1] |= IXP23XX_FLASH_WRITABLE;
+	IXP23XX_EXP_CS0[2] |= IXP23XX_FLASH_WRITABLE;
+	IXP23XX_EXP_CS0[3] |= IXP23XX_FLASH_WRITABLE;
+
+	ixp23xx_sys_init();
+}
+
+MACHINE_START(ROADRUNNER, "ADI Engineering RoadRunner Development Platform")
+	/* Maintainer: Deepak Saxena */
+	.phys_io	= IXP23XX_PERIPHERAL_PHYS,
+	.io_pg_offst	= ((IXP23XX_PERIPHERAL_VIRT >> 18)) & 0xfffc,
+	.map_io		= ixp23xx_map_io,
+	.init_irq	= ixp23xx_init_irq,
+	.timer		= &ixp23xx_timer,
+	.boot_params	= 0x00000100,
+	.init_machine	= roadrunner_init,
+MACHINE_END
diff --git a/include/asm-arm/arch-ixp23xx/debug-macro.S b/include/asm-arm/arch-ixp23xx/debug-macro.S
new file mode 100644
index 0000000..eb99fd6
--- /dev/null
+++ b/include/asm-arm/arch-ixp23xx/debug-macro.S
@@ -0,0 +1,23 @@
+/*
+ * include/asm-arm/arch-ixp23xx/debug-macro.S
+ *
+ * Debugging macro include header
+ *
+ * Copyright (C) 1994-1999 Russell King
+ * Moved from linux/arch/arm/kernel/debug.S by Ben Dooks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/arch/ixp23xx.h>
+
+		.macro	addruart,rx
+		mrc	p15, 0, \rx, c1, c0
+		tst	\rx, #1				@ mmu enabled?
+		ldreq	\rx, =IXP23XX_PERIPHERAL_PHYS 	@ physical
+		ldrne	\rx, =IXP23XX_PERIPHERAL_VIRT	@ virtual
+		.endm
+
+#define UART_SHIFT	2
+#include <asm/hardware/debug-8250.S>
diff --git a/include/asm-arm/arch-ixp23xx/dma.h b/include/asm-arm/arch-ixp23xx/dma.h
new file mode 100644
index 0000000..2f4335e
--- /dev/null
+++ b/include/asm-arm/arch-ixp23xx/dma.h
@@ -0,0 +1,3 @@
+/*
+ * include/asm-arm/arch-ixp23xx/dma.h
+ */
diff --git a/include/asm-arm/arch-ixp23xx/entry-macro.S b/include/asm-arm/arch-ixp23xx/entry-macro.S
new file mode 100644
index 0000000..0ef4e60
--- /dev/null
+++ b/include/asm-arm/arch-ixp23xx/entry-macro.S
@@ -0,0 +1,31 @@
+/*
+ * include/asm-arm/arch-ixp23xx/entry-macro.S
+ */
+
+		.macro	disable_fiq
+		.endm
+
+		.macro	get_irqnr_and_base, irqnr, irqstat, base, tmp
+		ldr	\irqnr, =(IXP23XX_INTC_VIRT + IXP23XX_INTR_IRQ_ENC_ST_OFFSET)
+		ldr	\irqnr, [\irqnr]	@ get interrupt number
+		cmp	\irqnr, #0x0		@ suprious interrupt ?
+		movne	\irqnr, \irqnr, lsr #2	@ skip unwanted low order bits
+		subne	\irqnr, \irqnr, #1	@ convert to 0 based
+
+#if 0
+		cmp	\irqnr, #IRQ_IXP23XX_PCI_INT_RPH
+		bne	1001f
+		mov	\irqnr, #IRQ_IXP23XX_INTA
+
+		ldr	\irqnr, =0xf5000030
+
+		mov	\tmp, #(1<<26)
+		tst	\irqnr, \tmp
+		movne	\irqnr, #IRQ_IXP23XX_INTB
+
+		mov	\tmp, #(1<<27)
+		tst	\irqnr, \tmp
+		movne	\irqnr, #IRQ_IXP23XX_INTA
+1001:
+#endif
+		.endm
diff --git a/include/asm-arm/arch-ixp23xx/hardware.h b/include/asm-arm/arch-ixp23xx/hardware.h
new file mode 100644
index 0000000..c0010d2
--- /dev/null
+++ b/include/asm-arm/arch-ixp23xx/hardware.h
@@ -0,0 +1,37 @@
+/*
+ * include/asm-arm/arch-ixp23xx/hardware.h
+ *
+ * Copyright (C) 2002-2004 Intel Corporation.
+ * Copyricht (C) 2005 MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Hardware definitions for IXP23XX based systems
+ */
+
+#ifndef __ASM_ARCH_HARDWARE_H
+#define __ASM_ARCH_HARDWARE_H
+
+/* PCI IO info */
+#define PCIO_BASE		IXP23XX_PCI_IO_VIRT
+#define PCIBIOS_MIN_IO		0x00000000
+#define PCIBIOS_MIN_MEM		0xe0000000
+
+#include "ixp23xx.h"
+
+#define pcibios_assign_all_busses()	0
+
+/*
+ * Platform helper functions
+ */
+#include "platform.h"
+
+/*
+ * Platform-specific headers
+ */
+#include "ixdp2351.h"
+
+
+#endif
diff --git a/include/asm-arm/arch-ixp23xx/io.h b/include/asm-arm/arch-ixp23xx/io.h
new file mode 100644
index 0000000..18415a8
--- /dev/null
+++ b/include/asm-arm/arch-ixp23xx/io.h
@@ -0,0 +1,54 @@
+/*
+ * include/asm-arm/arch-ixp23xx/io.h
+ *
+ * Original Author: Naeem M Afzal <naeem.m.afzal@intel.com>
+ * Maintainer: Deepak Saxena <dsaxena@plexity.net>
+ *
+ * Copyright (C) 2003-2005 Intel Corp.
+ * Copyright (C) 2005 MontaVista Software, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_ARCH_IO_H
+#define __ASM_ARCH_IO_H
+
+#define IO_SPACE_LIMIT 0xffffffff
+
+#define __io(p)		((void __iomem*)((p) + IXP23XX_PCI_IO_VIRT))
+#define __mem_pci(a)	(a)
+
+#include <linux/kernel.h>	/* For BUG */
+
+static inline void __iomem *
+ixp23xx_ioremap(unsigned long addr, unsigned long size, unsigned long flags)
+{
+	if (addr >= IXP23XX_PCI_MEM_START &&
+		addr <= IXP23XX_PCI_MEM_START + IXP23XX_PCI_MEM_SIZE) {
+		if (addr + size > IXP23XX_PCI_MEM_START + IXP23XX_PCI_MEM_SIZE)
+			return NULL;
+
+		return (void __iomem *)
+ 			((addr - IXP23XX_PCI_MEM_START) + IXP23XX_PCI_MEM_VIRT);
+	}
+
+	return __ioremap(addr, size, flags);
+}
+
+static inline void
+ixp23xx_iounmap(void __iomem *addr)
+{
+	if ((((u32)addr) >= IXP23XX_PCI_MEM_VIRT) &&
+	    (((u32)addr) < IXP23XX_PCI_MEM_VIRT + IXP23XX_PCI_MEM_SIZE))
+		return;
+
+	__iounmap(addr);
+}
+
+#define __arch_ioremap(a,s,f)	ixp23xx_ioremap(a,s,f)
+#define __arch_iounmap(a)	ixp23xx_iounmap(a)
+
+
+#endif
diff --git a/include/asm-arm/arch-ixp23xx/irqs.h b/include/asm-arm/arch-ixp23xx/irqs.h
new file mode 100644
index 0000000..e696395
--- /dev/null
+++ b/include/asm-arm/arch-ixp23xx/irqs.h
@@ -0,0 +1,223 @@
+/*
+ * include/asm-arm/arch-ixp23xx/irqs.h
+ *
+ * IRQ definitions for IXP23XX based systems
+ *
+ * Author: Naeem Afzal <naeem.m.afzal@intel.com>
+ *
+ * Copyright (C) 2003-2004 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_ARCH_IRQS_H
+#define __ASM_ARCH_IRQS_H
+
+#define NR_IXP23XX_IRQS			IRQ_IXP23XX_INTB+1
+#define IRQ_IXP23XX_EXTIRQS		NR_IXP23XX_IRQS
+
+
+#define IRQ_IXP23XX_DBG0		0	/* Debug/Execution/MBox */
+#define IRQ_IXP23XX_DBG1		1	/* Debug/Execution/MBox */
+#define IRQ_IXP23XX_NPE_TRG		2	/* npe_trigger */
+#define IRQ_IXP23XX_TIMER1		3	/* Timer[0] */
+#define IRQ_IXP23XX_TIMER2		4	/* Timer[1] */
+#define IRQ_IXP23XX_TIMESTAMP		5	/* Timer[2], Time-stamp */
+#define IRQ_IXP23XX_WDOG		6	/* Time[3], Watchdog Timer */
+#define IRQ_IXP23XX_PCI_DBELL		7	/* PCI Doorbell */
+#define IRQ_IXP23XX_PCI_DMA1		8	/* PCI DMA Channel 1 */
+#define IRQ_IXP23XX_PCI_DMA2		9	/* PCI DMA Channel 2 */
+#define IRQ_IXP23XX_PCI_DMA3		10	/* PCI DMA Channel 3 */
+#define IRQ_IXP23XX_PCI_INT_RPH		11	/* pcxg_pci_int_rph */
+#define IRQ_IXP23XX_CPP_PMU		12	/* xpxg_pm_int_rpl */
+#define IRQ_IXP23XX_SWINT0		13	/* S/W Interrupt0 */
+#define IRQ_IXP23XX_SWINT1		14	/* S/W Interrupt1 */
+#define IRQ_IXP23XX_UART2		15	/* UART1 Interrupt */
+#define IRQ_IXP23XX_UART1		16	/* UART0 Interrupt */
+#define IRQ_IXP23XX_XSI_PMU_ROLLOVER	17	/* AHB Performance M. Unit counter rollover */
+#define IRQ_IXP23XX_XSI_AHB_PM0		18	/* intr_pm_o */
+#define IRQ_IXP23XX_XSI_AHB_ECE0	19	/* intr_ece_o */
+#define IRQ_IXP23XX_XSI_AHB_GASKET	20	/* gas_intr_o */
+#define IRQ_IXP23XX_XSI_CPP		21	/* xsi2cpp_int */
+#define IRQ_IXP23XX_CPP_XSI		22	/* cpp2xsi_int */
+#define IRQ_IXP23XX_ME_ATTN0		23	/* ME_ATTN */
+#define IRQ_IXP23XX_ME_ATTN1		24	/* ME_ATTN */
+#define IRQ_IXP23XX_ME_ATTN2		25	/* ME_ATTN */
+#define IRQ_IXP23XX_ME_ATTN3		26	/* ME_ATTN */
+#define IRQ_IXP23XX_PCI_ERR_RPH		27	/* PCXG_PCI_ERR_RPH */
+#define IRQ_IXP23XX_D0XG_ECC_CORR	28	/* D0XG_DRAM_ECC_CORR */
+#define IRQ_IXP23XX_D0XG_ECC_UNCORR	29	/* D0XG_DRAM_ECC_UNCORR */
+#define IRQ_IXP23XX_SRAM_ERR1		30	/* SRAM1_ERR */
+#define IRQ_IXP23XX_SRAM_ERR0		31	/* SRAM0_ERR */
+#define IRQ_IXP23XX_MEDIA_ERR		32	/* MEDIA_ERR */
+#define IRQ_IXP23XX_STH_DRAM_ECC_MAJ	33	/* STH_DRAM0_ECC_MAJ */
+#define IRQ_IXP23XX_GPIO6		34	/* GPIO0 interrupts */
+#define IRQ_IXP23XX_GPIO7		35	/* GPIO1 interrupts */
+#define IRQ_IXP23XX_GPIO8		36	/* GPIO2 interrupts */
+#define IRQ_IXP23XX_GPIO9		37	/* GPIO3 interrupts */
+#define IRQ_IXP23XX_GPIO10		38	/* GPIO4 interrupts */
+#define IRQ_IXP23XX_GPIO11		39	/* GPIO5 interrupts */
+#define IRQ_IXP23XX_GPIO12		40	/* GPIO6 interrupts */
+#define IRQ_IXP23XX_GPIO13		41	/* GPIO7 interrupts */
+#define IRQ_IXP23XX_GPIO14		42	/* GPIO8 interrupts */
+#define IRQ_IXP23XX_GPIO15		43	/* GPIO9 interrupts */
+#define IRQ_IXP23XX_SHAC_RING0		44	/* SHAC Ring Full */
+#define IRQ_IXP23XX_SHAC_RING1		45	/* SHAC Ring Full */
+#define IRQ_IXP23XX_SHAC_RING2		46	/* SHAC Ring Full */
+#define IRQ_IXP23XX_SHAC_RING3		47	/* SHAC Ring Full */
+#define IRQ_IXP23XX_SHAC_RING4		48	/* SHAC Ring Full */
+#define IRQ_IXP23XX_SHAC_RING5		49	/* SHAC Ring Full */
+#define IRQ_IXP23XX_SHAC_RING6		50	/* SHAC RING Full */
+#define IRQ_IXP23XX_SHAC_RING7		51	/* SHAC Ring Full */
+#define IRQ_IXP23XX_SHAC_RING8		52	/* SHAC Ring Full */
+#define IRQ_IXP23XX_SHAC_RING9		53	/* SHAC Ring Full */
+#define IRQ_IXP23XX_SHAC_RING10		54	/* SHAC Ring Full */
+#define IRQ_IXP23XX_SHAC_RING11		55	/* SHAC Ring Full */
+#define IRQ_IXP23XX_ME_THREAD_A0_ME0	56	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A1_ME0	57	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A2_ME0	58	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A3_ME0	59	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A4_ME0	60	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A5_ME0	61	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A6_ME0	62	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A7_ME0	63	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A8_ME1	64	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A9_ME1	65	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A10_ME1	66	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A11_ME1	67	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A12_ME1	68	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A13_ME1	69	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A14_ME1	70	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A15_ME1	71	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A16_ME2	72	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A17_ME2	73	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A18_ME2	74	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A19_ME2	75	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A20_ME2	76	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A21_ME2	77	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A22_ME2	78	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A23_ME2	79	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A24_ME3	80	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A25_ME3	81	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A26_ME3	82	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A27_ME3	83	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A28_ME3	84	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A29_ME3	85	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A30_ME3	86	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_A31_ME3	87	/* ME_THREAD_A */
+#define IRQ_IXP23XX_ME_THREAD_B0_ME0	88	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B1_ME0	89	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B2_ME0	90	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B3_ME0	91	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B4_ME0	92	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B5_ME0	93	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B6_ME0	94	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B7_ME0	95	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B8_ME1	96	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B9_ME1	97	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B10_ME1	98	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B11_ME1	99	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B12_ME1	100	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B13_ME1	101	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B14_ME1	102	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B15_ME1	103	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B16_ME2	104	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B17_ME2	105	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B18_ME2	106	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B19_ME2	107	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B20_ME2	108	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B21_ME2	109	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B22_ME2	110	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B23_ME2	111	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B24_ME3	112	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B25_ME3	113	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B26_ME3	114	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B27_ME3	115	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B28_ME3	116	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B29_ME3	117	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B30_ME3	118	/* ME_THREAD_B */
+#define IRQ_IXP23XX_ME_THREAD_B31_ME3	119	/* ME_THREAD_B */
+
+#define NUM_IXP23XX_RAW_IRQS		120
+
+#define IRQ_IXP23XX_INTA		120	/* Indirect pcxg_pci_int_rph */
+#define IRQ_IXP23XX_INTB		121	/* Indirect pcxg_pci_int_rph */
+
+#define NR_IXP23XX_IRQ			(IRQ_IXP23XX_INTB + 1)
+
+/*
+ * We default to 32 per-board IRQs. Increase this number if you need
+ * more, but keep it realistic.
+ */
+#define NR_IXP23XX_MACH_IRQS 		32
+
+#define NR_IRQS				NR_IXP23XX_IRQS + NR_IXP23XX_MACH_IRQS
+
+#define IXP23XX_MACH_IRQ(irq) 		(NR_IXP23XX_IRQ + (irq))
+
+
+/*
+ * IXDP2351-specific interrupts
+ */
+
+/*
+ * External PCI interrupts signaled through INTB
+ *
+ */
+#define IXDP2351_INTB_IRQ_BASE 		0
+#define IRQ_IXDP2351_INTA_82546		IXP23XX_MACH_IRQ(0)
+#define IRQ_IXDP2351_INTB_82546		IXP23XX_MACH_IRQ(1)
+#define IRQ_IXDP2351_SPCI_DB_0		IXP23XX_MACH_IRQ(2)
+#define IRQ_IXDP2351_SPCI_DB_1		IXP23XX_MACH_IRQ(3)
+#define IRQ_IXDP2351_SPCI_PMC_INTA	IXP23XX_MACH_IRQ(4)
+#define IRQ_IXDP2351_SPCI_PMC_INTB	IXP23XX_MACH_IRQ(5)
+#define IRQ_IXDP2351_SPCI_PMC_INTC	IXP23XX_MACH_IRQ(6)
+#define IRQ_IXDP2351_SPCI_PMC_INTD	IXP23XX_MACH_IRQ(7)
+#define IRQ_IXDP2351_SPCI_FIC		IXP23XX_MACH_IRQ(8)
+
+#define IXDP2351_INTB_IRQ_BIT(irq)	(irq - IXP23XX_MACH_IRQ(0))
+#define IXDP2351_INTB_IRQ_MASK(irq)	(1 << IXDP2351_INTB_IRQ_BIT(irq))
+#define IXDP2351_INTB_IRQ_VALID		0x01FF
+#define IXDP2351_INTB_IRQ_NUM 		16
+
+/*
+ * Other external interrupts signaled through INTA
+ */
+#define IXDP2351_INTA_IRQ_BASE 		16
+#define IRQ_IXDP2351_IPMI_FROM		IXP23XX_MACH_IRQ(16)
+#define IRQ_IXDP2351_125US		IXP23XX_MACH_IRQ(17)
+#define IRQ_IXDP2351_DB_0_ADD		IXP23XX_MACH_IRQ(18)
+#define IRQ_IXDP2351_DB_1_ADD		IXP23XX_MACH_IRQ(19)
+#define IRQ_IXDP2351_DEBUG1		IXP23XX_MACH_IRQ(20)
+#define IRQ_IXDP2351_ADD_UART		IXP23XX_MACH_IRQ(21)
+#define IRQ_IXDP2351_FIC_ADD		IXP23XX_MACH_IRQ(24)
+#define IRQ_IXDP2351_CS8900		IXP23XX_MACH_IRQ(25)
+#define IRQ_IXDP2351_BBSRAM		IXP23XX_MACH_IRQ(26)
+#define IRQ_IXDP2351_CONFIG_MEDIA	IXP23XX_MACH_IRQ(27)
+#define IRQ_IXDP2351_CLOCK_REF		IXP23XX_MACH_IRQ(28)
+#define IRQ_IXDP2351_A10_NP		IXP23XX_MACH_IRQ(29)
+#define IRQ_IXDP2351_A11_NP		IXP23XX_MACH_IRQ(30)
+#define IRQ_IXDP2351_DEBUG_NP		IXP23XX_MACH_IRQ(31)
+
+#define IXDP2351_INTA_IRQ_BIT(irq) 	(irq - IXP23XX_MACH_IRQ(16))
+#define IXDP2351_INTA_IRQ_MASK(irq) 	(1 << IXDP2351_INTA_IRQ_BIT(irq))
+#define IXDP2351_INTA_IRQ_VALID 	0xFF3F
+#define IXDP2351_INTA_IRQ_NUM 		16
+
+
+/*
+ * ADI RoadRunner IRQs
+ */
+#define IRQ_ROADRUNNER_PCI_INTA 	IRQ_IXP23XX_INTA
+#define IRQ_ROADRUNNER_PCI_INTB 	IRQ_IXP23XX_INTB
+#define IRQ_ROADRUNNER_PCI_INTC 	IRQ_IXP23XX_GPIO11
+#define IRQ_ROADRUNNER_PCI_INTD 	IRQ_IXP23XX_GPIO12
+
+/*
+ * Put new board definitions here
+ */
+
+
+#endif
diff --git a/include/asm-arm/arch-ixp23xx/ixdp2351.h b/include/asm-arm/arch-ixp23xx/ixdp2351.h
new file mode 100644
index 0000000..4a24f8f
--- /dev/null
+++ b/include/asm-arm/arch-ixp23xx/ixdp2351.h
@@ -0,0 +1,89 @@
+/*
+ * include/asm-arm/arch-ixp23xx/ixdp2351.h
+ *
+ * Register and other defines for IXDP2351
+ *
+ * Copyright (c) 2002-2004 Intel Corp.
+ * Copytight (c) 2005 MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __ASM_ARCH_IXDP2351_H
+#define __ASM_ARCH_IXDP2351_H
+
+/*
+ * NP module memory map
+ */
+#define IXDP2351_NP_PHYS_BASE		(IXP23XX_EXP_BUS_CS4_BASE)
+#define IXDP2351_NP_PHYS_SIZE		0x00100000
+#define IXDP2351_NP_VIRT_BASE		0xeff00000
+
+#define IXDP2351_VIRT_CS8900_BASE	(IXDP2351_NP_VIRT_BASE)
+#define IXDP2351_VIRT_CS8900_END	(IXDP2351_VIRT_CS8900_BASE + 16)
+
+#define IXDP2351_VIRT_NP_CPLD_BASE 	(IXP23XX_EXP_BUS_CS4_BASE_VIRT + 0x00010000)
+
+#define IXDP2351_NP_CPLD_REG(reg) ((volatile u16 *)(IXDP2351_VIRT_NP_CPLD_BASE + reg))
+
+#define IXDP2351_NP_CPLD_RESET1_REG	IXDP2351_NP_CPLD_REG(0x00)
+#define IXDP2351_NP_CPLD_LED_REG	IXDP2351_NP_CPLD_REG(0x02)
+#define IXDP2351_NP_CPLD_VERSION_REG	IXDP2351_NP_CPLD_REG(0x04)
+
+/*
+ * Base board module memory map
+ */
+
+#define IXDP2351_BB_BASE_PHYS		(IXP23XX_EXP_BUS_CS5_BASE)
+#define IXDP2351_BB_SIZE		0x01000000
+#define IXDP2351_BB_BASE_VIRT		(0xee000000)
+
+#define IXDP2351_BB_AREA_BASE(offset)	(IXDP2351_BB_BASE_VIRT + offset)
+
+#define IXDP2351_VIRT_NVRAM_BASE	IXDP2351_BB_AREA_BASE(0x0)
+#define IXDP2351_NVRAM_SIZE		(0x20000)
+
+#define IXDP2351_VIRT_MB_IXF1104_BASE	IXDP3251_BB_AREA_BASE(0x00020000)
+#define IXDP2351_VIRT_ADD_UART_BASE	IXDP2351_BB_AREA_BASE(0x000240C0)
+#define IXDP2351_VIRT_FIC_BASE		IXDP2351_BB_AREA_BASE(0x00200000)
+#define IXDP2351_VIRT_DB0_BASE		IXDP2351_BB_AREA_BASE(0x00400000)
+#define IXDP2351_VIRT_DB1_BASE		IXDP2351_BB_AREA_BASE(0x00600000)
+#define IXDP2351_VIRT_CPLD_BASE		IXDP2351_BB_AREA_BASE(0x00024000)
+
+/*
+ * On board CPLD registers
+ */
+#define IXDP2351_CPLD_BB_REG(reg) ((volatile u16 *)(IXDP2351_VIRT_CPLD_BASE + reg))
+
+#define IXDP2351_CPLD_RESET0_REG	IXDP2351_CPLD_BB_REG(0x00)
+#define IXDP2351_CPLD_RESET1_REG	IXDP2351_CPLD_BB_REG(0x04)
+
+#define IXDP2351_CPLD_RESET1_MAGIC 	0x55AA
+#define IXDP2351_CPLD_RESET1_ENABLE 	0x8000
+
+#define IXDP2351_CPLD_FPGA_CONFIG_REG	IXDP2351_CPLD_BB_REG(0x08)
+#define IXDP2351_CPLD_INTB_MASK_SET_REG	IXDP2351_CPLD_BB_REG(0x10)
+#define IXDP2351_CPLD_INTA_MASK_SET_REG	IXDP2351_CPLD_BB_REG(0x14)
+#define IXDP2351_CPLD_INTB_STAT_REG	IXDP2351_CPLD_BB_REG(0x18)
+#define IXDP2351_CPLD_INTA_STAT_REG	IXDP2351_CPLD_BB_REG(0x1C)
+#define IXDP2351_CPLD_INTB_RAW_REG	IXDP2351_CPLD_BB_REG(0x20)	/* read */
+#define IXDP2351_CPLD_INTA_RAW_REG	IXDP2351_CPLD_BB_REG(0x24)	/* read */
+#define IXDP2351_CPLD_INTB_MASK_CLR_REG	IXDP2351_CPLD_INTB_RAW_REG	/* write */
+#define IXDP2351_CPLD_INTA_MASK_CLR_REG	IXDP2351_CPLD_INTA_RAW_REG	/* write */
+#define IXDP2351_CPLD_INTB_SIM_REG	IXDP2351_CPLD_BB_REG(0x28)
+#define IXDP2351_CPLD_INTA_SIM_REG	IXDP2351_CPLD_BB_REG(0x2C)
+	/* Interrupt bits are defined in irqs.h */
+#define IXDP2351_CPLD_BB_GBE0_REG	IXDP2351_CPLD_BB_REG(0x30)
+#define IXDP2351_CPLD_BB_GBE1_REG	IXDP2351_CPLD_BB_REG(0x34)
+
+/* #define IXDP2351_CPLD_BB_MISC_REG	IXDP2351_CPLD_REG(0x1C) */
+/* #define IXDP2351_CPLD_BB_MISC_REV_MASK	0xFF		*/
+/* #define IXDP2351_CPLD_BB_GDXCS0_REG	IXDP2351_CPLD_REG(0x24) */
+/* #define IXDP2351_CPLD_BB_GDXCS1_REG	IXDP2351_CPLD_REG(0x28) */
+/* #define IXDP2351_CPLD_BB_CLOCK_REG	IXDP2351_CPLD_REG(0x04) */
+
+
+#endif
diff --git a/include/asm-arm/arch-ixp23xx/ixp23xx.h b/include/asm-arm/arch-ixp23xx/ixp23xx.h
new file mode 100644
index 0000000..e49e1ca
--- /dev/null
+++ b/include/asm-arm/arch-ixp23xx/ixp23xx.h
@@ -0,0 +1,306 @@
+/*
+ * include/asm-arm/arch-ixp23xx/ixp23xx.h
+ *
+ * Register definitions for IXP23XX
+ *
+ * Copyright (C) 2003-2005 Intel Corporation.
+ * Copyright (C) 2005 MontaVista Software, Inc.
+ *
+ * Maintainer: Deepak Saxena <dsaxena@plexity.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_ARCH_IXP23XX_H
+#define __ASM_ARCH_IXP23XX_H
+
+/*
+ * IXP2300 linux memory map:
+ *
+ * virt		phys		size
+ * fffd0000	a0000000	64K		XSI2CPP_CSR
+ * fffc0000	c4000000	4K		EXP_CFG
+ * fff00000	c8000000	64K		PERIPHERAL
+ * fe000000	1c0000000	16M		CAP_CSR
+ * fd000000	1c8000000	16M		MSF_CSR
+ * fb000000			16M		---
+ * fa000000	1d8000000	32M		PCI_IO
+ * f8000000	1da000000	32M		PCI_CFG
+ * f6000000	1de000000	32M		PCI_CREG
+ * f4000000			32M		---
+ * f0000000	1e0000000	64M		PCI_MEM
+ * e[c-f]000000					per-platform mappings
+ */
+
+
+/****************************************************************************
+ * Static mappings.
+ ****************************************************************************/
+#define IXP23XX_XSI2CPP_CSR_PHYS	0xa0000000
+#define IXP23XX_XSI2CPP_CSR_VIRT	0xfffd0000
+#define IXP23XX_XSI2CPP_CSR_SIZE	0x00010000
+
+#define IXP23XX_EXP_CFG_PHYS		0xc4000000
+#define IXP23XX_EXP_CFG_VIRT		0xfffc0000
+#define IXP23XX_EXP_CFG_SIZE		0x00001000
+
+#define IXP23XX_PERIPHERAL_PHYS		0xc8000000
+#define IXP23XX_PERIPHERAL_VIRT		0xfff00000
+#define IXP23XX_PERIPHERAL_SIZE		0x00010000
+
+#define IXP23XX_CAP_CSR_PHYS		0x1c0000000ULL
+#define IXP23XX_CAP_CSR_VIRT		0xfe000000
+#define IXP23XX_CAP_CSR_SIZE		0x01000000
+
+#define IXP23XX_MSF_CSR_PHYS		0x1c8000000ULL
+#define IXP23XX_MSF_CSR_VIRT		0xfd000000
+#define IXP23XX_MSF_CSR_SIZE		0x01000000
+
+#define IXP23XX_PCI_IO_PHYS		0x1d8000000ULL
+#define IXP23XX_PCI_IO_VIRT		0xfa000000
+#define IXP23XX_PCI_IO_SIZE		0x02000000
+
+#define IXP23XX_PCI_CFG_PHYS		0x1da000000ULL
+#define IXP23XX_PCI_CFG_VIRT		0xf8000000
+#define IXP23XX_PCI_CFG_SIZE		0x02000000
+#define IXP23XX_PCI_CFG0_VIRT		IXP23XX_PCI_CFG_VIRT
+#define IXP23XX_PCI_CFG1_VIRT		(IXP23XX_PCI_CFG_VIRT + 0x01000000)
+
+#define IXP23XX_PCI_CREG_PHYS		0x1de000000ULL
+#define IXP23XX_PCI_CREG_VIRT		0xf6000000
+#define IXP23XX_PCI_CREG_SIZE		0x02000000
+#define IXP23XX_PCI_CSR_VIRT		(IXP23XX_PCI_CREG_VIRT + 0x01000000)
+
+#define IXP23XX_PCI_MEM_START		0xe0000000
+#define IXP23XX_PCI_MEM_PHYS		0x1e0000000ULL
+#define IXP23XX_PCI_MEM_VIRT		0xf0000000
+#define IXP23XX_PCI_MEM_SIZE		0x04000000
+
+
+/****************************************************************************
+ * XSI2CPP CSRs.
+ ****************************************************************************/
+#define IXP23XX_XSI2CPP_REG(x)		((volatile unsigned long *)(IXP23XX_XSI2CPP_CSR_VIRT + (x)))
+#define IXP23XX_CPP2XSI_CURR_XFER_REG3	IXP23XX_XSI2CPP_REG(0xf8)
+#define IXP23XX_CPP2XSI_ADDR_31		(1 << 19)
+#define IXP23XX_CPP2XSI_PSH_OFF		(1 << 20)
+#define IXP23XX_CPP2XSI_COH_OFF		(1 << 21)
+
+
+/****************************************************************************
+ * Expansion Bus Config.
+ ****************************************************************************/
+#define IXP23XX_EXP_CFG_REG(x)		((volatile unsigned long *)(IXP23XX_EXP_CFG_VIRT + (x)))
+#define IXP23XX_EXP_CS0			IXP23XX_EXP_CFG_REG(0x00)
+#define IXP23XX_EXP_CS1			IXP23XX_EXP_CFG_REG(0x04)
+#define IXP23XX_EXP_CS2			IXP23XX_EXP_CFG_REG(0x08)
+#define IXP23XX_EXP_CS3			IXP23XX_EXP_CFG_REG(0x0c)
+#define IXP23XX_EXP_CS4			IXP23XX_EXP_CFG_REG(0x10)
+#define IXP23XX_EXP_CS5			IXP23XX_EXP_CFG_REG(0x14)
+#define IXP23XX_EXP_CS6			IXP23XX_EXP_CFG_REG(0x18)
+#define IXP23XX_EXP_CS7			IXP23XX_EXP_CFG_REG(0x1c)
+#define IXP23XX_FLASH_WRITABLE		(0x2)
+#define IXP23XX_FLASH_BUS8		(0x1)
+
+#define IXP23XX_EXP_CFG0		IXP23XX_EXP_CFG_REG(0x20)
+#define IXP23XX_EXP_CFG1		IXP23XX_EXP_CFG_REG(0x24)
+#define IXP23XX_EXP_CFG0_MEM_MAP		(1 << 31)
+#define IXP23XX_EXP_CFG0_XSCALE_SPEED_SEL 	(3 << 22)
+#define IXP23XX_EXP_CFG0_XSCALE_SPEED_EN	(1 << 21)
+#define IXP23XX_EXP_CFG0_CPP_SPEED_SEL		(3 << 19)
+#define IXP23XX_EXP_CFG0_CPP_SPEED_EN		(1 << 18)
+#define IXP23XX_EXP_CFG0_PCI_SWIN		(3 << 16)
+#define IXP23XX_EXP_CFG0_PCI_DWIN		(3 << 14)
+#define IXP23XX_EXP_CFG0_PCI33_MODE		(1 << 13)
+#define IXP23XX_EXP_CFG0_QDR_SPEED_SEL		(1 << 12)
+#define IXP23XX_EXP_CFG0_CPP_DIV_SEL		(1 << 5)
+#define IXP23XX_EXP_CFG0_XSI_NOT_PRES		(1 << 4)
+#define IXP23XX_EXP_CFG0_PROM_BOOT		(1 << 3)
+#define IXP23XX_EXP_CFG0_PCI_ARB		(1 << 2)
+#define IXP23XX_EXP_CFG0_PCI_HOST		(1 << 1)
+#define IXP23XX_EXP_CFG0_FLASH_WIDTH		(1 << 0)
+
+#define IXP23XX_EXP_UNIT_FUSE		IXP23XX_EXP_CFG_REG(0x28)
+#define IXP23XX_EXP_MSF_MUX		IXP23XX_EXP_CFG_REG(0x30)
+
+#define IXP23XX_EXP_BUS_PHYS		0x90000000
+#define IXP23XX_EXP_BUS_WINDOW_SIZE	0x01000000
+
+#define IXP23XX_EXP_BUS_CS0_BASE	(IXP23XX_EXP_BUS_PHYS + 0x00000000)
+#define IXP23XX_EXP_BUS_CS1_BASE	(IXP23XX_EXP_BUS_PHYS + 0x01000000)
+#define IXP23XX_EXP_BUS_CS2_BASE	(IXP23XX_EXP_BUS_PHYS + 0x02000000)
+#define IXP23XX_EXP_BUS_CS3_BASE	(IXP23XX_EXP_BUS_PHYS + 0x03000000)
+#define IXP23XX_EXP_BUS_CS4_BASE	(IXP23XX_EXP_BUS_PHYS + 0x04000000)
+#define IXP23XX_EXP_BUS_CS5_BASE	(IXP23XX_EXP_BUS_PHYS + 0x05000000)
+#define IXP23XX_EXP_BUS_CS6_BASE	(IXP23XX_EXP_BUS_PHYS + 0x06000000)
+#define IXP23XX_EXP_BUS_CS7_BASE	(IXP23XX_EXP_BUS_PHYS + 0x07000000)
+
+
+/****************************************************************************
+ * Peripherals.
+ ****************************************************************************/
+#define IXP23XX_UART1_VIRT		(IXP23XX_PERIPHERAL_VIRT + 0x0000)
+#define IXP23XX_UART2_VIRT		(IXP23XX_PERIPHERAL_VIRT + 0x1000)
+#define IXP23XX_PMU_VIRT		(IXP23XX_PERIPHERAL_VIRT + 0x2000)
+#define IXP23XX_INTC_VIRT		(IXP23XX_PERIPHERAL_VIRT + 0x3000)
+#define IXP23XX_GPIO_VIRT		(IXP23XX_PERIPHERAL_VIRT + 0x4000)
+#define IXP23XX_TIMER_VIRT		(IXP23XX_PERIPHERAL_VIRT + 0x5000)
+#define IXP23XX_NPE0_VIRT		(IXP23XX_PERIPHERAL_VIRT + 0x6000)
+#define IXP23XX_DSR_VIRT		(IXP23XX_PERIPHERAL_VIRT + 0x7000)
+#define IXP23XX_NPE1_VIRT		(IXP23XX_PERIPHERAL_VIRT + 0x8000)
+#define IXP23XX_ETH0_VIRT		(IXP23XX_PERIPHERAL_VIRT + 0x9000)
+#define IXP23XX_ETH1_VIRT		(IXP23XX_PERIPHERAL_VIRT + 0xA000)
+#define IXP23XX_GIG0_VIRT		(IXP23XX_PERIPHERAL_VIRT + 0xB000)
+#define IXP23XX_GIG1_VIRT		(IXP23XX_PERIPHERAL_VIRT + 0xC000)
+#define IXP23XX_DDRS_VIRT		(IXP23XX_PERIPHERAL_VIRT + 0xD000)
+
+#define IXP23XX_UART1_PHYS		(IXP23XX_PERIPHERAL_PHYS + 0x0000)
+#define IXP23XX_UART2_PHYS		(IXP23XX_PERIPHERAL_PHYS + 0x1000)
+#define IXP23XX_PMU_PHYS		(IXP23XX_PERIPHERAL_PHYS + 0x2000)
+#define IXP23XX_INTC_PHYS		(IXP23XX_PERIPHERAL_PHYS + 0x3000)
+#define IXP23XX_GPIO_PHYS		(IXP23XX_PERIPHERAL_PHYS + 0x4000)
+#define IXP23XX_TIMER_PHYS		(IXP23XX_PERIPHERAL_PHYS + 0x5000)
+#define IXP23XX_NPE0_PHYS		(IXP23XX_PERIPHERAL_PHYS + 0x6000)
+#define IXP23XX_DSR_PHYS		(IXP23XX_PERIPHERAL_PHYS + 0x7000)
+#define IXP23XX_NPE1_PHYS		(IXP23XX_PERIPHERAL_PHYS + 0x8000)
+#define IXP23XX_ETH0_PHYS		(IXP23XX_PERIPHERAL_PHYS + 0x9000)
+#define IXP23XX_ETH1_PHYS		(IXP23XX_PERIPHERAL_PHYS + 0xA000)
+#define IXP23XX_GIG0_PHYS		(IXP23XX_PERIPHERAL_PHYS + 0xB000)
+#define IXP23XX_GIG1_PHYS		(IXP23XX_PERIPHERAL_PHYS + 0xC000)
+#define IXP23XX_DDRS_PHYS		(IXP23XX_PERIPHERAL_PHYS + 0xD000)
+
+
+/****************************************************************************
+ * Interrupt controller.
+ ****************************************************************************/
+#define IXP23XX_INTC_REG(x)		 ((volatile unsigned long *)(IXP23XX_INTC_VIRT + (x)))
+#define IXP23XX_INTR_ST1		IXP23XX_INTC_REG(0x00)
+#define IXP23XX_INTR_ST2		IXP23XX_INTC_REG(0x04)
+#define IXP23XX_INTR_ST3		IXP23XX_INTC_REG(0x08)
+#define IXP23XX_INTR_ST4		IXP23XX_INTC_REG(0x0c)
+#define IXP23XX_INTR_EN1		IXP23XX_INTC_REG(0x10)
+#define IXP23XX_INTR_EN2		IXP23XX_INTC_REG(0x14)
+#define IXP23XX_INTR_EN3		IXP23XX_INTC_REG(0x18)
+#define IXP23XX_INTR_EN4		IXP23XX_INTC_REG(0x1c)
+#define IXP23XX_INTR_SEL1		IXP23XX_INTC_REG(0x20)
+#define IXP23XX_INTR_SEL2		IXP23XX_INTC_REG(0x24)
+#define IXP23XX_INTR_SEL3		IXP23XX_INTC_REG(0x28)
+#define IXP23XX_INTR_SEL4		IXP23XX_INTC_REG(0x2c)
+#define IXP23XX_INTR_IRQ_ST1		IXP23XX_INTC_REG(0x30)
+#define IXP23XX_INTR_IRQ_ST2		IXP23XX_INTC_REG(0x34)
+#define IXP23XX_INTR_IRQ_ST3		IXP23XX_INTC_REG(0x38)
+#define IXP23XX_INTR_IRQ_ST4		IXP23XX_INTC_REG(0x3c)
+#define IXP23XX_INTR_IRQ_ENC_ST_OFFSET	0x54
+
+
+/****************************************************************************
+ * GPIO.
+ ****************************************************************************/
+#define IXP23XX_GPIO_REG(x)		((volatile unsigned long *)(IXP23XX_GPIO_VIRT + (x)))
+#define IXP23XX_GPIO_GPOUTR		IXP23XX_GPIO_REG(0x00)
+#define IXP23XX_GPIO_GPOER		IXP23XX_GPIO_REG(0x04)
+#define IXP23XX_GPIO_GPINR		IXP23XX_GPIO_REG(0x08)
+#define IXP23XX_GPIO_GPISR		IXP23XX_GPIO_REG(0x0c)
+#define IXP23XX_GPIO_GPIT1R		IXP23XX_GPIO_REG(0x10)
+#define IXP23XX_GPIO_GPIT2R		IXP23XX_GPIO_REG(0x14)
+#define IXP23XX_GPIO_GPCLKR		IXP23XX_GPIO_REG(0x18)
+#define IXP23XX_GPIO_GPDBSELR 		IXP23XX_GPIO_REG(0x1c)
+
+#define IXP23XX_GPIO_STYLE_MASK		0x7
+#define IXP23XX_GPIO_STYLE_ACTIVE_HIGH	0x0
+#define IXP23XX_GPIO_STYLE_ACTIVE_LOW	0x1
+#define IXP23XX_GPIO_STYLE_RISING_EDGE	0x2
+#define IXP23XX_GPIO_STYLE_FALLING_EDGE	0x3
+#define IXP23XX_GPIO_STYLE_TRANSITIONAL	0x4
+
+#define IXP23XX_GPIO_STYLE_SIZE		3
+
+
+/****************************************************************************
+ * Timer.
+ ****************************************************************************/
+#define IXP23XX_TIMER_REG(x)		((volatile unsigned long *)(IXP23XX_TIMER_VIRT + (x)))
+#define IXP23XX_TIMER_CONT		IXP23XX_TIMER_REG(0x00)
+#define IXP23XX_TIMER1_TIMESTAMP	IXP23XX_TIMER_REG(0x04)
+#define IXP23XX_TIMER1_RELOAD		IXP23XX_TIMER_REG(0x08)
+#define IXP23XX_TIMER2_TIMESTAMP	IXP23XX_TIMER_REG(0x0c)
+#define IXP23XX_TIMER2_RELOAD		IXP23XX_TIMER_REG(0x10)
+#define IXP23XX_TIMER_WDOG		IXP23XX_TIMER_REG(0x14)
+#define IXP23XX_TIMER_WDOG_EN		IXP23XX_TIMER_REG(0x18)
+#define IXP23XX_TIMER_WDOG_KEY		IXP23XX_TIMER_REG(0x1c)
+#define IXP23XX_TIMER_WDOG_KEY_MAGIC	0x482e
+#define IXP23XX_TIMER_STATUS		IXP23XX_TIMER_REG(0x20)
+#define IXP23XX_TIMER_SOFT_RESET	IXP23XX_TIMER_REG(0x24)
+#define IXP23XX_TIMER_SOFT_RESET_EN	IXP23XX_TIMER_REG(0x28)
+
+#define IXP23XX_TIMER_ENABLE		(1 << 0)
+#define IXP23XX_TIMER_ONE_SHOT		(1 << 1)
+/* Low order bits of reload value ignored */
+#define IXP23XX_TIMER_RELOAD_MASK	(0x3)
+#define IXP23XX_TIMER_DISABLED		(0x0)
+#define IXP23XX_TIMER1_INT_PEND		(1 << 0)
+#define IXP23XX_TIMER2_INT_PEND		(1 << 1)
+#define IXP23XX_TIMER_STATUS_TS_PEND	(1 << 2)
+#define IXP23XX_TIMER_STATUS_WDOG_PEND	(1 << 3)
+#define IXP23XX_TIMER_STATUS_WARM_RESET	(1 << 4)
+
+
+/****************************************************************************
+ * CAP CSRs.
+ ****************************************************************************/
+#define IXP23XX_GLOBAL_REG(x)		((volatile unsigned long *)(IXP23XX_CAP_CSR_VIRT + 0x4a00 + (x)))
+#define IXP23XX_PROD_IDG		IXP23XX_GLOBAL_REG(0x00)
+#define IXP23XX_MISC_CONTROL		IXP23XX_GLOBAL_REG(0x04)
+#define IXP23XX_MSF_CLK_CNTRL		IXP23XX_GLOBAL_REG(0x08)
+#define IXP23XX_RESET0			IXP23XX_GLOBAL_REG(0x0c)
+#define IXP23XX_RESET1			IXP23XX_GLOBAL_REG(0x10)
+#define IXP23XX_STRAP_OPTIONS		IXP23XX_GLOBAL_REG(0x18)
+
+#define IXP23XX_ENABLE_WATCHDOG		(1 << 24)
+#define IXP23XX_SHPC_INIT_COMP		(1 << 21)
+#define IXP23XX_RST_ALL			(1 << 16)
+#define IXP23XX_RESET_PCI		(1 << 2)
+#define IXP23XX_PCI_UNIT_RESET		(1 << 1)
+#define IXP23XX_XSCALE_RESET		(1 << 0)
+
+
+/****************************************************************************
+ * PCI CSRs.
+ ****************************************************************************/
+#define IXP23XX_PCI_CREG(x)		((volatile unsigned long *)(IXP23XX_PCI_CREG_VIRT + (x)))
+#define IXP23XX_PCI_CMDSTAT		IXP23XX_PCI_CREG(0x04)
+#define IXP23XX_PCI_SRAM_BAR		IXP23XX_PCI_CREG(0x14)
+#define IXP23XX_PCI_SDRAM_BAR		IXP23XX_PCI_CREG(0x18)
+
+
+#define IXP23XX_PCI_CSR(x)		((volatile unsigned long *)(IXP23XX_PCI_CREG_VIRT + 0x01000000 + (x)))
+#define IXP23XX_PCI_OUT_INT_STATUS	IXP23XX_PCI_CSR(0x0030)
+#define IXP23XX_PCI_OUT_INT_MASK	IXP23XX_PCI_CSR(0x0034)
+#define IXP23XX_PCI_SRAM_BASE_ADDR_MASK IXP23XX_PCI_CSR(0x00fc)
+#define IXP23XX_PCI_DRAM_BASE_ADDR_MASK IXP23XX_PCI_CSR(0x0100)
+#define IXP23XX_PCI_CONTROL		IXP23XX_PCI_CSR(0x013c)
+#define IXP23XX_PCI_ADDR_EXT		IXP23XX_PCI_CSR(0x0140)
+#define IXP23XX_PCI_ME_PUSH_STATUS	IXP23XX_PCI_CSR(0x0148)
+#define IXP23XX_PCI_ME_PUSH_EN		IXP23XX_PCI_CSR(0x014c)
+#define IXP23XX_PCI_ERR_STATUS		IXP23XX_PCI_CSR(0x0150)
+#define IXP23XX_PCI_ERROR_STATUS	IXP23XX_PCI_CSR(0x0150)
+#define IXP23XX_PCI_ERR_ENABLE		IXP23XX_PCI_CSR(0x0154)
+#define IXP23XX_PCI_XSCALE_INT_STATUS	IXP23XX_PCI_CSR(0x0158)
+#define IXP23XX_PCI_XSCALE_INT_ENABLE	IXP23XX_PCI_CSR(0x015c)
+#define IXP23XX_PCI_CPP_ADDR_BITS	IXP23XX_PCI_CSR(0x0160)
+
+
+#ifndef __ASSEMBLY__
+/*
+ * Is system memory on the XSI or CPP bus?
+ */
+static inline unsigned ixp23xx_cpp_boot(void)
+{
+	return (*IXP23XX_EXP_CFG0 & IXP23XX_EXP_CFG0_XSI_NOT_PRES);
+}
+#endif
+
+
+#endif
diff --git a/include/asm-arm/arch-ixp23xx/memory.h b/include/asm-arm/arch-ixp23xx/memory.h
new file mode 100644
index 0000000..bebcf0a
--- /dev/null
+++ b/include/asm-arm/arch-ixp23xx/memory.h
@@ -0,0 +1,46 @@
+/*
+ * include/asm-arm/arch-ixp23xx/memory.h
+ *
+ * Copyright (c) 2003-2004 Intel Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __ASM_ARCH_MEMORY_H
+#define __ASM_ARCH_MEMORY_H
+
+#include <asm/hardware.h>
+
+/*
+ * Physical DRAM offset.
+ */
+#define PHYS_OFFSET		(0x00000000)
+
+
+/*
+ * Virtual view <-> DMA view memory address translations
+ * virt_to_bus: Used to translate the virtual address to an
+ *		address suitable to be passed to set_dma_addr
+ * bus_to_virt: Used to convert an address for DMA operations
+ *		to an address that the kernel can use.
+ */
+#ifndef __ASSEMBLY__
+
+#define __virt_to_bus(v)						\
+	({ unsigned int ret;						\
+	ret = ((__virt_to_phys(v) - 0x00000000) +			\
+	 (*((volatile int *)IXP23XX_PCI_SDRAM_BAR) & 0xfffffff0)); 	\
+	ret; })
+
+#define __bus_to_virt(b)						\
+	({ unsigned int data;						\
+	data = *((volatile int *)IXP23XX_PCI_SDRAM_BAR);		\
+	 __phys_to_virt((((b - (data & 0xfffffff0)) + 0x00000000))); })
+
+#endif
+
+
+#endif
diff --git a/include/asm-arm/arch-ixp23xx/platform.h b/include/asm-arm/arch-ixp23xx/platform.h
new file mode 100644
index 0000000..f85b468
--- /dev/null
+++ b/include/asm-arm/arch-ixp23xx/platform.h
@@ -0,0 +1,31 @@
+/*
+ * include/asm-arm/arch-ixp23xx/platform.h
+ *
+ * Various bits of code used by platform-level code.
+ *
+ * Author: Deepak Saxena <dsaxena@plexity.net>
+ *
+ * Copyright 2005 (c) MontaVista Software, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __ASSEMBLY__
+
+struct pci_sys_data;
+
+void ixp23xx_map_io(void);
+void ixp23xx_init_irq(void);
+void ixp23xx_sys_init(void);
+int ixp23xx_pci_setup(int, struct pci_sys_data *);
+void ixp23xx_pci_preinit(void);
+struct pci_bus *ixp23xx_pci_scan_bus(int, struct pci_sys_data*);
+
+extern struct sys_timer ixp23xx_timer;
+
+#define IXP23XX_UART_XTAL		14745600
+
+
+#endif
diff --git a/include/asm-arm/arch-ixp23xx/system.h b/include/asm-arm/arch-ixp23xx/system.h
new file mode 100644
index 0000000..925e6b0
--- /dev/null
+++ b/include/asm-arm/arch-ixp23xx/system.h
@@ -0,0 +1,33 @@
+/*
+ * include/asm-arm/arch-ixp23xx/system.h
+ *
+ * Copyright (C) 2003 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/hardware.h>
+#include <asm/mach-types.h>
+
+static inline void arch_idle(void)
+{
+#if 0
+	if (!hlt_counter)
+		cpu_do_idle();
+#endif
+}
+
+static inline void arch_reset(char mode)
+{
+	/* First try machine specific support */
+	if (machine_is_ixdp2351()) {
+		*IXDP2351_CPLD_RESET1_REG = IXDP2351_CPLD_RESET1_MAGIC;
+		(void) *IXDP2351_CPLD_RESET1_REG;
+		*IXDP2351_CPLD_RESET1_REG = IXDP2351_CPLD_RESET1_ENABLE;
+	}
+
+	/* Use on-chip reset capability */
+	*IXP23XX_RESET0 |= IXP23XX_RST_ALL;
+}
diff --git a/include/asm-arm/arch-ixp23xx/time.h b/include/asm-arm/arch-ixp23xx/time.h
new file mode 100644
index 0000000..f6828fd
--- /dev/null
+++ b/include/asm-arm/arch-ixp23xx/time.h
@@ -0,0 +1,3 @@
+/*
+ * include/asm-arm/arch-ixp23xx/time.h
+ */
diff --git a/include/asm-arm/arch-ixp23xx/timex.h b/include/asm-arm/arch-ixp23xx/timex.h
new file mode 100644
index 0000000..516f72f
--- /dev/null
+++ b/include/asm-arm/arch-ixp23xx/timex.h
@@ -0,0 +1,7 @@
+/*
+ * include/asm-arm/arch-ixp23xx/timex.h
+ *
+ * XScale architecture timex specifications
+ */
+
+#define CLOCK_TICK_RATE 75000000
diff --git a/include/asm-arm/arch-ixp23xx/uncompress.h b/include/asm-arm/arch-ixp23xx/uncompress.h
new file mode 100644
index 0000000..62623fa
--- /dev/null
+++ b/include/asm-arm/arch-ixp23xx/uncompress.h
@@ -0,0 +1,45 @@
+/*
+ * include/asm-arm/arch-ixp23xx/uncompress.h
+ *
+ * Copyright (C) 2002-2004 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_ARCH_UNCOMPRESS_H
+#define __ASM_ARCH_UNCOMPRESS_H
+
+#include <asm/hardware.h>
+#include <linux/serial_reg.h>
+
+#define UART_BASE	((volatile u32 *)IXP23XX_UART1_PHYS)
+
+static __inline__ void putc(char c)
+{
+	int j;
+
+	for (j = 0; j < 0x1000; j++) {
+		if (UART_BASE[UART_LSR] & UART_LSR_THRE)
+			break;
+	}
+
+	UART_BASE[UART_TX] = c;
+}
+
+static void putstr(const char *s)
+{
+	while (*s) {
+		putc(*s);
+		if (*s == '\n')
+			putc('\r');
+		s++;
+	}
+}
+
+#define arch_decomp_setup()
+#define arch_decomp_wdog()
+
+
+#endif
diff --git a/include/asm-arm/arch-ixp23xx/vmalloc.h b/include/asm-arm/arch-ixp23xx/vmalloc.h
new file mode 100644
index 0000000..9f25666
--- /dev/null
+++ b/include/asm-arm/arch-ixp23xx/vmalloc.h
@@ -0,0 +1,10 @@
+/*
+ * include/asm-arm/arch-ixp23xx/vmalloc.h
+ *
+ * Copyright (c) 2005 MontaVista Software, Inc.
+ *
+ * NPU mappings end at 0xf0000000 and we allocate 64MB for board
+ * specific static I/O.
+ */
+
+#define VMALLOC_END	(0xec000000)
-- 
cgit v0.10.2