From 2c906b317b2d9c7e32b0d513e102bd68a2c49112 Mon Sep 17 00:00:00 2001 From: Alexander Clouter Date: Wed, 22 Mar 2006 09:54:10 +0000 Subject: [PATCH] cpufreq_conservative: aligning of codebase with ondemand Since the conservative govenor was released its codebase has drifted from the the direction and updates that have been applied to the ondemand govornor. This patch addresses the lack of updates in that period and brings conservative back up to date. The resulting diff file between cpufreq_ondemand.c and cpufreq_conservative.c is now much smaller and shows more clearly the differences between the two. Another reason to do this is ages ago, knowingly, I did a piss poor attempt at making conservative less responsive by knocking up DEF_SAMPLING_RATE_LATENCY_MULTIPLIER by two orders of magnitude. I did fix this ages ago but in my dis-organisation I must have toasted the diff and left it the way it was. About two weeks ago a user contacted me saying he was having problems with the conservative governor with his AMD Athlon XP-M 2800+ as /sys/devices/system/cpu/cpu0/cpufreq/conservative showed sampling_rate_min 9950000 sampling_rate_max 1360065408 Nine seconds to decide about changing the frequency....not too responsive :) Signed-off-by: Alexander Clouter Signed-off-by: Dominik Brodowski diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index ac38766..adecd31 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -35,12 +35,7 @@ */ #define DEF_FREQUENCY_UP_THRESHOLD (80) -#define MIN_FREQUENCY_UP_THRESHOLD (0) -#define MAX_FREQUENCY_UP_THRESHOLD (100) - #define DEF_FREQUENCY_DOWN_THRESHOLD (20) -#define MIN_FREQUENCY_DOWN_THRESHOLD (0) -#define MAX_FREQUENCY_DOWN_THRESHOLD (100) /* * The polling frequency of this governor depends on the capability of @@ -53,10 +48,14 @@ * All times here are in uS. */ static unsigned int def_sampling_rate; -#define MIN_SAMPLING_RATE (def_sampling_rate / 2) +#define MIN_SAMPLING_RATE_RATIO (2) +/* for correct statistics, we need at least 10 ticks between each measure */ +#define MIN_STAT_SAMPLING_RATE (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) +#define MIN_SAMPLING_RATE (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) #define MAX_SAMPLING_RATE (500 * def_sampling_rate) -#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (100000) -#define DEF_SAMPLING_DOWN_FACTOR (5) +#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) +#define DEF_SAMPLING_DOWN_FACTOR (1) +#define MAX_SAMPLING_DOWN_FACTOR (10) #define TRANSITION_LATENCY_LIMIT (10 * 1000) static void do_dbs_timer(void *data); @@ -136,7 +135,7 @@ static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, unsigned int input; int ret; ret = sscanf (buf, "%u", &input); - if (ret != 1 ) + if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) return -EINVAL; mutex_lock(&dbs_mutex); @@ -173,8 +172,7 @@ static ssize_t store_up_threshold(struct cpufreq_policy *unused, ret = sscanf (buf, "%u", &input); mutex_lock(&dbs_mutex); - if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || - input < MIN_FREQUENCY_UP_THRESHOLD || + if (ret != 1 || input > 100 || input < 0 || input <= dbs_tuners_ins.down_threshold) { mutex_unlock(&dbs_mutex); return -EINVAL; @@ -194,8 +192,7 @@ static ssize_t store_down_threshold(struct cpufreq_policy *unused, ret = sscanf (buf, "%u", &input); mutex_lock(&dbs_mutex); - if (ret != 1 || input > MAX_FREQUENCY_DOWN_THRESHOLD || - input < MIN_FREQUENCY_DOWN_THRESHOLD || + if (ret != 1 || input > 100 || input < 0 || input >= dbs_tuners_ins.up_threshold) { mutex_unlock(&dbs_mutex); return -EINVAL; @@ -337,7 +334,6 @@ static void dbs_check_cpu(int cpu) */ /* Check for frequency increase */ - idle_ticks = UINT_MAX; for_each_cpu_mask(j, policy->cpus) { unsigned int tmp_idle_ticks, total_idle_ticks; @@ -357,7 +353,7 @@ static void dbs_check_cpu(int cpu) /* Scale idle ticks by 100 and compare with up and down ticks */ idle_ticks *= 100; up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) * - usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + usecs_to_jiffies(dbs_tuners_ins.sampling_rate); if (idle_ticks < up_idle_ticks) { down_skip[cpu] = 0; @@ -398,6 +394,7 @@ static void dbs_check_cpu(int cpu) struct cpu_dbs_info_s *j_dbs_info; j_dbs_info = &per_cpu(cpu_dbs_info, j); + /* Check for frequency decrease */ total_idle_ticks = j_dbs_info->prev_cpu_idle_up; tmp_idle_ticks = total_idle_ticks - j_dbs_info->prev_cpu_idle_down; @@ -414,12 +411,14 @@ static void dbs_check_cpu(int cpu) freq_down_sampling_rate = dbs_tuners_ins.sampling_rate * dbs_tuners_ins.sampling_down_factor; down_idle_ticks = (100 - dbs_tuners_ins.down_threshold) * - usecs_to_jiffies(freq_down_sampling_rate); + usecs_to_jiffies(freq_down_sampling_rate); if (idle_ticks > down_idle_ticks) { - /* if we are already at the lowest speed then break out early + /* + * if we are already at the lowest speed then break out early * or if we 'cannot' reduce the speed as the user might want - * freq_step to be zero */ + * freq_step to be zero + */ if (requested_freq[cpu] == policy->min || dbs_tuners_ins.freq_step == 0) return; @@ -434,9 +433,8 @@ static void dbs_check_cpu(int cpu) if (requested_freq[cpu] < policy->min) requested_freq[cpu] = policy->min; - __cpufreq_driver_target(policy, - requested_freq[cpu], - CPUFREQ_RELATION_H); + __cpufreq_driver_target(policy, requested_freq[cpu], + CPUFREQ_RELATION_H); return; } } @@ -507,13 +505,16 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, if (dbs_enable == 1) { unsigned int latency; /* policy latency is in nS. Convert it to uS first */ + latency = policy->cpuinfo.transition_latency / 1000; + if (latency == 0) + latency = 1; - latency = policy->cpuinfo.transition_latency; - if (latency < 1000) - latency = 1000; - - def_sampling_rate = (latency / 1000) * + def_sampling_rate = latency * DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; + + if (def_sampling_rate < MIN_STAT_SAMPLING_RATE) + def_sampling_rate = MIN_STAT_SAMPLING_RATE; + dbs_tuners_ins.sampling_rate = def_sampling_rate; dbs_tuners_ins.ignore_nice = 0; dbs_tuners_ins.freq_step = 5; -- cgit v0.10.2 From e8a02572252f9115c2b8296c40fd8b985f06f872 Mon Sep 17 00:00:00 2001 From: Alexander Clouter Date: Wed, 22 Mar 2006 09:56:23 +0000 Subject: [PATCH] cpufreq_conservative: alter default responsiveness The sensible approach to making conservative less responsive than ondemand :) As mentioned in patch [1/4]. We do not want conservative to shoot through all the frequencies, its point (by default) is to slowly move through them. By default its ten times less responsive. Signed-off-by: Alexander Clouter Signed-off-by: Dominik Brodowski diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index adecd31..3ca3cf0 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -509,7 +509,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, if (latency == 0) latency = 1; - def_sampling_rate = latency * + def_sampling_rate = 10 * latency * DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; if (def_sampling_rate < MIN_STAT_SAMPLING_RATE) -- cgit v0.10.2 From 08a28e2e98aa821cf6f15f8a267beb2f33377bb9 Mon Sep 17 00:00:00 2001 From: Alexander Clouter Date: Wed, 22 Mar 2006 09:59:16 +0000 Subject: [PATCH] cpufreq_conservative: make for_each_cpu() safe All these changes should make cpufreq_conservative safe in regards to the x86 for_each_cpu cpumask.h changes and whatnot. Whilst making it safe a number of pointless for loops related to the cpu mask's were removed. I was never comfortable with all those for loops, especially as the iteration is over the same data again and again for each CPU you had in a single poll, an O(n^2) outcome to frequency scaling. The approach I use is to assume by default no CPU's exist and it sets the requested_freq to zero as a kind of flag, the reasoning is in the source ;) If the CPU is queried and requested_freq is zero then it initialises the variable to current_freq and then continues as if nothing happened which should be the same net effect as before? Signed-off-by: Alexander Clouter Signed-off-by: Dominik Brodowski diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 3ca3cf0..7498f25 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -294,31 +294,40 @@ static struct attribute_group dbs_attr_group = { static void dbs_check_cpu(int cpu) { unsigned int idle_ticks, up_idle_ticks, down_idle_ticks; + unsigned int tmp_idle_ticks, total_idle_ticks; unsigned int freq_step; unsigned int freq_down_sampling_rate; - static int down_skip[NR_CPUS]; - static int requested_freq[NR_CPUS]; - static unsigned short init_flag = 0; - struct cpu_dbs_info_s *this_dbs_info; - struct cpu_dbs_info_s *dbs_info; - + static unsigned short down_skip[NR_CPUS]; + static unsigned int requested_freq[NR_CPUS]; + static unsigned int init_flag = NR_CPUS; + struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, cpu); struct cpufreq_policy *policy; - unsigned int j; - this_dbs_info = &per_cpu(cpu_dbs_info, cpu); if (!this_dbs_info->enable) return; - policy = this_dbs_info->cur_policy; - - if ( init_flag == 0 ) { - for_each_online_cpu(j) { - dbs_info = &per_cpu(cpu_dbs_info, j); - requested_freq[j] = dbs_info->cur_policy->cur; + if ( init_flag != 0 ) { + for_each_cpu(init_flag) { + down_skip[init_flag] = 0; + /* I doubt a CPU exists with a freq of 0hz :) */ + requested_freq[init_flag] = 0; } - init_flag = 1; + init_flag = 0; } + /* + * If its a freshly initialised cpu we setup requested_freq. This + * check could be avoided if we did not care about a first time + * stunted increase in CPU speed when there is a load. I feel we + * should be initialising this to something. The removal of a CPU + * is not a problem, after a short time the CPU should settle down + * to a 'natural' frequency. + */ + if (requested_freq[cpu] == 0) + requested_freq[cpu] = this_dbs_info->cur_policy->cur; + + policy = this_dbs_info->cur_policy; + /* * The default safe range is 20% to 80% * Every sampling_rate, we check @@ -335,20 +344,15 @@ static void dbs_check_cpu(int cpu) /* Check for frequency increase */ idle_ticks = UINT_MAX; - for_each_cpu_mask(j, policy->cpus) { - unsigned int tmp_idle_ticks, total_idle_ticks; - struct cpu_dbs_info_s *j_dbs_info; - j_dbs_info = &per_cpu(cpu_dbs_info, j); - /* Check for frequency increase */ - total_idle_ticks = get_cpu_idle_time(j); - tmp_idle_ticks = total_idle_ticks - - j_dbs_info->prev_cpu_idle_up; - j_dbs_info->prev_cpu_idle_up = total_idle_ticks; - - if (tmp_idle_ticks < idle_ticks) - idle_ticks = tmp_idle_ticks; - } + /* Check for frequency increase */ + total_idle_ticks = get_cpu_idle_time(cpu); + tmp_idle_ticks = total_idle_ticks - + this_dbs_info->prev_cpu_idle_up; + this_dbs_info->prev_cpu_idle_up = total_idle_ticks; + + if (tmp_idle_ticks < idle_ticks) + idle_ticks = tmp_idle_ticks; /* Scale idle ticks by 100 and compare with up and down ticks */ idle_ticks *= 100; @@ -357,13 +361,9 @@ static void dbs_check_cpu(int cpu) if (idle_ticks < up_idle_ticks) { down_skip[cpu] = 0; - for_each_cpu_mask(j, policy->cpus) { - struct cpu_dbs_info_s *j_dbs_info; + this_dbs_info->prev_cpu_idle_down = + this_dbs_info->prev_cpu_idle_up; - j_dbs_info = &per_cpu(cpu_dbs_info, j); - j_dbs_info->prev_cpu_idle_down = - j_dbs_info->prev_cpu_idle_up; - } /* if we are already at full speed then break out early */ if (requested_freq[cpu] == policy->max) return; @@ -388,21 +388,14 @@ static void dbs_check_cpu(int cpu) if (down_skip[cpu] < dbs_tuners_ins.sampling_down_factor) return; - idle_ticks = UINT_MAX; - for_each_cpu_mask(j, policy->cpus) { - unsigned int tmp_idle_ticks, total_idle_ticks; - struct cpu_dbs_info_s *j_dbs_info; + /* Check for frequency decrease */ + total_idle_ticks = this_dbs_info->prev_cpu_idle_up; + tmp_idle_ticks = total_idle_ticks - + this_dbs_info->prev_cpu_idle_down; + this_dbs_info->prev_cpu_idle_down = total_idle_ticks; - j_dbs_info = &per_cpu(cpu_dbs_info, j); - /* Check for frequency decrease */ - total_idle_ticks = j_dbs_info->prev_cpu_idle_up; - tmp_idle_ticks = total_idle_ticks - - j_dbs_info->prev_cpu_idle_down; - j_dbs_info->prev_cpu_idle_down = total_idle_ticks; - - if (tmp_idle_ticks < idle_ticks) - idle_ticks = tmp_idle_ticks; - } + if (tmp_idle_ticks < idle_ticks) + idle_ticks = tmp_idle_ticks; /* Scale idle ticks by 100 and compare with up and down ticks */ idle_ticks *= 100; @@ -491,7 +484,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, j_dbs_info = &per_cpu(cpu_dbs_info, j); j_dbs_info->cur_policy = policy; - j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j); + j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(cpu); j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up; } -- cgit v0.10.2 From a159b82770ab84e1b5e0306fa65e158188492b16 Mon Sep 17 00:00:00 2001 From: Alexander Clouter Date: Wed, 22 Mar 2006 10:00:18 +0000 Subject: [PATCH] cpufreq_conservative: alternative initialise approach Venki, author of cpufreq_ondemand, came up with a neater way to remove the initialiser code from the main loop of my code and out to the point when the governor is actually initialised. Not only does it look but it also feels cleaner, plus its simpler to understand. It also saves a bunch of pointless conditional statements in the main loop. Signed-off-by: Alexander Clouter Signed-off-by: Dominik Brodowski diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 7498f25..a152d2c 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -65,6 +65,8 @@ struct cpu_dbs_info_s { unsigned int prev_cpu_idle_up; unsigned int prev_cpu_idle_down; unsigned int enable; + unsigned int down_skip; + unsigned int requested_freq; }; static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); @@ -297,35 +299,12 @@ static void dbs_check_cpu(int cpu) unsigned int tmp_idle_ticks, total_idle_ticks; unsigned int freq_step; unsigned int freq_down_sampling_rate; - static unsigned short down_skip[NR_CPUS]; - static unsigned int requested_freq[NR_CPUS]; - static unsigned int init_flag = NR_CPUS; struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, cpu); struct cpufreq_policy *policy; if (!this_dbs_info->enable) return; - if ( init_flag != 0 ) { - for_each_cpu(init_flag) { - down_skip[init_flag] = 0; - /* I doubt a CPU exists with a freq of 0hz :) */ - requested_freq[init_flag] = 0; - } - init_flag = 0; - } - - /* - * If its a freshly initialised cpu we setup requested_freq. This - * check could be avoided if we did not care about a first time - * stunted increase in CPU speed when there is a load. I feel we - * should be initialising this to something. The removal of a CPU - * is not a problem, after a short time the CPU should settle down - * to a 'natural' frequency. - */ - if (requested_freq[cpu] == 0) - requested_freq[cpu] = this_dbs_info->cur_policy->cur; - policy = this_dbs_info->cur_policy; /* @@ -360,12 +339,12 @@ static void dbs_check_cpu(int cpu) usecs_to_jiffies(dbs_tuners_ins.sampling_rate); if (idle_ticks < up_idle_ticks) { - down_skip[cpu] = 0; + this_dbs_info->down_skip = 0; this_dbs_info->prev_cpu_idle_down = this_dbs_info->prev_cpu_idle_up; /* if we are already at full speed then break out early */ - if (requested_freq[cpu] == policy->max) + if (this_dbs_info->requested_freq == policy->max) return; freq_step = (dbs_tuners_ins.freq_step * policy->max) / 100; @@ -374,18 +353,18 @@ static void dbs_check_cpu(int cpu) if (unlikely(freq_step == 0)) freq_step = 5; - requested_freq[cpu] += freq_step; - if (requested_freq[cpu] > policy->max) - requested_freq[cpu] = policy->max; + this_dbs_info->requested_freq += freq_step; + if (this_dbs_info->requested_freq > policy->max) + this_dbs_info->requested_freq = policy->max; - __cpufreq_driver_target(policy, requested_freq[cpu], + __cpufreq_driver_target(policy, this_dbs_info->requested_freq, CPUFREQ_RELATION_H); return; } /* Check for frequency decrease */ - down_skip[cpu]++; - if (down_skip[cpu] < dbs_tuners_ins.sampling_down_factor) + this_dbs_info->down_skip++; + if (this_dbs_info->down_skip < dbs_tuners_ins.sampling_down_factor) return; /* Check for frequency decrease */ @@ -399,7 +378,7 @@ static void dbs_check_cpu(int cpu) /* Scale idle ticks by 100 and compare with up and down ticks */ idle_ticks *= 100; - down_skip[cpu] = 0; + this_dbs_info->down_skip = 0; freq_down_sampling_rate = dbs_tuners_ins.sampling_rate * dbs_tuners_ins.sampling_down_factor; @@ -412,7 +391,7 @@ static void dbs_check_cpu(int cpu) * or if we 'cannot' reduce the speed as the user might want * freq_step to be zero */ - if (requested_freq[cpu] == policy->min + if (this_dbs_info->requested_freq == policy->min || dbs_tuners_ins.freq_step == 0) return; @@ -422,11 +401,11 @@ static void dbs_check_cpu(int cpu) if (unlikely(freq_step == 0)) freq_step = 5; - requested_freq[cpu] -= freq_step; - if (requested_freq[cpu] < policy->min) - requested_freq[cpu] = policy->min; + this_dbs_info->requested_freq -= freq_step; + if (this_dbs_info->requested_freq < policy->min) + this_dbs_info->requested_freq = policy->min; - __cpufreq_driver_target(policy, requested_freq[cpu], + __cpufreq_driver_target(policy, this_dbs_info->requested_freq, CPUFREQ_RELATION_H); return; } @@ -489,6 +468,8 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, = j_dbs_info->prev_cpu_idle_up; } this_dbs_info->enable = 1; + this_dbs_info->down_skip = 0; + this_dbs_info->requested_freq = policy->cur; sysfs_create_group(&policy->kobj, &dbs_attr_group); dbs_enable++; /* -- cgit v0.10.2 From ff8c288d7d1a368b663058cdee1ea0adcdef2fa2 Mon Sep 17 00:00:00 2001 From: Eric Piel Date: Fri, 10 Mar 2006 11:34:16 +0200 Subject: [PATCH] cpufreq_ondemand: Warn if it cannot run due to too long transition latency Display a warning if the ondemand governor can not be selected due to a transition latency of the cpufreq driver which is too long. Signed-off-by: Eric Piel Acked-by: Venkatesh Pallipadi Signed-off-by: Dominik Brodowski diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 69aa1db..6430489 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -395,8 +395,11 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, return -EINVAL; if (policy->cpuinfo.transition_latency > - (TRANSITION_LATENCY_LIMIT * 1000)) + (TRANSITION_LATENCY_LIMIT * 1000)) { + printk(KERN_WARNING "ondemand governor failed to load " + "due to too long transition latency\n"); return -EINVAL; + } if (this_dbs_info->enable) /* Already enabled */ break; -- cgit v0.10.2 From 9cbad61b41f0b6f0a4c600fe96d8292ffd592b50 Mon Sep 17 00:00:00 2001 From: Eric Piel Date: Fri, 10 Mar 2006 11:35:27 +0200 Subject: [PATCH] cpufreq_ondemand: keep ignore_nice_load value when it is reselected Keep the value of ignore_nice_load of the ondemand governor even after the governor has been deselected and selected back. This is the behavior of the other exported values of the ondemand governor and it's much more user-friendly. Signed-off-by: Eric Piel Acked-by: Venkatesh Pallipadi Signed-off-by: Dominik Brodowski diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 6430489..cd846f57 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -84,6 +84,7 @@ struct dbs_tuners { static struct dbs_tuners dbs_tuners_ins = { .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, + .ignore_nice = 0, }; static inline unsigned int get_cpu_idle_time(unsigned int cpu) @@ -434,8 +435,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, def_sampling_rate = MIN_STAT_SAMPLING_RATE; dbs_tuners_ins.sampling_rate = def_sampling_rate; - dbs_tuners_ins.ignore_nice = 0; - dbs_timer_init(); } -- cgit v0.10.2 From 7c9d8c0e84d395a01289ebd1597758939a875a86 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 26 Mar 2006 11:11:03 +0200 Subject: [PATCH] cpufreq_ondemand: add range check Assert that cpufreq_target is, at least, called with the minimum frequency allowed by this policy, not something lower. It triggered problems on ARM. Signed-off-by: Dominik Brodowski diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index cd846f57..956d121 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -351,6 +351,9 @@ static void dbs_check_cpu(int cpu) freq_next = (freq_next * policy->cur) / (dbs_tuners_ins.up_threshold - 10); + if (freq_next < policy->min) + freq_next = policy->min; + if (freq_next <= ((policy->cur * 95) / 100)) __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); } -- cgit v0.10.2 From 8faaea3faa5ed0b2a15afb7b4e57ce0cd8dbe4ef Mon Sep 17 00:00:00 2001 From: Freddy Spierenburg Date: Sun, 26 Mar 2006 21:22:51 +0100 Subject: [SERIAL] Small time UART configuration fix for AU1100 processor The AU1100 processor does not have an internal UART2. Only UART0, UART1 and UART3 exist. This patch removes the non existing UART2 and replaces it with a descriptive comment. Signed-off-by: Freddy Spierenburg Signed-off-by: Russell King diff --git a/drivers/serial/8250_au1x00.c b/drivers/serial/8250_au1x00.c index 8d8d7a7..3d1bfd0 100644 --- a/drivers/serial/8250_au1x00.c +++ b/drivers/serial/8250_au1x00.c @@ -51,7 +51,7 @@ static struct plat_serial8250_port au1x00_data[] = { #elif defined(CONFIG_SOC_AU1100) PORT(UART0_ADDR, AU1100_UART0_INT), PORT(UART1_ADDR, AU1100_UART1_INT), - PORT(UART2_ADDR, AU1100_UART2_INT), + /* The internal UART2 does not exist on the AU1100 processor. */ PORT(UART3_ADDR, AU1100_UART3_INT), #elif defined(CONFIG_SOC_AU1550) PORT(UART0_ADDR, AU1550_UART0_INT), -- cgit v0.10.2 From 335bd9dff31d042b773591933d3ee5bd62d5ea27 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sun, 26 Mar 2006 21:25:57 +0100 Subject: [SERIAL] Remove obsoleted au1x00_uart driver As announced in feature-removal-schedule.txt. Signed-off-by: Ralf Baechle Signed-off-by: Russell King diff --git a/arch/mips/au1000/common/setup.c b/arch/mips/au1000/common/setup.c index 1080558..307e98c 100644 --- a/arch/mips/au1000/common/setup.c +++ b/arch/mips/au1000/common/setup.c @@ -94,7 +94,7 @@ void __init plat_setup(void) argptr = prom_getcmdline(); -#if defined(CONFIG_SERIAL_AU1X00_CONSOLE) || defined(CONFIG_SERIAL_8250_CONSOLE) +#ifdef CONFIG_SERIAL_8250_CONSOLE if ((argptr = strstr(argptr, "console=")) == NULL) { argptr = prom_getcmdline(); strcat(argptr, " console=ttyS0,115200"); diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index ceb3697..fe0d8b8 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -620,22 +620,6 @@ config SERIAL_SH_SCI_CONSOLE depends on SERIAL_SH_SCI=y select SERIAL_CORE_CONSOLE -config SERIAL_AU1X00 - bool "Enable Au1x00 UART Support" - depends on MIPS && SOC_AU1X00 - select SERIAL_CORE - help - If you have an Alchemy AU1X00 processor (MIPS based) and you want - to use serial ports, say Y. Otherwise, say N. - -config SERIAL_AU1X00_CONSOLE - bool "Enable Au1x00 serial console" - depends on SERIAL_AU1X00 - select SERIAL_CORE_CONSOLE - help - If you have an Alchemy AU1X00 processor (MIPS based) and you want - to use a console on a serial port, say Y. Otherwise, say N. - config SERIAL_CORE tristate diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index a3a4323..d2b4c21 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -41,7 +41,6 @@ obj-$(CONFIG_SERIAL_COLDFIRE) += mcfserial.o obj-$(CONFIG_V850E_UART) += v850e_uart.o obj-$(CONFIG_SERIAL_PMACZILOG) += pmac_zilog.o obj-$(CONFIG_SERIAL_LH7A40X) += serial_lh7a40x.o -obj-$(CONFIG_SERIAL_AU1X00) += au1x00_uart.o obj-$(CONFIG_SERIAL_DZ) += dz.o obj-$(CONFIG_SERIAL_SH_SCI) += sh-sci.o obj-$(CONFIG_SERIAL_SGI_L1_CONSOLE) += sn_console.o diff --git a/drivers/serial/au1x00_uart.c b/drivers/serial/au1x00_uart.c deleted file mode 100644 index 948880a..0000000 --- a/drivers/serial/au1x00_uart.c +++ /dev/null @@ -1,1287 +0,0 @@ -/* - * Driver for 8250/16550-type serial ports - * - * Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o. - * - * Copyright (C) 2001 Russell King. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * A note about mapbase / membase - * - * mapbase is the physical address of the IO port. Currently, we don't - * support this very well, and it may well be dropped from this driver - * in future. As such, mapbase should be NULL. - * - * membase is an 'ioremapped' cookie. This is compatible with the old - * serial.c driver, and is currently the preferred form. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#if defined(CONFIG_SERIAL_AU1X00_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) -#define SUPPORT_SYSRQ -#endif - -#include -#include "8250.h" - -/* - * Debugging. - */ -#if 0 -#define DEBUG_AUTOCONF(fmt...) printk(fmt) -#else -#define DEBUG_AUTOCONF(fmt...) do { } while (0) -#endif - -#if 0 -#define DEBUG_INTR(fmt...) printk(fmt) -#else -#define DEBUG_INTR(fmt...) do { } while (0) -#endif - -#define PASS_LIMIT 256 - -/* - * We default to IRQ0 for the "no irq" hack. Some - * machine types want others as well - they're free - * to redefine this in their header file. - */ -#define is_real_interrupt(irq) ((irq) != 0) - -static struct old_serial_port old_serial_port[] = { - { .baud_base = 0, - .iomem_base = (u8 *)UART0_ADDR, - .irq = AU1000_UART0_INT, - .flags = STD_COM_FLAGS, - .iomem_reg_shift = 2, - }, { - .baud_base = 0, - .iomem_base = (u8 *)UART1_ADDR, - .irq = AU1000_UART1_INT, - .flags = STD_COM_FLAGS, - .iomem_reg_shift = 2 - }, { - .baud_base = 0, - .iomem_base = (u8 *)UART2_ADDR, - .irq = AU1000_UART2_INT, - .flags = STD_COM_FLAGS, - .iomem_reg_shift = 2 - }, { - .baud_base = 0, - .iomem_base = (u8 *)UART3_ADDR, - .irq = AU1000_UART3_INT, - .flags = STD_COM_FLAGS, - .iomem_reg_shift = 2 - } -}; - -#define UART_NR ARRAY_SIZE(old_serial_port) - -struct uart_8250_port { - struct uart_port port; - struct timer_list timer; /* "no irq" timer */ - struct list_head list; /* ports on this IRQ */ - unsigned short rev; - unsigned char acr; - unsigned char ier; - unsigned char lcr; - unsigned char mcr_mask; /* mask of user bits */ - unsigned char mcr_force; /* mask of forced bits */ - unsigned char lsr_break_flag; - - /* - * We provide a per-port pm hook. - */ - void (*pm)(struct uart_port *port, - unsigned int state, unsigned int old); -}; - -struct irq_info { - spinlock_t lock; - struct list_head *head; -}; - -static struct irq_info irq_lists[NR_IRQS]; - -/* - * Here we define the default xmit fifo size used for each type of UART. - */ -static const struct serial_uart_config uart_config[PORT_MAX_8250+1] = { - { "unknown", 1, 0 }, - { "8250", 1, 0 }, - { "16450", 1, 0 }, - { "16550", 1, 0 }, - /* PORT_16550A */ - { "AU1X00_UART",16, UART_CLEAR_FIFO | UART_USE_FIFO }, -}; - -static unsigned int serial_in(struct uart_8250_port *up, int offset) -{ - return au_readl((unsigned long)up->port.membase + offset); -} - -static void serial_out(struct uart_8250_port *up, int offset, int value) -{ - au_writel(value, (unsigned long)up->port.membase + offset); -} - -#define serial_inp(up, offset) serial_in(up, offset) -#define serial_outp(up, offset, value) serial_out(up, offset, value) - -/* - * This routine is called by rs_init() to initialize a specific serial - * port. It determines what type of UART chip this serial port is - * using: 8250, 16450, 16550, 16550A. The important question is - * whether or not this UART is a 16550A or not, since this will - * determine whether or not we can use its FIFO features or not. - */ -static void autoconfig(struct uart_8250_port *up, unsigned int probeflags) -{ - unsigned char save_lcr, save_mcr; - unsigned long flags; - - if (!up->port.iobase && !up->port.mapbase && !up->port.membase) - return; - - DEBUG_AUTOCONF("ttyS%d: autoconf (0x%04x, 0x%08lx): ", - up->port.line, up->port.iobase, up->port.membase); - - /* - * We really do need global IRQs disabled here - we're going to - * be frobbing the chips IRQ enable register to see if it exists. - */ - spin_lock_irqsave(&up->port.lock, flags); -// save_flags(flags); cli(); - - save_mcr = serial_in(up, UART_MCR); - save_lcr = serial_in(up, UART_LCR); - - up->port.type = PORT_16550A; - serial_outp(up, UART_LCR, save_lcr); - - up->port.fifosize = uart_config[up->port.type].dfl_xmit_fifo_size; - - if (up->port.type == PORT_UNKNOWN) - goto out; - - /* - * Reset the UART. - */ - serial_outp(up, UART_MCR, save_mcr); - serial_outp(up, UART_FCR, (UART_FCR_ENABLE_FIFO | - UART_FCR_CLEAR_RCVR | - UART_FCR_CLEAR_XMIT)); - serial_outp(up, UART_FCR, 0); - (void)serial_in(up, UART_RX); - serial_outp(up, UART_IER, 0); - - out: - spin_unlock_irqrestore(&up->port.lock, flags); -// restore_flags(flags); - DEBUG_AUTOCONF("type=%s\n", uart_config[up->port.type].name); -} - -static void serial8250_stop_tx(struct uart_port *port) -{ - struct uart_8250_port *up = (struct uart_8250_port *)port; - - if (up->ier & UART_IER_THRI) { - up->ier &= ~UART_IER_THRI; - serial_out(up, UART_IER, up->ier); - } -} - -static void serial8250_start_tx(struct uart_port *port) -{ - struct uart_8250_port *up = (struct uart_8250_port *)port; - - if (!(up->ier & UART_IER_THRI)) { - up->ier |= UART_IER_THRI; - serial_out(up, UART_IER, up->ier); - } -} - -static void serial8250_stop_rx(struct uart_port *port) -{ - struct uart_8250_port *up = (struct uart_8250_port *)port; - - up->ier &= ~UART_IER_RLSI; - up->port.read_status_mask &= ~UART_LSR_DR; - serial_out(up, UART_IER, up->ier); -} - -static void serial8250_enable_ms(struct uart_port *port) -{ - struct uart_8250_port *up = (struct uart_8250_port *)port; - - up->ier |= UART_IER_MSI; - serial_out(up, UART_IER, up->ier); -} - -static void -receive_chars(struct uart_8250_port *up, int *status, struct pt_regs *regs) -{ - struct tty_struct *tty = up->port.info->tty; - unsigned char ch, flag; - int max_count = 256; - - do { - ch = serial_inp(up, UART_RX); - flag = TTY_NORMAL; - up->port.icount.rx++; - - if (unlikely(*status & (UART_LSR_BI | UART_LSR_PE | - UART_LSR_FE | UART_LSR_OE))) { - /* - * For statistics only - */ - if (*status & UART_LSR_BI) { - *status &= ~(UART_LSR_FE | UART_LSR_PE); - up->port.icount.brk++; - /* - * We do the SysRQ and SAK checking - * here because otherwise the break - * may get masked by ignore_status_mask - * or read_status_mask. - */ - if (uart_handle_break(&up->port)) - goto ignore_char; - } else if (*status & UART_LSR_PE) - up->port.icount.parity++; - else if (*status & UART_LSR_FE) - up->port.icount.frame++; - if (*status & UART_LSR_OE) - up->port.icount.overrun++; - - /* - * Mask off conditions which should be ingored. - */ - *status &= up->port.read_status_mask; - -#ifdef CONFIG_SERIAL_AU1X00_CONSOLE - if (up->port.line == up->port.cons->index) { - /* Recover the break flag from console xmit */ - *status |= up->lsr_break_flag; - up->lsr_break_flag = 0; - } -#endif - if (*status & UART_LSR_BI) { - DEBUG_INTR("handling break...."); - flag = TTY_BREAK; - } else if (*status & UART_LSR_PE) - flag = TTY_PARITY; - else if (*status & UART_LSR_FE) - flag = TTY_FRAME; - } - if (uart_handle_sysrq_char(&up->port, ch, regs)) - goto ignore_char; - if ((*status & up->port.ignore_status_mask) == 0) - tty_insert_flip_char(tty, ch, flag); - if (*status & UART_LSR_OE) - /* - * Overrun is special, since it's reported - * immediately, and doesn't affect the current - * character. - */ - tty_insert_flip_char(tty, 0, TTY_OVERRUN); - } - ignore_char: - *status = serial_inp(up, UART_LSR); - } while ((*status & UART_LSR_DR) && (max_count-- > 0)); - spin_unlock(&up->port.lock); - tty_flip_buffer_push(tty); - spin_lock(&up->port.lock); -} - -static void transmit_chars(struct uart_8250_port *up) -{ - struct circ_buf *xmit = &up->port.info->xmit; - int count; - - if (up->port.x_char) { - serial_outp(up, UART_TX, up->port.x_char); - up->port.icount.tx++; - up->port.x_char = 0; - return; - } - if (uart_circ_empty(xmit) || uart_tx_stopped(&up->port)) { - serial8250_stop_tx(&up->port); - return; - } - - count = up->port.fifosize; - do { - serial_out(up, UART_TX, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - up->port.icount.tx++; - if (uart_circ_empty(xmit)) - break; - } while (--count > 0); - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(&up->port); - - DEBUG_INTR("THRE..."); - - if (uart_circ_empty(xmit)) - serial8250_stop_tx(&up->port); -} - -static void check_modem_status(struct uart_8250_port *up) -{ - int status; - - status = serial_in(up, UART_MSR); - - if ((status & UART_MSR_ANY_DELTA) == 0) - return; - - if (status & UART_MSR_TERI) - up->port.icount.rng++; - if (status & UART_MSR_DDSR) - up->port.icount.dsr++; - if (status & UART_MSR_DDCD) - uart_handle_dcd_change(&up->port, status & UART_MSR_DCD); - if (status & UART_MSR_DCTS) - uart_handle_cts_change(&up->port, status & UART_MSR_CTS); - - wake_up_interruptible(&up->port.info->delta_msr_wait); -} - -/* - * This handles the interrupt from one port. - */ -static inline void -serial8250_handle_port(struct uart_8250_port *up, struct pt_regs *regs) -{ - unsigned int status = serial_inp(up, UART_LSR); - - DEBUG_INTR("status = %x...", status); - - if (status & UART_LSR_DR) - receive_chars(up, &status, regs); - check_modem_status(up); - if (status & UART_LSR_THRE) - transmit_chars(up); -} - -/* - * This is the serial driver's interrupt routine. - * - * Arjan thinks the old way was overly complex, so it got simplified. - * Alan disagrees, saying that need the complexity to handle the weird - * nature of ISA shared interrupts. (This is a special exception.) - * - * In order to handle ISA shared interrupts properly, we need to check - * that all ports have been serviced, and therefore the ISA interrupt - * line has been de-asserted. - * - * This means we need to loop through all ports. checking that they - * don't have an interrupt pending. - */ -static irqreturn_t serial8250_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - struct irq_info *i = dev_id; - struct list_head *l, *end = NULL; - int pass_counter = 0; - - DEBUG_INTR("serial8250_interrupt(%d)...", irq); - - spin_lock(&i->lock); - - l = i->head; - do { - struct uart_8250_port *up; - unsigned int iir; - - up = list_entry(l, struct uart_8250_port, list); - - iir = serial_in(up, UART_IIR); - if (!(iir & UART_IIR_NO_INT)) { - spin_lock(&up->port.lock); - serial8250_handle_port(up, regs); - spin_unlock(&up->port.lock); - - end = NULL; - } else if (end == NULL) - end = l; - - l = l->next; - - if (l == i->head && pass_counter++ > PASS_LIMIT) { - /* If we hit this, we're dead. */ - printk(KERN_ERR "serial8250: too much work for " - "irq%d\n", irq); - break; - } - } while (l != end); - - spin_unlock(&i->lock); - - DEBUG_INTR("end.\n"); - /* FIXME! Was it really ours? */ - return IRQ_HANDLED; -} - -/* - * To support ISA shared interrupts, we need to have one interrupt - * handler that ensures that the IRQ line has been deasserted - * before returning. Failing to do this will result in the IRQ - * line being stuck active, and, since ISA irqs are edge triggered, - * no more IRQs will be seen. - */ -static void serial_do_unlink(struct irq_info *i, struct uart_8250_port *up) -{ - spin_lock_irq(&i->lock); - - if (!list_empty(i->head)) { - if (i->head == &up->list) - i->head = i->head->next; - list_del(&up->list); - } else { - BUG_ON(i->head != &up->list); - i->head = NULL; - } - - spin_unlock_irq(&i->lock); -} - -static int serial_link_irq_chain(struct uart_8250_port *up) -{ - struct irq_info *i = irq_lists + up->port.irq; - int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? SA_SHIRQ : 0; - - spin_lock_irq(&i->lock); - - if (i->head) { - list_add(&up->list, i->head); - spin_unlock_irq(&i->lock); - - ret = 0; - } else { - INIT_LIST_HEAD(&up->list); - i->head = &up->list; - spin_unlock_irq(&i->lock); - - ret = request_irq(up->port.irq, serial8250_interrupt, - irq_flags, "serial", i); - if (ret < 0) - serial_do_unlink(i, up); - } - - return ret; -} - -static void serial_unlink_irq_chain(struct uart_8250_port *up) -{ - struct irq_info *i = irq_lists + up->port.irq; - - BUG_ON(i->head == NULL); - - if (list_empty(i->head)) - free_irq(up->port.irq, i); - - serial_do_unlink(i, up); -} - -/* - * This function is used to handle ports that do not have an - * interrupt. This doesn't work very well for 16450's, but gives - * barely passable results for a 16550A. (Although at the expense - * of much CPU overhead). - */ -static void serial8250_timeout(unsigned long data) -{ - struct uart_8250_port *up = (struct uart_8250_port *)data; - unsigned int timeout; - unsigned int iir; - - iir = serial_in(up, UART_IIR); - if (!(iir & UART_IIR_NO_INT)) { - spin_lock(&up->port.lock); - serial8250_handle_port(up, NULL); - spin_unlock(&up->port.lock); - } - - timeout = up->port.timeout; - timeout = timeout > 6 ? (timeout / 2 - 2) : 1; - mod_timer(&up->timer, jiffies + timeout); -} - -static unsigned int serial8250_tx_empty(struct uart_port *port) -{ - struct uart_8250_port *up = (struct uart_8250_port *)port; - unsigned long flags; - unsigned int ret; - - spin_lock_irqsave(&up->port.lock, flags); - ret = serial_in(up, UART_LSR) & UART_LSR_TEMT ? TIOCSER_TEMT : 0; - spin_unlock_irqrestore(&up->port.lock, flags); - - return ret; -} - -static unsigned int serial8250_get_mctrl(struct uart_port *port) -{ - struct uart_8250_port *up = (struct uart_8250_port *)port; - unsigned char status; - unsigned int ret; - - status = serial_in(up, UART_MSR); - - ret = 0; - if (status & UART_MSR_DCD) - ret |= TIOCM_CAR; - if (status & UART_MSR_RI) - ret |= TIOCM_RNG; - if (status & UART_MSR_DSR) - ret |= TIOCM_DSR; - if (status & UART_MSR_CTS) - ret |= TIOCM_CTS; - return ret; -} - -static void serial8250_set_mctrl(struct uart_port *port, unsigned int mctrl) -{ - struct uart_8250_port *up = (struct uart_8250_port *)port; - unsigned char mcr = 0; - - if (mctrl & TIOCM_RTS) - mcr |= UART_MCR_RTS; - if (mctrl & TIOCM_DTR) - mcr |= UART_MCR_DTR; - if (mctrl & TIOCM_OUT1) - mcr |= UART_MCR_OUT1; - if (mctrl & TIOCM_OUT2) - mcr |= UART_MCR_OUT2; - if (mctrl & TIOCM_LOOP) - mcr |= UART_MCR_LOOP; - - mcr = (mcr & up->mcr_mask) | up->mcr_force; - - serial_out(up, UART_MCR, mcr); -} - -static void serial8250_break_ctl(struct uart_port *port, int break_state) -{ - struct uart_8250_port *up = (struct uart_8250_port *)port; - unsigned long flags; - - spin_lock_irqsave(&up->port.lock, flags); - if (break_state == -1) - up->lcr |= UART_LCR_SBC; - else - up->lcr &= ~UART_LCR_SBC; - serial_out(up, UART_LCR, up->lcr); - spin_unlock_irqrestore(&up->port.lock, flags); -} - -static int serial8250_startup(struct uart_port *port) -{ - struct uart_8250_port *up = (struct uart_8250_port *)port; - unsigned long flags; - int retval; - - /* - * Clear the FIFO buffers and disable them. - * (they will be reeanbled in set_termios()) - */ - if (uart_config[up->port.type].flags & UART_CLEAR_FIFO) { - serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO); - serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO | - UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT); - serial_outp(up, UART_FCR, 0); - } - - /* - * Clear the interrupt registers. - */ - (void) serial_inp(up, UART_LSR); - (void) serial_inp(up, UART_RX); - (void) serial_inp(up, UART_IIR); - (void) serial_inp(up, UART_MSR); - - /* - * At this point, there's no way the LSR could still be 0xff; - * if it is, then bail out, because there's likely no UART - * here. - */ - if (!(up->port.flags & UPF_BUGGY_UART) && - (serial_inp(up, UART_LSR) == 0xff)) { - printk("ttyS%d: LSR safety check engaged!\n", up->port.line); - return -ENODEV; - } - - retval = serial_link_irq_chain(up); - if (retval) - return retval; - - /* - * Now, initialize the UART - */ - serial_outp(up, UART_LCR, UART_LCR_WLEN8); - - spin_lock_irqsave(&up->port.lock, flags); - if (up->port.flags & UPF_FOURPORT) { - if (!is_real_interrupt(up->port.irq)) - up->port.mctrl |= TIOCM_OUT1; - } else - /* - * Most PC uarts need OUT2 raised to enable interrupts. - */ - if (is_real_interrupt(up->port.irq)) - up->port.mctrl |= TIOCM_OUT2; - - serial8250_set_mctrl(&up->port, up->port.mctrl); - spin_unlock_irqrestore(&up->port.lock, flags); - - /* - * Finally, enable interrupts. Note: Modem status interrupts - * are set via set_termios(), which will be occurring imminently - * anyway, so we don't enable them here. - */ - up->ier = UART_IER_RLSI | UART_IER_RDI; - serial_outp(up, UART_IER, up->ier); - - if (up->port.flags & UPF_FOURPORT) { - unsigned int icp; - /* - * Enable interrupts on the AST Fourport board - */ - icp = (up->port.iobase & 0xfe0) | 0x01f; - outb_p(0x80, icp); - (void) inb_p(icp); - } - - /* - * And clear the interrupt registers again for luck. - */ - (void) serial_inp(up, UART_LSR); - (void) serial_inp(up, UART_RX); - (void) serial_inp(up, UART_IIR); - (void) serial_inp(up, UART_MSR); - - return 0; -} - -static void serial8250_shutdown(struct uart_port *port) -{ - struct uart_8250_port *up = (struct uart_8250_port *)port; - unsigned long flags; - - /* - * Disable interrupts from this port - */ - up->ier = 0; - serial_outp(up, UART_IER, 0); - - spin_lock_irqsave(&up->port.lock, flags); - if (up->port.flags & UPF_FOURPORT) { - /* reset interrupts on the AST Fourport board */ - inb((up->port.iobase & 0xfe0) | 0x1f); - up->port.mctrl |= TIOCM_OUT1; - } else - up->port.mctrl &= ~TIOCM_OUT2; - - serial8250_set_mctrl(&up->port, up->port.mctrl); - spin_unlock_irqrestore(&up->port.lock, flags); - - /* - * Disable break condition and FIFOs - */ - serial_out(up, UART_LCR, serial_inp(up, UART_LCR) & ~UART_LCR_SBC); - serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO | - UART_FCR_CLEAR_RCVR | - UART_FCR_CLEAR_XMIT); - serial_outp(up, UART_FCR, 0); - - /* - * Read data port to reset things, and then unlink from - * the IRQ chain. - */ - (void) serial_in(up, UART_RX); - - if (!is_real_interrupt(up->port.irq)) - del_timer_sync(&up->timer); - else - serial_unlink_irq_chain(up); -} - -static unsigned int serial8250_get_divisor(struct uart_port *port, unsigned int baud) -{ - unsigned int quot; - - /* - * Handle magic divisors for baud rates above baud_base on - * SMSC SuperIO chips. - */ - if ((port->flags & UPF_MAGIC_MULTIPLIER) && - baud == (port->uartclk/4)) - quot = 0x8001; - else if ((port->flags & UPF_MAGIC_MULTIPLIER) && - baud == (port->uartclk/8)) - quot = 0x8002; - else - quot = uart_get_divisor(port, baud); - - return quot; -} - -static void -serial8250_set_termios(struct uart_port *port, struct termios *termios, - struct termios *old) -{ - struct uart_8250_port *up = (struct uart_8250_port *)port; - unsigned char cval, fcr = 0; - unsigned long flags; - unsigned int baud, quot; - - switch (termios->c_cflag & CSIZE) { - case CS5: - cval = UART_LCR_WLEN5; - break; - case CS6: - cval = UART_LCR_WLEN6; - break; - case CS7: - cval = UART_LCR_WLEN7; - break; - default: - case CS8: - cval = UART_LCR_WLEN8; - break; - } - - if (termios->c_cflag & CSTOPB) - cval |= UART_LCR_STOP; - if (termios->c_cflag & PARENB) - cval |= UART_LCR_PARITY; - if (!(termios->c_cflag & PARODD)) - cval |= UART_LCR_EPAR; -#ifdef CMSPAR - if (termios->c_cflag & CMSPAR) - cval |= UART_LCR_SPAR; -#endif - - /* - * Ask the core to calculate the divisor for us. - */ - baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16); - quot = serial8250_get_divisor(port, baud); - quot = 0x35; /* FIXME */ - - /* - * Work around a bug in the Oxford Semiconductor 952 rev B - * chip which causes it to seriously miscalculate baud rates - * when DLL is 0. - */ - if ((quot & 0xff) == 0 && up->port.type == PORT_16C950 && - up->rev == 0x5201) - quot ++; - - if (uart_config[up->port.type].flags & UART_USE_FIFO) { - if (baud < 2400) - fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIGGER_1; - else - fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIGGER_8; - } - - /* - * Ok, we're now changing the port state. Do it with - * interrupts disabled. - */ - spin_lock_irqsave(&up->port.lock, flags); - - /* - * Update the per-port timeout. - */ - uart_update_timeout(port, termios->c_cflag, baud); - - up->port.read_status_mask = UART_LSR_OE | UART_LSR_THRE | UART_LSR_DR; - if (termios->c_iflag & INPCK) - up->port.read_status_mask |= UART_LSR_FE | UART_LSR_PE; - if (termios->c_iflag & (BRKINT | PARMRK)) - up->port.read_status_mask |= UART_LSR_BI; - - /* - * Characteres to ignore - */ - up->port.ignore_status_mask = 0; - if (termios->c_iflag & IGNPAR) - up->port.ignore_status_mask |= UART_LSR_PE | UART_LSR_FE; - if (termios->c_iflag & IGNBRK) { - up->port.ignore_status_mask |= UART_LSR_BI; - /* - * If we're ignoring parity and break indicators, - * ignore overruns too (for real raw support). - */ - if (termios->c_iflag & IGNPAR) - up->port.ignore_status_mask |= UART_LSR_OE; - } - - /* - * ignore all characters if CREAD is not set - */ - if ((termios->c_cflag & CREAD) == 0) - up->port.ignore_status_mask |= UART_LSR_DR; - - /* - * CTS flow control flag and modem status interrupts - */ - up->ier &= ~UART_IER_MSI; - if (UART_ENABLE_MS(&up->port, termios->c_cflag)) - up->ier |= UART_IER_MSI; - - serial_out(up, UART_IER, up->ier); - serial_outp(up, 0x28, quot & 0xffff); - up->lcr = cval; /* Save LCR */ - if (up->port.type != PORT_16750) { - if (fcr & UART_FCR_ENABLE_FIFO) { - /* emulated UARTs (Lucent Venus 167x) need two steps */ - serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO); - } - serial_outp(up, UART_FCR, fcr); /* set fcr */ - } - spin_unlock_irqrestore(&up->port.lock, flags); -} - -static void -serial8250_pm(struct uart_port *port, unsigned int state, - unsigned int oldstate) -{ - struct uart_8250_port *up = (struct uart_8250_port *)port; - if (state) { - /* sleep */ - if (up->pm) - up->pm(port, state, oldstate); - } else { - /* wake */ - if (up->pm) - up->pm(port, state, oldstate); - } -} - -/* - * Resource handling. This is complicated by the fact that resources - * depend on the port type. Maybe we should be claiming the standard - * 8250 ports, and then trying to get other resources as necessary? - */ -static int -serial8250_request_std_resource(struct uart_8250_port *up, struct resource **res) -{ - unsigned int size = 8 << up->port.regshift; - int ret = 0; - - switch (up->port.iotype) { - case UPIO_MEM: - if (up->port.mapbase) { - *res = request_mem_region(up->port.mapbase, size, "serial"); - if (!*res) - ret = -EBUSY; - } - break; - - case UPIO_HUB6: - case UPIO_PORT: - *res = request_region(up->port.iobase, size, "serial"); - if (!*res) - ret = -EBUSY; - break; - } - return ret; -} - - -static void serial8250_release_port(struct uart_port *port) -{ - struct uart_8250_port *up = (struct uart_8250_port *)port; - unsigned long start, offset = 0, size = 0; - - size <<= up->port.regshift; - - switch (up->port.iotype) { - case UPIO_MEM: - if (up->port.mapbase) { - /* - * Unmap the area. - */ - iounmap(up->port.membase); - up->port.membase = NULL; - - start = up->port.mapbase; - - if (size) - release_mem_region(start + offset, size); - release_mem_region(start, 8 << up->port.regshift); - } - break; - - case UPIO_HUB6: - case UPIO_PORT: - start = up->port.iobase; - - if (size) - release_region(start + offset, size); - release_region(start + offset, 8 << up->port.regshift); - break; - - default: - break; - } -} - -static int serial8250_request_port(struct uart_port *port) -{ - struct uart_8250_port *up = (struct uart_8250_port *)port; - struct resource *res = NULL, *res_rsa = NULL; - int ret = 0; - - ret = serial8250_request_std_resource(up, &res); - - /* - * If we have a mapbase, then request that as well. - */ - if (ret == 0 && up->port.flags & UPF_IOREMAP) { - int size = res->end - res->start + 1; - - up->port.membase = ioremap(up->port.mapbase, size); - if (!up->port.membase) - ret = -ENOMEM; - } - - if (ret < 0) { - if (res_rsa) - release_resource(res_rsa); - if (res) - release_resource(res); - } - return ret; -} - -static void serial8250_config_port(struct uart_port *port, int flags) -{ - struct uart_8250_port *up = (struct uart_8250_port *)port; - struct resource *res_std = NULL, *res_rsa = NULL; - int probeflags = PROBE_ANY; - - probeflags &= ~PROBE_RSA; - - if (flags & UART_CONFIG_TYPE) - autoconfig(up, probeflags); - - /* - * If the port wasn't an RSA port, release the resource. - */ - if (up->port.type != PORT_RSA && res_rsa) - release_resource(res_rsa); - - if (up->port.type == PORT_UNKNOWN && res_std) - release_resource(res_std); -} - -static int -serial8250_verify_port(struct uart_port *port, struct serial_struct *ser) -{ - if (ser->irq >= NR_IRQS || ser->irq < 0 || - ser->baud_base < 9600 || ser->type < PORT_UNKNOWN || - ser->type > PORT_MAX_8250 || ser->type == PORT_CIRRUS || - ser->type == PORT_STARTECH) - return -EINVAL; - return 0; -} - -static const char * -serial8250_type(struct uart_port *port) -{ - int type = port->type; - - if (type >= ARRAY_SIZE(uart_config)) - type = 0; - return uart_config[type].name; -} - -static struct uart_ops serial8250_pops = { - .tx_empty = serial8250_tx_empty, - .set_mctrl = serial8250_set_mctrl, - .get_mctrl = serial8250_get_mctrl, - .stop_tx = serial8250_stop_tx, - .start_tx = serial8250_start_tx, - .stop_rx = serial8250_stop_rx, - .enable_ms = serial8250_enable_ms, - .break_ctl = serial8250_break_ctl, - .startup = serial8250_startup, - .shutdown = serial8250_shutdown, - .set_termios = serial8250_set_termios, - .pm = serial8250_pm, - .type = serial8250_type, - .release_port = serial8250_release_port, - .request_port = serial8250_request_port, - .config_port = serial8250_config_port, - .verify_port = serial8250_verify_port, -}; - -static struct uart_8250_port serial8250_ports[UART_NR]; - -static void __init serial8250_isa_init_ports(void) -{ - struct uart_8250_port *up; - static int first = 1; - int i; - - if (!first) - return; - first = 0; - - for (i = 0, up = serial8250_ports; i < ARRAY_SIZE(old_serial_port); - i++, up++) { - up->port.iobase = old_serial_port[i].port; - up->port.irq = old_serial_port[i].irq; - up->port.uartclk = get_au1x00_uart_baud_base(); - up->port.flags = old_serial_port[i].flags; - up->port.hub6 = old_serial_port[i].hub6; - up->port.membase = old_serial_port[i].iomem_base; - up->port.iotype = old_serial_port[i].io_type; - up->port.regshift = old_serial_port[i].iomem_reg_shift; - up->port.ops = &serial8250_pops; - } -} - -static void __init serial8250_register_ports(struct uart_driver *drv) -{ - int i; - - serial8250_isa_init_ports(); - - for (i = 0; i < UART_NR; i++) { - struct uart_8250_port *up = &serial8250_ports[i]; - - up->port.line = i; - up->port.ops = &serial8250_pops; - init_timer(&up->timer); - up->timer.function = serial8250_timeout; - - /* - * ALPHA_KLUDGE_MCR needs to be killed. - */ - up->mcr_mask = ~ALPHA_KLUDGE_MCR; - up->mcr_force = ALPHA_KLUDGE_MCR; - - uart_add_one_port(drv, &up->port); - } -} - -#ifdef CONFIG_SERIAL_AU1X00_CONSOLE - -#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) - -/* - * Wait for transmitter & holding register to empty - */ -static inline void wait_for_xmitr(struct uart_8250_port *up) -{ - unsigned int status, tmout = 10000; - - /* Wait up to 10ms for the character(s) to be sent. */ - do { - status = serial_in(up, UART_LSR); - - if (status & UART_LSR_BI) - up->lsr_break_flag = UART_LSR_BI; - - if (--tmout == 0) - break; - udelay(1); - } while ((status & BOTH_EMPTY) != BOTH_EMPTY); - - /* Wait up to 1s for flow control if necessary */ - if (up->port.flags & UPF_CONS_FLOW) { - tmout = 1000000; - while (--tmout && - ((serial_in(up, UART_MSR) & UART_MSR_CTS) == 0)) - udelay(1); - } -} - -static void au1x00_console_putchar(struct uart_port *port, int ch) -{ - struct uart_8250_port *up = (struct uart_8250_port *)port; - - wait_for_xmitr(up); - serial_out(up, UART_TX, ch); -} - -/* - * Print a string to the serial port trying not to disturb - * any possible real use of the port... - * - * The console_lock must be held when we get here. - */ -static void -serial8250_console_write(struct console *co, const char *s, unsigned int count) -{ - struct uart_8250_port *up = &serial8250_ports[co->index]; - unsigned int ier; - - /* - * First save the UER then disable the interrupts - */ - ier = serial_in(up, UART_IER); - serial_out(up, UART_IER, 0); - - uart_console_write(&up->port, s, count, au1x00_console_putchar); - - /* - * Finally, wait for transmitter to become empty - * and restore the IER - */ - wait_for_xmitr(up); - serial_out(up, UART_IER, ier); -} - -static int __init serial8250_console_setup(struct console *co, char *options) -{ - struct uart_port *port; - int baud = 9600; - int bits = 8; - int parity = 'n'; - int flow = 'n'; - - /* - * Check whether an invalid uart number has been specified, and - * if so, search for the first available port that does have - * console support. - */ - if (co->index >= UART_NR) - co->index = 0; - port = &serial8250_ports[co->index].port; - - /* - * Temporary fix. - */ - spin_lock_init(&port->lock); - - if (options) - uart_parse_options(options, &baud, &parity, &bits, &flow); - - return uart_set_options(port, co, baud, parity, bits, flow); -} - -extern struct uart_driver serial8250_reg; -static struct console serial8250_console = { - .name = "ttyS", - .write = serial8250_console_write, - .device = uart_console_device, - .setup = serial8250_console_setup, - .flags = CON_PRINTBUFFER, - .index = -1, - .data = &serial8250_reg, -}; - -static int __init serial8250_console_init(void) -{ - serial8250_isa_init_ports(); - register_console(&serial8250_console); - return 0; -} -console_initcall(serial8250_console_init); - -#define SERIAL8250_CONSOLE &serial8250_console -#else -#define SERIAL8250_CONSOLE NULL -#endif - -static struct uart_driver serial8250_reg = { - .owner = THIS_MODULE, - .driver_name = "serial", - .devfs_name = "tts/", - .dev_name = "ttyS", - .major = TTY_MAJOR, - .minor = 64, - .nr = UART_NR, - .cons = SERIAL8250_CONSOLE, -}; - -int __init early_serial_setup(struct uart_port *port) -{ - serial8250_isa_init_ports(); - serial8250_ports[port->line].port = *port; - serial8250_ports[port->line].port.ops = &serial8250_pops; - return 0; -} - -/** - * serial8250_suspend_port - suspend one serial port - * @line: serial line number - * @level: the level of port suspension, as per uart_suspend_port - * - * Suspend one serial port. - */ -void serial8250_suspend_port(int line) -{ - uart_suspend_port(&serial8250_reg, &serial8250_ports[line].port); -} - -/** - * serial8250_resume_port - resume one serial port - * @line: serial line number - * @level: the level of port resumption, as per uart_resume_port - * - * Resume one serial port. - */ -void serial8250_resume_port(int line) -{ - uart_resume_port(&serial8250_reg, &serial8250_ports[line].port); -} - -static int __init serial8250_init(void) -{ - int ret, i; - - printk(KERN_INFO "Serial: Au1x00 driver\n"); - - for (i = 0; i < NR_IRQS; i++) - spin_lock_init(&irq_lists[i].lock); - - ret = uart_register_driver(&serial8250_reg); - if (ret >= 0) - serial8250_register_ports(&serial8250_reg); - - return ret; -} - -static void __exit serial8250_exit(void) -{ - int i; - - for (i = 0; i < UART_NR; i++) - uart_remove_one_port(&serial8250_reg, &serial8250_ports[i].port); - - uart_unregister_driver(&serial8250_reg); -} - -module_init(serial8250_init); -module_exit(serial8250_exit); - -EXPORT_SYMBOL(serial8250_suspend_port); -EXPORT_SYMBOL(serial8250_resume_port); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Au1x00 serial driver\n"); diff --git a/include/asm-mips/serial.h b/include/asm-mips/serial.h index e796d75..7b23664 100644 --- a/include/asm-mips/serial.h +++ b/include/asm-mips/serial.h @@ -103,88 +103,6 @@ #define IVR_SERIAL_PORT_DEFNS #endif -#ifdef CONFIG_SERIAL_AU1X00 -#include -#ifdef CONFIG_SOC_AU1000 -#define AU1000_SERIAL_PORT_DEFNS \ - { .baud_base = 0, .port = UART0_ADDR, \ - .iomem_base = (unsigned char *)UART0_ADDR, \ - .irq = AU1000_UART0_INT, .flags = STD_COM_FLAGS, \ - .iomem_reg_shift = 2 }, \ - { .baud_base = 0, .port = UART1_ADDR, \ - .iomem_base = (unsigned char *)UART1_ADDR, \ - .irq = AU1000_UART1_INT, .flags = STD_COM_FLAGS, \ - .iomem_reg_shift = 2 }, \ - { .baud_base = 0, .port = UART2_ADDR, \ - .iomem_base = (unsigned char *)UART2_ADDR, \ - .irq = AU1000_UART2_INT, .flags = STD_COM_FLAGS, \ - .iomem_reg_shift = 2 }, \ - { .baud_base = 0, .port = UART3_ADDR, \ - .iomem_base = (unsigned char *)UART3_ADDR, \ - .irq = AU1000_UART3_INT, .flags = STD_COM_FLAGS, \ - .iomem_reg_shift = 2 }, -#endif - -#ifdef CONFIG_SOC_AU1500 -#define AU1000_SERIAL_PORT_DEFNS \ - { .baud_base = 0, .port = UART0_ADDR, \ - .iomem_base = (unsigned char *)UART0_ADDR, \ - .irq = AU1500_UART0_INT, .flags = STD_COM_FLAGS, \ - .iomem_reg_shift = 2 }, \ - { .baud_base = 0, .port = UART3_ADDR, \ - .iomem_base = (unsigned char *)UART3_ADDR, \ - .irq = AU1500_UART3_INT, .flags = STD_COM_FLAGS, \ - .iomem_reg_shift = 2 }, -#endif - -#ifdef CONFIG_SOC_AU1100 -#define AU1000_SERIAL_PORT_DEFNS \ - { .baud_base = 0, .port = UART0_ADDR, \ - .iomem_base = (unsigned char *)UART0_ADDR, \ - .irq = AU1100_UART0_INT, .flags = STD_COM_FLAGS, \ - .iomem_reg_shift = 2 }, \ - { .baud_base = 0, .port = UART1_ADDR, \ - .iomem_base = (unsigned char *)UART1_ADDR, \ - .irq = AU1100_UART1_INT, .flags = STD_COM_FLAGS, \ - .iomem_reg_shift = 2 }, \ - { .baud_base = 0, .port = UART3_ADDR, \ - .iomem_base = (unsigned char *)UART3_ADDR, \ - .irq = AU1100_UART3_INT, .flags = STD_COM_FLAGS, \ - .iomem_reg_shift = 2 }, -#endif - -#ifdef CONFIG_SOC_AU1550 -#define AU1000_SERIAL_PORT_DEFNS \ - { .baud_base = 0, .port = UART0_ADDR, \ - .iomem_base = (unsigned char *)UART0_ADDR, \ - .irq = AU1550_UART0_INT, .flags = STD_COM_FLAGS, \ - .iomem_reg_shift = 2 }, \ - { .baud_base = 0, .port = UART1_ADDR, \ - .iomem_base = (unsigned char *)UART1_ADDR, \ - .irq = AU1550_UART1_INT, .flags = STD_COM_FLAGS, \ - .iomem_reg_shift = 2 }, \ - { .baud_base = 0, .port = UART3_ADDR, \ - .iomem_base = (unsigned char *)UART3_ADDR, \ - .irq = AU1550_UART3_INT, .flags = STD_COM_FLAGS,\ - .iomem_reg_shift = 2 }, -#endif - -#ifdef CONFIG_SOC_AU1200 -#define AU1000_SERIAL_PORT_DEFNS \ - { .baud_base = 0, .port = UART0_ADDR, \ - .iomem_base = (unsigned char *)UART0_ADDR, \ - .irq = AU1200_UART0_INT, .flags = STD_COM_FLAGS, \ - .iomem_reg_shift = 2 }, \ - { .baud_base = 0, .port = UART1_ADDR, \ - .iomem_base = (unsigned char *)UART1_ADDR, \ - .irq = AU1200_UART1_INT, .flags = STD_COM_FLAGS, \ - .iomem_reg_shift = 2 }, -#endif - -#else -#define AU1000_SERIAL_PORT_DEFNS -#endif - #ifdef CONFIG_HAVE_STD_PC_SERIAL_PORT #define STD_SERIAL_PORT_DEFNS \ /* UART CLK PORT IRQ FLAGS */ \ @@ -331,7 +249,6 @@ MOMENCO_OCELOT_G_SERIAL_PORT_DEFNS \ MOMENCO_OCELOT_C_SERIAL_PORT_DEFNS \ MOMENCO_OCELOT_SERIAL_PORT_DEFNS \ - MOMENCO_OCELOT_3_SERIAL_PORT_DEFNS \ - AU1000_SERIAL_PORT_DEFNS + MOMENCO_OCELOT_3_SERIAL_PORT_DEFNS #endif /* _ASM_SERIAL_H */ -- cgit v0.10.2 From c9b4470e2d9d611181fe488fdf463948d8b24901 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Sun, 26 Mar 2006 21:40:27 +0100 Subject: [ARM] 3414/1: ep93xx: reset ethernet controller before uncompressing Patch from Lennert Buytenhek The Redboot version that cirrus supplies for the cirrus ep93xx doesn't turn off DMA from the ethernet MAC before jumping to linux, which means that we might end up with bits of RX status and packet data scribbled over the uncompressed kernel image. Work around this by resetting the ethernet MAC before we uncompress. We don't usually work around bootloader bugs, but considering that the large majority of ep93xx boards out there have this problem, I figured this it was justified in this case. Signed-off-by: Lennert Buytenhek Signed-off-by: Russell King diff --git a/include/asm-arm/arch-ep93xx/uncompress.h b/include/asm-arm/arch-ep93xx/uncompress.h index 4410d21..2171082 100644 --- a/include/asm-arm/arch-ep93xx/uncompress.h +++ b/include/asm-arm/arch-ep93xx/uncompress.h @@ -16,11 +16,21 @@ static unsigned char __raw_readb(unsigned int ptr) return *((volatile unsigned char *)ptr); } +static unsigned int __raw_readl(unsigned int ptr) +{ + return *((volatile unsigned int *)ptr); +} + static void __raw_writeb(unsigned char value, unsigned int ptr) { *((volatile unsigned char *)ptr) = value; } +static void __raw_writel(unsigned int value, unsigned int ptr) +{ + *((volatile unsigned int *)ptr) = value; +} + #define PHYS_UART1_DATA 0x808c0000 #define PHYS_UART1_FLAG 0x808c0018 @@ -49,5 +59,33 @@ static void putstr(const char *s) } } -#define arch_decomp_setup() + +/* + * Some bootloaders don't turn off DMA from the ethernet MAC before + * jumping to linux, which means that we might end up with bits of RX + * status and packet data scribbled over the uncompressed kernel image. + * Work around this by resetting the ethernet MAC before we uncompress. + */ +#define PHYS_ETH_SELF_CTL 0x80010020 +#define ETH_SELF_CTL_RESET 0x00000001 + +static void ethernet_reset(void) +{ + unsigned int v; + + /* Reset the ethernet MAC. */ + v = __raw_readl(PHYS_ETH_SELF_CTL); + __raw_writel(v | ETH_SELF_CTL_RESET, PHYS_ETH_SELF_CTL); + + /* Wait for reset to finish. */ + while (__raw_readl(PHYS_ETH_SELF_CTL) & ETH_SELF_CTL_RESET) + ; +} + + +static void arch_decomp_setup(void) +{ + ethernet_reset(); +} + #define arch_decomp_wdog() -- cgit v0.10.2 From bd8f103efef14980decf5d54a2de85835e9a6d9a Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Sun, 26 Mar 2006 21:40:27 +0100 Subject: [ARM] 3415/1: Akita: Add missing EXPORT_SYMBOL Patch from Richard Purdie Add an EXPORT_SYMBOL for the Akita IO Expander Device. Signed-off-by: Richard Purdie Signed-off-by: Russell King diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index 30ec317..0dbb079 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -467,6 +467,8 @@ struct platform_device akitaioexp_device = { .id = -1, }; +EXPORT_SYMBOL_GPL(akitaioexp_device); + static void __init akita_init(void) { spitz_ficp_platform_data.transceiver_mode = akita_irda_transceiver_mode; -- cgit v0.10.2 From fbb18a277a6f192404aa20ece49529acb1e1e76d Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 26 Mar 2006 23:13:39 +0100 Subject: [SERIAL] amba-pl010: allow platforms to specify modem control method The amba-pl010 hardware does not provide RTS and DTR control lines; it is expected that these will be implemented using GPIO. Allow platforms to supply a function to implement manipulation of modem control lines. Signed-off-by: Russell King diff --git a/arch/arm/mach-integrator/core.c b/arch/arm/mach-integrator/core.c index 20071a2..576a5e9 100644 --- a/arch/arm/mach-integrator/core.c +++ b/arch/arm/mach-integrator/core.c @@ -15,7 +15,9 @@ #include #include #include +#include #include +#include #include #include @@ -28,6 +30,8 @@ #include "common.h" +static struct amba_pl010_data integrator_uart_data; + static struct amba_device rtc_device = { .dev = { .bus_id = "mb:15", @@ -44,6 +48,7 @@ static struct amba_device rtc_device = { static struct amba_device uart0_device = { .dev = { .bus_id = "mb:16", + .platform_data = &integrator_uart_data, }, .res = { .start = INTEGRATOR_UART0_BASE, @@ -57,6 +62,7 @@ static struct amba_device uart0_device = { static struct amba_device uart1_device = { .dev = { .bus_id = "mb:17", + .platform_data = &integrator_uart_data, }, .res = { .start = INTEGRATOR_UART1_BASE, @@ -115,6 +121,46 @@ static int __init integrator_init(void) arch_initcall(integrator_init); +/* + * On the Integrator platform, the port RTS and DTR are provided by + * bits in the following SC_CTRLS register bits: + * RTS DTR + * UART0 7 6 + * UART1 5 4 + */ +#define SC_CTRLC (IO_ADDRESS(INTEGRATOR_SC_BASE) + INTEGRATOR_SC_CTRLC_OFFSET) +#define SC_CTRLS (IO_ADDRESS(INTEGRATOR_SC_BASE) + INTEGRATOR_SC_CTRLS_OFFSET) + +static void integrator_uart_set_mctrl(struct amba_device *dev, void __iomem *base, unsigned int mctrl) +{ + unsigned int ctrls = 0, ctrlc = 0, rts_mask, dtr_mask; + + if (dev == &uart0_device) { + rts_mask = 1 << 4; + dtr_mask = 1 << 5; + } else { + rts_mask = 1 << 6; + dtr_mask = 1 << 7; + } + + if (mctrl & TIOCM_RTS) + ctrlc |= rts_mask; + else + ctrls |= rts_mask; + + if (mctrl & TIOCM_DTR) + ctrlc |= dtr_mask; + else + ctrls |= dtr_mask; + + __raw_writel(ctrls, SC_CTRLS); + __raw_writel(ctrlc, SC_CTRLC); +} + +static struct amba_pl010_data integrator_uart_data = { + .set_mctrl = integrator_uart_set_mctrl, +}; + #define CM_CTRL IO_ADDRESS(INTEGRATOR_HDR_BASE) + INTEGRATOR_HDR_CTRL_OFFSET static DEFINE_SPINLOCK(cm_lock); diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c index 127d6cd..1631414 100644 --- a/drivers/serial/amba-pl010.c +++ b/drivers/serial/amba-pl010.c @@ -51,8 +51,6 @@ #include #include -#include -#include #define UART_NR 2 @@ -65,26 +63,16 @@ #define UART_RX_DATA(s) (((s) & UART01x_FR_RXFE) == 0) #define UART_TX_READY(s) (((s) & UART01x_FR_TXFF) == 0) -#define UART_DUMMY_RSR_RX /*256*/0 +#define UART_DUMMY_RSR_RX 256 #define UART_PORT_SIZE 64 /* - * On the Integrator platform, the port RTS and DTR are provided by - * bits in the following SC_CTRLS register bits: - * RTS DTR - * UART0 7 6 - * UART1 5 4 - */ -#define SC_CTRLC (IO_ADDRESS(INTEGRATOR_SC_BASE) + INTEGRATOR_SC_CTRLC_OFFSET) -#define SC_CTRLS (IO_ADDRESS(INTEGRATOR_SC_BASE) + INTEGRATOR_SC_CTRLS_OFFSET) - -/* * We wrap our port structure around the generic uart_port. */ struct uart_amba_port { struct uart_port port; - unsigned int dtr_mask; - unsigned int rts_mask; + struct amba_device *dev; + struct amba_pl010_data *data; unsigned int old_status; }; @@ -300,20 +288,9 @@ static unsigned int pl010_get_mctrl(struct uart_port *port) static void pl010_set_mctrl(struct uart_port *port, unsigned int mctrl) { struct uart_amba_port *uap = (struct uart_amba_port *)port; - unsigned int ctrls = 0, ctrlc = 0; - - if (mctrl & TIOCM_RTS) - ctrlc |= uap->rts_mask; - else - ctrls |= uap->rts_mask; - if (mctrl & TIOCM_DTR) - ctrlc |= uap->dtr_mask; - else - ctrls |= uap->dtr_mask; - - __raw_writel(ctrls, SC_CTRLS); - __raw_writel(ctrlc, SC_CTRLC); + if (uap->data) + uap->data->set_mctrl(uap->dev, uap->port.membase, mctrl); } static void pl010_break_ctl(struct uart_port *port, int break_state) @@ -539,38 +516,7 @@ static struct uart_ops amba_pl010_pops = { .verify_port = pl010_verify_port, }; -static struct uart_amba_port amba_ports[UART_NR] = { - { - .port = { - .membase = (void *)IO_ADDRESS(INTEGRATOR_UART0_BASE), - .mapbase = INTEGRATOR_UART0_BASE, - .iotype = UPIO_MEM, - .irq = IRQ_UARTINT0, - .uartclk = 14745600, - .fifosize = 16, - .ops = &amba_pl010_pops, - .flags = UPF_BOOT_AUTOCONF, - .line = 0, - }, - .dtr_mask = 1 << 5, - .rts_mask = 1 << 4, - }, - { - .port = { - .membase = (void *)IO_ADDRESS(INTEGRATOR_UART1_BASE), - .mapbase = INTEGRATOR_UART1_BASE, - .iotype = UPIO_MEM, - .irq = IRQ_UARTINT1, - .uartclk = 14745600, - .fifosize = 16, - .ops = &amba_pl010_pops, - .flags = UPF_BOOT_AUTOCONF, - .line = 1, - }, - .dtr_mask = 1 << 7, - .rts_mask = 1 << 6, - } -}; +static struct uart_amba_port *amba_ports[UART_NR]; #ifdef CONFIG_SERIAL_AMBA_PL010_CONSOLE @@ -588,7 +534,7 @@ static void pl010_console_putchar(struct uart_port *port, int ch) static void pl010_console_write(struct console *co, const char *s, unsigned int count) { - struct uart_port *port = &amba_ports[co->index].port; + struct uart_port *port = &amba_ports[co->index]->port; unsigned int status, old_cr; /* @@ -651,7 +597,7 @@ static int __init pl010_console_setup(struct console *co, char *options) */ if (co->index >= UART_NR) co->index = 0; - port = &amba_ports[co->index].port; + port = &amba_ports[co->index]->port; if (options) uart_parse_options(options, &baud, &parity, &bits, &flow); @@ -672,24 +618,6 @@ static struct console amba_console = { .data = &amba_reg, }; -static int __init amba_console_init(void) -{ - /* - * All port initializations are done statically - */ - register_console(&amba_console); - return 0; -} -console_initcall(amba_console_init); - -static int __init amba_late_console_init(void) -{ - if (!(amba_console.flags & CON_ENABLED)) - register_console(&amba_console); - return 0; -} -late_initcall(amba_late_console_init); - #define AMBA_CONSOLE &amba_console #else #define AMBA_CONSOLE NULL @@ -707,30 +635,76 @@ static struct uart_driver amba_reg = { static int pl010_probe(struct amba_device *dev, void *id) { - int i; + struct uart_amba_port *port; + void __iomem *base; + int i, ret; - for (i = 0; i < UART_NR; i++) { - if (amba_ports[i].port.mapbase != dev->res.start) - continue; + for (i = 0; i < ARRAY_SIZE(amba_ports); i++) + if (amba_ports[i] == NULL) + break; - amba_ports[i].port.dev = &dev->dev; - uart_add_one_port(&amba_reg, &amba_ports[i].port); - amba_set_drvdata(dev, &amba_ports[i]); - break; + if (i == ARRAY_SIZE(amba_ports)) { + ret = -EBUSY; + goto out; } - return 0; + port = kzalloc(sizeof(struct uart_amba_port), GFP_KERNEL); + if (!port) { + ret = -ENOMEM; + goto out; + } + + base = ioremap(dev->res.start, PAGE_SIZE); + if (!base) { + ret = -ENOMEM; + goto free; + } + + port->port.dev = &dev->dev; + port->port.mapbase = dev->res.start; + port->port.membase = base; + port->port.iotype = UPIO_MEM; + port->port.irq = dev->irq[0]; + port->port.uartclk = 14745600; + port->port.fifosize = 16; + port->port.ops = &amba_pl010_pops; + port->port.flags = UPF_BOOT_AUTOCONF; + port->port.line = i; + port->dev = dev; + port->data = dev->dev.platform_data; + + amba_ports[i] = port; + + amba_set_drvdata(dev, port); + ret = uart_add_one_port(&amba_reg, &port->port); + if (ret) { + amba_set_drvdata(dev, NULL); + amba_ports[i] = NULL; + iounmap(base); + free: + kfree(port); + } + + out: + return ret; } static int pl010_remove(struct amba_device *dev) { - struct uart_amba_port *uap = amba_get_drvdata(dev); - - if (uap) - uart_remove_one_port(&amba_reg, &uap->port); + struct uart_amba_port *port = amba_get_drvdata(dev); + int i; amba_set_drvdata(dev, NULL); + uart_remove_one_port(&amba_reg, &port->port); + + for (i = 0; i < ARRAY_SIZE(amba_ports); i++) + if (amba_ports[i] == port) + amba_ports[i] = NULL; + + iounmap(port->port.membase); + kfree(port); + return 0; } diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index dc726ff..48ee32a 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -158,4 +158,10 @@ #define UART01x_RSR_ANY (UART01x_RSR_OE|UART01x_RSR_BE|UART01x_RSR_PE|UART01x_RSR_FE) #define UART01x_FR_MODEM_ANY (UART01x_FR_DCD|UART01x_FR_DSR|UART01x_FR_CTS) +#ifndef __ASSEMBLY__ +struct amba_pl010_data { + void (*set_mctrl)(struct amba_device *dev, void __iomem *base, unsigned int mctrl); +}; +#endif + #endif -- cgit v0.10.2 From aee85fe8e8143d3f54d9e6d3c6cdd40ead563267 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Sun, 26 Mar 2006 23:16:39 +0100 Subject: [SERIAL] Provide Cirrus EP93xx AMBA PL010 serial support. Signed-off-by: Lennert Buytenhek Signed-off-by: Russell King diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index 865427b..2d892e4 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -30,7 +30,9 @@ #include #include #include +#include #include +#include #include #include @@ -360,6 +362,68 @@ void __init ep93xx_init_irq(void) /************************************************************************* * EP93xx peripheral handling *************************************************************************/ +#define EP93XX_UART_MCR_OFFSET (0x0100) + +static void ep93xx_uart_set_mctrl(struct amba_device *dev, + void __iomem *base, unsigned int mctrl) +{ + unsigned int mcr; + + mcr = 0; + if (!(mctrl & TIOCM_RTS)) + mcr |= 2; + if (!(mctrl & TIOCM_DTR)) + mcr |= 1; + + __raw_writel(mcr, base + EP93XX_UART_MCR_OFFSET); +} + +static struct amba_pl010_data ep93xx_uart_data = { + .set_mctrl = ep93xx_uart_set_mctrl, +}; + +static struct amba_device uart1_device = { + .dev = { + .bus_id = "apb:uart1", + .platform_data = &ep93xx_uart_data, + }, + .res = { + .start = EP93XX_UART1_PHYS_BASE, + .end = EP93XX_UART1_PHYS_BASE + 0x0fff, + .flags = IORESOURCE_MEM, + }, + .irq = { IRQ_EP93XX_UART1, NO_IRQ }, + .periphid = 0x00041010, +}; + +static struct amba_device uart2_device = { + .dev = { + .bus_id = "apb:uart2", + .platform_data = &ep93xx_uart_data, + }, + .res = { + .start = EP93XX_UART2_PHYS_BASE, + .end = EP93XX_UART2_PHYS_BASE + 0x0fff, + .flags = IORESOURCE_MEM, + }, + .irq = { IRQ_EP93XX_UART2, NO_IRQ }, + .periphid = 0x00041010, +}; + +static struct amba_device uart3_device = { + .dev = { + .bus_id = "apb:uart3", + .platform_data = &ep93xx_uart_data, + }, + .res = { + .start = EP93XX_UART3_PHYS_BASE, + .end = EP93XX_UART3_PHYS_BASE + 0x0fff, + .flags = IORESOURCE_MEM, + }, + .irq = { IRQ_EP93XX_UART3, NO_IRQ }, + .periphid = 0x00041010, +}; + void __init ep93xx_init_devices(void) { unsigned int v; @@ -371,4 +435,8 @@ void __init ep93xx_init_devices(void) v &= ~EP93XX_SYSCON_DEVICE_CONFIG_CRUNCH_ENABLE; __raw_writel(0xaa, EP93XX_SYSCON_SWLOCK); __raw_writel(v, EP93XX_SYSCON_DEVICE_CONFIG); + + amba_device_register(&uart1_device, &iomem_resource); + amba_device_register(&uart2_device, &iomem_resource); + amba_device_register(&uart3_device, &iomem_resource); } -- cgit v0.10.2 From 6abaaaae6d5ed52422c8caf65f3cdbb95579bb58 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 26 Mar 2006 17:37:54 -0800 Subject: [IPSEC]: Fix tunnel error handling in ipcomp6 The error handling in ipcomp6_tunnel_create is broken in two ways: 1) If we fail to allocate an SPI (this should never happen in practice since there are plenty of 32-bit SPI values for us to use), we will still go ahead and create the SA. 2) When xfrm_init_state fails, we first of all may trigger the BUG_TRAP in __xfrm_state_destroy because we didn't set the state to DEAD. More importantly we end up returning the freed state as if we succeeded! This patch fixes them both. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 028b636..d4cfec3 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -228,6 +228,9 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) t->id.proto = IPPROTO_IPV6; t->id.spi = xfrm6_tunnel_alloc_spi((xfrm_address_t *)&x->props.saddr); + if (!t->id.spi) + goto error; + memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET6; @@ -243,7 +246,9 @@ out: return t; error: + t->km.state = XFRM_STATE_DEAD; xfrm_state_put(t); + t = NULL; goto out; } -- cgit v0.10.2 From 3eb4801d7bde42b82f05137392a1ee0ece090bad Mon Sep 17 00:00:00 2001 From: Norbert Kiesel Date: Sun, 26 Mar 2006 17:39:55 -0800 Subject: [NET]: drop duplicate assignment in request_sock Just noticed that request_sock.[ch] contain a useless assignment of rskq_accept_head to itself. I assume this is a typo and the 2nd one was supposed to be _tail. However, setting _tail to NULL is not needed, so the patch below just drops the 2nd assignment. Signed-off-By: Norbert Kiesel Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 11641c9..c5d7f92 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -145,7 +145,7 @@ static inline struct request_sock * { struct request_sock *req = queue->rskq_accept_head; - queue->rskq_accept_head = queue->rskq_accept_head = NULL; + queue->rskq_accept_head = NULL; return req; } diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 98f0fc9..1e44eda 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -51,7 +51,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); rwlock_init(&queue->syn_wait_lock); - queue->rskq_accept_head = queue->rskq_accept_head = NULL; + queue->rskq_accept_head = NULL; lopt->nr_table_entries = nr_table_entries; write_lock_bh(&queue->syn_wait_lock); -- cgit v0.10.2 From 28832e83379afd0b0e83b78ac317290c79ebd496 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 8 Mar 2006 11:19:51 +0100 Subject: [PATCH] update max_sectors documentation The max_sectors has been split into max_hw_sectors and max_sectors for some time. A patch to have blk_queue_max_sectors enforce this was sent by me and it broke IDE. This patch updates the documentation. Signed-off-by: Jens Axboe diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 8e63831..f989a9e 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -132,8 +132,18 @@ Some new queue property settings: limit. No highmem default. blk_queue_max_sectors(q, max_sectors) - Maximum size request you can handle in units of 512 byte - sectors. 255 default. + Sets two variables that limit the size of the request. + + - The request queue's max_sectors, which is a soft size in + in units of 512 byte sectors, and could be dynamically varied + by the core kernel. + + - The request queue's max_hw_sectors, which is a hard limit + and reflects the maximum size request a driver can handle + in units of 512 byte sectors. + + The default for both max_sectors and max_hw_sectors is + 255. The upper limit of max_sectors is 1024. blk_queue_max_phys_segments(q, max_segments) Maximum physical segments you can handle in a request. 128 -- cgit v0.10.2 From 06ff37ffb4ba8bcbda0e9d19c712c954ef7b8a0a Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Wed, 8 Mar 2006 11:21:52 +0100 Subject: [PATCH] kzalloc() conversion in drivers/block this patch converts drivers/block to kzalloc usage. Compile tested with allyesconfig. Signed-off-by: Eric Sesterhenn Signed-off-by: Jens Axboe diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 9bdea2a..49c7cd5 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -311,11 +311,10 @@ static boolean DAC960_CreateAuxiliaryStructures(DAC960_Controller_T *Controller) CommandsRemaining = CommandAllocationGroupSize; CommandGroupByteCount = CommandsRemaining * CommandAllocationLength; - AllocationPointer = kmalloc(CommandGroupByteCount, GFP_ATOMIC); + AllocationPointer = kzalloc(CommandGroupByteCount, GFP_ATOMIC); if (AllocationPointer == NULL) return DAC960_Failure(Controller, "AUXILIARY STRUCTURE CREATION"); - memset(AllocationPointer, 0, CommandGroupByteCount); } Command = (DAC960_Command_T *) AllocationPointer; AllocationPointer += CommandAllocationLength; @@ -2709,14 +2708,12 @@ DAC960_DetectController(struct pci_dev *PCI_Device, void __iomem *BaseAddress; int i; - Controller = (DAC960_Controller_T *) - kmalloc(sizeof(DAC960_Controller_T), GFP_ATOMIC); + Controller = kzalloc(sizeof(DAC960_Controller_T), GFP_ATOMIC); if (Controller == NULL) { DAC960_Error("Unable to allocate Controller structure for " "Controller at\n", NULL); return NULL; } - memset(Controller, 0, sizeof(DAC960_Controller_T)); Controller->ControllerNumber = DAC960_ControllerCount; DAC960_Controllers[DAC960_ControllerCount++] = Controller; Controller->Bus = PCI_Device->bus->number; diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 71ec9e6..168da0c 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -996,13 +996,11 @@ static int cciss_ioctl(struct inode *inode, struct file *filep, status = -EINVAL; goto cleanup1; } - buff = (unsigned char **) kmalloc(MAXSGENTRIES * - sizeof(char *), GFP_KERNEL); + buff = kzalloc(MAXSGENTRIES * sizeof(char *), GFP_KERNEL); if (!buff) { status = -ENOMEM; goto cleanup1; } - memset(buff, 0, MAXSGENTRIES); buff_size = (int *) kmalloc(MAXSGENTRIES * sizeof(int), GFP_KERNEL); if (!buff_size) { @@ -2940,13 +2938,12 @@ static void cciss_getgeometry(int cntl_num) int block_size; int total_size; - ld_buff = kmalloc(sizeof(ReportLunData_struct), GFP_KERNEL); + ld_buff = kzalloc(sizeof(ReportLunData_struct), GFP_KERNEL); if (ld_buff == NULL) { printk(KERN_ERR "cciss: out of memory\n"); return; } - memset(ld_buff, 0, sizeof(ReportLunData_struct)); size_buff = kmalloc(sizeof( ReadCapdata_struct), GFP_KERNEL); if (size_buff == NULL) { @@ -3060,10 +3057,9 @@ static int alloc_cciss_hba(void) for(i=0; i< MAX_CTLR; i++) { if (!hba[i]) { ctlr_info_t *p; - p = kmalloc(sizeof(ctlr_info_t), GFP_KERNEL); + p = kzalloc(sizeof(ctlr_info_t), GFP_KERNEL); if (!p) goto Enomem; - memset(p, 0, sizeof(ctlr_info_t)); for (n = 0; n < NWD; n++) p->gendisk[n] = disk[n]; hba[i] = p; diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index 0e66e90..597c007 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -1027,12 +1027,11 @@ cciss_update_non_disk_devices(int cntl_num, int hostno) int i; c = (ctlr_info_t *) hba[cntl_num]; - ld_buff = kmalloc(reportlunsize, GFP_KERNEL); + ld_buff = kzalloc(reportlunsize, GFP_KERNEL); if (ld_buff == NULL) { printk(KERN_ERR "cciss: out of memory\n"); return; } - memset(ld_buff, 0, reportlunsize); inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL); if (inq_buff == NULL) { printk(KERN_ERR "cciss: out of memory\n"); diff --git a/drivers/block/paride/bpck6.c b/drivers/block/paride/bpck6.c index 08d858a..41a237c 100644 --- a/drivers/block/paride/bpck6.c +++ b/drivers/block/paride/bpck6.c @@ -224,10 +224,9 @@ static void bpck6_log_adapter( PIA *pi, char * scratch, int verbose ) static int bpck6_init_proto(PIA *pi) { - Interface *p = kmalloc(sizeof(Interface), GFP_KERNEL); + Interface *p = kzalloc(sizeof(Interface), GFP_KERNEL); if (p) { - memset(p, 0, sizeof(Interface)); pi->private = (unsigned long)p; return 0; } -- cgit v0.10.2 From f68110fc28859f5d7231d5c4fb6dbe68b1394c9b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 8 Mar 2006 13:31:44 +0100 Subject: [BLOCK] ll_rw_blk: kmalloc -> kzalloc conversion Signed-off-by: Jens Axboe diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 062067f..b836b43 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -906,17 +906,15 @@ init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth) __FUNCTION__, depth); } - tag_index = kmalloc(depth * sizeof(struct request *), GFP_ATOMIC); + tag_index = kzalloc(depth * sizeof(struct request *), GFP_ATOMIC); if (!tag_index) goto fail; nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG; - tag_map = kmalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC); + tag_map = kzalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC); if (!tag_map) goto fail; - memset(tag_index, 0, depth * sizeof(struct request *)); - memset(tag_map, 0, nr_ulongs * sizeof(unsigned long)); tags->real_max_depth = depth; tags->max_depth = depth; tags->tag_index = tag_index; -- cgit v0.10.2 From b8fca1c7682105c843319728d8e37b42b19092bb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 21 Mar 2006 15:24:37 +0100 Subject: [PATCH] ide-cd: quiet down GPCMD_READ_CDVD_CAPACITY failure Some drives like to throw a: ATAPI device hdc: Error: Not ready -- (Sense key=0x02) Incompatible medium installed -- (asc=0x30, ascq=0x00) The failed "Read Cd/Dvd Capacity" packet command was: "25 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 " warning on incompatible media, so quiet down this error. Signed-off-by: Jens Axboe diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index c7671e1..b4a41d6 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -2143,6 +2143,7 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity, req.cmd[0] = GPCMD_READ_CDVD_CAPACITY; req.data = (char *)&capbuf; req.data_len = sizeof(capbuf); + req.flags |= REQ_QUIET; stat = cdrom_queue_packet_command(drive, &req); if (stat == 0) { -- cgit v0.10.2 From 4c5d0bbde9669cfb7f7fd4670dc9a117aea90384 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 22 Mar 2006 08:08:01 +0100 Subject: [PATCH] blk_execute_rq_nowait-speedup Both elv_add_request() and generic_unplug_device() grab the queue lock and disable interrupts, do that locally and use the __ variants. Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index b836b43..7fc903b 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -2477,10 +2477,12 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, rq->rq_disk = bd_disk; rq->flags |= REQ_NOMERGE; rq->end_io = done; - elv_add_request(q, rq, where, 1); - generic_unplug_device(q); + WARN_ON(irqs_disabled()); + spin_lock_irq(q->queue_lock); + __elv_add_request(q, rq, where, 1); + __generic_unplug_device(q); + spin_unlock_irq(q->queue_lock); } - EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); /** -- cgit v0.10.2 From 837c7878771c15ed8d85ecf814ece7fcb4551b46 Mon Sep 17 00:00:00 2001 From: Ben Woodard Date: Wed, 22 Mar 2006 08:09:31 +0100 Subject: [BLOCK] increase size of disk stat counters The kernel's representation of the disk statistics uses the type unsigned which is 32b on both 32b and 64b platforms. Unfortunately, most system tools that work with these numbers that are exported in /proc/diskstats including iostat read these numbers into unsigned longs. This works fine on 32b platforms and when the number of IO transactions are small on 64b platforms. However, when the numbers wrap on 64b platforms & you read the numbers into unsigned longs, and compare the numbers to previous readings, then you get an unsigned representation of a negative number. This looks like a very large 64b number & gives you bizarre readouts in iostat: ilc4: Device: rrqm/s wrqm/s r/s w/s rsec/s wsec/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util ilc4: sda 5.50 0.00 143.96 0.00 307496983987862656.00 0.00 153748491993931328.00 0.00 2136028725038430.00 7.94 55.12 5.59 80.42 Though fixing iostat in user space is possible, and a quick survey indicates that several other similar tools also use unsigned longs when processing /proc/diskstats. Therefore, it seems like a better approach would be to extend the length of the disk_stats structure on 64b architectures to 64b. The following patch does that. It should not affect the operation on 32b platforms. Signed-off-by: Ben Woodard Cc: Rick Lindsley Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe diff --git a/block/genhd.c b/block/genhd.c index 64510fd..db4c60c 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -454,8 +454,8 @@ static ssize_t disk_stats_read(struct gendisk * disk, char *page) disk_round_stats(disk); preempt_enable(); return sprintf(page, - "%8u %8u %8llu %8u " - "%8u %8u %8llu %8u " + "%8lu %8lu %8llu %8u " + "%8lu %8lu %8llu %8u " "%8u %8u %8u" "\n", disk_stat_read(disk, ios[READ]), @@ -649,7 +649,7 @@ static int diskstats_show(struct seq_file *s, void *v) preempt_disable(); disk_round_stats(gp); preempt_enable(); - seq_printf(s, "%4d %4d %s %u %u %llu %u %u %u %llu %u %u %u %u\n", + seq_printf(s, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", gp->major, n + gp->first_minor, disk_name(gp, n, buf), disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]), (unsigned long long)disk_stat_read(gp, sectors[0]), diff --git a/include/linux/genhd.h b/include/linux/genhd.h index fd647fd..179fea53 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -89,12 +89,12 @@ struct hd_struct { #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 struct disk_stats { - unsigned sectors[2]; /* READs and WRITEs */ - unsigned ios[2]; - unsigned merges[2]; - unsigned ticks[2]; - unsigned io_ticks; - unsigned time_in_queue; + unsigned long sectors[2]; /* READs and WRITEs */ + unsigned long ios[2]; + unsigned long merges[2]; + unsigned long ticks[2]; + unsigned long io_ticks; + unsigned long time_in_queue; }; struct gendisk { -- cgit v0.10.2 From 89a7689e5c039090d99cbdc3625252c3dee50f7f Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Fri, 24 Mar 2006 10:00:57 +0100 Subject: [PATCH] unused label in drivers/block/cciss. this patch removes a warning about an unused label, by moving the label into the ifdef. Signed-off-by: Eric Sesterhenn Signed-off-by: Jens Axboe diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 168da0c..1b0fd31 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -2727,9 +2727,9 @@ static void __devinit cciss_interrupt_mode(ctlr_info_t *c, struct pci_dev *pdev, return; } } +default_int_mode: #endif /* CONFIG_PCI_MSI */ /* if we get here we're going to use the default interrupt mode */ -default_int_mode: c->intr[SIMPLE_MODE_INT] = pdev->irq; return; } -- cgit v0.10.2 From 09540e691d7f57684b296b60241b2ff357fae3ab Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 26 Mar 2006 14:32:09 +0200 Subject: [PATCH] Fix blktrace compile with sysfs not defined debugfs depends on sysfs, so make blktrace kconfig option depend on that. Reported by Adrian Bunk. Signed-off-by: Jens Axboe diff --git a/block/Kconfig b/block/Kconfig index 43ca070..5536839 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -13,6 +13,7 @@ config LBD config BLK_DEV_IO_TRACE bool "Support for tracing block io actions" + depends on SYSFS select RELAY select DEBUG_FS help -- cgit v0.10.2 From ae36b883d29e53b6083ed3d1d44f254cee7507d3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 26 Mar 2006 14:38:54 +0200 Subject: [PATCH] Don't make debugfs depend on DEBUG_KERNEL We use it generally now, at least blktrace isn't a specific debug kernel feature. Signed-off-by: Jens Axboe diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7e70ab1..6e8a60f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -172,7 +172,7 @@ config DEBUG_IOREMAP config DEBUG_FS bool "Debug Filesystem" - depends on DEBUG_KERNEL && SYSFS + depends on SYSFS help debugfs is a virtual file system that kernel developers use to put debugging files into. Enable this option to be able to read and -- cgit v0.10.2 From 7bebd83dbf096d0bf4b4bfbaf5d8844a05f5eafc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 27 Mar 2006 01:07:55 -0800 Subject: [SPARC64]: Fix off-by-1 error in TSB grow check. The worst part about this bug is what it would cause a hugepage TSB to be allocated for every address space since "0 >= 0". Signed-off-by: David S. Miller diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c index d21ff32..0db2f7d 100644 --- a/arch/sparc64/mm/fault.c +++ b/arch/sparc64/mm/fault.c @@ -413,12 +413,12 @@ good_area: #ifdef CONFIG_HUGETLB_PAGE mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE)); #endif - if (unlikely(mm_rss >= + if (unlikely(mm_rss > mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit)) tsb_grow(mm, MM_TSB_BASE, mm_rss); #ifdef CONFIG_HUGETLB_PAGE mm_rss = mm->context.huge_pte_count; - if (unlikely(mm_rss >= + if (unlikely(mm_rss > mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) tsb_grow(mm, MM_TSB_HUGE, mm_rss); #endif -- cgit v0.10.2 From c8c4b939e8dff82ee2bc22ed506ae29ecffca4c2 Mon Sep 17 00:00:00 2001 From: Erik Mouw Date: Mon, 27 Mar 2006 15:32:30 +0100 Subject: [ARM] 3416/1: Update LART site URL Patch from Erik Mouw The LART website moved to http://www.lartmaker.nl/. This patch updates the URL in ARM specific files. Signed-off-by: Erik Mouw Acked-by: Jan-Derk Bakker Signed-off-by: Russell King diff --git a/Documentation/arm/SA1100/Assabet b/Documentation/arm/SA1100/Assabet index cbbe558..78bc1c1 100644 --- a/Documentation/arm/SA1100/Assabet +++ b/Documentation/arm/SA1100/Assabet @@ -26,7 +26,7 @@ Installing a bootloader A couple of bootloaders able to boot Linux on Assabet are available: -BLOB (http://www.lart.tudelft.nl/lartware/blob/) +BLOB (http://www.lartmaker.nl/lartware/blob/) BLOB is a bootloader used within the LART project. Some contributed patches were merged into BLOB to add support for Assabet. diff --git a/Documentation/arm/SA1100/LART b/Documentation/arm/SA1100/LART index 2f73f51..6d412b6 100644 --- a/Documentation/arm/SA1100/LART +++ b/Documentation/arm/SA1100/LART @@ -11,4 +11,4 @@ is under development, with plenty of others in different stages of planning. The hardware designs for this board have been released under an open license; -see the LART page at http://www.lart.tudelft.nl/ for more information. +see the LART page at http://www.lartmaker.nl/ for more information. diff --git a/arch/arm/mach-sa1100/Kconfig b/arch/arm/mach-sa1100/Kconfig index 6923316..cd67ab1 100644 --- a/arch/arm/mach-sa1100/Kconfig +++ b/arch/arm/mach-sa1100/Kconfig @@ -111,7 +111,7 @@ config SA1100_LART bool "LART" help Say Y here if you are using the Linux Advanced Radio Terminal - (also known as the LART). See for + (also known as the LART). See for information on the LART. config SA1100_PLEB diff --git a/arch/arm/mach-sa1100/cpu-sa1100.c b/arch/arm/mach-sa1100/cpu-sa1100.c index 6435b2e..d68630b 100644 --- a/arch/arm/mach-sa1100/cpu-sa1100.c +++ b/arch/arm/mach-sa1100/cpu-sa1100.c @@ -11,7 +11,7 @@ * linux-2.4.5-rmk1 * * This software has been developed while working on the LART - * computing board (http://www.lart.tudelft.nl/), which is + * computing board (http://www.lartmaker.nl/), which is * sponsored by the Mobile Multi-media Communications * (http://www.mmc.tudelft.nl/) and Ubiquitous Communications * (http://www.ubicom.tudelft.nl/) projects. -- cgit v0.10.2 From 4682adcfb06448827fbdfd8b6c636796de569b7d Mon Sep 17 00:00:00 2001 From: "Hyok S. Choi" Date: Mon, 27 Mar 2006 15:46:06 +0100 Subject: [ARM] nommu: trivial patch for arch/arm/lib/Makefile ifeq ($CONFIG_PREEMPT,y) -> ifeq ($(CONFIG_PREEMPT),y) Signed-off-by: Hyok S. Choi Signed-off-by: Russell King diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 391f3ab..7b726b6 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -18,7 +18,7 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ # the code in uaccess.S is not preemption safe and # probably faster on ARMv3 only -ifeq ($CONFIG_PREEMPT,y) +ifeq ($(CONFIG_PREEMPT),y) lib-y += copy_from_user.o copy_to_user.o else ifneq ($(CONFIG_CPU_32v3),y) -- cgit v0.10.2 From 3747b36eeab93d8969e86987bbc1d44971229b26 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Mar 2006 16:59:07 +0100 Subject: [ARM] proc-v6: mark page table walks outer-cacheable, shared. Enable NX. Mark page table walks with outer-cacheable attribute, and enable no-execute in page tables. Signed-off-by: Russell King diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 9a7e7c0..ee6f152 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -21,6 +21,14 @@ #define D_CACHE_LINE_SIZE 32 +#define TTB_C (1 << 0) +#define TTB_S (1 << 1) +#define TTB_IMP (1 << 2) +#define TTB_RGN_NC (0 << 3) +#define TTB_RGN_WBWA (1 << 3) +#define TTB_RGN_WT (2 << 3) +#define TTB_RGN_WB (3 << 3) + .macro cpsie, flags .ifc \flags, f .long 0xf1080040 @@ -115,7 +123,7 @@ ENTRY(cpu_v6_switch_mm) mov r2, #0 ldr r1, [r1, #MM_CONTEXT_ID] @ get mm->context.id #ifdef CONFIG_SMP - orr r0, r0, #2 @ set shared pgtable + orr r0, r0, #TTB_RGN_WBWA|TTB_S @ mark PTWs shared, outer cacheable #endif mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB mcr p15, 0, r2, c7, c10, 4 @ drain write buffer @@ -161,8 +169,8 @@ ENTRY(cpu_v6_set_pte) tst r1, #L_PTE_YOUNG biceq r2, r2, #PTE_EXT_APX | PTE_EXT_AP_MASK -@ tst r1, #L_PTE_EXEC -@ orreq r2, r2, #PTE_EXT_XN + tst r1, #L_PTE_EXEC + orreq r2, r2, #PTE_EXT_XN tst r1, #L_PTE_PRESENT moveq r2, #0 @@ -221,7 +229,7 @@ __v6_setup: mcr p15, 0, r0, c8, c7, 0 @ invalidate I + D TLBs mcr p15, 0, r0, c2, c0, 2 @ TTB control register #ifdef CONFIG_SMP - orr r4, r4, #2 @ set shared pgtable + orr r4, r4, #TTB_RGN_WBWA|TTB_S @ mark PTWs shared, outer cacheable #endif mcr p15, 0, r4, c2, c0, 1 @ load TTB1 #ifdef CONFIG_VFP -- cgit v0.10.2 From dbffa471611d3fc4b401ebabf7bb63ac0e0272b1 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Mon, 27 Mar 2006 01:14:26 -0800 Subject: [PATCH] PM-Timer: don't use workaround if chipset is not buggy Current timer_pm.c reads I/O port triple times, in order to avoid the bug of chipset. But I/O port is slow. 2.6.16 (pmtmr) Simple gettimeofday: 3.6532 microseconds 2.6.16+patch (pmtmr) Simple gettimeofday: 1.4582 microseconds [if chip is buggy, probably it will be 7us or more in 4.2% of probability.] This patch adds blacklist of buggy chip, and if chip is not buggy, this uses fast normal version instead of slow workaround version. If chip is buggy, warnings "pmtmr is slow". But sounds like there is gray zone. I found the PIIX4 errata, but I couldn't find the ICH4 errata. But some motherboard seems to have problem. So, if we found a ICH4, generate warnings, and use a workaround version. If user's ICH4 is good, the user can specify the "pmtmr_good" boot parameter to use fast version. Acked-by: John Stultz Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c index 264edaa..144e94a 100644 --- a/arch/i386/kernel/timers/timer_pm.c +++ b/arch/i386/kernel/timers/timer_pm.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -45,24 +46,31 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; #define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ +static int pmtmr_need_workaround __read_mostly = 1; + /*helper function to safely read acpi pm timesource*/ static inline u32 read_pmtmr(void) { - u32 v1=0,v2=0,v3=0; - /* It has been reported that because of various broken - * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time - * source is not latched, so you must read it multiple - * times to insure a safe value is read. - */ - do { - v1 = inl(pmtmr_ioport); - v2 = inl(pmtmr_ioport); - v3 = inl(pmtmr_ioport); - } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) - || (v3 > v1 && v3 < v2)); - - /* mask the output to 24 bits */ - return v2 & ACPI_PM_MASK; + if (pmtmr_need_workaround) { + u32 v1, v2, v3; + + /* It has been reported that because of various broken + * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time + * source is not latched, so you must read it multiple + * times to insure a safe value is read. + */ + do { + v1 = inl(pmtmr_ioport); + v2 = inl(pmtmr_ioport); + v3 = inl(pmtmr_ioport); + } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) + || (v3 > v1 && v3 < v2)); + + /* mask the output to 24 bits */ + return v2 & ACPI_PM_MASK; + } + + return inl(pmtmr_ioport) & ACPI_PM_MASK; } @@ -263,6 +271,72 @@ struct init_timer_opts __initdata timer_pmtmr_init = { .opts = &timer_pmtmr, }; +#ifdef CONFIG_PCI +/* + * PIIX4 Errata: + * + * The power management timer may return improper results when read. + * Although the timer value settles properly after incrementing, + * while incrementing there is a 3 ns window every 69.8 ns where the + * timer value is indeterminate (a 4.2% chance that the data will be + * incorrect when read). As a result, the ACPI free running count up + * timer specification is violated due to erroneous reads. + */ +static int __init pmtmr_bug_check(void) +{ + static struct pci_device_id gray_list[] __initdata = { + /* these chipsets may have bug. */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82801DB_0) }, + { }, + }; + struct pci_dev *dev; + int pmtmr_has_bug = 0; + u8 rev; + + if (cur_timer != &timer_pmtmr || !pmtmr_need_workaround) + return 0; + + dev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82371AB_3, NULL); + if (dev) { + pci_read_config_byte(dev, PCI_REVISION_ID, &rev); + /* the bug has been fixed in PIIX4M */ + if (rev < 3) { + printk(KERN_WARNING "* Found PM-Timer Bug on this " + "chipset. Due to workarounds for a bug,\n" + "* this time source is slow. Consider trying " + "other time sources (clock=)\n"); + pmtmr_has_bug = 1; + } + pci_dev_put(dev); + } + + if (pci_dev_present(gray_list)) { + printk(KERN_WARNING "* This chipset may have PM-Timer Bug. Due" + " to workarounds for a bug,\n" + "* this time source is slow. If you are sure your timer" + " does not have\n" + "* this bug, please use \"pmtmr_good\" to disable the " + "workaround\n"); + pmtmr_has_bug = 1; + } + + if (!pmtmr_has_bug) + pmtmr_need_workaround = 0; + + return 0; +} +device_initcall(pmtmr_bug_check); +#endif + +static int __init pmtr_good_setup(char *__str) +{ + pmtmr_need_workaround = 0; + return 1; +} +__setup("pmtmr_good", pmtr_good_setup); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Dominik Brodowski "); MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86"); -- cgit v0.10.2 From bb83da053319e81906473bec08e8f677d6ac615f Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 27 Mar 2006 01:14:27 -0800 Subject: [PATCH] uml: fix build warnings in __get_user Fix a gcc warning about losing qualifiers to the first argument of copy_from_user. The typeof change for correctness, and fixes a lot of the warnings, but there are some cases where x has some extra qualifiers, like volatile, which copy_from_user can't know about. For these, the void * cast seems to be necessary. Also cleaned up some of the whitespace and got rid of the emacs comment at the bottom. Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-um/uaccess.h b/include/asm-um/uaccess.h index 2ee028b..4e460d6 100644 --- a/include/asm-um/uaccess.h +++ b/include/asm-um/uaccess.h @@ -41,16 +41,16 @@ #define __get_user(x, ptr) \ ({ \ - const __typeof__(ptr) __private_ptr = ptr; \ - __typeof__(*(__private_ptr)) __private_val; \ - int __private_ret = -EFAULT; \ - (x) = (__typeof__(*(__private_ptr)))0; \ - if (__copy_from_user(&__private_val, (__private_ptr), \ - sizeof(*(__private_ptr))) == 0) {\ - (x) = (__typeof__(*(__private_ptr))) __private_val; \ - __private_ret = 0; \ - } \ - __private_ret; \ + const __typeof__(ptr) __private_ptr = ptr; \ + __typeof__(x) __private_val; \ + int __private_ret = -EFAULT; \ + (x) = (__typeof__(*(__private_ptr)))0; \ + if (__copy_from_user((void *) &__private_val, (__private_ptr), \ + sizeof(*(__private_ptr))) == 0) { \ + (x) = (__typeof__(*(__private_ptr))) __private_val; \ + __private_ret = 0; \ + } \ + __private_ret; \ }) #define get_user(x, ptr) \ @@ -89,14 +89,3 @@ struct exception_table_entry }; #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ -- cgit v0.10.2 From c90e12b86595dbd9996336ac9d762487c638d934 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 27 Mar 2006 01:14:29 -0800 Subject: [PATCH] uml: fix declaration of exit() This fixes a conflict between a header and what gcc "knows" the declaration' to be. Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/include/kern.h b/arch/um/include/kern.h index 7d223be..4ce3fc6 100644 --- a/arch/um/include/kern.h +++ b/arch/um/include/kern.h @@ -29,7 +29,7 @@ extern int getuid(void); extern int getgid(void); extern int pause(void); extern int write(int, const void *, int); -extern int exit(int); +extern void exit(int); extern int close(int); extern int read(unsigned int, char *, int); extern int pipe(int *); -- cgit v0.10.2 From d9f8b62a6b033fe7226d7c2b2efdd164ca1aa07c Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 27 Mar 2006 01:14:29 -0800 Subject: [PATCH] uml: fix some printf formats Some printf formats are incorrect for large memory sizes. Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 80c9c18..0c5b9dc 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -421,7 +421,7 @@ int linux_main(int argc, char **argv) #ifndef CONFIG_HIGHMEM highmem = 0; printf("CONFIG_HIGHMEM not enabled - physical memory shrunk " - "to %lu bytes\n", physmem_size); + "to %Lu bytes\n", physmem_size); #endif } @@ -433,8 +433,8 @@ int linux_main(int argc, char **argv) setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem); if(init_maps(physmem_size, iomem_size, highmem)){ - printf("Failed to allocate mem_map for %lu bytes of physical " - "memory and %lu bytes of highmem\n", physmem_size, + printf("Failed to allocate mem_map for %Lu bytes of physical " + "memory and %Lu bytes of highmem\n", physmem_size, highmem); exit(1); } -- cgit v0.10.2 From 63ae2a94d98dd9d94163918539ec80df33f44a69 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 27 Mar 2006 01:14:30 -0800 Subject: [PATCH] uml: move libc-dependent irq code to os-Linux The serial UML OS-abstraction layer patch (um/kernel dir). This moves all systemcalls from irq_user.c file under os-Linux dir Signed-off-by: Gennady Sharapov Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/include/irq_user.h b/arch/um/include/irq_user.h index b61deb8..69a93c8 100644 --- a/arch/um/include/irq_user.h +++ b/arch/um/include/irq_user.h @@ -1,4 +1,4 @@ -/* +/* * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) * Licensed under the GPL */ @@ -6,6 +6,17 @@ #ifndef __IRQ_USER_H__ #define __IRQ_USER_H__ +struct irq_fd { + struct irq_fd *next; + void *id; + int fd; + int type; + int irq; + int pid; + int events; + int current_events; +}; + enum { IRQ_READ, IRQ_WRITE }; extern void sigio_handler(int sig, union uml_pt_regs *regs); @@ -16,8 +27,6 @@ extern void reactivate_fd(int fd, int irqnum); extern void deactivate_fd(int fd, int irqnum); extern int deactivate_all_fds(void); extern void forward_interrupts(int pid); -extern void init_irq_signals(int on_sigstack); -extern void forward_ipi(int fd, int pid); extern int activate_ipi(int fd, int pid); extern unsigned long irq_lock(void); extern void irq_unlock(unsigned long flags); diff --git a/arch/um/include/misc_constants.h b/arch/um/include/misc_constants.h new file mode 100644 index 0000000..989bc08 --- /dev/null +++ b/arch/um/include/misc_constants.h @@ -0,0 +1,6 @@ +#ifndef __MISC_CONSTANT_H_ +#define __MISC_CONSTANT_H_ + +#include + +#endif diff --git a/arch/um/include/os.h b/arch/um/include/os.h index 2a1c64d..efae3e6 100644 --- a/arch/um/include/os.h +++ b/arch/um/include/os.h @@ -12,6 +12,7 @@ #include "sysdep/ptrace.h" #include "kern_util.h" #include "skas/mm_id.h" +#include "irq_user.h" #define OS_TYPE_FILE 1 #define OS_TYPE_DIR 2 @@ -198,6 +199,8 @@ extern void os_flush_stdout(void); /* tt.c * for tt mode only (will be deleted in future...) */ +extern void forward_ipi(int fd, int pid); +extern void kill_child_dead(int pid); extern void stop(void); extern int wait_for_stop(int pid, int sig, int cont_type, void *relay); extern int protect_memory(unsigned long addr, unsigned long len, @@ -294,4 +297,17 @@ extern void initial_thread_cb_skas(void (*proc)(void *), extern void halt_skas(void); extern void reboot_skas(void); +/* irq.c */ +extern int os_waiting_for_events(struct irq_fd *active_fds); +extern int os_isatty(int fd); +extern int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds); +extern void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg, + struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2); +extern void os_free_irq_later(struct irq_fd *active_fds, + int irq, void *dev_id); +extern int os_get_pollfd(int i); +extern void os_set_pollfd(int i, int fd); +extern void os_set_ioignore(void); +extern void init_irq_signals(int on_sigstack); + #endif diff --git a/arch/um/kernel/irq_user.c b/arch/um/kernel/irq_user.c index 0e32f5f..483e79c 100644 --- a/arch/um/kernel/irq_user.c +++ b/arch/um/kernel/irq_user.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) * Licensed under the GPL */ @@ -18,53 +18,25 @@ #include "sigio.h" #include "irq_user.h" #include "os.h" +#include "misc_constants.h" -struct irq_fd { - struct irq_fd *next; - void *id; - int fd; - int type; - int irq; - int pid; - int events; - int current_events; -}; - -static struct irq_fd *active_fds = NULL; +struct irq_fd *active_fds = NULL; static struct irq_fd **last_irq_ptr = &active_fds; -static struct pollfd *pollfds = NULL; -static int pollfds_num = 0; -static int pollfds_size = 0; - -extern int io_count, intr_count; - extern void free_irqs(void); void sigio_handler(int sig, union uml_pt_regs *regs) { struct irq_fd *irq_fd; - int i, n; + int n; if(smp_sigio_handler()) return; while(1){ - n = poll(pollfds, pollfds_num, 0); - if(n < 0){ - if(errno == EINTR) continue; - printk("sigio_handler : poll returned %d, " - "errno = %d\n", n, errno); - break; - } - if(n == 0) break; - - irq_fd = active_fds; - for(i = 0; i < pollfds_num; i++){ - if(pollfds[i].revents != 0){ - irq_fd->current_events = pollfds[i].revents; - pollfds[i].fd = -1; - } - irq_fd = irq_fd->next; - } + n = os_waiting_for_events(active_fds); + if (n <= 0) { + if(n == -EINTR) continue; + else break; + } for(irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next){ if(irq_fd->current_events != 0){ @@ -77,14 +49,9 @@ void sigio_handler(int sig, union uml_pt_regs *regs) free_irqs(); } -int activate_ipi(int fd, int pid) -{ - return(os_set_fd_async(fd, pid)); -} - static void maybe_sigio_broken(int fd, int type) { - if(isatty(fd)){ + if(os_isatty(fd)){ if((type == IRQ_WRITE) && !pty_output_sigio){ write_sigio_workaround(); add_sigio_fd(fd, 0); @@ -96,12 +63,13 @@ static void maybe_sigio_broken(int fd, int type) } } + int activate_fd(int irq, int fd, int type, void *dev_id) { struct pollfd *tmp_pfd; struct irq_fd *new_fd, *irq_fd; unsigned long flags; - int pid, events, err, n, size; + int pid, events, err, n; pid = os_getpid(); err = os_set_fd_async(fd, pid); @@ -113,8 +81,8 @@ int activate_fd(int irq, int fd, int type, void *dev_id) if(new_fd == NULL) goto out; - if(type == IRQ_READ) events = POLLIN | POLLPRI; - else events = POLLOUT; + if(type == IRQ_READ) events = UM_POLLIN | UM_POLLPRI; + else events = UM_POLLOUT; *new_fd = ((struct irq_fd) { .next = NULL, .id = dev_id, .fd = fd, @@ -125,12 +93,12 @@ int activate_fd(int irq, int fd, int type, void *dev_id) .current_events = 0 } ); /* Critical section - locked by a spinlock because this stuff can - * be changed from interrupt handlers. The stuff above is done + * be changed from interrupt handlers. The stuff above is done * outside the lock because it allocates memory. */ /* Actually, it only looks like it can be called from interrupt - * context. The culprit is reactivate_fd, which calls + * context. The culprit is reactivate_fd, which calls * maybe_sigio_broken, which calls write_sigio_workaround, * which calls activate_fd. However, write_sigio_workaround should * only be called once, at boot time. That would make it clear that @@ -147,40 +115,42 @@ int activate_fd(int irq, int fd, int type, void *dev_id) } } - n = pollfds_num; - if(n == pollfds_size){ - while(1){ - /* Here we have to drop the lock in order to call - * kmalloc, which might sleep. If something else - * came in and changed the pollfds array, we free - * the buffer and try again. - */ - irq_unlock(flags); - size = (pollfds_num + 1) * sizeof(pollfds[0]); - tmp_pfd = um_kmalloc(size); - flags = irq_lock(); - if(tmp_pfd == NULL) - goto out_unlock; - if(n == pollfds_size) - break; + /*-------------*/ + if(type == IRQ_WRITE) + fd = -1; + + tmp_pfd = NULL; + n = 0; + + while(1){ + n = os_create_pollfd(fd, events, tmp_pfd, n); + if (n == 0) + break; + + /* n > 0 + * It means we couldn't put new pollfd to current pollfds + * and tmp_fds is NULL or too small for new pollfds array. + * Needed size is equal to n as minimum. + * + * Here we have to drop the lock in order to call + * kmalloc, which might sleep. + * If something else came in and changed the pollfds array + * so we will not be able to put new pollfd struct to pollfds + * then we free the buffer tmp_fds and try again. + */ + irq_unlock(flags); + if (tmp_pfd != NULL) { kfree(tmp_pfd); + tmp_pfd = NULL; } - if(pollfds != NULL){ - memcpy(tmp_pfd, pollfds, - sizeof(pollfds[0]) * pollfds_size); - kfree(pollfds); - } - pollfds = tmp_pfd; - pollfds_size++; - } - if(type == IRQ_WRITE) - fd = -1; + tmp_pfd = um_kmalloc(n); + if (tmp_pfd == NULL) + goto out_kfree; - pollfds[pollfds_num] = ((struct pollfd) { .fd = fd, - .events = events, - .revents = 0 }); - pollfds_num++; + flags = irq_lock(); + } + /*-------------*/ *last_irq_ptr = new_fd; last_irq_ptr = &new_fd->next; @@ -196,6 +166,7 @@ int activate_fd(int irq, int fd, int type, void *dev_id) out_unlock: irq_unlock(flags); + out_kfree: kfree(new_fd); out: return(err); @@ -203,43 +174,10 @@ int activate_fd(int irq, int fd, int type, void *dev_id) static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg) { - struct irq_fd **prev; unsigned long flags; - int i = 0; flags = irq_lock(); - prev = &active_fds; - while(*prev != NULL){ - if((*test)(*prev, arg)){ - struct irq_fd *old_fd = *prev; - if((pollfds[i].fd != -1) && - (pollfds[i].fd != (*prev)->fd)){ - printk("free_irq_by_cb - mismatch between " - "active_fds and pollfds, fd %d vs %d\n", - (*prev)->fd, pollfds[i].fd); - goto out; - } - - pollfds_num--; - - /* This moves the *whole* array after pollfds[i] (though - * it doesn't spot as such)! */ - - memmove(&pollfds[i], &pollfds[i + 1], - (pollfds_num - i) * sizeof(pollfds[0])); - - if(last_irq_ptr == &old_fd->next) - last_irq_ptr = prev; - *prev = (*prev)->next; - if(old_fd->type == IRQ_WRITE) - ignore_sigio_fd(old_fd->fd); - kfree(old_fd); - continue; - } - prev = &(*prev)->next; - i++; - } - out: + os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr); irq_unlock(flags); } @@ -277,6 +215,7 @@ static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out) { struct irq_fd *irq; int i = 0; + int fdi; for(irq=active_fds; irq != NULL; irq = irq->next){ if((irq->fd == fd) && (irq->irq == irqnum)) break; @@ -286,10 +225,11 @@ static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out) printk("find_irq_by_fd doesn't have descriptor %d\n", fd); goto out; } - if((pollfds[i].fd != -1) && (pollfds[i].fd != fd)){ + fdi = os_get_pollfd(i); + if((fdi != -1) && (fdi != fd)){ printk("find_irq_by_fd - mismatch between active_fds and " - "pollfds, fd %d vs %d, need %d\n", irq->fd, - pollfds[i].fd, fd); + "pollfds, fd %d vs %d, need %d\n", irq->fd, + fdi, fd); irq = NULL; goto out; } @@ -310,9 +250,7 @@ void reactivate_fd(int fd, int irqnum) irq_unlock(flags); return; } - - pollfds[i].fd = irq->fd; - + os_set_pollfd(i, irq->fd); irq_unlock(flags); /* This calls activate_fd, so it has to be outside the critical @@ -331,7 +269,7 @@ void deactivate_fd(int fd, int irqnum) irq = find_irq_by_fd(fd, irqnum, &i); if(irq == NULL) goto out; - pollfds[i].fd = -1; + os_set_pollfd(i, -1); out: irq_unlock(flags); } @@ -347,21 +285,11 @@ int deactivate_all_fds(void) return(err); } /* If there is a signal already queued, after unblocking ignore it */ - set_handler(SIGIO, SIG_IGN, 0, -1); + os_set_ioignore(); return(0); } -void forward_ipi(int fd, int pid) -{ - int err; - - err = os_set_owner(fd, pid); - if(err < 0) - printk("forward_ipi: set_owner failed, fd = %d, me = %d, " - "target = %d, err = %d\n", fd, os_getpid(), pid, -err); -} - void forward_interrupts(int pid) { struct irq_fd *irq; @@ -384,22 +312,6 @@ void forward_interrupts(int pid) irq_unlock(flags); } -void init_irq_signals(int on_sigstack) -{ - __sighandler_t h; - int flags; - - flags = on_sigstack ? SA_ONSTACK : 0; - if(timer_irq_inited) h = (__sighandler_t) alarm_handler; - else h = boot_timer_handler; - - set_handler(SIGVTALRM, h, flags | SA_RESTART, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, -1); - set_handler(SIGIO, (__sighandler_t) sig_handler, flags | SA_RESTART, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - signal(SIGWINCH, SIG_IGN); -} - /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c index 72113b0..c8d8d0a 100644 --- a/arch/um/kernel/smp.c +++ b/arch/um/kernel/smp.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -77,9 +77,9 @@ static int idle_proc(void *cpup) if(err < 0) panic("CPU#%d failed to create IPI pipe, err = %d", cpu, -err); - activate_ipi(cpu_data[cpu].ipi_pipe[0], + os_set_fd_async(cpu_data[cpu].ipi_pipe[0], current->thread.mode.tt.extern_pid); - + wmb(); if (cpu_test_and_set(cpu, cpu_callin_map)) { printk("huh, CPU#%d already present??\n", cpu); @@ -106,7 +106,7 @@ static struct task_struct *idle_thread(int cpu) panic("copy_process failed in idle_thread, error = %ld", PTR_ERR(new_task)); - cpu_tasks[cpu] = ((struct cpu_task) + cpu_tasks[cpu] = ((struct cpu_task) { .pid = new_task->thread.mode.tt.extern_pid, .task = new_task } ); idle_threads[cpu] = new_task; @@ -134,12 +134,12 @@ void smp_prepare_cpus(unsigned int maxcpus) if(err < 0) panic("CPU#0 failed to create IPI pipe, errno = %d", -err); - activate_ipi(cpu_data[me].ipi_pipe[0], + os_set_fd_async(cpu_data[me].ipi_pipe[0], current->thread.mode.tt.extern_pid); for(cpu = 1; cpu < ncpus; cpu++){ printk("Booting processor %d...\n", cpu); - + idle = idle_thread(cpu); init_idle(idle, cpu); @@ -223,7 +223,7 @@ void smp_call_function_slave(int cpu) atomic_inc(&scf_finished); } -int smp_call_function(void (*_func)(void *info), void *_info, int nonatomic, +int smp_call_function(void (*_func)(void *info), void *_info, int nonatomic, int wait) { int cpus = num_online_cpus() - 1; diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index 08a4e62..73d3d83 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -3,14 +3,14 @@ # Licensed under the GPL # -obj-y = aio.o elf_aux.o file.o helper.o main.o mem.o process.o signal.o \ +obj-y = aio.o elf_aux.o file.o helper.o irq.o main.o mem.o process.o signal.o \ start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o user_syms.o \ util.o drivers/ sys-$(SUBARCH)/ obj-$(CONFIG_MODE_SKAS) += skas/ -USER_OBJS := aio.o elf_aux.o file.o helper.o main.o mem.o process.o signal.o \ - start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o util.o +USER_OBJS := aio.o elf_aux.o file.o helper.o irq.o main.o mem.o process.o \ + signal.o start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o util.o elf_aux.o: $(ARCH_DIR)/kernel-offsets.h CFLAGS_elf_aux.o += -I$(objtree)/arch/um diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c new file mode 100644 index 0000000..e599be4 --- /dev/null +++ b/arch/um/os-Linux/irq.c @@ -0,0 +1,162 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "user_util.h" +#include "kern_util.h" +#include "user.h" +#include "process.h" +#include "sigio.h" +#include "irq_user.h" +#include "os.h" + +static struct pollfd *pollfds = NULL; +static int pollfds_num = 0; +static int pollfds_size = 0; + +int os_waiting_for_events(struct irq_fd *active_fds) +{ + struct irq_fd *irq_fd; + int i, n, err; + + n = poll(pollfds, pollfds_num, 0); + if(n < 0){ + err = -errno; + if(errno != EINTR) + printk("sigio_handler: os_waiting_for_events:" + " poll returned %d, errno = %d\n", n, errno); + return err; + } + + if(n == 0) + return 0; + + irq_fd = active_fds; + + for(i = 0; i < pollfds_num; i++){ + if(pollfds[i].revents != 0){ + irq_fd->current_events = pollfds[i].revents; + pollfds[i].fd = -1; + } + irq_fd = irq_fd->next; + } + return n; +} + +int os_isatty(int fd) +{ + return(isatty(fd)); +} + +int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds) +{ + if (pollfds_num == pollfds_size) { + if (size_tmpfds <= pollfds_size * sizeof(pollfds[0])) { + /* return min size needed for new pollfds area */ + return((pollfds_size + 1) * sizeof(pollfds[0])); + } + + if(pollfds != NULL){ + memcpy(tmp_pfd, pollfds, + sizeof(pollfds[0]) * pollfds_size); + /* remove old pollfds */ + kfree(pollfds); + } + pollfds = tmp_pfd; + pollfds_size++; + } else { + /* remove not used tmp_pfd */ + if (tmp_pfd != NULL) + kfree(tmp_pfd); + } + + pollfds[pollfds_num] = ((struct pollfd) { .fd = fd, + .events = events, + .revents = 0 }); + pollfds_num++; + + return(0); +} + +void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg, + struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2) +{ + struct irq_fd **prev; + int i = 0; + + prev = &active_fds; + while(*prev != NULL){ + if((*test)(*prev, arg)){ + struct irq_fd *old_fd = *prev; + if((pollfds[i].fd != -1) && + (pollfds[i].fd != (*prev)->fd)){ + printk("os_free_irq_by_cb - mismatch between " + "active_fds and pollfds, fd %d vs %d\n", + (*prev)->fd, pollfds[i].fd); + goto out; + } + + pollfds_num--; + + /* This moves the *whole* array after pollfds[i] + * (though it doesn't spot as such)! + */ + + memmove(&pollfds[i], &pollfds[i + 1], + (pollfds_num - i) * sizeof(pollfds[0])); + if(*last_irq_ptr2 == &old_fd->next) + *last_irq_ptr2 = prev; + + *prev = (*prev)->next; + if(old_fd->type == IRQ_WRITE) + ignore_sigio_fd(old_fd->fd); + kfree(old_fd); + continue; + } + prev = &(*prev)->next; + i++; + } + out: + return; +} + + +int os_get_pollfd(int i) +{ + return(pollfds[i].fd); +} + +void os_set_pollfd(int i, int fd) +{ + pollfds[i].fd = fd; +} + +void os_set_ioignore(void) +{ + set_handler(SIGIO, SIG_IGN, 0, -1); +} + +void init_irq_signals(int on_sigstack) +{ + __sighandler_t h; + int flags; + + flags = on_sigstack ? SA_ONSTACK : 0; + if(timer_irq_inited) h = (__sighandler_t) alarm_handler; + else h = boot_timer_handler; + + set_handler(SIGVTALRM, h, flags | SA_RESTART, + SIGUSR1, SIGIO, SIGWINCH, SIGALRM, -1); + set_handler(SIGIO, (__sighandler_t) sig_handler, flags | SA_RESTART, + SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); + signal(SIGWINCH, SIG_IGN); +} diff --git a/arch/um/os-Linux/tt.c b/arch/um/os-Linux/tt.c index 919d19f..5461a06 100644 --- a/arch/um/os-Linux/tt.c +++ b/arch/um/os-Linux/tt.c @@ -110,6 +110,16 @@ int wait_for_stop(int pid, int sig, int cont_type, void *relay) } } +void forward_ipi(int fd, int pid) +{ + int err; + + err = os_set_owner(fd, pid); + if(err < 0) + printk("forward_ipi: set_owner failed, fd = %d, me = %d, " + "target = %d, err = %d\n", fd, os_getpid(), pid, -err); +} + /* *------------------------- * only for tt mode (will be deleted in future...) diff --git a/arch/um/sys-i386/user-offsets.c b/arch/um/sys-i386/user-offsets.c index 26b6867..6f4ef2b 100644 --- a/arch/um/sys-i386/user-offsets.c +++ b/arch/um/sys-i386/user-offsets.c @@ -3,12 +3,13 @@ #include #include #include +#include #define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) #define DEFINE_LONGS(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long))) + asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long))) #define OFFSET(sym, str, mem) \ DEFINE(sym, offsetof(struct str, mem)); @@ -67,4 +68,9 @@ void foo(void) DEFINE(HOST_ES, ES); DEFINE(HOST_GS, GS); DEFINE(UM_FRAME_SIZE, sizeof(struct user_regs_struct)); + + /* XXX Duplicated between i386 and x86_64 */ + DEFINE(UM_POLLIN, POLLIN); + DEFINE(UM_POLLPRI, POLLPRI); + DEFINE(UM_POLLOUT, POLLOUT); } diff --git a/arch/um/sys-x86_64/user-offsets.c b/arch/um/sys-x86_64/user-offsets.c index 7bd54a9..899cebb 100644 --- a/arch/um/sys-x86_64/user-offsets.c +++ b/arch/um/sys-x86_64/user-offsets.c @@ -1,6 +1,7 @@ #include #include #include +#include #define __FRAME_OFFSETS #include #include @@ -88,4 +89,9 @@ void foo(void) DEFINE_LONGS(HOST_IP, RIP); DEFINE_LONGS(HOST_SP, RSP); DEFINE(UM_FRAME_SIZE, sizeof(struct user_regs_struct)); + + /* XXX Duplicated between i386 and x86_64 */ + DEFINE(UM_POLLIN, POLLIN); + DEFINE(UM_POLLPRI, POLLPRI); + DEFINE(UM_POLLOUT, POLLOUT); } -- cgit v0.10.2 From 9b4f018d92dbeff9305b4cf4aa80874c3974cfcc Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 27 Mar 2006 01:14:31 -0800 Subject: [PATCH] uml: merge irq_user.c and irq.c The serial UML OS-abstraction layer patch (um/kernel dir). This joins irq_user.c and irq.c files. Signed-off-by: Gennady Sharapov Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index 693018b..8458c30 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -7,7 +7,7 @@ extra-y := vmlinux.lds clean-files := obj-y = config.o exec_kern.o exitcode.o \ - init_task.o irq.o irq_user.o ksyms.o mem.o physmem.o \ + init_task.o irq.o ksyms.o mem.o physmem.o \ process_kern.o ptrace.o reboot.o resource.o sigio_user.o sigio_kern.o \ signal_kern.o smp.o syscall_kern.o sysrq.o \ time_kern.o tlb.o trap_kern.o uaccess.o um_arch.o umid.o diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index bbf94bf..c39ea3a 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -31,6 +31,8 @@ #include "irq_user.h" #include "irq_kern.h" #include "os.h" +#include "sigio.h" +#include "misc_constants.h" /* * Generic, controller-independent functions: @@ -77,6 +79,298 @@ skip: return 0; } +struct irq_fd *active_fds = NULL; +static struct irq_fd **last_irq_ptr = &active_fds; + +extern void free_irqs(void); + +void sigio_handler(int sig, union uml_pt_regs *regs) +{ + struct irq_fd *irq_fd; + int n; + + if(smp_sigio_handler()) return; + while(1){ + n = os_waiting_for_events(active_fds); + if (n <= 0) { + if(n == -EINTR) continue; + else break; + } + + for(irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next){ + if(irq_fd->current_events != 0){ + irq_fd->current_events = 0; + do_IRQ(irq_fd->irq, regs); + } + } + } + + free_irqs(); +} + +static void maybe_sigio_broken(int fd, int type) +{ + if(os_isatty(fd)){ + if((type == IRQ_WRITE) && !pty_output_sigio){ + write_sigio_workaround(); + add_sigio_fd(fd, 0); + } + else if((type == IRQ_READ) && !pty_close_sigio){ + write_sigio_workaround(); + add_sigio_fd(fd, 1); + } + } +} + + +int activate_fd(int irq, int fd, int type, void *dev_id) +{ + struct pollfd *tmp_pfd; + struct irq_fd *new_fd, *irq_fd; + unsigned long flags; + int pid, events, err, n; + + pid = os_getpid(); + err = os_set_fd_async(fd, pid); + if(err < 0) + goto out; + + new_fd = um_kmalloc(sizeof(*new_fd)); + err = -ENOMEM; + if(new_fd == NULL) + goto out; + + if(type == IRQ_READ) events = UM_POLLIN | UM_POLLPRI; + else events = UM_POLLOUT; + *new_fd = ((struct irq_fd) { .next = NULL, + .id = dev_id, + .fd = fd, + .type = type, + .irq = irq, + .pid = pid, + .events = events, + .current_events = 0 } ); + + /* Critical section - locked by a spinlock because this stuff can + * be changed from interrupt handlers. The stuff above is done + * outside the lock because it allocates memory. + */ + + /* Actually, it only looks like it can be called from interrupt + * context. The culprit is reactivate_fd, which calls + * maybe_sigio_broken, which calls write_sigio_workaround, + * which calls activate_fd. However, write_sigio_workaround should + * only be called once, at boot time. That would make it clear that + * this is called only from process context, and can be locked with + * a semaphore. + */ + flags = irq_lock(); + for(irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next){ + if((irq_fd->fd == fd) && (irq_fd->type == type)){ + printk("Registering fd %d twice\n", fd); + printk("Irqs : %d, %d\n", irq_fd->irq, irq); + printk("Ids : 0x%p, 0x%p\n", irq_fd->id, dev_id); + goto out_unlock; + } + } + + /*-------------*/ + if(type == IRQ_WRITE) + fd = -1; + + tmp_pfd = NULL; + n = 0; + + while(1){ + n = os_create_pollfd(fd, events, tmp_pfd, n); + if (n == 0) + break; + + /* n > 0 + * It means we couldn't put new pollfd to current pollfds + * and tmp_fds is NULL or too small for new pollfds array. + * Needed size is equal to n as minimum. + * + * Here we have to drop the lock in order to call + * kmalloc, which might sleep. + * If something else came in and changed the pollfds array + * so we will not be able to put new pollfd struct to pollfds + * then we free the buffer tmp_fds and try again. + */ + irq_unlock(flags); + if (tmp_pfd != NULL) { + kfree(tmp_pfd); + tmp_pfd = NULL; + } + + tmp_pfd = um_kmalloc(n); + if (tmp_pfd == NULL) + goto out_kfree; + + flags = irq_lock(); + } + /*-------------*/ + + *last_irq_ptr = new_fd; + last_irq_ptr = &new_fd->next; + + irq_unlock(flags); + + /* This calls activate_fd, so it has to be outside the critical + * section. + */ + maybe_sigio_broken(fd, type); + + return(0); + + out_unlock: + irq_unlock(flags); + out_kfree: + kfree(new_fd); + out: + return(err); +} + +static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg) +{ + unsigned long flags; + + flags = irq_lock(); + os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr); + irq_unlock(flags); +} + +struct irq_and_dev { + int irq; + void *dev; +}; + +static int same_irq_and_dev(struct irq_fd *irq, void *d) +{ + struct irq_and_dev *data = d; + + return((irq->irq == data->irq) && (irq->id == data->dev)); +} + +void free_irq_by_irq_and_dev(unsigned int irq, void *dev) +{ + struct irq_and_dev data = ((struct irq_and_dev) { .irq = irq, + .dev = dev }); + + free_irq_by_cb(same_irq_and_dev, &data); +} + +static int same_fd(struct irq_fd *irq, void *fd) +{ + return(irq->fd == *((int *) fd)); +} + +void free_irq_by_fd(int fd) +{ + free_irq_by_cb(same_fd, &fd); +} + +static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out) +{ + struct irq_fd *irq; + int i = 0; + int fdi; + + for(irq=active_fds; irq != NULL; irq = irq->next){ + if((irq->fd == fd) && (irq->irq == irqnum)) break; + i++; + } + if(irq == NULL){ + printk("find_irq_by_fd doesn't have descriptor %d\n", fd); + goto out; + } + fdi = os_get_pollfd(i); + if((fdi != -1) && (fdi != fd)){ + printk("find_irq_by_fd - mismatch between active_fds and " + "pollfds, fd %d vs %d, need %d\n", irq->fd, + fdi, fd); + irq = NULL; + goto out; + } + *index_out = i; + out: + return(irq); +} + +void reactivate_fd(int fd, int irqnum) +{ + struct irq_fd *irq; + unsigned long flags; + int i; + + flags = irq_lock(); + irq = find_irq_by_fd(fd, irqnum, &i); + if(irq == NULL){ + irq_unlock(flags); + return; + } + os_set_pollfd(i, irq->fd); + irq_unlock(flags); + + /* This calls activate_fd, so it has to be outside the critical + * section. + */ + maybe_sigio_broken(fd, irq->type); +} + +void deactivate_fd(int fd, int irqnum) +{ + struct irq_fd *irq; + unsigned long flags; + int i; + + flags = irq_lock(); + irq = find_irq_by_fd(fd, irqnum, &i); + if(irq == NULL) + goto out; + os_set_pollfd(i, -1); + out: + irq_unlock(flags); +} + +int deactivate_all_fds(void) +{ + struct irq_fd *irq; + int err; + + for(irq=active_fds;irq != NULL;irq = irq->next){ + err = os_clear_fd_async(irq->fd); + if(err) + return(err); + } + /* If there is a signal already queued, after unblocking ignore it */ + os_set_ioignore(); + + return(0); +} + +void forward_interrupts(int pid) +{ + struct irq_fd *irq; + unsigned long flags; + int err; + + flags = irq_lock(); + for(irq=active_fds;irq != NULL;irq = irq->next){ + err = os_set_owner(irq->fd, pid); + if(err < 0){ + /* XXX Just remove the irq rather than + * print out an infinite stream of these + */ + printk("Failed to forward %d to pid %d, err = %d\n", + irq->fd, pid, -err); + } + + irq->pid = pid; + } + irq_unlock(flags); +} + /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific diff --git a/arch/um/kernel/irq_user.c b/arch/um/kernel/irq_user.c deleted file mode 100644 index 483e79c..0000000 --- a/arch/um/kernel/irq_user.c +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "user_util.h" -#include "kern_util.h" -#include "user.h" -#include "process.h" -#include "sigio.h" -#include "irq_user.h" -#include "os.h" -#include "misc_constants.h" - -struct irq_fd *active_fds = NULL; -static struct irq_fd **last_irq_ptr = &active_fds; - -extern void free_irqs(void); - -void sigio_handler(int sig, union uml_pt_regs *regs) -{ - struct irq_fd *irq_fd; - int n; - - if(smp_sigio_handler()) return; - while(1){ - n = os_waiting_for_events(active_fds); - if (n <= 0) { - if(n == -EINTR) continue; - else break; - } - - for(irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next){ - if(irq_fd->current_events != 0){ - irq_fd->current_events = 0; - do_IRQ(irq_fd->irq, regs); - } - } - } - - free_irqs(); -} - -static void maybe_sigio_broken(int fd, int type) -{ - if(os_isatty(fd)){ - if((type == IRQ_WRITE) && !pty_output_sigio){ - write_sigio_workaround(); - add_sigio_fd(fd, 0); - } - else if((type == IRQ_READ) && !pty_close_sigio){ - write_sigio_workaround(); - add_sigio_fd(fd, 1); - } - } -} - - -int activate_fd(int irq, int fd, int type, void *dev_id) -{ - struct pollfd *tmp_pfd; - struct irq_fd *new_fd, *irq_fd; - unsigned long flags; - int pid, events, err, n; - - pid = os_getpid(); - err = os_set_fd_async(fd, pid); - if(err < 0) - goto out; - - new_fd = um_kmalloc(sizeof(*new_fd)); - err = -ENOMEM; - if(new_fd == NULL) - goto out; - - if(type == IRQ_READ) events = UM_POLLIN | UM_POLLPRI; - else events = UM_POLLOUT; - *new_fd = ((struct irq_fd) { .next = NULL, - .id = dev_id, - .fd = fd, - .type = type, - .irq = irq, - .pid = pid, - .events = events, - .current_events = 0 } ); - - /* Critical section - locked by a spinlock because this stuff can - * be changed from interrupt handlers. The stuff above is done - * outside the lock because it allocates memory. - */ - - /* Actually, it only looks like it can be called from interrupt - * context. The culprit is reactivate_fd, which calls - * maybe_sigio_broken, which calls write_sigio_workaround, - * which calls activate_fd. However, write_sigio_workaround should - * only be called once, at boot time. That would make it clear that - * this is called only from process context, and can be locked with - * a semaphore. - */ - flags = irq_lock(); - for(irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next){ - if((irq_fd->fd == fd) && (irq_fd->type == type)){ - printk("Registering fd %d twice\n", fd); - printk("Irqs : %d, %d\n", irq_fd->irq, irq); - printk("Ids : 0x%x, 0x%x\n", irq_fd->id, dev_id); - goto out_unlock; - } - } - - /*-------------*/ - if(type == IRQ_WRITE) - fd = -1; - - tmp_pfd = NULL; - n = 0; - - while(1){ - n = os_create_pollfd(fd, events, tmp_pfd, n); - if (n == 0) - break; - - /* n > 0 - * It means we couldn't put new pollfd to current pollfds - * and tmp_fds is NULL or too small for new pollfds array. - * Needed size is equal to n as minimum. - * - * Here we have to drop the lock in order to call - * kmalloc, which might sleep. - * If something else came in and changed the pollfds array - * so we will not be able to put new pollfd struct to pollfds - * then we free the buffer tmp_fds and try again. - */ - irq_unlock(flags); - if (tmp_pfd != NULL) { - kfree(tmp_pfd); - tmp_pfd = NULL; - } - - tmp_pfd = um_kmalloc(n); - if (tmp_pfd == NULL) - goto out_kfree; - - flags = irq_lock(); - } - /*-------------*/ - - *last_irq_ptr = new_fd; - last_irq_ptr = &new_fd->next; - - irq_unlock(flags); - - /* This calls activate_fd, so it has to be outside the critical - * section. - */ - maybe_sigio_broken(fd, type); - - return(0); - - out_unlock: - irq_unlock(flags); - out_kfree: - kfree(new_fd); - out: - return(err); -} - -static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg) -{ - unsigned long flags; - - flags = irq_lock(); - os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr); - irq_unlock(flags); -} - -struct irq_and_dev { - int irq; - void *dev; -}; - -static int same_irq_and_dev(struct irq_fd *irq, void *d) -{ - struct irq_and_dev *data = d; - - return((irq->irq == data->irq) && (irq->id == data->dev)); -} - -void free_irq_by_irq_and_dev(unsigned int irq, void *dev) -{ - struct irq_and_dev data = ((struct irq_and_dev) { .irq = irq, - .dev = dev }); - - free_irq_by_cb(same_irq_and_dev, &data); -} - -static int same_fd(struct irq_fd *irq, void *fd) -{ - return(irq->fd == *((int *) fd)); -} - -void free_irq_by_fd(int fd) -{ - free_irq_by_cb(same_fd, &fd); -} - -static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out) -{ - struct irq_fd *irq; - int i = 0; - int fdi; - - for(irq=active_fds; irq != NULL; irq = irq->next){ - if((irq->fd == fd) && (irq->irq == irqnum)) break; - i++; - } - if(irq == NULL){ - printk("find_irq_by_fd doesn't have descriptor %d\n", fd); - goto out; - } - fdi = os_get_pollfd(i); - if((fdi != -1) && (fdi != fd)){ - printk("find_irq_by_fd - mismatch between active_fds and " - "pollfds, fd %d vs %d, need %d\n", irq->fd, - fdi, fd); - irq = NULL; - goto out; - } - *index_out = i; - out: - return(irq); -} - -void reactivate_fd(int fd, int irqnum) -{ - struct irq_fd *irq; - unsigned long flags; - int i; - - flags = irq_lock(); - irq = find_irq_by_fd(fd, irqnum, &i); - if(irq == NULL){ - irq_unlock(flags); - return; - } - os_set_pollfd(i, irq->fd); - irq_unlock(flags); - - /* This calls activate_fd, so it has to be outside the critical - * section. - */ - maybe_sigio_broken(fd, irq->type); -} - -void deactivate_fd(int fd, int irqnum) -{ - struct irq_fd *irq; - unsigned long flags; - int i; - - flags = irq_lock(); - irq = find_irq_by_fd(fd, irqnum, &i); - if(irq == NULL) - goto out; - os_set_pollfd(i, -1); - out: - irq_unlock(flags); -} - -int deactivate_all_fds(void) -{ - struct irq_fd *irq; - int err; - - for(irq=active_fds;irq != NULL;irq = irq->next){ - err = os_clear_fd_async(irq->fd); - if(err) - return(err); - } - /* If there is a signal already queued, after unblocking ignore it */ - os_set_ioignore(); - - return(0); -} - -void forward_interrupts(int pid) -{ - struct irq_fd *irq; - unsigned long flags; - int err; - - flags = irq_lock(); - for(irq=active_fds;irq != NULL;irq = irq->next){ - err = os_set_owner(irq->fd, pid); - if(err < 0){ - /* XXX Just remove the irq rather than - * print out an infinite stream of these - */ - printk("Failed to forward %d to pid %d, err = %d\n", - irq->fd, pid, -err); - } - - irq->pid = pid; - } - irq_unlock(flags); -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ -- cgit v0.10.2 From 8e367065eea04a6fde5cb8f3854164af99c45693 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 27 Mar 2006 01:14:32 -0800 Subject: [PATCH] uml: move SIGIO startup code to os-Linux/start_up.c The serial UML OS-abstraction layer patch (um/kernel dir). This moves all startup code from sigio_user.c file under os-Linux dir Signed-off-by: Gennady Sharapov Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/include/os.h b/arch/um/include/os.h index efae3e6..6d865d4 100644 --- a/arch/um/include/os.h +++ b/arch/um/include/os.h @@ -161,6 +161,7 @@ extern int os_lock_file(int fd, int excl); /* start_up.c */ extern void os_early_checks(void); extern int can_do_skas(void); +extern void os_check_bugs(void); /* Make sure they are clear when running in TT mode. Required by * SEGV_MAYBE_FIXABLE */ diff --git a/arch/um/include/sigio.h b/arch/um/include/sigio.h index 37d76e2..1432fcf 100644 --- a/arch/um/include/sigio.h +++ b/arch/um/include/sigio.h @@ -8,7 +8,6 @@ extern int write_sigio_irq(int fd); extern int register_sigio_fd(int fd); -extern int read_sigio_fd(int fd); extern int add_sigio_fd(int fd, int read); extern int ignore_sigio_fd(int fd); extern void sigio_lock(void); diff --git a/arch/um/kernel/sigio_kern.c b/arch/um/kernel/sigio_kern.c index 2299884..1c1300f 100644 --- a/arch/um/kernel/sigio_kern.c +++ b/arch/um/kernel/sigio_kern.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -12,13 +12,16 @@ #include "sigio.h" #include "irq_user.h" #include "irq_kern.h" +#include "os.h" /* Protected by sigio_lock() called from write_sigio_workaround */ static int sigio_irq_fd = -1; static irqreturn_t sigio_interrupt(int irq, void *data, struct pt_regs *unused) { - read_sigio_fd(sigio_irq_fd); + char c; + + os_read_file(sigio_irq_fd, &c, sizeof(c)); reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ); return(IRQ_HANDLED); } @@ -51,6 +54,9 @@ void sigio_unlock(void) spin_unlock(&sigio_spinlock); } +extern void sigio_cleanup(void); +__uml_exitcall(sigio_cleanup); + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/arch/um/kernel/sigio_user.c b/arch/um/kernel/sigio_user.c index f7b18e1..9bef941 100644 --- a/arch/um/kernel/sigio_user.c +++ b/arch/um/kernel/sigio_user.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) * Licensed under the GPL */ @@ -20,127 +20,6 @@ #include "sigio.h" #include "os.h" -/* Changed during early boot */ -int pty_output_sigio = 0; -int pty_close_sigio = 0; - -/* Used as a flag during SIGIO testing early in boot */ -static volatile int got_sigio = 0; - -void __init handler(int sig) -{ - got_sigio = 1; -} - -struct openpty_arg { - int master; - int slave; - int err; -}; - -static void openpty_cb(void *arg) -{ - struct openpty_arg *info = arg; - - info->err = 0; - if(openpty(&info->master, &info->slave, NULL, NULL, NULL)) - info->err = -errno; -} - -void __init check_one_sigio(void (*proc)(int, int)) -{ - struct sigaction old, new; - struct openpty_arg pty = { .master = -1, .slave = -1 }; - int master, slave, err; - - initial_thread_cb(openpty_cb, &pty); - if(pty.err){ - printk("openpty failed, errno = %d\n", -pty.err); - return; - } - - master = pty.master; - slave = pty.slave; - - if((master == -1) || (slave == -1)){ - printk("openpty failed to allocate a pty\n"); - return; - } - - /* Not now, but complain so we now where we failed. */ - err = raw(master); - if (err < 0) - panic("check_sigio : __raw failed, errno = %d\n", -err); - - err = os_sigio_async(master, slave); - if(err < 0) - panic("tty_fds : sigio_async failed, err = %d\n", -err); - - if(sigaction(SIGIO, NULL, &old) < 0) - panic("check_sigio : sigaction 1 failed, errno = %d\n", errno); - new = old; - new.sa_handler = handler; - if(sigaction(SIGIO, &new, NULL) < 0) - panic("check_sigio : sigaction 2 failed, errno = %d\n", errno); - - got_sigio = 0; - (*proc)(master, slave); - - os_close_file(master); - os_close_file(slave); - - if(sigaction(SIGIO, &old, NULL) < 0) - panic("check_sigio : sigaction 3 failed, errno = %d\n", errno); -} - -static void tty_output(int master, int slave) -{ - int n; - char buf[512]; - - printk("Checking that host ptys support output SIGIO..."); - - memset(buf, 0, sizeof(buf)); - - while(os_write_file(master, buf, sizeof(buf)) > 0) ; - if(errno != EAGAIN) - panic("check_sigio : write failed, errno = %d\n", errno); - while(((n = os_read_file(slave, buf, sizeof(buf))) > 0) && !got_sigio) ; - - if (got_sigio) { - printk("Yes\n"); - pty_output_sigio = 1; - } else if (n == -EAGAIN) { - printk("No, enabling workaround\n"); - } else { - panic("check_sigio : read failed, err = %d\n", n); - } -} - -static void tty_close(int master, int slave) -{ - printk("Checking that host ptys support SIGIO on close..."); - - os_close_file(slave); - if(got_sigio){ - printk("Yes\n"); - pty_close_sigio = 1; - } - else printk("No, enabling workaround\n"); -} - -void __init check_sigio(void) -{ - if((os_access("/dev/ptmx", OS_ACC_R_OK) < 0) && - (os_access("/dev/ptyp0", OS_ACC_R_OK) < 0)){ - printk("No pseudo-terminals available - skipping pty SIGIO " - "check\n"); - return; - } - check_one_sigio(tty_output); - check_one_sigio(tty_close); -} - /* Protected by sigio_lock(), also used by sigio_cleanup, which is an * exitcall. */ @@ -267,10 +146,10 @@ static void update_thread(void) if(write_sigio_pid != -1) os_kill_process(write_sigio_pid, 1); write_sigio_pid = -1; - os_close_file(sigio_private[0]); - os_close_file(sigio_private[1]); - os_close_file(write_sigio_fds[0]); - os_close_file(write_sigio_fds[1]); + close(sigio_private[0]); + close(sigio_private[1]); + close(write_sigio_fds[0]); + close(write_sigio_fds[1]); /* Critical section end */ set_signals(flags); } @@ -428,39 +307,18 @@ void write_sigio_workaround(void) out_free: kfree(p); out_close2: - os_close_file(l_sigio_private[0]); - os_close_file(l_sigio_private[1]); + close(l_sigio_private[0]); + close(l_sigio_private[1]); out_close1: - os_close_file(l_write_sigio_fds[0]); - os_close_file(l_write_sigio_fds[1]); + close(l_write_sigio_fds[0]); + close(l_write_sigio_fds[1]); return; } -int read_sigio_fd(int fd) -{ - int n; - char c; - - n = os_read_file(fd, &c, sizeof(c)); - if(n != sizeof(c)){ - if(n < 0) { - printk("read_sigio_fd - read failed, err = %d\n", -n); - return(n); - } - else { - printk("read_sigio_fd - short read, bytes = %d\n", n); - return(-EIO); - } - } - return(n); -} - -static void sigio_cleanup(void) +void sigio_cleanup(void) { if (write_sigio_pid != -1) { os_kill_process(write_sigio_pid, 1); write_sigio_pid = -1; } } - -__uml_exitcall(sigio_cleanup); diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 0c5b9dc..bb1c872 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -487,8 +487,7 @@ void __init setup_arch(char **cmdline_p) void __init check_bugs(void) { arch_check_bugs(); - check_sigio(); - check_devanon(); + os_check_bugs(); } void apply_alternatives(struct alt_instr *start, struct alt_instr *end) diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 829d6b0..3275313 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -3,6 +3,7 @@ * Licensed under the GPL */ +#include #include #include #include @@ -539,3 +540,130 @@ int __init parse_iomem(char *str, int *add) return(1); } + +/* Changed during early boot */ +int pty_output_sigio = 0; +int pty_close_sigio = 0; + +/* Used as a flag during SIGIO testing early in boot */ +static volatile int got_sigio = 0; + +static void __init handler(int sig) +{ + got_sigio = 1; +} + +struct openpty_arg { + int master; + int slave; + int err; +}; + +static void openpty_cb(void *arg) +{ + struct openpty_arg *info = arg; + + info->err = 0; + if(openpty(&info->master, &info->slave, NULL, NULL, NULL)) + info->err = -errno; +} + +static void __init check_one_sigio(void (*proc)(int, int)) +{ + struct sigaction old, new; + struct openpty_arg pty = { .master = -1, .slave = -1 }; + int master, slave, err; + + initial_thread_cb(openpty_cb, &pty); + if(pty.err){ + printk("openpty failed, errno = %d\n", -pty.err); + return; + } + + master = pty.master; + slave = pty.slave; + + if((master == -1) || (slave == -1)){ + printk("openpty failed to allocate a pty\n"); + return; + } + + /* Not now, but complain so we now where we failed. */ + err = raw(master); + if (err < 0) + panic("check_sigio : __raw failed, errno = %d\n", -err); + + err = os_sigio_async(master, slave); + if(err < 0) + panic("tty_fds : sigio_async failed, err = %d\n", -err); + + if(sigaction(SIGIO, NULL, &old) < 0) + panic("check_sigio : sigaction 1 failed, errno = %d\n", errno); + new = old; + new.sa_handler = handler; + if(sigaction(SIGIO, &new, NULL) < 0) + panic("check_sigio : sigaction 2 failed, errno = %d\n", errno); + + got_sigio = 0; + (*proc)(master, slave); + + close(master); + close(slave); + + if(sigaction(SIGIO, &old, NULL) < 0) + panic("check_sigio : sigaction 3 failed, errno = %d\n", errno); +} + +static void tty_output(int master, int slave) +{ + int n; + char buf[512]; + + printk("Checking that host ptys support output SIGIO..."); + + memset(buf, 0, sizeof(buf)); + + while(os_write_file(master, buf, sizeof(buf)) > 0) ; + if(errno != EAGAIN) + panic("check_sigio : write failed, errno = %d\n", errno); + while(((n = os_read_file(slave, buf, sizeof(buf))) > 0) && !got_sigio) ; + + if(got_sigio){ + printk("Yes\n"); + pty_output_sigio = 1; + } + else if(n == -EAGAIN) printk("No, enabling workaround\n"); + else panic("check_sigio : read failed, err = %d\n", n); +} + +static void tty_close(int master, int slave) +{ + printk("Checking that host ptys support SIGIO on close..."); + + close(slave); + if(got_sigio){ + printk("Yes\n"); + pty_close_sigio = 1; + } + else printk("No, enabling workaround\n"); +} + +void __init check_sigio(void) +{ + if((os_access("/dev/ptmx", OS_ACC_R_OK) < 0) && + (os_access("/dev/ptyp0", OS_ACC_R_OK) < 0)){ + printk("No pseudo-terminals available - skipping pty SIGIO " + "check\n"); + return; + } + check_one_sigio(tty_output); + check_one_sigio(tty_close); +} + +void os_check_bugs(void) +{ + check_ptrace(); + check_sigio(); + check_devanon(); +} + -- cgit v0.10.2 From f206aabb035318ac4bafbf0b87798335de3634df Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 27 Mar 2006 01:14:33 -0800 Subject: [PATCH] uml: move sigio_user.c to os-Linux/sigio.c The serial UML OS-abstraction layer patch (um/kernel dir). This moves sigio_user.c to os-Linux dir Signed-off-by: Gennady Sharapov Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/include/os.h b/arch/um/include/os.h index 6d865d4..53dee8d 100644 --- a/arch/um/include/os.h +++ b/arch/um/include/os.h @@ -311,4 +311,9 @@ extern void os_set_pollfd(int i, int fd); extern void os_set_ioignore(void); extern void init_irq_signals(int on_sigstack); +/* sigio.c */ +extern void write_sigio_workaround(void); +extern int add_sigio_fd(int fd, int read); +extern int ignore_sigio_fd(int fd); + #endif diff --git a/arch/um/include/sigio.h b/arch/um/include/sigio.h index 1432fcf..fe99ea1 100644 --- a/arch/um/include/sigio.h +++ b/arch/um/include/sigio.h @@ -8,8 +8,6 @@ extern int write_sigio_irq(int fd); extern int register_sigio_fd(int fd); -extern int add_sigio_fd(int fd, int read); -extern int ignore_sigio_fd(int fd); extern void sigio_lock(void); extern void sigio_unlock(void); diff --git a/arch/um/include/user_util.h b/arch/um/include/user_util.h index a6f1f17..992a7e1 100644 --- a/arch/um/include/user_util.h +++ b/arch/um/include/user_util.h @@ -58,7 +58,6 @@ extern int attach(int pid); extern void kill_child_dead(int pid); extern int cont(int pid); extern void check_sigio(void); -extern void write_sigio_workaround(void); extern void arch_check_bugs(void); extern int cpu_feature(char *what, char *buf, int len); extern int arch_handle_signal(int sig, union uml_pt_regs *regs); diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index 8458c30..ac5afaa 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -8,7 +8,7 @@ clean-files := obj-y = config.o exec_kern.o exitcode.o \ init_task.o irq.o ksyms.o mem.o physmem.o \ - process_kern.o ptrace.o reboot.o resource.o sigio_user.o sigio_kern.o \ + process_kern.o ptrace.o reboot.o resource.o sigio_kern.o \ signal_kern.o smp.o syscall_kern.o sysrq.o \ time_kern.o tlb.o trap_kern.o uaccess.o um_arch.o umid.o diff --git a/arch/um/kernel/sigio_user.c b/arch/um/kernel/sigio_user.c deleted file mode 100644 index 9bef941..0000000 --- a/arch/um/kernel/sigio_user.c +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "init.h" -#include "user.h" -#include "kern_util.h" -#include "user_util.h" -#include "sigio.h" -#include "os.h" - -/* Protected by sigio_lock(), also used by sigio_cleanup, which is an - * exitcall. - */ -static int write_sigio_pid = -1; - -/* These arrays are initialized before the sigio thread is started, and - * the descriptors closed after it is killed. So, it can't see them change. - * On the UML side, they are changed under the sigio_lock. - */ -static int write_sigio_fds[2] = { -1, -1 }; -static int sigio_private[2] = { -1, -1 }; - -struct pollfds { - struct pollfd *poll; - int size; - int used; -}; - -/* Protected by sigio_lock(). Used by the sigio thread, but the UML thread - * synchronizes with it. - */ -struct pollfds current_poll = { - .poll = NULL, - .size = 0, - .used = 0 -}; - -struct pollfds next_poll = { - .poll = NULL, - .size = 0, - .used = 0 -}; - -static int write_sigio_thread(void *unused) -{ - struct pollfds *fds, tmp; - struct pollfd *p; - int i, n, respond_fd; - char c; - - signal(SIGWINCH, SIG_IGN); - fds = ¤t_poll; - while(1){ - n = poll(fds->poll, fds->used, -1); - if(n < 0){ - if(errno == EINTR) continue; - printk("write_sigio_thread : poll returned %d, " - "errno = %d\n", n, errno); - } - for(i = 0; i < fds->used; i++){ - p = &fds->poll[i]; - if(p->revents == 0) continue; - if(p->fd == sigio_private[1]){ - n = os_read_file(sigio_private[1], &c, sizeof(c)); - if(n != sizeof(c)) - printk("write_sigio_thread : " - "read failed, err = %d\n", -n); - tmp = current_poll; - current_poll = next_poll; - next_poll = tmp; - respond_fd = sigio_private[1]; - } - else { - respond_fd = write_sigio_fds[1]; - fds->used--; - memmove(&fds->poll[i], &fds->poll[i + 1], - (fds->used - i) * sizeof(*fds->poll)); - } - - n = os_write_file(respond_fd, &c, sizeof(c)); - if(n != sizeof(c)) - printk("write_sigio_thread : write failed, " - "err = %d\n", -n); - } - } - - return 0; -} - -static int need_poll(int n) -{ - if(n <= next_poll.size){ - next_poll.used = n; - return(0); - } - kfree(next_poll.poll); - next_poll.poll = um_kmalloc_atomic(n * sizeof(struct pollfd)); - if(next_poll.poll == NULL){ - printk("need_poll : failed to allocate new pollfds\n"); - next_poll.size = 0; - next_poll.used = 0; - return(-1); - } - next_poll.size = n; - next_poll.used = n; - return(0); -} - -/* Must be called with sigio_lock held, because it's needed by the marked - * critical section. */ -static void update_thread(void) -{ - unsigned long flags; - int n; - char c; - - flags = set_signals(0); - n = os_write_file(sigio_private[0], &c, sizeof(c)); - if(n != sizeof(c)){ - printk("update_thread : write failed, err = %d\n", -n); - goto fail; - } - - n = os_read_file(sigio_private[0], &c, sizeof(c)); - if(n != sizeof(c)){ - printk("update_thread : read failed, err = %d\n", -n); - goto fail; - } - - set_signals(flags); - return; - fail: - /* Critical section start */ - if(write_sigio_pid != -1) - os_kill_process(write_sigio_pid, 1); - write_sigio_pid = -1; - close(sigio_private[0]); - close(sigio_private[1]); - close(write_sigio_fds[0]); - close(write_sigio_fds[1]); - /* Critical section end */ - set_signals(flags); -} - -int add_sigio_fd(int fd, int read) -{ - int err = 0, i, n, events; - - sigio_lock(); - for(i = 0; i < current_poll.used; i++){ - if(current_poll.poll[i].fd == fd) - goto out; - } - - n = current_poll.used + 1; - err = need_poll(n); - if(err) - goto out; - - for(i = 0; i < current_poll.used; i++) - next_poll.poll[i] = current_poll.poll[i]; - - if(read) events = POLLIN; - else events = POLLOUT; - - next_poll.poll[n - 1] = ((struct pollfd) { .fd = fd, - .events = events, - .revents = 0 }); - update_thread(); - out: - sigio_unlock(); - return(err); -} - -int ignore_sigio_fd(int fd) -{ - struct pollfd *p; - int err = 0, i, n = 0; - - sigio_lock(); - for(i = 0; i < current_poll.used; i++){ - if(current_poll.poll[i].fd == fd) break; - } - if(i == current_poll.used) - goto out; - - err = need_poll(current_poll.used - 1); - if(err) - goto out; - - for(i = 0; i < current_poll.used; i++){ - p = ¤t_poll.poll[i]; - if(p->fd != fd) next_poll.poll[n++] = current_poll.poll[i]; - } - if(n == i){ - printk("ignore_sigio_fd : fd %d not found\n", fd); - err = -1; - goto out; - } - - update_thread(); - out: - sigio_unlock(); - return(err); -} - -static struct pollfd* setup_initial_poll(int fd) -{ - struct pollfd *p; - - p = um_kmalloc(sizeof(struct pollfd)); - if (p == NULL) { - printk("setup_initial_poll : failed to allocate poll\n"); - return NULL; - } - *p = ((struct pollfd) { .fd = fd, - .events = POLLIN, - .revents = 0 }); - return p; -} - -void write_sigio_workaround(void) -{ - unsigned long stack; - struct pollfd *p; - int err; - int l_write_sigio_fds[2]; - int l_sigio_private[2]; - int l_write_sigio_pid; - - /* We call this *tons* of times - and most ones we must just fail. */ - sigio_lock(); - l_write_sigio_pid = write_sigio_pid; - sigio_unlock(); - - if (l_write_sigio_pid != -1) - return; - - err = os_pipe(l_write_sigio_fds, 1, 1); - if(err < 0){ - printk("write_sigio_workaround - os_pipe 1 failed, " - "err = %d\n", -err); - return; - } - err = os_pipe(l_sigio_private, 1, 1); - if(err < 0){ - printk("write_sigio_workaround - os_pipe 1 failed, " - "err = %d\n", -err); - goto out_close1; - } - - p = setup_initial_poll(l_sigio_private[1]); - if(!p) - goto out_close2; - - sigio_lock(); - - /* Did we race? Don't try to optimize this, please, it's not so likely - * to happen, and no more than once at the boot. */ - if(write_sigio_pid != -1) - goto out_unlock; - - write_sigio_pid = run_helper_thread(write_sigio_thread, NULL, - CLONE_FILES | CLONE_VM, &stack, 0); - - if (write_sigio_pid < 0) - goto out_clear; - - if (write_sigio_irq(l_write_sigio_fds[0])) - goto out_kill; - - /* Success, finally. */ - memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds)); - memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private)); - - current_poll = ((struct pollfds) { .poll = p, - .used = 1, - .size = 1 }); - - sigio_unlock(); - return; - - out_kill: - l_write_sigio_pid = write_sigio_pid; - write_sigio_pid = -1; - sigio_unlock(); - /* Going to call waitpid, avoid holding the lock. */ - os_kill_process(l_write_sigio_pid, 1); - goto out_free; - - out_clear: - write_sigio_pid = -1; - out_unlock: - sigio_unlock(); - out_free: - kfree(p); - out_close2: - close(l_sigio_private[0]); - close(l_sigio_private[1]); - out_close1: - close(l_write_sigio_fds[0]); - close(l_write_sigio_fds[1]); - return; -} - -void sigio_cleanup(void) -{ - if (write_sigio_pid != -1) { - os_kill_process(write_sigio_pid, 1); - write_sigio_pid = -1; - } -} diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index 73d3d83..c3d56c2 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -3,14 +3,15 @@ # Licensed under the GPL # -obj-y = aio.o elf_aux.o file.o helper.o irq.o main.o mem.o process.o signal.o \ - start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o user_syms.o \ - util.o drivers/ sys-$(SUBARCH)/ +obj-y = aio.o elf_aux.o file.o helper.o irq.o main.o mem.o process.o sigio.o \ + signal.o start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o \ + user_syms.o util.o drivers/ sys-$(SUBARCH)/ obj-$(CONFIG_MODE_SKAS) += skas/ USER_OBJS := aio.o elf_aux.o file.o helper.o irq.o main.o mem.o process.o \ - signal.o start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o util.o + sigio.o signal.o start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o \ + util.o elf_aux.o: $(ARCH_DIR)/kernel-offsets.h CFLAGS_elf_aux.o += -I$(objtree)/arch/um diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c new file mode 100644 index 0000000..7604c40 --- /dev/null +++ b/arch/um/os-Linux/sigio.c @@ -0,0 +1,324 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "init.h" +#include "user.h" +#include "kern_util.h" +#include "user_util.h" +#include "sigio.h" +#include "os.h" + +/* Protected by sigio_lock(), also used by sigio_cleanup, which is an + * exitcall. + */ +static int write_sigio_pid = -1; + +/* These arrays are initialized before the sigio thread is started, and + * the descriptors closed after it is killed. So, it can't see them change. + * On the UML side, they are changed under the sigio_lock. + */ +static int write_sigio_fds[2] = { -1, -1 }; +static int sigio_private[2] = { -1, -1 }; + +struct pollfds { + struct pollfd *poll; + int size; + int used; +}; + +/* Protected by sigio_lock(). Used by the sigio thread, but the UML thread + * synchronizes with it. + */ +struct pollfds current_poll = { + .poll = NULL, + .size = 0, + .used = 0 +}; + +struct pollfds next_poll = { + .poll = NULL, + .size = 0, + .used = 0 +}; + +static int write_sigio_thread(void *unused) +{ + struct pollfds *fds, tmp; + struct pollfd *p; + int i, n, respond_fd; + char c; + + signal(SIGWINCH, SIG_IGN); + fds = ¤t_poll; + while(1){ + n = poll(fds->poll, fds->used, -1); + if(n < 0){ + if(errno == EINTR) continue; + printk("write_sigio_thread : poll returned %d, " + "errno = %d\n", n, errno); + } + for(i = 0; i < fds->used; i++){ + p = &fds->poll[i]; + if(p->revents == 0) continue; + if(p->fd == sigio_private[1]){ + n = os_read_file(sigio_private[1], &c, sizeof(c)); + if(n != sizeof(c)) + printk("write_sigio_thread : " + "read failed, err = %d\n", -n); + tmp = current_poll; + current_poll = next_poll; + next_poll = tmp; + respond_fd = sigio_private[1]; + } + else { + respond_fd = write_sigio_fds[1]; + fds->used--; + memmove(&fds->poll[i], &fds->poll[i + 1], + (fds->used - i) * sizeof(*fds->poll)); + } + + n = os_write_file(respond_fd, &c, sizeof(c)); + if(n != sizeof(c)) + printk("write_sigio_thread : write failed, " + "err = %d\n", -n); + } + } + + return 0; +} + +static int need_poll(int n) +{ + if(n <= next_poll.size){ + next_poll.used = n; + return(0); + } + kfree(next_poll.poll); + next_poll.poll = um_kmalloc_atomic(n * sizeof(struct pollfd)); + if(next_poll.poll == NULL){ + printk("need_poll : failed to allocate new pollfds\n"); + next_poll.size = 0; + next_poll.used = 0; + return(-1); + } + next_poll.size = n; + next_poll.used = n; + return(0); +} + +/* Must be called with sigio_lock held, because it's needed by the marked + * critical section. */ +static void update_thread(void) +{ + unsigned long flags; + int n; + char c; + + flags = set_signals(0); + n = os_write_file(sigio_private[0], &c, sizeof(c)); + if(n != sizeof(c)){ + printk("update_thread : write failed, err = %d\n", -n); + goto fail; + } + + n = os_read_file(sigio_private[0], &c, sizeof(c)); + if(n != sizeof(c)){ + printk("update_thread : read failed, err = %d\n", -n); + goto fail; + } + + set_signals(flags); + return; + fail: + /* Critical section start */ + if(write_sigio_pid != -1) + os_kill_process(write_sigio_pid, 1); + write_sigio_pid = -1; + close(sigio_private[0]); + close(sigio_private[1]); + close(write_sigio_fds[0]); + close(write_sigio_fds[1]); + /* Critical section end */ + set_signals(flags); +} + +int add_sigio_fd(int fd, int read) +{ + int err = 0, i, n, events; + + sigio_lock(); + for(i = 0; i < current_poll.used; i++){ + if(current_poll.poll[i].fd == fd) + goto out; + } + + n = current_poll.used + 1; + err = need_poll(n); + if(err) + goto out; + + for(i = 0; i < current_poll.used; i++) + next_poll.poll[i] = current_poll.poll[i]; + + if(read) events = POLLIN; + else events = POLLOUT; + + next_poll.poll[n - 1] = ((struct pollfd) { .fd = fd, + .events = events, + .revents = 0 }); + update_thread(); + out: + sigio_unlock(); + return(err); +} + +int ignore_sigio_fd(int fd) +{ + struct pollfd *p; + int err = 0, i, n = 0; + + sigio_lock(); + for(i = 0; i < current_poll.used; i++){ + if(current_poll.poll[i].fd == fd) break; + } + if(i == current_poll.used) + goto out; + + err = need_poll(current_poll.used - 1); + if(err) + goto out; + + for(i = 0; i < current_poll.used; i++){ + p = ¤t_poll.poll[i]; + if(p->fd != fd) next_poll.poll[n++] = current_poll.poll[i]; + } + if(n == i){ + printk("ignore_sigio_fd : fd %d not found\n", fd); + err = -1; + goto out; + } + + update_thread(); + out: + sigio_unlock(); + return(err); +} + +static struct pollfd *setup_initial_poll(int fd) +{ + struct pollfd *p; + + p = um_kmalloc(sizeof(struct pollfd)); + if (p == NULL) { + printk("setup_initial_poll : failed to allocate poll\n"); + return NULL; + } + *p = ((struct pollfd) { .fd = fd, + .events = POLLIN, + .revents = 0 }); + return p; +} + +void write_sigio_workaround(void) +{ + unsigned long stack; + struct pollfd *p; + int err; + int l_write_sigio_fds[2]; + int l_sigio_private[2]; + int l_write_sigio_pid; + + /* We call this *tons* of times - and most ones we must just fail. */ + sigio_lock(); + l_write_sigio_pid = write_sigio_pid; + sigio_unlock(); + + if (l_write_sigio_pid != -1) + return; + + err = os_pipe(l_write_sigio_fds, 1, 1); + if(err < 0){ + printk("write_sigio_workaround - os_pipe 1 failed, " + "err = %d\n", -err); + return; + } + err = os_pipe(l_sigio_private, 1, 1); + if(err < 0){ + printk("write_sigio_workaround - os_pipe 2 failed, " + "err = %d\n", -err); + goto out_close1; + } + + p = setup_initial_poll(l_sigio_private[1]); + if(!p) + goto out_close2; + + sigio_lock(); + + /* Did we race? Don't try to optimize this, please, it's not so likely + * to happen, and no more than once at the boot. */ + if(write_sigio_pid != -1) + goto out_unlock; + + write_sigio_pid = run_helper_thread(write_sigio_thread, NULL, + CLONE_FILES | CLONE_VM, &stack, 0); + + if (write_sigio_pid < 0) + goto out_clear; + + if (write_sigio_irq(l_write_sigio_fds[0])) + goto out_kill; + + /* Success, finally. */ + memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds)); + memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private)); + + current_poll = ((struct pollfds) { .poll = p, + .used = 1, + .size = 1 }); + + sigio_unlock(); + return; + + out_kill: + l_write_sigio_pid = write_sigio_pid; + write_sigio_pid = -1; + sigio_unlock(); + /* Going to call waitpid, avoid holding the lock. */ + os_kill_process(l_write_sigio_pid, 1); + goto out_free; + + out_clear: + write_sigio_pid = -1; + out_unlock: + sigio_unlock(); + out_free: + kfree(p); + out_close2: + close(l_sigio_private[0]); + close(l_sigio_private[1]); + out_close1: + close(l_write_sigio_fds[0]); + close(l_write_sigio_fds[1]); + return; +} + +void sigio_cleanup(void) +{ + if(write_sigio_pid != -1){ + os_kill_process(write_sigio_pid, 1); + write_sigio_pid = -1; + } +} -- cgit v0.10.2 From 81efcd3300754462537ffac60336158b2f773b4e Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Mon, 27 Mar 2006 01:14:34 -0800 Subject: [PATCH] uml: more carefully test whether we are in a system call For security reasons, UML in is_syscall() needs to have access to code in vsyscall-page. The current implementation grants this access by explicitly allowing access to vsyscall in access_ok_skas(). With this change, copy_from_user() may be used to read the code. Ptrace access to vsyscall-page for debugging already was implemented in get_user_pages() by mainline. In i386, copy_from_user can't access vsyscall-page, but returns EFAULT. To make UML behave as i386 does, I changed is_syscall to use access_process_vm(current) to read the code from vsyscall-page. This doesn't hurt security, but simplifies the code and prepares implementation of stub-vmas. Signed-off-by: Bodo Stroesser Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/sys-i386/ptrace.c b/arch/um/sys-i386/ptrace.c index e839ce6..8032a10 100644 --- a/arch/um/sys-i386/ptrace.c +++ b/arch/um/sys-i386/ptrace.c @@ -6,6 +6,7 @@ #include #include #include "linux/sched.h" +#include "linux/mm.h" #include "asm/elf.h" #include "asm/ptrace.h" #include "asm/uaccess.h" @@ -26,9 +27,17 @@ int is_syscall(unsigned long addr) n = copy_from_user(&instr, (void __user *) addr, sizeof(instr)); if(n){ - printk("is_syscall : failed to read instruction from 0x%lx\n", - addr); - return(0); + /* access_process_vm() grants access to vsyscall and stub, + * while copy_from_user doesn't. Maybe access_process_vm is + * slow, but that doesn't matter, since it will be called only + * in case of singlestepping, if copy_from_user failed. + */ + n = access_process_vm(current, addr, &instr, sizeof(instr), 0); + if(n != sizeof(instr)) { + printk("is_syscall : failed to read instruction from " + "0x%lx\n", addr); + return(1); + } } /* int 0x80 or sysenter */ return((instr == 0x80cd) || (instr == 0x340f)); diff --git a/arch/um/sys-x86_64/ptrace.c b/arch/um/sys-x86_64/ptrace.c index 74eee5c..147bbf0 100644 --- a/arch/um/sys-x86_64/ptrace.c +++ b/arch/um/sys-x86_64/ptrace.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -136,9 +137,28 @@ void arch_switch(void) */ } +/* XXX Mostly copied from sys-i386 */ int is_syscall(unsigned long addr) { - panic("is_syscall"); + unsigned short instr; + int n; + + n = copy_from_user(&instr, (void __user *) addr, sizeof(instr)); + if(n){ + /* access_process_vm() grants access to vsyscall and stub, + * while copy_from_user doesn't. Maybe access_process_vm is + * slow, but that doesn't matter, since it will be called only + * in case of singlestepping, if copy_from_user failed. + */ + n = access_process_vm(current, addr, &instr, sizeof(instr), 0); + if(n != sizeof(instr)) { + printk("is_syscall : failed to read instruction from " + "0x%lx\n", addr); + return(1); + } + } + /* sysenter */ + return(instr == 0x050f); } int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu ) -- cgit v0.10.2 From c554f899b6747c9d8035bf91864d89b337ceb411 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 27 Mar 2006 01:14:36 -0800 Subject: [PATCH] uml: move tty logging to os-Linux The serial UML OS-abstraction layer patch (um/kernel dir). This moves all systemcalls from tty_log.c file under os-Linux dir Signed-off-by: Gennady Sharapov Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index ac5afaa..fe08971 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -15,15 +15,12 @@ obj-y = config.o exec_kern.o exitcode.o \ obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o obj-$(CONFIG_GPROF) += gprof_syms.o obj-$(CONFIG_GCOV) += gmon_syms.o -obj-$(CONFIG_TTY_LOG) += tty_log.o obj-$(CONFIG_SYSCALL_DEBUG) += syscall.o obj-$(CONFIG_MODE_TT) += tt/ obj-$(CONFIG_MODE_SKAS) += skas/ -user-objs-$(CONFIG_TTY_LOG) += tty_log.o - -USER_OBJS := $(user-objs-y) config.o tty_log.o +USER_OBJS := config.o include arch/um/scripts/Makefile.rules diff --git a/arch/um/kernel/exec_kern.c b/arch/um/kernel/exec_kern.c index c264e1c..1ca8431 100644 --- a/arch/um/kernel/exec_kern.c +++ b/arch/um/kernel/exec_kern.c @@ -30,8 +30,6 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) CHOOSE_MODE_PROC(start_thread_tt, start_thread_skas, regs, eip, esp); } -extern void log_exec(char **argv, void *tty); - static long execve1(char *file, char __user * __user *argv, char __user *__user *env) { diff --git a/arch/um/kernel/tty_log.c b/arch/um/kernel/tty_log.c deleted file mode 100644 index 9ada656..0000000 --- a/arch/um/kernel/tty_log.c +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) and - * geoffrey hing - * Licensed under the GPL - */ - -#include -#include -#include -#include -#include -#include -#include "init.h" -#include "user.h" -#include "kern_util.h" -#include "os.h" - -#define TTY_LOG_DIR "./" - -/* Set early in boot and then unchanged */ -static char *tty_log_dir = TTY_LOG_DIR; -static int tty_log_fd = -1; - -#define TTY_LOG_OPEN 1 -#define TTY_LOG_CLOSE 2 -#define TTY_LOG_WRITE 3 -#define TTY_LOG_EXEC 4 - -#define TTY_READ 1 -#define TTY_WRITE 2 - -struct tty_log_buf { - int what; - unsigned long tty; - int len; - int direction; - unsigned long sec; - unsigned long usec; -}; - -int open_tty_log(void *tty, void *current_tty) -{ - struct timeval tv; - struct tty_log_buf data; - char buf[strlen(tty_log_dir) + sizeof("01234567890-01234567\0")]; - int fd; - - gettimeofday(&tv, NULL); - if(tty_log_fd != -1){ - data = ((struct tty_log_buf) { .what = TTY_LOG_OPEN, - .tty = (unsigned long) tty, - .len = sizeof(current_tty), - .direction = 0, - .sec = tv.tv_sec, - .usec = tv.tv_usec } ); - os_write_file(tty_log_fd, &data, sizeof(data)); - os_write_file(tty_log_fd, ¤t_tty, data.len); - return(tty_log_fd); - } - - sprintf(buf, "%s/%0u-%0u", tty_log_dir, (unsigned int) tv.tv_sec, - (unsigned int) tv.tv_usec); - - fd = os_open_file(buf, of_append(of_create(of_rdwr(OPENFLAGS()))), - 0644); - if(fd < 0){ - printk("open_tty_log : couldn't open '%s', errno = %d\n", - buf, -fd); - } - return(fd); -} - -void close_tty_log(int fd, void *tty) -{ - struct tty_log_buf data; - struct timeval tv; - - if(tty_log_fd != -1){ - gettimeofday(&tv, NULL); - data = ((struct tty_log_buf) { .what = TTY_LOG_CLOSE, - .tty = (unsigned long) tty, - .len = 0, - .direction = 0, - .sec = tv.tv_sec, - .usec = tv.tv_usec } ); - os_write_file(tty_log_fd, &data, sizeof(data)); - return; - } - os_close_file(fd); -} - -static int log_chunk(int fd, const char *buf, int len) -{ - int total = 0, try, missed, n; - char chunk[64]; - - while(len > 0){ - try = (len > sizeof(chunk)) ? sizeof(chunk) : len; - missed = copy_from_user_proc(chunk, (char *) buf, try); - try -= missed; - n = os_write_file(fd, chunk, try); - if(n != try) { - if(n < 0) - return(n); - return(-EIO); - } - if(missed != 0) - return(-EFAULT); - - len -= try; - total += try; - buf += try; - } - - return(total); -} - -int write_tty_log(int fd, const char *buf, int len, void *tty, int is_read) -{ - struct timeval tv; - struct tty_log_buf data; - int direction; - - if(fd == tty_log_fd){ - gettimeofday(&tv, NULL); - direction = is_read ? TTY_READ : TTY_WRITE; - data = ((struct tty_log_buf) { .what = TTY_LOG_WRITE, - .tty = (unsigned long) tty, - .len = len, - .direction = direction, - .sec = tv.tv_sec, - .usec = tv.tv_usec } ); - os_write_file(tty_log_fd, &data, sizeof(data)); - } - - return(log_chunk(fd, buf, len)); -} - -void log_exec(char **argv, void *tty) -{ - struct timeval tv; - struct tty_log_buf data; - char **ptr,*arg; - int len; - - if(tty_log_fd == -1) return; - - gettimeofday(&tv, NULL); - - len = 0; - for(ptr = argv; ; ptr++){ - if(copy_from_user_proc(&arg, ptr, sizeof(arg))) - return; - if(arg == NULL) break; - len += strlen_user_proc(arg); - } - - data = ((struct tty_log_buf) { .what = TTY_LOG_EXEC, - .tty = (unsigned long) tty, - .len = len, - .direction = 0, - .sec = tv.tv_sec, - .usec = tv.tv_usec } ); - os_write_file(tty_log_fd, &data, sizeof(data)); - - for(ptr = argv; ; ptr++){ - if(copy_from_user_proc(&arg, ptr, sizeof(arg))) - return; - if(arg == NULL) break; - log_chunk(tty_log_fd, arg, strlen_user_proc(arg)); - } -} - -extern void register_tty_logger(int (*opener)(void *, void *), - int (*writer)(int, const char *, int, - void *, int), - void (*closer)(int, void *)); - -static int register_logger(void) -{ - register_tty_logger(open_tty_log, write_tty_log, close_tty_log); - return(0); -} - -__uml_initcall(register_logger); - -static int __init set_tty_log_dir(char *name, int *add) -{ - tty_log_dir = name; - return 0; -} - -__uml_setup("tty_log_dir=", set_tty_log_dir, -"tty_log_dir=\n" -" This is used to specify the directory where the logs of all pty\n" -" data from this UML machine will be written.\n\n" -); - -static int __init set_tty_log_fd(char *name, int *add) -{ - char *end; - - tty_log_fd = strtoul(name, &end, 0); - if((*end != '\0') || (end == name)){ - printf("set_tty_log_fd - strtoul failed on '%s'\n", name); - tty_log_fd = -1; - } - - *add = 0; - return 0; -} - -__uml_setup("tty_log_fd=", set_tty_log_fd, -"tty_log_fd=\n" -" This is used to specify a preconfigured file descriptor to which all\n" -" tty data will be written. Preconfigure the descriptor with something\n" -" like '10>tty_log tty_log_fd=10'.\n\n" -); - - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index c3d56c2..1659386 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -8,10 +8,12 @@ obj-y = aio.o elf_aux.o file.o helper.o irq.o main.o mem.o process.o sigio.o \ user_syms.o util.o drivers/ sys-$(SUBARCH)/ obj-$(CONFIG_MODE_SKAS) += skas/ +obj-$(CONFIG_TTY_LOG) += tty_log.o +user-objs-$(CONFIG_TTY_LOG) += tty_log.o -USER_OBJS := aio.o elf_aux.o file.o helper.o irq.o main.o mem.o process.o \ - sigio.o signal.o start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o \ - util.o +USER_OBJS := $(user-objs-y) aio.o elf_aux.o file.o helper.o irq.o main.o mem.o \ + process.o sigio.o signal.o start_up.o time.o trap.o tt.o tty.o \ + uaccess.o umid.o util.o elf_aux.o: $(ARCH_DIR)/kernel-offsets.h CFLAGS_elf_aux.o += -I$(objtree)/arch/um diff --git a/arch/um/os-Linux/tty_log.c b/arch/um/os-Linux/tty_log.c new file mode 100644 index 0000000..c6ba56c --- /dev/null +++ b/arch/um/os-Linux/tty_log.c @@ -0,0 +1,218 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) and + * geoffrey hing + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include "init.h" +#include "user.h" +#include "kern_util.h" +#include "os.h" + +#define TTY_LOG_DIR "./" + +/* Set early in boot and then unchanged */ +static char *tty_log_dir = TTY_LOG_DIR; +static int tty_log_fd = -1; + +#define TTY_LOG_OPEN 1 +#define TTY_LOG_CLOSE 2 +#define TTY_LOG_WRITE 3 +#define TTY_LOG_EXEC 4 + +#define TTY_READ 1 +#define TTY_WRITE 2 + +struct tty_log_buf { + int what; + unsigned long tty; + int len; + int direction; + unsigned long sec; + unsigned long usec; +}; + +int open_tty_log(void *tty, void *current_tty) +{ + struct timeval tv; + struct tty_log_buf data; + char buf[strlen(tty_log_dir) + sizeof("01234567890-01234567\0")]; + int fd; + + gettimeofday(&tv, NULL); + if(tty_log_fd != -1){ + data = ((struct tty_log_buf) { .what = TTY_LOG_OPEN, + .tty = (unsigned long) tty, + .len = sizeof(current_tty), + .direction = 0, + .sec = tv.tv_sec, + .usec = tv.tv_usec } ); + os_write_file(tty_log_fd, &data, sizeof(data)); + os_write_file(tty_log_fd, ¤t_tty, data.len); + return(tty_log_fd); + } + + sprintf(buf, "%s/%0u-%0u", tty_log_dir, (unsigned int) tv.tv_sec, + (unsigned int) tv.tv_usec); + + fd = os_open_file(buf, of_append(of_create(of_rdwr(OPENFLAGS()))), + 0644); + if(fd < 0){ + printk("open_tty_log : couldn't open '%s', errno = %d\n", + buf, -fd); + } + return(fd); +} + +void close_tty_log(int fd, void *tty) +{ + struct tty_log_buf data; + struct timeval tv; + + if(tty_log_fd != -1){ + gettimeofday(&tv, NULL); + data = ((struct tty_log_buf) { .what = TTY_LOG_CLOSE, + .tty = (unsigned long) tty, + .len = 0, + .direction = 0, + .sec = tv.tv_sec, + .usec = tv.tv_usec } ); + os_write_file(tty_log_fd, &data, sizeof(data)); + return; + } + os_close_file(fd); +} + +static int log_chunk(int fd, const char *buf, int len) +{ + int total = 0, try, missed, n; + char chunk[64]; + + while(len > 0){ + try = (len > sizeof(chunk)) ? sizeof(chunk) : len; + missed = copy_from_user_proc(chunk, (char *) buf, try); + try -= missed; + n = os_write_file(fd, chunk, try); + if(n != try) { + if(n < 0) + return(n); + return(-EIO); + } + if(missed != 0) + return(-EFAULT); + + len -= try; + total += try; + buf += try; + } + + return(total); +} + +int write_tty_log(int fd, const char *buf, int len, void *tty, int is_read) +{ + struct timeval tv; + struct tty_log_buf data; + int direction; + + if(fd == tty_log_fd){ + gettimeofday(&tv, NULL); + direction = is_read ? TTY_READ : TTY_WRITE; + data = ((struct tty_log_buf) { .what = TTY_LOG_WRITE, + .tty = (unsigned long) tty, + .len = len, + .direction = direction, + .sec = tv.tv_sec, + .usec = tv.tv_usec } ); + os_write_file(tty_log_fd, &data, sizeof(data)); + } + + return(log_chunk(fd, buf, len)); +} + +void log_exec(char **argv, void *tty) +{ + struct timeval tv; + struct tty_log_buf data; + char **ptr,*arg; + int len; + + if(tty_log_fd == -1) return; + + gettimeofday(&tv, NULL); + + len = 0; + for(ptr = argv; ; ptr++){ + if(copy_from_user_proc(&arg, ptr, sizeof(arg))) + return; + if(arg == NULL) break; + len += strlen_user_proc(arg); + } + + data = ((struct tty_log_buf) { .what = TTY_LOG_EXEC, + .tty = (unsigned long) tty, + .len = len, + .direction = 0, + .sec = tv.tv_sec, + .usec = tv.tv_usec } ); + os_write_file(tty_log_fd, &data, sizeof(data)); + + for(ptr = argv; ; ptr++){ + if(copy_from_user_proc(&arg, ptr, sizeof(arg))) + return; + if(arg == NULL) break; + log_chunk(tty_log_fd, arg, strlen_user_proc(arg)); + } +} + +extern void register_tty_logger(int (*opener)(void *, void *), + int (*writer)(int, const char *, int, + void *, int), + void (*closer)(int, void *)); + +static int register_logger(void) +{ + register_tty_logger(open_tty_log, write_tty_log, close_tty_log); + return(0); +} + +__uml_initcall(register_logger); + +static int __init set_tty_log_dir(char *name, int *add) +{ + tty_log_dir = name; + return 0; +} + +__uml_setup("tty_log_dir=", set_tty_log_dir, +"tty_log_dir=\n" +" This is used to specify the directory where the logs of all pty\n" +" data from this UML machine will be written.\n\n" +); + +static int __init set_tty_log_fd(char *name, int *add) +{ + char *end; + + tty_log_fd = strtoul(name, &end, 0); + if((*end != '\0') || (end == name)){ + printf("set_tty_log_fd - strtoul failed on '%s'\n", name); + tty_log_fd = -1; + } + + *add = 0; + return 0; +} + +__uml_setup("tty_log_fd=", set_tty_log_fd, +"tty_log_fd=\n" +" This is used to specify a preconfigured file descriptor to which all\n" +" tty data will be written. Preconfigure the descriptor with something\n" +" like '10>tty_log tty_log_fd=10'.\n\n" +); -- cgit v0.10.2 From cf9165a50aa21027d2f0eefc90476ec59ae6d2b8 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 27 Mar 2006 01:14:36 -0800 Subject: [PATCH] uml: oS header cleanups This rearranges the OS declarations by moving some declarations into os.h. Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/include/os.h b/arch/um/include/os.h index 53dee8d..d3d1bc6 100644 --- a/arch/um/include/os.h +++ b/arch/um/include/os.h @@ -122,6 +122,7 @@ static inline struct openflags of_cloexec(struct openflags flags) return(flags); } +/* file.c */ extern int os_stat_file(const char *file_name, struct uml_stat *buf); extern int os_stat_fd(const int fd, struct uml_stat *buf); extern int os_access(const char *file, int mode); @@ -157,6 +158,15 @@ extern int os_connect_socket(char *name); extern int os_file_type(char *file); extern int os_file_mode(char *file, struct openflags *mode_out); extern int os_lock_file(int fd, int excl); +extern void os_flush_stdout(void); +extern int os_stat_filesystem(char *path, long *bsize_out, + long long *blocks_out, long long *bfree_out, + long long *bavail_out, long long *files_out, + long long *ffree_out, void *fsid_out, + int fsid_size, long *namelen_out, + long *spare_out); +extern int os_change_dir(char *dir); +extern int os_fchange_dir(int fd); /* start_up.c */ extern void os_early_checks(void); @@ -316,4 +326,8 @@ extern void write_sigio_workaround(void); extern int add_sigio_fd(int fd, int read); extern int ignore_sigio_fd(int fd); +/* skas/trap */ +extern void sig_handler_common_skas(int sig, void *sc_ptr); +extern void user_signal(int sig, union uml_pt_regs *regs, int pid); + #endif diff --git a/arch/um/include/skas/mode-skas.h b/arch/um/include/skas/mode-skas.h index 260065c..8bc6916 100644 --- a/arch/um/include/skas/mode-skas.h +++ b/arch/um/include/skas/mode-skas.h @@ -13,7 +13,6 @@ extern unsigned long exec_fp_regs[]; extern unsigned long exec_fpx_regs[]; extern int have_fpx_regs; -extern void sig_handler_common_skas(int sig, void *sc_ptr); extern void kill_off_processes_skas(void); #endif diff --git a/arch/um/include/skas/skas.h b/arch/um/include/skas/skas.h index 8635728..853b26f 100644 --- a/arch/um/include/skas/skas.h +++ b/arch/um/include/skas/skas.h @@ -17,7 +17,6 @@ extern int user_thread(unsigned long stack, int flags); extern void new_thread_proc(void *stack, void (*handler)(int sig)); extern void new_thread_handler(int sig); extern void handle_syscall(union uml_pt_regs *regs); -extern void user_signal(int sig, union uml_pt_regs *regs, int pid); extern int new_mm(unsigned long stack); extern void get_skas_faultinfo(int pid, struct faultinfo * fi); extern long execute_syscall_skas(void *r); -- cgit v0.10.2 From 6c29256c5703ad7c3cf62276dbc38d802a05669e Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 27 Mar 2006 01:14:37 -0800 Subject: [PATCH] uml: allow ubd devices to be shared in a cluster This adds a 'c' option to the ubd switch which turns off host file locking so that the device can be shared, as with a cluster. There's also some whitespace cleanup while I was in this file. Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index fa617e0..0336575 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) * Licensed under the GPL */ @@ -71,7 +71,7 @@ struct io_thread_req { int error; }; -extern int open_ubd_file(char *file, struct openflags *openflags, +extern int open_ubd_file(char *file, struct openflags *openflags, int shared, char **backing_file_out, int *bitmap_offset_out, unsigned long *bitmap_len_out, int *data_offset_out, int *create_cow_out); @@ -137,7 +137,7 @@ static int fake_major = MAJOR_NR; static struct gendisk *ubd_gendisk[MAX_DEV]; static struct gendisk *fake_gendisk[MAX_DEV]; - + #ifdef CONFIG_BLK_DEV_UBD_SYNC #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \ .cl = 1 }) @@ -168,6 +168,7 @@ struct ubd { __u64 size; struct openflags boot_openflags; struct openflags openflags; + int shared; int no_cow; struct cow cow; struct platform_device pdev; @@ -189,6 +190,7 @@ struct ubd { .boot_openflags = OPEN_FLAGS, \ .openflags = OPEN_FLAGS, \ .no_cow = 0, \ + .shared = 0, \ .cow = DEFAULT_COW, \ } @@ -305,7 +307,7 @@ static int ubd_setup_common(char *str, int *index_out) } major = simple_strtoul(str, &end, 0); if((*end != '\0') || (end == str)){ - printk(KERN_ERR + printk(KERN_ERR "ubd_setup : didn't parse major number\n"); return(1); } @@ -316,7 +318,7 @@ static int ubd_setup_common(char *str, int *index_out) printk(KERN_ERR "Can't assign a fake major twice\n"); goto out1; } - + fake_major = major; printk(KERN_INFO "Setting extra ubd major number to %d\n", @@ -351,7 +353,7 @@ static int ubd_setup_common(char *str, int *index_out) if (index_out) *index_out = n; - for (i = 0; i < 4; i++) { + for (i = 0; i < sizeof("rscd="); i++) { switch (*str) { case 'r': flags.w = 0; @@ -362,11 +364,14 @@ static int ubd_setup_common(char *str, int *index_out) case 'd': dev->no_cow = 1; break; + case 'c': + dev->shared = 1; + break; case '=': str++; goto break_loop; default: - printk(KERN_ERR "ubd_setup : Expected '=' or flag letter (r,s or d)\n"); + printk(KERN_ERR "ubd_setup : Expected '=' or flag letter (r, s, c, or d)\n"); goto out; } str++; @@ -515,7 +520,7 @@ static void ubd_handler(void) spin_unlock(&ubd_io_lock); return; } - + ubd_finish(rq, req.error); reactivate_fd(thread_fd, UBD_IRQ); do_ubd_request(ubd_queue); @@ -532,7 +537,7 @@ static int io_pid = -1; void kill_io_thread(void) { - if(io_pid != -1) + if(io_pid != -1) os_kill_process(io_pid, 1); } @@ -567,14 +572,15 @@ static int ubd_open_dev(struct ubd *dev) create_cow = 0; create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL; back_ptr = dev->no_cow ? NULL : &dev->cow.file; - dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr, - &dev->cow.bitmap_offset, &dev->cow.bitmap_len, - &dev->cow.data_offset, create_ptr); + dev->fd = open_ubd_file(dev->file, &dev->openflags, dev->shared, + back_ptr, &dev->cow.bitmap_offset, + &dev->cow.bitmap_len, &dev->cow.data_offset, + create_ptr); if((dev->fd == -ENOENT) && create_cow){ - dev->fd = create_cow_file(dev->file, dev->cow.file, + dev->fd = create_cow_file(dev->file, dev->cow.file, dev->openflags, 1 << 9, PAGE_SIZE, - &dev->cow.bitmap_offset, + &dev->cow.bitmap_offset, &dev->cow.bitmap_len, &dev->cow.data_offset); if(dev->fd >= 0){ @@ -598,16 +604,16 @@ static int ubd_open_dev(struct ubd *dev) } flush_tlb_kernel_vm(); - err = read_cow_bitmap(dev->fd, dev->cow.bitmap, - dev->cow.bitmap_offset, + err = read_cow_bitmap(dev->fd, dev->cow.bitmap, + dev->cow.bitmap_offset, dev->cow.bitmap_len); if(err < 0) goto error; flags = dev->openflags; flags.w = 0; - err = open_ubd_file(dev->cow.file, &flags, NULL, NULL, NULL, - NULL, NULL); + err = open_ubd_file(dev->cow.file, &flags, dev->shared, NULL, + NULL, NULL, NULL, NULL); if(err < 0) goto error; dev->cow.fd = err; } @@ -685,11 +691,11 @@ static int ubd_add(int n) dev->size = ROUND_BLOCK(dev->size); err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]); - if(err) + if(err) goto out_close; - + if(fake_major != MAJOR_NR) - ubd_new_disk(fake_major, dev->size, n, + ubd_new_disk(fake_major, dev->size, n, &fake_gendisk[n]); /* perhaps this should also be under the "if (fake_major)" above */ @@ -854,7 +860,7 @@ int ubd_init(void) return -1; } platform_driver_register(&ubd_driver); - for (i = 0; i < MAX_DEV; i++) + for (i = 0; i < MAX_DEV; i++) ubd_add(i); return 0; } @@ -872,16 +878,16 @@ int ubd_driver_init(void){ * enough. So use anyway the io thread. */ } stack = alloc_stack(0, 0); - io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), + io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), &thread_fd); if(io_pid < 0){ - printk(KERN_ERR + printk(KERN_ERR "ubd : Failed to start I/O thread (errno = %d) - " "falling back to synchronous I/O\n", -io_pid); io_pid = -1; return(0); } - err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, + err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, SA_INTERRUPT, "ubd", ubd_dev); if(err != 0) printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); @@ -978,7 +984,7 @@ static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, if(req->op == UBD_READ) { for(i = 0; i < req->length >> 9; i++){ if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) - ubd_set_bit(i, (unsigned char *) + ubd_set_bit(i, (unsigned char *) &req->sector_mask); } } @@ -999,7 +1005,7 @@ static int prepare_request(struct request *req, struct io_thread_req *io_req) /* This should be impossible now */ if((rq_data_dir(req) == WRITE) && !dev->openflags.w){ - printk("Write attempted on readonly ubd device %s\n", + printk("Write attempted on readonly ubd device %s\n", disk->disk_name); end_request(req, 0); return(1); @@ -1182,7 +1188,7 @@ int read_cow_bitmap(int fd, void *buf, int offset, int len) return(0); } -int open_ubd_file(char *file, struct openflags *openflags, +int open_ubd_file(char *file, struct openflags *openflags, int shared, char **backing_file_out, int *bitmap_offset_out, unsigned long *bitmap_len_out, int *data_offset_out, int *create_cow_out) @@ -1206,10 +1212,14 @@ int open_ubd_file(char *file, struct openflags *openflags, return fd; } - err = os_lock_file(fd, openflags->w); - if(err < 0){ - printk("Failed to lock '%s', err = %d\n", file, -err); - goto out_close; + if(shared) + printk("Not locking \"%s\" on the host\n", file); + else { + err = os_lock_file(fd, openflags->w); + if(err < 0){ + printk("Failed to lock '%s', err = %d\n", file, -err); + goto out_close; + } } /* Succesful return case! */ @@ -1260,7 +1270,7 @@ int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, int err, fd; flags.c = 1; - fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL); + fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL); if(fd < 0){ err = fd; printk("Open of COW file '%s' failed, errno = %d\n", cow_file, -- cgit v0.10.2 From 98c18238f146a9038098244d60a7d2b1f67ee790 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 27 Mar 2006 01:14:38 -0800 Subject: [PATCH] uml: fix segfault on signal delivery This fixes a process segfault where a signal was being delivered such that a new stack page needed to be allocated to hold the signal frame. This was tripping some logic in the page fault handler which wouldn't allocate the page if the faulting address was more that 32 bytes lower than the current stack pointer. Since a signal frame is greater than 32 bytes, this exercised that case. It's fixed by updating the SP in the pt_regs before starting to copy the signal frame. Since those are the registers that will be copied on to the stack, we have to be careful to put the original SP, not the new one which points to the signal frame, on the stack. Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c index 7cd1a82..33a40f5 100644 --- a/arch/um/sys-i386/signal.c +++ b/arch/um/sys-i386/signal.c @@ -58,7 +58,7 @@ static int copy_sc_from_user_skas(struct pt_regs *regs, } int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp, - struct pt_regs *regs) + struct pt_regs *regs, unsigned long sp) { struct sigcontext sc; unsigned long fpregs[HOST_FP_SIZE]; @@ -72,7 +72,7 @@ int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp, sc.edi = REGS_EDI(regs->regs.skas.regs); sc.esi = REGS_ESI(regs->regs.skas.regs); sc.ebp = REGS_EBP(regs->regs.skas.regs); - sc.esp = REGS_SP(regs->regs.skas.regs); + sc.esp = sp; sc.ebx = REGS_EBX(regs->regs.skas.regs); sc.edx = REGS_EDX(regs->regs.skas.regs); sc.ecx = REGS_ECX(regs->regs.skas.regs); @@ -132,7 +132,7 @@ int copy_sc_from_user_tt(struct sigcontext *to, struct sigcontext *from, } int copy_sc_to_user_tt(struct sigcontext *to, struct _fpstate *fp, - struct sigcontext *from, int fpsize) + struct sigcontext *from, int fpsize, unsigned long sp) { struct _fpstate *to_fp, *from_fp; int err; @@ -140,11 +140,18 @@ int copy_sc_to_user_tt(struct sigcontext *to, struct _fpstate *fp, to_fp = (fp ? fp : (struct _fpstate *) (to + 1)); from_fp = from->fpstate; err = copy_to_user(to, from, sizeof(*to)); + + /* The SP in the sigcontext is the updated one for the signal + * delivery. The sp passed in is the original, and this needs + * to be restored, so we stick it in separately. + */ + err |= copy_to_user(&SC_SP(to), sp, sizeof(sp)); + if(from_fp != NULL){ err |= copy_to_user(&to->fpstate, &to_fp, sizeof(to->fpstate)); err |= copy_to_user(to_fp, from_fp, fpsize); } - return(err); + return err; } #endif @@ -159,11 +166,11 @@ static int copy_sc_from_user(struct pt_regs *to, void __user *from) } static int copy_sc_to_user(struct sigcontext *to, struct _fpstate *fp, - struct pt_regs *from) + struct pt_regs *from, unsigned long sp) { return(CHOOSE_MODE(copy_sc_to_user_tt(to, fp, UPT_SC(&from->regs), - sizeof(*fp)), - copy_sc_to_user_skas(to, fp, from))); + sizeof(*fp), sp), + copy_sc_to_user_skas(to, fp, from, sp))); } static int copy_ucontext_to_user(struct ucontext *uc, struct _fpstate *fp, @@ -174,7 +181,7 @@ static int copy_ucontext_to_user(struct ucontext *uc, struct _fpstate *fp, err |= put_user(current->sas_ss_sp, &uc->uc_stack.ss_sp); err |= put_user(sas_ss_flags(sp), &uc->uc_stack.ss_flags); err |= put_user(current->sas_ss_size, &uc->uc_stack.ss_size); - err |= copy_sc_to_user(&uc->uc_mcontext, fp, ¤t->thread.regs); + err |= copy_sc_to_user(&uc->uc_mcontext, fp, ¤t->thread.regs, sp); err |= copy_to_user(&uc->uc_sigmask, set, sizeof(*set)); return(err); } @@ -207,6 +214,7 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig, { struct sigframe __user *frame; void *restorer; + unsigned long save_sp = PT_REGS_SP(regs); int err = 0; stack_top &= -8UL; @@ -218,9 +226,19 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig, if(ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; + /* Update SP now because the page fault handler refuses to extend + * the stack if the faulting address is too far below the current + * SP, which frame now certainly is. If there's an error, the original + * value is restored on the way out. + * When writing the sigcontext to the stack, we have to write the + * original value, so that's passed to copy_sc_to_user, which does + * the right thing with it. + */ + PT_REGS_SP(regs) = (unsigned long) frame; + err |= __put_user(restorer, &frame->pretcode); err |= __put_user(sig, &frame->sig); - err |= copy_sc_to_user(&frame->sc, NULL, regs); + err |= copy_sc_to_user(&frame->sc, NULL, regs, save_sp); err |= __put_user(mask->sig[0], &frame->sc.oldmask); if (_NSIG_WORDS > 1) err |= __copy_to_user(&frame->extramask, &mask->sig[1], @@ -238,7 +256,7 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig, err |= __put_user(0x80cd, (short __user *)(frame->retcode+6)); if(err) - return(err); + goto err; PT_REGS_SP(regs) = (unsigned long) frame; PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler; @@ -248,7 +266,11 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig, if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) ptrace_notify(SIGTRAP); - return(0); + return 0; + +err: + PT_REGS_SP(regs) = save_sp; + return err; } int setup_signal_stack_si(unsigned long stack_top, int sig, @@ -257,6 +279,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, { struct rt_sigframe __user *frame; void *restorer; + unsigned long save_sp = PT_REGS_SP(regs); int err = 0; stack_top &= -8UL; @@ -268,13 +291,16 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, if(ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; + /* See comment above about why this is here */ + PT_REGS_SP(regs) = (unsigned long) frame; + err |= __put_user(restorer, &frame->pretcode); err |= __put_user(sig, &frame->sig); err |= __put_user(&frame->info, &frame->pinfo); err |= __put_user(&frame->uc, &frame->puc); err |= copy_siginfo_to_user(&frame->info, info); err |= copy_ucontext_to_user(&frame->uc, &frame->fpstate, mask, - PT_REGS_SP(regs)); + save_sp); /* * This is movl $,%eax ; int $0x80 @@ -288,9 +314,8 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, err |= __put_user(0x80cd, (short __user *)(frame->retcode+5)); if(err) - return(err); + goto err; - PT_REGS_SP(regs) = (unsigned long) frame; PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler; PT_REGS_EAX(regs) = (unsigned long) sig; PT_REGS_EDX(regs) = (unsigned long) &frame->info; @@ -298,7 +323,11 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) ptrace_notify(SIGTRAP); - return(0); + return 0; + +err: + PT_REGS_SP(regs) = save_sp; + return err; } long sys_sigreturn(struct pt_regs regs) diff --git a/arch/um/sys-x86_64/signal.c b/arch/um/sys-x86_64/signal.c index fe1d065..e75c4e1 100644 --- a/arch/um/sys-x86_64/signal.c +++ b/arch/um/sys-x86_64/signal.c @@ -55,7 +55,8 @@ static int copy_sc_from_user_skas(struct pt_regs *regs, } int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp, - struct pt_regs *regs, unsigned long mask) + struct pt_regs *regs, unsigned long mask, + unsigned long sp) { struct faultinfo * fi = ¤t->thread.arch.faultinfo; int err = 0; @@ -70,7 +71,11 @@ int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp, err |= PUTREG(regs, RDI, to, rdi); err |= PUTREG(regs, RSI, to, rsi); err |= PUTREG(regs, RBP, to, rbp); - err |= PUTREG(regs, RSP, to, rsp); + /* Must use orignal RSP, which is passed in, rather than what's in + * the pt_regs, because that's already been updated to point at the + * signal frame. + */ + err |= __put_user(sp, &to->rsp); err |= PUTREG(regs, RBX, to, rbx); err |= PUTREG(regs, RDX, to, rdx); err |= PUTREG(regs, RCX, to, rcx); @@ -102,7 +107,7 @@ int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp, #ifdef CONFIG_MODE_TT int copy_sc_from_user_tt(struct sigcontext *to, struct sigcontext *from, - int fpsize) + int fpsize) { struct _fpstate *to_fp, *from_fp; unsigned long sigs; @@ -120,7 +125,7 @@ int copy_sc_from_user_tt(struct sigcontext *to, struct sigcontext *from, } int copy_sc_to_user_tt(struct sigcontext *to, struct _fpstate *fp, - struct sigcontext *from, int fpsize) + struct sigcontext *from, int fpsize, unsigned long sp) { struct _fpstate *to_fp, *from_fp; int err; @@ -128,11 +133,17 @@ int copy_sc_to_user_tt(struct sigcontext *to, struct _fpstate *fp, to_fp = (fp ? fp : (struct _fpstate *) (to + 1)); from_fp = from->fpstate; err = copy_to_user(to, from, sizeof(*to)); + /* The SP in the sigcontext is the updated one for the signal + * delivery. The sp passed in is the original, and this needs + * to be restored, so we stick it in separately. + */ + err |= copy_to_user(&SC_SP(to), sp, sizeof(sp)); + if(from_fp != NULL){ err |= copy_to_user(&to->fpstate, &to_fp, sizeof(to->fpstate)); err |= copy_to_user(to_fp, from_fp, fpsize); } - return(err); + return err; } #endif @@ -148,11 +159,12 @@ static int copy_sc_from_user(struct pt_regs *to, void __user *from) } static int copy_sc_to_user(struct sigcontext *to, struct _fpstate *fp, - struct pt_regs *from, unsigned long mask) + struct pt_regs *from, unsigned long mask, + unsigned long sp) { return(CHOOSE_MODE(copy_sc_to_user_tt(to, fp, UPT_SC(&from->regs), - sizeof(*fp)), - copy_sc_to_user_skas(to, fp, from, mask))); + sizeof(*fp), sp), + copy_sc_to_user_skas(to, fp, from, mask, sp))); } struct rt_sigframe @@ -170,6 +182,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, { struct rt_sigframe __user *frame; struct _fpstate __user *fp = NULL; + unsigned long save_sp = PT_REGS_RSP(regs); int err = 0; struct task_struct *me = current; @@ -193,14 +206,25 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, goto out; } + /* Update SP now because the page fault handler refuses to extend + * the stack if the faulting address is too far below the current + * SP, which frame now certainly is. If there's an error, the original + * value is restored on the way out. + * When writing the sigcontext to the stack, we have to write the + * original value, so that's passed to copy_sc_to_user, which does + * the right thing with it. + */ + PT_REGS_RSP(regs) = (unsigned long) frame; + /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(PT_REGS_SP(regs)), + err |= __put_user(sas_ss_flags(save_sp), &frame->uc.uc_stack.ss_flags); err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= copy_sc_to_user(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); + err |= copy_sc_to_user(&frame->uc.uc_mcontext, fp, regs, set->sig[0], + save_sp); err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate); if (sizeof(*set) == 16) { __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); @@ -217,10 +241,10 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); else /* could use a vstub here */ - goto out; + goto restore_sp; if (err) - goto out; + goto restore_sp; /* Set up registers for signal handler */ { @@ -238,10 +262,12 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, PT_REGS_RSI(regs) = (unsigned long) &frame->info; PT_REGS_RDX(regs) = (unsigned long) &frame->uc; PT_REGS_RIP(regs) = (unsigned long) ka->sa.sa_handler; - - PT_REGS_RSP(regs) = (unsigned long) frame; out: - return(err); + return err; + +restore_sp: + PT_REGS_RSP(regs) = save_sp; + return err; } long sys_rt_sigreturn(struct pt_regs *regs) -- cgit v0.10.2 From 1fbbd6844e6a84e5d166ab475dc298d3b89fa4bf Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 27 Mar 2006 01:14:39 -0800 Subject: [PATCH] uml: prevent umid theft Behavior when booting two UMLs with the same umid was broken. The second one would steal the umid. This fixes that, making the second UML take a random umid instead. Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c index ecf107a..198e591 100644 --- a/arch/um/os-Linux/umid.c +++ b/arch/um/os-Linux/umid.c @@ -143,8 +143,10 @@ static int not_dead_yet(char *dir) goto out_close; } - if((kill(p, 0) == 0) || (errno != ESRCH)) + if((kill(p, 0) == 0) || (errno != ESRCH)){ + printk("umid \"%s\" is already in use by pid %d\n", umid, p); return 1; + } err = actually_do_remove(dir); if(err) @@ -234,33 +236,44 @@ int __init make_umid(void) err = mkdir(tmp, 0777); if(err < 0){ err = -errno; - if(errno != EEXIST) + if(err != -EEXIST) goto err; - if(not_dead_yet(tmp) < 0) + /* 1 -> this umid is already in use + * < 0 -> we couldn't remove the umid directory + * In either case, we can't use this umid, so return -EEXIST. + */ + if(not_dead_yet(tmp) != 0) goto err; err = mkdir(tmp, 0777); } - if(err < 0){ - printk("Failed to create '%s' - err = %d\n", umid, err); - goto err_rmdir; + if(err){ + err = -errno; + printk("Failed to create '%s' - err = %d\n", umid, -errno); + goto err; } umid_setup = 1; create_pid_file(); - return 0; - - err_rmdir: - rmdir(tmp); + err = 0; err: return err; } static int __init make_umid_init(void) { + if(!make_umid()) + return 0; + + /* If initializing with the given umid failed, then try again with + * a random one. + */ + printk("Failed to initialize umid \"%s\", trying with a random umid\n", + umid); + *umid = '\0'; make_umid(); return 0; -- cgit v0.10.2 From 5f4e8fd08f3993bc31a7dd91767fdb4da4fe6278 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 27 Mar 2006 01:14:40 -0800 Subject: [PATCH] uml: fix thread startup race This fixes a race in the starting of write_sigio_thread. Previously, some of the data needed by the thread was initialized after the clone. If the thread ran immediately, it would see the uninitialized data, including an empty pollfds, which would cause it to hang. We move the data initialization to before the clone, and adjust the error paths and cleanup accordingly. Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c index 7604c40..9ba9429 100644 --- a/arch/um/os-Linux/sigio.c +++ b/arch/um/os-Linux/sigio.c @@ -29,8 +29,10 @@ static int write_sigio_pid = -1; * the descriptors closed after it is killed. So, it can't see them change. * On the UML side, they are changed under the sigio_lock. */ -static int write_sigio_fds[2] = { -1, -1 }; -static int sigio_private[2] = { -1, -1 }; +#define SIGIO_FDS_INIT {-1, -1} + +static int write_sigio_fds[2] = SIGIO_FDS_INIT; +static int sigio_private[2] = SIGIO_FDS_INIT; struct pollfds { struct pollfd *poll; @@ -270,49 +272,46 @@ void write_sigio_workaround(void) /* Did we race? Don't try to optimize this, please, it's not so likely * to happen, and no more than once at the boot. */ if(write_sigio_pid != -1) - goto out_unlock; - - write_sigio_pid = run_helper_thread(write_sigio_thread, NULL, - CLONE_FILES | CLONE_VM, &stack, 0); + goto out_free; - if (write_sigio_pid < 0) - goto out_clear; + current_poll = ((struct pollfds) { .poll = p, + .used = 1, + .size = 1 }); if (write_sigio_irq(l_write_sigio_fds[0])) - goto out_kill; + goto out_clear_poll; - /* Success, finally. */ memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds)); memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private)); - current_poll = ((struct pollfds) { .poll = p, - .used = 1, - .size = 1 }); + write_sigio_pid = run_helper_thread(write_sigio_thread, NULL, + CLONE_FILES | CLONE_VM, &stack, 0); - sigio_unlock(); - return; + if (write_sigio_pid < 0) + goto out_clear; - out_kill: - l_write_sigio_pid = write_sigio_pid; - write_sigio_pid = -1; sigio_unlock(); - /* Going to call waitpid, avoid holding the lock. */ - os_kill_process(l_write_sigio_pid, 1); - goto out_free; + return; - out_clear: +out_clear: write_sigio_pid = -1; - out_unlock: - sigio_unlock(); - out_free: + write_sigio_fds[0] = -1; + write_sigio_fds[1] = -1; + sigio_private[0] = -1; + sigio_private[1] = -1; +out_clear_poll: + current_poll = ((struct pollfds) { .poll = NULL, + .size = 0, + .used = 0 }); +out_free: kfree(p); - out_close2: + sigio_unlock(); +out_close2: close(l_sigio_private[0]); close(l_sigio_private[1]); - out_close1: +out_close1: close(l_write_sigio_fds[0]); close(l_write_sigio_fds[1]); - return; } void sigio_cleanup(void) -- cgit v0.10.2 From 86c79cbcee7d617c5aaf9b4f7e6cc093397d3451 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 27 Mar 2006 01:14:41 -0800 Subject: [PATCH] uml: fix hostfs stack corruption Noted by Oleg Drokin: We initialized an extra slot of struct kstatfs.spare, sometimes causing stack corruption. Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Cc: Oleg Drokin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index b97809d..23b7cee 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c @@ -360,7 +360,6 @@ int do_statfs(char *root, long *bsize_out, long long *blocks_out, spare_out[2] = buf.f_spare[2]; spare_out[3] = buf.f_spare[3]; spare_out[4] = buf.f_spare[4]; - spare_out[5] = buf.f_spare[5]; return(0); } -- cgit v0.10.2 From 718c604a28e35848754a65b839e4877ec34b2fca Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:42 -0800 Subject: [PATCH] autofs4: lookup white space cleanup Whitespace and formating changes to lookup code. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 62d8d4a..2c676bd 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -296,20 +296,20 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f { struct super_block *sb = mnt->mnt_sb; struct autofs_sb_info *sbi = autofs4_sbi(sb); - struct autofs_info *de_info = autofs4_dentry_ino(dentry); + struct autofs_info *ino = autofs4_dentry_ino(dentry); int status = 0; /* Block on any pending expiry here; invalidate the dentry when expiration is done to trigger mount request with a new dentry */ - if (de_info && (de_info->flags & AUTOFS_INF_EXPIRING)) { + if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) { DPRINTK("waiting for expire %p name=%.*s", dentry, dentry->d_name.len, dentry->d_name.name); status = autofs4_wait(sbi, dentry, NFY_NONE); - + DPRINTK("expire done status=%d", status); - + /* * If the directory still exists the mount request must * continue otherwise it can't be followed at the right @@ -323,18 +323,21 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f DPRINTK("dentry=%p %.*s ino=%p", dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); - /* Wait for a pending mount, triggering one if there isn't one already */ + /* + * Wait for a pending mount, triggering one if there + * isn't one already + */ if (dentry->d_inode == NULL) { DPRINTK("waiting for mount name=%.*s", dentry->d_name.len, dentry->d_name.name); status = autofs4_wait(sbi, dentry, NFY_MOUNT); - + DPRINTK("mount done status=%d", status); if (status && dentry->d_inode) return 0; /* Try to get the kernel to invalidate this dentry */ - + /* Turn this into a real negative dentry? */ if (status == -ENOENT) { dentry->d_time = jiffies + AUTOFS_NEGATIVE_TIMEOUT; @@ -367,8 +370,10 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f } } - /* We don't update the usages for the autofs daemon itself, this - is necessary for recursive autofs mounts */ + /* + * We don't update the usages for the autofs daemon itself, this + * is necessary for recursive autofs mounts + */ if (!autofs4_oz_mode(sbi)) autofs4_update_usage(mnt, dentry); @@ -384,9 +389,9 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f * yet completely filled in, and revalidate has to delay such * lookups.. */ -static int autofs4_revalidate(struct dentry * dentry, struct nameidata *nd) +static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct inode * dir = dentry->d_parent->d_inode; + struct inode *dir = dentry->d_parent->d_inode; struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); int oz_mode = autofs4_oz_mode(sbi); int flags = nd ? nd->flags : 0; @@ -462,12 +467,13 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s DPRINTK("name = %.*s", dentry->d_name.len, dentry->d_name.name); + /* File name too long to exist */ if (dentry->d_name.len > NAME_MAX) - return ERR_PTR(-ENAMETOOLONG);/* File name too long to exist */ + return ERR_PTR(-ENAMETOOLONG); sbi = autofs4_sbi(dir->i_sb); - oz_mode = autofs4_oz_mode(sbi); + DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", current->pid, process_group(current), sbi->catatonic, oz_mode); @@ -519,7 +525,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s * doesn't do the right thing for all system calls, but it should * be OK for the operations we permit from an autofs. */ - if ( dentry->d_inode && d_unhashed(dentry) ) + if (dentry->d_inode && d_unhashed(dentry)) return ERR_PTR(-ENOENT); return NULL; -- cgit v0.10.2 From f360ce3be466d50de153b001b24561ca7593042b Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:43 -0800 Subject: [PATCH] autofs4: use libfs routines for readdir Change readdir routines to use the cursor based routines in libfs.c. This removes reliance on old readdir code from 2.4 and should improve efficiency of readdir in autofs4. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 2c676bd..af9a4c6 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -30,7 +30,6 @@ static int autofs4_dir_close(struct inode *inode, struct file *file); static int autofs4_dir_readdir(struct file * filp, void * dirent, filldir_t filldir); static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t filldir); static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); -static int autofs4_dcache_readdir(struct file *, void *, filldir_t); struct file_operations autofs4_root_operations = { .open = dcache_dir_open, @@ -82,7 +81,7 @@ static int autofs4_root_readdir(struct file *file, void *dirent, DPRINTK("needs_reghost = %d", sbi->needs_reghost); - return autofs4_dcache_readdir(file, dirent, filldir); + return dcache_readdir(file, dirent, filldir); } /* Update usage from here to top of tree, so that scan of @@ -103,75 +102,21 @@ static void autofs4_update_usage(struct vfsmount *mnt, struct dentry *dentry) spin_unlock(&dcache_lock); } -/* - * From 2.4 kernel readdir.c - */ -static int autofs4_dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) -{ - int i; - struct dentry *dentry = filp->f_dentry; - - i = filp->f_pos; - switch (i) { - case 0: - if (filldir(dirent, ".", 1, i, dentry->d_inode->i_ino, DT_DIR) < 0) - break; - i++; - filp->f_pos++; - /* fallthrough */ - case 1: - if (filldir(dirent, "..", 2, i, dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) - break; - i++; - filp->f_pos++; - /* fallthrough */ - default: { - struct list_head *list; - int j = i-2; - - spin_lock(&dcache_lock); - list = dentry->d_subdirs.next; - - for (;;) { - if (list == &dentry->d_subdirs) { - spin_unlock(&dcache_lock); - return 0; - } - if (!j) - break; - j--; - list = list->next; - } - - while(1) { - struct dentry *de = list_entry(list, - struct dentry, d_u.d_child); - - if (!d_unhashed(de) && de->d_inode) { - spin_unlock(&dcache_lock); - if (filldir(dirent, de->d_name.name, de->d_name.len, filp->f_pos, de->d_inode->i_ino, DT_UNKNOWN) < 0) - break; - spin_lock(&dcache_lock); - } - filp->f_pos++; - list = list->next; - if (list != &dentry->d_subdirs) - continue; - spin_unlock(&dcache_lock); - break; - } - } - } - return 0; -} - static int autofs4_dir_open(struct inode *inode, struct file *file) { struct dentry *dentry = file->f_dentry; struct vfsmount *mnt = file->f_vfsmnt; struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct dentry *cursor; int status; + status = dcache_dir_open(inode, file); + if (status) + goto out; + + cursor = file->private_data; + cursor->d_fsdata = NULL; + DPRINTK("file=%p dentry=%p %.*s", file, dentry, dentry->d_name.len, dentry->d_name.name); @@ -180,12 +125,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) if (autofs4_ispending(dentry)) { DPRINTK("dentry busy"); - return -EBUSY; + dcache_dir_close(inode, file); + status = -EBUSY; + goto out; } + status = -ENOENT; if (!d_mountpoint(dentry) && dentry->d_op && dentry->d_op->d_revalidate) { struct nameidata nd; - int empty; + int empty, ret; /* In case there are stale directory dentrys from a failed mount */ spin_lock(&dcache_lock); @@ -195,13 +143,13 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) if (!empty) d_invalidate(dentry); - nd.dentry = dentry; - nd.mnt = mnt; nd.flags = LOOKUP_DIRECTORY; - status = (dentry->d_op->d_revalidate)(dentry, &nd); + ret = (dentry->d_op->d_revalidate)(dentry, &nd); - if (!status) - return -ENOENT; + if (!ret) { + dcache_dir_close(inode, file); + goto out; + } } if (d_mountpoint(dentry)) { @@ -212,25 +160,29 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) if (!autofs4_follow_mount(&fp_mnt, &fp_dentry)) { dput(fp_dentry); mntput(fp_mnt); - return -ENOENT; + dcache_dir_close(inode, file); + goto out; } fp = dentry_open(fp_dentry, fp_mnt, file->f_flags); status = PTR_ERR(fp); if (IS_ERR(fp)) { - file->private_data = NULL; - return status; + dcache_dir_close(inode, file); + goto out; } - file->private_data = fp; + cursor->d_fsdata = fp; } -out: return 0; +out: + return status; } static int autofs4_dir_close(struct inode *inode, struct file *file) { struct dentry *dentry = file->f_dentry; struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct dentry *cursor = file->private_data; + int status = 0; DPRINTK("file=%p dentry=%p %.*s", file, dentry, dentry->d_name.len, dentry->d_name.name); @@ -240,26 +192,28 @@ static int autofs4_dir_close(struct inode *inode, struct file *file) if (autofs4_ispending(dentry)) { DPRINTK("dentry busy"); - return -EBUSY; + status = -EBUSY; + goto out; } if (d_mountpoint(dentry)) { - struct file *fp = file->private_data; - - if (!fp) - return -ENOENT; - + struct file *fp = cursor->d_fsdata; + if (!fp) { + status = -ENOENT; + goto out; + } filp_close(fp, current->files); - file->private_data = NULL; } out: - return 0; + dcache_dir_close(inode, file); + return status; } static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldir) { struct dentry *dentry = file->f_dentry; struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct dentry *cursor = file->private_data; int status; DPRINTK("file=%p dentry=%p %.*s", @@ -274,7 +228,7 @@ static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldi } if (d_mountpoint(dentry)) { - struct file *fp = file->private_data; + struct file *fp = cursor->d_fsdata; if (!fp) return -ENOENT; @@ -289,7 +243,7 @@ static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldi return status; } out: - return autofs4_dcache_readdir(file, dirent, filldir); + return dcache_readdir(file, dirent, filldir); } static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int flags) -- cgit v0.10.2 From 2d753e62b87ab2fc72bb4ff5153791d32ff9c08e Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:44 -0800 Subject: [PATCH] autofs4: can't mount due to mount point dir not empty Addresse a problem where stale dentrys stop mounts from happening. When a mount point directory is pre-created and a non-existent entry within it is requested a dentry ends up being created within the mount point directory which stops future mounts. The problem is solved by ignoring negative, unhashed dentrys in the mount point d_subdirs list. Additionally the apparent cacheing of -ENOENT returns from requests is removed. The test on d_time is a tautology and d_time is not initialised and has an unexpected value. In short it doesn't do what it's meant to. The cacheing of failed requests to the daemon is important and will be followed up later. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index f54c5b2..eea2593 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -41,14 +41,6 @@ #define AUTOFS_SUPER_MAGIC 0x0187 -/* - * If the daemon returns a negative response (AUTOFS_IOC_FAIL) then the - * kernel will keep the negative response cached for up to the time given - * here, although the time can be shorter if the kernel throws the dcache - * entry away. This probably should be settable from user space. - */ -#define AUTOFS_NEGATIVE_TIMEOUT (60*HZ) /* 1 minute */ - /* Unified info structure. This is pointed to by both the dentry and inode structures. Each file in the filesystem has an instance of this structure. It holds a reference to the dentry, so dentries are never diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index af9a4c6..c7ff357 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -294,14 +294,13 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f /* Turn this into a real negative dentry? */ if (status == -ENOENT) { - dentry->d_time = jiffies + AUTOFS_NEGATIVE_TIMEOUT; spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; spin_unlock(&dentry->d_lock); - return 1; + return 0; } else if (status) { /* Return a negative dentry, but leave it "pending" */ - return 1; + return 0; } /* Trigger mount for path component or follow link */ } else if (flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) || @@ -360,13 +359,13 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) /* Negative dentry.. invalidate if "old" */ if (dentry->d_inode == NULL) - return (dentry->d_time - jiffies <= AUTOFS_NEGATIVE_TIMEOUT); + return 0; /* Check for a non-mountpoint directory with no contents */ spin_lock(&dcache_lock); if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry) && - list_empty(&dentry->d_subdirs)) { + simple_empty_nolock(dentry)) { DPRINTK("dentry=%p %.*s, emptydir", dentry, dentry->d_name.len, dentry->d_name.name); spin_unlock(&dcache_lock); -- cgit v0.10.2 From 1f5f2c3059ae8cc23cb3303daf324b06ba79517a Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:44 -0800 Subject: [PATCH] autofs4: expire code readability cleanup Change the names of the boolean functions autofs4_check_mount and autofs4_check_tree to autofs4_mount_busy and autofs4_tree_busy respectively and alters their return codes to suit in order to aid code readabilty. A couple of white space cleanups are included as well. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index dc39589..bcc17f5 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -16,7 +16,7 @@ static unsigned long now; -/* Check if a dentry can be expired return 1 if it can else return 0 */ +/* Check if a dentry can be expired */ static inline int autofs4_can_expire(struct dentry *dentry, unsigned long timeout, int do_now) { @@ -41,14 +41,13 @@ static inline int autofs4_can_expire(struct dentry *dentry, attempts if expire fails the first time */ ino->last_used = now; } - return 1; } -/* Check a mount point for busyness return 1 if not busy, otherwise */ -static int autofs4_check_mount(struct vfsmount *mnt, struct dentry *dentry) +/* Check a mount point for busyness */ +static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) { - int status = 0; + int status = 1; DPRINTK("dentry %p %.*s", dentry, (int)dentry->d_name.len, dentry->d_name.name); @@ -65,7 +64,7 @@ static int autofs4_check_mount(struct vfsmount *mnt, struct dentry *dentry) /* The big question */ if (may_umount_tree(mnt) == 0) - status = 1; + status = 0; done: DPRINTK("returning = %d", status); mntput(mnt); @@ -75,30 +74,29 @@ done: /* Check a directory tree of mount points for busyness * The tree is not busy iff no mountpoints are busy - * Return 1 if the tree is busy or 0 otherwise */ -static int autofs4_check_tree(struct vfsmount *mnt, - struct dentry *top, - unsigned long timeout, - int do_now) +static int autofs4_tree_busy(struct vfsmount *mnt, + struct dentry *top, + unsigned long timeout, + int do_now) { struct dentry *this_parent = top; struct list_head *next; - DPRINTK("parent %p %.*s", + DPRINTK("top %p %.*s", top, (int)top->d_name.len, top->d_name.name); /* Negative dentry - give up */ if (!simple_positive(top)) - return 0; + return 1; /* Timeout of a tree mount is determined by its top dentry */ if (!autofs4_can_expire(top, timeout, do_now)) - return 0; + return 1; /* Is someone visiting anywhere in the tree ? */ if (may_umount_tree(mnt)) - return 0; + return 1; spin_lock(&dcache_lock); repeat: @@ -126,9 +124,9 @@ resume: if (d_mountpoint(dentry)) { /* First busy => tree busy */ - if (!autofs4_check_mount(mnt, dentry)) { + if (autofs4_mount_busy(mnt, dentry)) { dput(dentry); - return 0; + return 1; } } @@ -144,7 +142,7 @@ resume: } spin_unlock(&dcache_lock); - return 1; + return 0; } static struct dentry *autofs4_check_leaves(struct vfsmount *mnt, @@ -188,7 +186,7 @@ resume: goto cont; /* Can we umount this guy */ - if (autofs4_check_mount(mnt, dentry)) + if (!autofs4_mount_busy(mnt, dentry)) return dentry; } @@ -241,7 +239,7 @@ static struct dentry *autofs4_expire(struct super_block *sb, struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); /* Negative dentry - give up */ - if ( !simple_positive(dentry) ) { + if (!simple_positive(dentry)) { next = next->next; continue; } @@ -259,21 +257,21 @@ static struct dentry *autofs4_expire(struct super_block *sb, goto next; /* Can we umount this guy */ - if (autofs4_check_mount(mnt, dentry)) { + if (!autofs4_mount_busy(mnt, dentry)) { expired = dentry; break; } goto next; } - if ( simple_empty(dentry) ) + if (simple_empty(dentry)) goto next; /* Case 2: tree mount, expire iff entire tree is not busy */ if (!exp_leaves) { /* Lock the tree as we must expire as a whole */ spin_lock(&sbi->fs_lock); - if (autofs4_check_tree(mnt, dentry, timeout, do_now)) { + if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) { struct autofs_info *inf = autofs4_dentry_ino(dentry); /* Set this flag early to catch sys_chdir and the like */ @@ -297,7 +295,7 @@ next: next = next->next; } - if ( expired ) { + if (expired) { DPRINTK("returning %p %.*s", expired, (int)expired->d_name.len, expired->d_name.name); spin_lock(&dcache_lock); @@ -352,16 +350,16 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, return -EFAULT; if ((dentry = autofs4_expire(sb, mnt, sbi, do_now)) != NULL) { - struct autofs_info *de_info = autofs4_dentry_ino(dentry); + struct autofs_info *ino = autofs4_dentry_ino(dentry); /* This is synchronous because it makes the daemon a little easier */ - de_info->flags |= AUTOFS_INF_EXPIRING; + ino->flags |= AUTOFS_INF_EXPIRING; ret = autofs4_wait(sbi, dentry, NFY_EXPIRE); - de_info->flags &= ~AUTOFS_INF_EXPIRING; + ino->flags &= ~AUTOFS_INF_EXPIRING; dput(dentry); } - + return ret; } -- cgit v0.10.2 From 1ce12bad85863478619688c0c7363f93a9e5edb8 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:45 -0800 Subject: [PATCH] autofs4: simplify expire tree traversal Simplify the expire tree traversal code by using a function from namespace.c to calculate the next entry in the top down tree traversals carried out during the expire operation. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index bcc17f5..165fe9e 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -72,6 +72,27 @@ done: return status; } +/* + * Calculate next entry in top down tree traversal. + * From next_mnt in namespace.c - elegant. + */ +static struct dentry *next_dentry(struct dentry *p, struct dentry *root) +{ + struct list_head *next = p->d_subdirs.next; + + if (next == &p->d_subdirs) { + while (1) { + if (p == root) + return NULL; + next = p->d_u.d_child.next; + if (next != &p->d_parent->d_subdirs) + break; + p = p->d_parent; + } + } + return list_entry(next, struct dentry, d_u.d_child); +} + /* Check a directory tree of mount points for busyness * The tree is not busy iff no mountpoints are busy */ @@ -80,8 +101,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt, unsigned long timeout, int do_now) { - struct dentry *this_parent = top; - struct list_head *next; + struct dentry *p; DPRINTK("top %p %.*s", top, (int)top->d_name.len, top->d_name.name); @@ -99,49 +119,28 @@ static int autofs4_tree_busy(struct vfsmount *mnt, return 1; spin_lock(&dcache_lock); -repeat: - next = this_parent->d_subdirs.next; -resume: - while (next != &this_parent->d_subdirs) { - struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); - + for (p = top; p; p = next_dentry(p, top)) { /* Negative dentry - give up */ - if (!simple_positive(dentry)) { - next = next->next; + if (!simple_positive(p)) continue; - } DPRINTK("dentry %p %.*s", - dentry, (int)dentry->d_name.len, dentry->d_name.name); - - if (!simple_empty_nolock(dentry)) { - this_parent = dentry; - goto repeat; - } + p, (int) p->d_name.len, p->d_name.name); - dentry = dget(dentry); + p = dget(p); spin_unlock(&dcache_lock); - if (d_mountpoint(dentry)) { + if (d_mountpoint(p)) { /* First busy => tree busy */ - if (autofs4_mount_busy(mnt, dentry)) { - dput(dentry); + if (autofs4_mount_busy(mnt, p)) { + dput(p); return 1; } } - - dput(dentry); + dput(p); spin_lock(&dcache_lock); - next = next->next; - } - - if (this_parent != top) { - next = this_parent->d_u.d_child.next; - this_parent = this_parent->d_parent; - goto resume; } spin_unlock(&dcache_lock); - return 0; } @@ -150,59 +149,38 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt, unsigned long timeout, int do_now) { - struct dentry *this_parent = parent; - struct list_head *next; + struct dentry *p; DPRINTK("parent %p %.*s", parent, (int)parent->d_name.len, parent->d_name.name); spin_lock(&dcache_lock); -repeat: - next = this_parent->d_subdirs.next; -resume: - while (next != &this_parent->d_subdirs) { - struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); - + for (p = parent; p; p = next_dentry(p, parent)) { /* Negative dentry - give up */ - if (!simple_positive(dentry)) { - next = next->next; + if (!simple_positive(p)) continue; - } DPRINTK("dentry %p %.*s", - dentry, (int)dentry->d_name.len, dentry->d_name.name); + p, (int) p->d_name.len, p->d_name.name); - if (!list_empty(&dentry->d_subdirs)) { - this_parent = dentry; - goto repeat; - } - - dentry = dget(dentry); + p = dget(p); spin_unlock(&dcache_lock); - if (d_mountpoint(dentry)) { + if (d_mountpoint(p)) { /* Can we expire this guy */ - if (!autofs4_can_expire(dentry, timeout, do_now)) + if (!autofs4_can_expire(p, timeout, do_now)) goto cont; /* Can we umount this guy */ - if (!autofs4_mount_busy(mnt, dentry)) - return dentry; + if (!autofs4_mount_busy(mnt, p)) + return p; } cont: - dput(dentry); + dput(p); spin_lock(&dcache_lock); - next = next->next; - } - - if (this_parent != parent) { - next = this_parent->d_u.d_child.next; - this_parent = this_parent->d_parent; - goto resume; } spin_unlock(&dcache_lock); - return NULL; } -- cgit v0.10.2 From 1aff3c8b0511b5bb54acf7859e0c6ec9ae7287a9 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:46 -0800 Subject: [PATCH] autofs4: fix false negative return from expire Fix the case where an expire returns busy on a tree mount when it is in fact not busy. This case was overlooked when the patch to prevent the expiring away of "scaffolding" directories for tree mounts was applied. The problem arises when a tree of mounts is a member of a map with other keys. The current logic will not expire the tree if any other mount in the map is busy. The solution is to maintain a "minimum" use count for each autofs dentry and compare this to the actual dentry usage count during expire. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index eea2593..00da71d 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -55,6 +55,7 @@ struct autofs_info { struct autofs_sb_info *sbi; unsigned long last_used; + atomic_t count; mode_t mode; size_t size; diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 165fe9e..053d92a 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -101,6 +101,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt, unsigned long timeout, int do_now) { + struct autofs_info *ino; struct dentry *p; DPRINTK("top %p %.*s", @@ -110,14 +111,6 @@ static int autofs4_tree_busy(struct vfsmount *mnt, if (!simple_positive(top)) return 1; - /* Timeout of a tree mount is determined by its top dentry */ - if (!autofs4_can_expire(top, timeout, do_now)) - return 1; - - /* Is someone visiting anywhere in the tree ? */ - if (may_umount_tree(mnt)) - return 1; - spin_lock(&dcache_lock); for (p = top; p; p = next_dentry(p, top)) { /* Negative dentry - give up */ @@ -130,17 +123,40 @@ static int autofs4_tree_busy(struct vfsmount *mnt, p = dget(p); spin_unlock(&dcache_lock); + /* + * Is someone visiting anywhere in the subtree ? + * If there's no mount we need to check the usage + * count for the autofs dentry. + */ + ino = autofs4_dentry_ino(p); if (d_mountpoint(p)) { - /* First busy => tree busy */ if (autofs4_mount_busy(mnt, p)) { dput(p); return 1; } + } else { + unsigned int ino_count = atomic_read(&ino->count); + + /* allow for dget above and top is already dgot */ + if (p == top) + ino_count += 2; + else + ino_count++; + + if (atomic_read(&p->d_count) > ino_count) { + dput(p); + return 1; + } } dput(p); spin_lock(&dcache_lock); } spin_unlock(&dcache_lock); + + /* Timeout of a tree mount is ultimately determined by its top dentry */ + if (!autofs4_can_expire(top, timeout, do_now)) + return 1; + return 0; } diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 1ad98d4..2335b1d 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -46,6 +46,7 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino, ino->size = 0; ino->last_used = jiffies; + atomic_set(&ino->count, 0); ino->sbi = sbi; @@ -64,10 +65,19 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino, void autofs4_free_ino(struct autofs_info *ino) { + struct autofs_info *p_ino; + if (ino->dentry) { ino->dentry->d_fsdata = NULL; - if (ino->dentry->d_inode) + if (ino->dentry->d_inode) { + struct dentry *parent = ino->dentry->d_parent; + if (atomic_dec_and_test(&ino->count)) { + p_ino = autofs4_dentry_ino(parent); + if (p_ino && parent != ino->dentry) + atomic_dec(&p_ino->count); + } dput(ino->dentry); + } ino->dentry = NULL; } if (ino->free) diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index c7ff357..d196712 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -490,6 +490,7 @@ static int autofs4_dir_symlink(struct inode *dir, { struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); struct autofs_info *ino = autofs4_dentry_ino(dentry); + struct autofs_info *p_ino; struct inode *inode; char *cp; @@ -523,6 +524,10 @@ static int autofs4_dir_symlink(struct inode *dir, dentry->d_fsdata = ino; ino->dentry = dget(dentry); + atomic_inc(&ino->count); + p_ino = autofs4_dentry_ino(dentry->d_parent); + if (p_ino && dentry->d_parent != dentry) + atomic_inc(&p_ino->count); ino->inode = inode; dir->i_mtime = CURRENT_TIME; @@ -549,11 +554,17 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) { struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); struct autofs_info *ino = autofs4_dentry_ino(dentry); + struct autofs_info *p_ino; /* This allows root to remove symlinks */ if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) ) return -EACCES; + if (atomic_dec_and_test(&ino->count)) { + p_ino = autofs4_dentry_ino(dentry->d_parent); + if (p_ino && dentry->d_parent != dentry) + atomic_dec(&p_ino->count); + } dput(ino->dentry); dentry->d_inode->i_size = 0; @@ -570,6 +581,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) { struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); struct autofs_info *ino = autofs4_dentry_ino(dentry); + struct autofs_info *p_ino; if (!autofs4_oz_mode(sbi)) return -EACCES; @@ -584,8 +596,12 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); + if (atomic_dec_and_test(&ino->count)) { + p_ino = autofs4_dentry_ino(dentry->d_parent); + if (p_ino && dentry->d_parent != dentry) + atomic_dec(&p_ino->count); + } dput(ino->dentry); - dentry->d_inode->i_size = 0; dentry->d_inode->i_nlink = 0; @@ -599,6 +615,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) { struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); struct autofs_info *ino = autofs4_dentry_ino(dentry); + struct autofs_info *p_ino; struct inode *inode; if ( !autofs4_oz_mode(sbi) ) @@ -621,6 +638,10 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) dentry->d_fsdata = ino; ino->dentry = dget(dentry); + atomic_inc(&ino->count); + p_ino = autofs4_dentry_ino(dentry->d_parent); + if (p_ino && dentry->d_parent != dentry) + atomic_inc(&p_ino->count); ino->inode = inode; dir->i_nlink++; dir->i_mtime = CURRENT_TIME; -- cgit v0.10.2 From e0a7aae94030b878601eb67686b696de4a3764f0 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:47 -0800 Subject: [PATCH] autofs4: expire mounts that hold no (extra) references only Alter the expire semantics that define how "busyness" is determined. Currently a last_used counter is updated on every revalidate from processes other than the mount owner process group. This patch changes that so that an expire candidate is busy only if it has a reference count greater than the expected minimum, such as when there is an open file or working directory in use. This method is the only way that busyness can be established for direct mounts within the new implementation. For consistency the expire semantic is made the same for all mounts. A side effect of the patch is that mounts which remain mounted unessessarily in the presence of some GUI programs that scan the filesystem should now expire. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 053d92a..6ae2fc8 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -47,6 +47,7 @@ static inline int autofs4_can_expire(struct dentry *dentry, /* Check a mount point for busyness */ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) { + struct dentry *top = dentry; int status = 1; DPRINTK("dentry %p %.*s", @@ -62,9 +63,14 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) if (is_autofs4_dentry(dentry)) goto done; - /* The big question */ - if (may_umount_tree(mnt) == 0) - status = 0; + /* Update the expiry counter if fs is busy */ + if (may_umount_tree(mnt)) { + struct autofs_info *ino = autofs4_dentry_ino(top); + ino->last_used = jiffies; + goto done; + } + + status = 0; done: DPRINTK("returning = %d", status); mntput(mnt); @@ -101,7 +107,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt, unsigned long timeout, int do_now) { - struct autofs_info *ino; + struct autofs_info *top_ino = autofs4_dentry_ino(top); struct dentry *p; DPRINTK("top %p %.*s", @@ -127,14 +133,16 @@ static int autofs4_tree_busy(struct vfsmount *mnt, * Is someone visiting anywhere in the subtree ? * If there's no mount we need to check the usage * count for the autofs dentry. + * If the fs is busy update the expiry counter. */ - ino = autofs4_dentry_ino(p); if (d_mountpoint(p)) { if (autofs4_mount_busy(mnt, p)) { + top_ino->last_used = jiffies; dput(p); return 1; } } else { + struct autofs_info *ino = autofs4_dentry_ino(p); unsigned int ino_count = atomic_read(&ino->count); /* allow for dget above and top is already dgot */ @@ -144,6 +152,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt, ino_count++; if (atomic_read(&p->d_count) > ino_count) { + top_ino->last_used = jiffies; dput(p); return 1; } @@ -183,14 +192,13 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt, spin_unlock(&dcache_lock); if (d_mountpoint(p)) { - /* Can we expire this guy */ - if (!autofs4_can_expire(p, timeout, do_now)) + /* Can we umount this guy */ + if (autofs4_mount_busy(mnt, p)) goto cont; - /* Can we umount this guy */ - if (!autofs4_mount_busy(mnt, p)) + /* Can we expire this guy */ + if (autofs4_can_expire(p, timeout, do_now)) return p; - } cont: dput(p); @@ -246,12 +254,12 @@ static struct dentry *autofs4_expire(struct super_block *sb, DPRINTK("checking mountpoint %p %.*s", dentry, (int)dentry->d_name.len, dentry->d_name.name); - /* Can we expire this guy */ - if (!autofs4_can_expire(dentry, timeout, do_now)) + /* Can we umount this guy */ + if (autofs4_mount_busy(mnt, dentry)) goto next; - /* Can we umount this guy */ - if (!autofs4_mount_busy(mnt, dentry)) { + /* Can we expire this guy */ + if (autofs4_can_expire(dentry, timeout, do_now)) { expired = dentry; break; } diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index d196712..3a4a5b4 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -330,6 +330,10 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f if (!autofs4_oz_mode(sbi)) autofs4_update_usage(mnt, dentry); + /* Initialize expiry counter after successful mount */ + if (ino) + ino->last_used = jiffies; + spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; spin_unlock(&dentry->d_lock); -- cgit v0.10.2 From 862b110f0132401e422783bf68521ade3dc67578 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:48 -0800 Subject: [PATCH] autofs4: remove update_atime unused function Remove the update of i_atime from autofs4 in favour of having VFS update it. i_atime is never used for expire in autofs4. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 3a4a5b4..72dca33 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -84,24 +84,6 @@ static int autofs4_root_readdir(struct file *file, void *dirent, return dcache_readdir(file, dirent, filldir); } -/* Update usage from here to top of tree, so that scan of - top-level directories will give a useful result */ -static void autofs4_update_usage(struct vfsmount *mnt, struct dentry *dentry) -{ - struct dentry *top = dentry->d_sb->s_root; - - spin_lock(&dcache_lock); - for(; dentry != top; dentry = dentry->d_parent) { - struct autofs_info *ino = autofs4_dentry_ino(dentry); - - if (ino) { - touch_atime(mnt, dentry); - ino->last_used = jiffies; - } - } - spin_unlock(&dcache_lock); -} - static int autofs4_dir_open(struct inode *inode, struct file *file) { struct dentry *dentry = file->f_dentry; @@ -246,10 +228,9 @@ out: return dcache_readdir(file, dirent, filldir); } -static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int flags) +static int try_to_fill_dentry(struct dentry *dentry, int flags) { - struct super_block *sb = mnt->mnt_sb; - struct autofs_sb_info *sbi = autofs4_sbi(sb); + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); struct autofs_info *ino = autofs4_dentry_ino(dentry); int status = 0; @@ -323,13 +304,6 @@ static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int f } } - /* - * We don't update the usages for the autofs daemon itself, this - * is necessary for recursive autofs mounts - */ - if (!autofs4_oz_mode(sbi)) - autofs4_update_usage(mnt, dentry); - /* Initialize expiry counter after successful mount */ if (ino) ino->last_used = jiffies; @@ -357,7 +331,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) /* Pending dentry */ if (autofs4_ispending(dentry)) { if (!oz_mode) - status = try_to_fill_dentry(nd->mnt, dentry, flags); + status = try_to_fill_dentry(dentry, flags); return status; } @@ -374,15 +348,11 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) dentry, dentry->d_name.len, dentry->d_name.name); spin_unlock(&dcache_lock); if (!oz_mode) - status = try_to_fill_dentry(nd->mnt, dentry, flags); + status = try_to_fill_dentry(dentry, flags); return status; } spin_unlock(&dcache_lock); - /* Update the usage list */ - if (!oz_mode) - autofs4_update_usage(nd->mnt, dentry); - return 1; } -- cgit v0.10.2 From d7c4a5f1080a61fd4a3a00ba5f741986f8fb71f0 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:49 -0800 Subject: [PATCH] autofs4: add a show mount options for proc filesystem Add show_options method to display autofs4 mount options in the proc filesystem. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 00da71d..d82a019 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -87,11 +87,14 @@ struct autofs_wait_queue { struct autofs_sb_info { u32 magic; struct dentry *root; + int pipefd; struct file *pipe; pid_t oz_pgrp; int catatonic; int version; int sub_version; + int min_proto; + int max_proto; unsigned long exp_timeout; int reghost_enabled; int needs_reghost; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 2335b1d..d9a71da 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -163,9 +164,26 @@ static void autofs4_put_super(struct super_block *sb) DPRINTK("shutting down"); } +static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + struct autofs_sb_info *sbi = autofs4_sbi(mnt->mnt_sb); + + if (!sbi) + return 0; + + seq_printf(m, ",fd=%d", sbi->pipefd); + seq_printf(m, ",pgrp=%d", sbi->oz_pgrp); + seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ); + seq_printf(m, ",minproto=%d", sbi->min_proto); + seq_printf(m, ",maxproto=%d", sbi->max_proto); + + return 0; +} + static struct super_operations autofs4_sops = { .put_super = autofs4_put_super, .statfs = simple_statfs, + .show_options = autofs4_show_options, }; enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto}; @@ -261,7 +279,6 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) int pipefd; struct autofs_sb_info *sbi; struct autofs_info *ino; - int minproto, maxproto; sbi = (struct autofs_sb_info *) kmalloc(sizeof(*sbi), GFP_KERNEL); if ( !sbi ) @@ -273,12 +290,15 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) s->s_fs_info = sbi; sbi->magic = AUTOFS_SBI_MAGIC; sbi->root = NULL; + sbi->pipefd = -1; sbi->catatonic = 0; sbi->exp_timeout = 0; sbi->oz_pgrp = process_group(current); sbi->sb = s; sbi->version = 0; sbi->sub_version = 0; + sbi->min_proto = 0; + sbi->max_proto = 0; mutex_init(&sbi->wq_mutex); spin_lock_init(&sbi->fs_lock); sbi->queues = NULL; @@ -311,22 +331,26 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid, &sbi->oz_pgrp, - &minproto, &maxproto)) { + &sbi->min_proto, &sbi->max_proto)) { printk("autofs: called with bogus options\n"); goto fail_dput; } /* Couldn't this be tested earlier? */ - if (maxproto < AUTOFS_MIN_PROTO_VERSION || - minproto > AUTOFS_MAX_PROTO_VERSION) { + if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION || + sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) { printk("autofs: kernel does not match daemon version " "daemon (%d, %d) kernel (%d, %d)\n", - minproto, maxproto, + sbi->min_proto, sbi->max_proto, AUTOFS_MIN_PROTO_VERSION, AUTOFS_MAX_PROTO_VERSION); goto fail_dput; } - sbi->version = maxproto > AUTOFS_MAX_PROTO_VERSION ? AUTOFS_MAX_PROTO_VERSION : maxproto; + /* Establish highest kernel protocol version */ + if (sbi->max_proto > AUTOFS_MAX_PROTO_VERSION) + sbi->version = AUTOFS_MAX_PROTO_VERSION; + else + sbi->version = sbi->max_proto; sbi->sub_version = AUTOFS_PROTO_SUBVERSION; DPRINTK("pipe fd = %d, pgrp = %u", pipefd, sbi->oz_pgrp); @@ -339,6 +363,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) if ( !pipe->f_op || !pipe->f_op->write ) goto fail_fput; sbi->pipe = pipe; + sbi->pipefd = pipefd; /* * Take a reference to the root dentry so we get a chance to -- cgit v0.10.2 From e77fbddf77c071a5735a4bc63a30649bd64baf14 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:49 -0800 Subject: [PATCH] autofs4: white space cleanup for waitq.c Whitespace and formating changes to waitq code. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index be78e93..b0bb9d4 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -33,7 +33,7 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi) sbi->catatonic = 1; wq = sbi->queues; sbi->queues = NULL; /* Erase all wait queues */ - while ( wq ) { + while (wq) { nwq = wq->next; wq->status = -ENOENT; /* Magic is gone - report failure */ kfree(wq->name); @@ -45,7 +45,6 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi) fput(sbi->pipe); /* Close the pipe */ sbi->pipe = NULL; } - shrink_dcache_sb(sbi->sb); } @@ -165,7 +164,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, int len, status; /* In catatonic mode, we don't wait for nobody */ - if ( sbi->catatonic ) + if (sbi->catatonic) return -ENOENT; name = kmalloc(NAME_MAX + 1, GFP_KERNEL); @@ -190,7 +189,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, break; } - if ( !wq ) { + if (!wq) { /* Can't wait for an expire if there's no mount */ if (notify == NFY_NONE && !d_mountpoint(dentry)) { kfree(name); @@ -200,7 +199,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, /* Create a new wait queue */ wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL); - if ( !wq ) { + if (!wq) { kfree(name); mutex_unlock(&sbi->wq_mutex); return -ENOMEM; @@ -240,14 +239,14 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, /* wq->name is NULL if and only if the lock is already released */ - if ( sbi->catatonic ) { + if (sbi->catatonic) { /* We might have slept, so check again for catatonic mode */ wq->status = -ENOENT; kfree(wq->name); wq->name = NULL; } - if ( wq->name ) { + if (wq->name) { /* Block all but "shutdown" signals while waiting */ sigset_t oldset; unsigned long irqflags; @@ -283,12 +282,12 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok struct autofs_wait_queue *wq, **wql; mutex_lock(&sbi->wq_mutex); - for ( wql = &sbi->queues ; (wq = *wql) != 0 ; wql = &wq->next ) { - if ( wq->wait_queue_token == wait_queue_token ) + for (wql = &sbi->queues ; (wq = *wql) != 0 ; wql = &wq->next) { + if (wq->wait_queue_token == wait_queue_token) break; } - if ( !wq ) { + if (!wq) { mutex_unlock(&sbi->wq_mutex); return -EINVAL; } -- cgit v0.10.2 From 90a59c7cf5dd68b41ffab61dd30eba1ef5746e66 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:50 -0800 Subject: [PATCH] autofs4: rename simple_empty_nolock function Rename the function simple_empty_nolock to __simple_empty in line with kernel naming conventions. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index d82a019..bc1b054 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -201,7 +201,7 @@ static inline int simple_positive(struct dentry *dentry) return dentry->d_inode && !d_unhashed(dentry); } -static inline int simple_empty_nolock(struct dentry *dentry) +static inline int __simple_empty(struct dentry *dentry) { struct dentry *child; int ret = 0; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 72dca33..dcd4802 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -343,7 +343,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) spin_lock(&dcache_lock); if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry) && - simple_empty_nolock(dentry)) { + __simple_empty(dentry)) { DPRINTK("dentry=%p %.*s, emptydir", dentry, dentry->d_name.len, dentry->d_name.name); spin_unlock(&dcache_lock); -- cgit v0.10.2 From e3474a8eb38e48dea6690d1fabd75f3c7fd2f93f Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:51 -0800 Subject: [PATCH] autofs4: change may_umount* functions to boolean Change the functions may_umount and may_umount_tree to boolean functions to aid code readability. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c index 5ccfcf2..3fded38 100644 --- a/fs/autofs/dirhash.c +++ b/fs/autofs/dirhash.c @@ -92,7 +92,7 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, ; dput(dentry); - if ( may_umount(mnt) == 0 ) { + if ( may_umount(mnt) ) { mntput(mnt); DPRINTK(("autofs: signaling expire on %s\n", ent->name)); return ent; /* Expirable! */ diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 6ae2fc8..02a218f 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -64,7 +64,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) goto done; /* Update the expiry counter if fs is busy */ - if (may_umount_tree(mnt)) { + if (!may_umount_tree(mnt)) { struct autofs_info *ino = autofs4_dentry_ino(top); ino->last_used = jiffies; goto done; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index dcd4802..26eb1f0 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -699,7 +699,7 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p) { int status = 0; - if (may_umount(mnt) == 0) + if (may_umount(mnt)) status = 1; DPRINTK("returning %d", status); diff --git a/fs/namespace.c b/fs/namespace.c index e069a4c..bf478ad 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -459,9 +459,9 @@ int may_umount_tree(struct vfsmount *mnt) spin_unlock(&vfsmount_lock); if (actual_refs > minimum_refs) - return -EBUSY; + return 0; - return 0; + return 1; } EXPORT_SYMBOL(may_umount_tree); @@ -481,10 +481,10 @@ EXPORT_SYMBOL(may_umount_tree); */ int may_umount(struct vfsmount *mnt) { - int ret = 0; + int ret = 1; spin_lock(&vfsmount_lock); if (propagate_mount_busy(mnt, 2)) - ret = -EBUSY; + ret = 0; spin_unlock(&vfsmount_lock); return ret; } -- cgit v0.10.2 From f75ba3ade8a4599d67040a9493d75a864e7b329c Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:52 -0800 Subject: [PATCH] autofs4: increase module version Update autofs4 version. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h index 9343c89..d998ddc 100644 --- a/include/linux/auto_fs4.h +++ b/include/linux/auto_fs4.h @@ -23,7 +23,7 @@ #define AUTOFS_MIN_PROTO_VERSION 3 #define AUTOFS_MAX_PROTO_VERSION 4 -#define AUTOFS_PROTO_SUBVERSION 7 +#define AUTOFS_PROTO_SUBVERSION 10 /* Mask for expire behaviour */ #define AUTOFS_EXP_IMMEDIATE 1 -- cgit v0.10.2 From 051d381259eb57d6074d02a6ba6e90e744f1a29f Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:53 -0800 Subject: [PATCH] autofs4: nameidata needs to be up to date for follow_link In order to be able to trigger a mount using the follow_link inode method the nameidata struct that is passed in needs to have the vfsmount of the autofs trigger not its parent. During a path walk if an autofs trigger is mounted on a dentry, when the follow_link method is called, the nameidata struct contains the vfsmount and mountpoint dentry of the parent mount while the dentry that is passed in is the root of the autofs trigger mount. I believe it is impossible to get the vfsmount of the trigger mount, within the follow_link method, when only the parent vfsmount and the root dentry of the trigger mount are known. This patch updates the nameidata struct on entry to __do_follow_link if it detects that it is out of date. It moves the path_to_nameidata to above __do_follow_link to facilitate calling it from there. The dput_path is moved as well as that seemed sensible. No changes are made to these two functions. Signed-off-by: Ian Kent Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/namei.c b/fs/namei.c index 98dc2e1..22f6e8d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -546,6 +546,22 @@ struct path { struct dentry *dentry; }; +static inline void dput_path(struct path *path, struct nameidata *nd) +{ + dput(path->dentry); + if (path->mnt != nd->mnt) + mntput(path->mnt); +} + +static inline void path_to_nameidata(struct path *path, struct nameidata *nd) +{ + dput(nd->dentry); + if (nd->mnt != path->mnt) + mntput(nd->mnt); + nd->mnt = path->mnt; + nd->dentry = path->dentry; +} + static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd) { int error; @@ -555,8 +571,11 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata touch_atime(path->mnt, dentry); nd_set_link(nd, NULL); - if (path->mnt == nd->mnt) - mntget(path->mnt); + if (path->mnt != nd->mnt) { + path_to_nameidata(path, nd); + dget(dentry); + } + mntget(path->mnt); cookie = dentry->d_inode->i_op->follow_link(dentry, nd); error = PTR_ERR(cookie); if (!IS_ERR(cookie)) { @@ -573,22 +592,6 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata return error; } -static inline void dput_path(struct path *path, struct nameidata *nd) -{ - dput(path->dentry); - if (path->mnt != nd->mnt) - mntput(path->mnt); -} - -static inline void path_to_nameidata(struct path *path, struct nameidata *nd) -{ - dput(nd->dentry); - if (nd->mnt != path->mnt) - mntput(nd->mnt); - nd->mnt = path->mnt; - nd->dentry = path->dentry; -} - /* * This limits recursive symlink follows to 8, while * limiting consecutive symlinks to 40. -- cgit v0.10.2 From 34ca959cfc15cf09ad4da4f31ab034691e51af78 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:54 -0800 Subject: [PATCH] autofs4: add v5 follow_link mount trigger method This patch adds a follow_link inode method for the root of an autofs direct mount trigger. It also adds the corresponding mount options and updates the show_mount method. Signed-off-by: Ian Kent Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index bc1b054..ed388a1 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -3,6 +3,7 @@ * linux/fs/autofs/autofs_i.h * * Copyright 1997-1998 Transmeta Corporation - All Rights Reserved + * Copyright 2005-2006 Ian Kent * * This file is part of the Linux kernel and is made available under * the terms of the GNU General Public License, version 2, or at your @@ -84,6 +85,10 @@ struct autofs_wait_queue { #define AUTOFS_SBI_MAGIC 0x6d4a556d +#define AUTOFS_TYP_INDIRECT 0x0001 +#define AUTOFS_TYP_DIRECT 0x0002 +#define AUTOFS_TYP_OFFSET 0x0004 + struct autofs_sb_info { u32 magic; struct dentry *root; @@ -96,6 +101,7 @@ struct autofs_sb_info { int min_proto; int max_proto; unsigned long exp_timeout; + unsigned int type; int reghost_enabled; int needs_reghost; struct super_block *sb; @@ -162,6 +168,8 @@ int autofs4_expire_multi(struct super_block *, struct vfsmount *, extern struct inode_operations autofs4_symlink_inode_operations; extern struct inode_operations autofs4_dir_inode_operations; extern struct inode_operations autofs4_root_inode_operations; +extern struct inode_operations autofs4_indirect_root_inode_operations; +extern struct inode_operations autofs4_direct_root_inode_operations; extern struct file_operations autofs4_dir_operations; extern struct file_operations autofs4_root_operations; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index d9a71da..3801bed 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -3,6 +3,7 @@ * linux/fs/autofs/inode.c * * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved + * Copyright 2005-2006 Ian Kent * * This file is part of the Linux kernel and is made available under * the terms of the GNU General Public License, version 2, or at your @@ -177,6 +178,13 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt) seq_printf(m, ",minproto=%d", sbi->min_proto); seq_printf(m, ",maxproto=%d", sbi->max_proto); + if (sbi->type & AUTOFS_TYP_OFFSET) + seq_printf(m, ",offset"); + else if (sbi->type & AUTOFS_TYP_DIRECT) + seq_printf(m, ",direct"); + else + seq_printf(m, ",indirect"); + return 0; } @@ -186,7 +194,8 @@ static struct super_operations autofs4_sops = { .show_options = autofs4_show_options, }; -enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto}; +enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto, + Opt_indirect, Opt_direct, Opt_offset}; static match_table_t tokens = { {Opt_fd, "fd=%u"}, @@ -195,11 +204,15 @@ static match_table_t tokens = { {Opt_pgrp, "pgrp=%u"}, {Opt_minproto, "minproto=%u"}, {Opt_maxproto, "maxproto=%u"}, + {Opt_indirect, "indirect"}, + {Opt_direct, "direct"}, + {Opt_offset, "offset"}, {Opt_err, NULL} }; static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, - pid_t *pgrp, int *minproto, int *maxproto) + pid_t *pgrp, unsigned int *type, + int *minproto, int *maxproto) { char *p; substring_t args[MAX_OPT_ARGS]; @@ -253,6 +266,15 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, return 1; *maxproto = option; break; + case Opt_indirect: + *type = AUTOFS_TYP_INDIRECT; + break; + case Opt_direct: + *type = AUTOFS_TYP_DIRECT; + break; + case Opt_offset: + *type = AUTOFS_TYP_DIRECT | AUTOFS_TYP_OFFSET; + break; default: return 1; } @@ -271,6 +293,11 @@ static struct autofs_info *autofs4_mkroot(struct autofs_sb_info *sbi) return ino; } +void autofs4_dentry_release(struct dentry *); +static struct dentry_operations autofs4_sb_dentry_operations = { + .d_release = autofs4_dentry_release, +}; + int autofs4_fill_super(struct super_block *s, void *data, int silent) { struct inode * root_inode; @@ -297,6 +324,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) sbi->sb = s; sbi->version = 0; sbi->sub_version = 0; + sbi->type = 0; sbi->min_proto = 0; sbi->max_proto = 0; mutex_init(&sbi->wq_mutex); @@ -315,27 +343,31 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) if (!ino) goto fail_free; root_inode = autofs4_get_inode(s, ino); - kfree(ino); if (!root_inode) - goto fail_free; + goto fail_ino; - root_inode->i_op = &autofs4_root_inode_operations; - root_inode->i_fop = &autofs4_root_operations; root = d_alloc_root(root_inode); - pipe = NULL; - if (!root) goto fail_iput; + pipe = NULL; + + root->d_op = &autofs4_sb_dentry_operations; + root->d_fsdata = ino; /* Can this call block? */ if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid, - &sbi->oz_pgrp, + &sbi->oz_pgrp, &sbi->type, &sbi->min_proto, &sbi->max_proto)) { printk("autofs: called with bogus options\n"); goto fail_dput; } + root_inode->i_fop = &autofs4_root_operations; + root_inode->i_op = sbi->type & AUTOFS_TYP_DIRECT ? + &autofs4_direct_root_inode_operations : + &autofs4_indirect_root_inode_operations; + /* Couldn't this be tested earlier? */ if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION || sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) { @@ -391,6 +423,8 @@ fail_dput: fail_iput: printk("autofs: get root dentry failed\n"); iput(root_inode); +fail_ino: + kfree(ino); fail_free: kfree(sbi); fail_unlock: diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 26eb1f0..3f00485 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -4,7 +4,7 @@ * * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved * Copyright 1999-2000 Jeremy Fitzhardinge - * Copyright 2001-2003 Ian Kent + * Copyright 2001-2006 Ian Kent * * This file is part of the Linux kernel and is made available under * the terms of the GNU General Public License, version 2, or at your @@ -30,6 +30,7 @@ static int autofs4_dir_close(struct inode *inode, struct file *file); static int autofs4_dir_readdir(struct file * filp, void * dirent, filldir_t filldir); static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t filldir); static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); +static void *autofs4_follow_link(struct dentry *, struct nameidata *); struct file_operations autofs4_root_operations = { .open = dcache_dir_open, @@ -46,7 +47,7 @@ struct file_operations autofs4_dir_operations = { .readdir = autofs4_dir_readdir, }; -struct inode_operations autofs4_root_inode_operations = { +struct inode_operations autofs4_indirect_root_inode_operations = { .lookup = autofs4_lookup, .unlink = autofs4_dir_unlink, .symlink = autofs4_dir_symlink, @@ -54,6 +55,11 @@ struct inode_operations autofs4_root_inode_operations = { .rmdir = autofs4_dir_rmdir, }; +struct inode_operations autofs4_direct_root_inode_operations = { + .lookup = autofs4_lookup, + .follow_link = autofs4_follow_link, +}; + struct inode_operations autofs4_dir_inode_operations = { .lookup = autofs4_lookup, .unlink = autofs4_dir_unlink, @@ -252,7 +258,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) */ status = d_invalidate(dentry); if (status != -EBUSY) - return 0; + return -ENOENT; } DPRINTK("dentry=%p %.*s ino=%p", @@ -271,17 +277,17 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) DPRINTK("mount done status=%d", status); if (status && dentry->d_inode) - return 0; /* Try to get the kernel to invalidate this dentry */ + return status; /* Try to get the kernel to invalidate this dentry */ /* Turn this into a real negative dentry? */ if (status == -ENOENT) { spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; spin_unlock(&dentry->d_lock); - return 0; + return status; } else if (status) { /* Return a negative dentry, but leave it "pending" */ - return 0; + return status; } /* Trigger mount for path component or follow link */ } else if (flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) || @@ -300,7 +306,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; spin_unlock(&dentry->d_lock); - return 0; + return status; } } @@ -311,7 +317,41 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; spin_unlock(&dentry->d_lock); - return 1; + return status; +} + +/* For autofs direct mounts the follow link triggers the mount */ +static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + int oz_mode = autofs4_oz_mode(sbi); + unsigned int lookup_type; + int status; + + DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d", + dentry, dentry->d_name.len, dentry->d_name.name, oz_mode, + nd->flags); + + /* If it's our master or we shouldn't trigger a mount we're done */ + lookup_type = nd->flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY); + if (oz_mode || !lookup_type) + goto done; + + status = try_to_fill_dentry(dentry, 0); + if (status) + goto out_error; + + if (!autofs4_follow_mount(&nd->mnt, &nd->dentry)) { + status = -ENOENT; + goto out_error; + } + +done: + return NULL; + +out_error: + path_release(nd); + return ERR_PTR(status); } /* @@ -326,13 +366,13 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); int oz_mode = autofs4_oz_mode(sbi); int flags = nd ? nd->flags : 0; - int status = 1; + int status = 0; /* Pending dentry */ if (autofs4_ispending(dentry)) { if (!oz_mode) status = try_to_fill_dentry(dentry, flags); - return status; + return !status; } /* Negative dentry.. invalidate if "old" */ @@ -349,14 +389,14 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) spin_unlock(&dcache_lock); if (!oz_mode) status = try_to_fill_dentry(dentry, flags); - return status; + return !status; } spin_unlock(&dcache_lock); return 1; } -static void autofs4_dentry_release(struct dentry *de) +void autofs4_dentry_release(struct dentry *de) { struct autofs_info *inf; -- cgit v0.10.2 From 3a15e2ab5d6e79a79291734ac24f33d51c0ae389 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:55 -0800 Subject: [PATCH] autofs4: add v5 expire logic This patch adds expire logic for autofs direct mounts. Signed-off-by: Ian Kent Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 02a218f..8fd92ea 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -4,7 +4,7 @@ * * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved * Copyright 1999-2000 Jeremy Fitzhardinge - * Copyright 2001-2003 Ian Kent + * Copyright 2001-2006 Ian Kent * * This file is part of the Linux kernel and is made available under * the terms of the GNU General Public License, version 2, or at your @@ -99,6 +99,39 @@ static struct dentry *next_dentry(struct dentry *p, struct dentry *root) return list_entry(next, struct dentry, d_u.d_child); } +/* + * Check a direct mount point for busyness. + * Direct mounts have similar expiry semantics to tree mounts. + * The tree is not busy iff no mountpoints are busy and there are no + * autofs submounts. + */ +static int autofs4_direct_busy(struct vfsmount *mnt, + struct dentry *top, + unsigned long timeout, + int do_now) +{ + DPRINTK("top %p %.*s", + top, (int) top->d_name.len, top->d_name.name); + + /* Not a mountpoint - give up */ + if (!d_mountpoint(top)) + return 1; + + /* If it's busy update the expiry counters */ + if (!may_umount_tree(mnt)) { + struct autofs_info *ino = autofs4_dentry_ino(top); + if (ino) + ino->last_used = jiffies; + return 1; + } + + /* Timeout of a direct mount is determined by its top dentry */ + if (!autofs4_can_expire(top, timeout, do_now)) + return 1; + + return 0; +} + /* Check a directory tree of mount points for busyness * The tree is not busy iff no mountpoints are busy */ @@ -208,16 +241,48 @@ cont: return NULL; } +/* Check if we can expire a direct mount (possibly a tree) */ +static struct dentry *autofs4_expire_direct(struct super_block *sb, + struct vfsmount *mnt, + struct autofs_sb_info *sbi, + int how) +{ + unsigned long timeout; + struct dentry *root = dget(sb->s_root); + int do_now = how & AUTOFS_EXP_IMMEDIATE; + + if (!sbi->exp_timeout || !root) + return NULL; + + now = jiffies; + timeout = sbi->exp_timeout; + + /* Lock the tree as we must expire as a whole */ + spin_lock(&sbi->fs_lock); + if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { + struct autofs_info *ino = autofs4_dentry_ino(root); + + /* Set this flag early to catch sys_chdir and the like */ + ino->flags |= AUTOFS_INF_EXPIRING; + spin_unlock(&sbi->fs_lock); + return root; + } + spin_unlock(&sbi->fs_lock); + dput(root); + + return NULL; +} + /* * Find an eligible tree to time-out * A tree is eligible if :- * - it is unused by any user process * - it has been unused for exp_timeout time */ -static struct dentry *autofs4_expire(struct super_block *sb, - struct vfsmount *mnt, - struct autofs_sb_info *sbi, - int how) +static struct dentry *autofs4_expire_indirect(struct super_block *sb, + struct vfsmount *mnt, + struct autofs_sb_info *sbi, + int how) { unsigned long timeout; struct dentry *root = sb->s_root; @@ -249,7 +314,12 @@ static struct dentry *autofs4_expire(struct super_block *sb, dentry = dget(dentry); spin_unlock(&dcache_lock); - /* Case 1: indirect mount or top level direct mount */ + /* + * Case 1: (i) indirect mount or top level pseudo direct mount + * (autofs-4.1). + * (ii) indirect mount with offset mount, check the "/" + * offset (autofs-5.0+). + */ if (d_mountpoint(dentry)) { DPRINTK("checking mountpoint %p %.*s", dentry, (int)dentry->d_name.len, dentry->d_name.name); @@ -283,7 +353,10 @@ static struct dentry *autofs4_expire(struct super_block *sb, break; } spin_unlock(&sbi->fs_lock); - /* Case 3: direct mount, expire individual leaves */ + /* + * Case 3: pseudo direct mount, expire individual leaves + * (autofs-4.1). + */ } else { expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); if (expired) { @@ -325,7 +398,7 @@ int autofs4_expire_run(struct super_block *sb, pkt.hdr.proto_version = sbi->version; pkt.hdr.type = autofs_ptype_expire; - if ((dentry = autofs4_expire(sb, mnt, sbi, 0)) == NULL) + if ((dentry = autofs4_expire_indirect(sb, mnt, sbi, 0)) == NULL) return -EAGAIN; pkt.len = dentry->d_name.len; @@ -351,7 +424,12 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, if (arg && get_user(do_now, arg)) return -EFAULT; - if ((dentry = autofs4_expire(sb, mnt, sbi, do_now)) != NULL) { + if (sbi->type & AUTOFS_TYP_DIRECT) + dentry = autofs4_expire_direct(sb, mnt, sbi, do_now); + else + dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now); + + if (dentry) { struct autofs_info *ino = autofs4_dentry_ino(dentry); /* This is synchronous because it makes the daemon a -- cgit v0.10.2 From 5c0a32fc2cd0be912511199449a37a4a6f0f582d Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:55 -0800 Subject: [PATCH] autofs4: add new packet type for v5 communications This patch define a new autofs packet for autofs v5 and updates the waitq.c functions to handle the additional packet type. Signed-off-by: Ian Kent Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index ed388a1..37c8d90 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -77,6 +77,12 @@ struct autofs_wait_queue { int hash; int len; char *name; + u32 dev; + u64 ino; + uid_t uid; + gid_t gid; + pid_t pid; + pid_t tgid; /* This is for status reporting upon return */ int status; atomic_t notified; @@ -180,13 +186,6 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *, struct autofs_sb_info /* Queue management functions */ -enum autofs_notify -{ - NFY_NONE, - NFY_MOUNT, - NFY_EXPIRE -}; - int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify); int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int); void autofs4_catatonic_mode(struct autofs_sb_info *); @@ -204,6 +203,16 @@ static inline int autofs4_follow_mount(struct vfsmount **mnt, struct dentry **de return res; } +static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi) +{ + return new_encode_dev(sbi->sb->s_dev); +} + +static inline u64 autofs4_get_ino(struct autofs_sb_info *sbi) +{ + return sbi->sb->s_root->d_inode->i_ino; +} + static inline int simple_positive(struct dentry *dentry) { return dentry->d_inode && !d_unhashed(dentry); diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index b0bb9d4..12da2c9 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -3,7 +3,7 @@ * linux/fs/autofs/waitq.c * * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved - * Copyright 2001-2003 Ian Kent + * Copyright 2001-2006 Ian Kent * * This file is part of the Linux kernel and is made available under * the terms of the GNU General Public License, version 2, or at your @@ -97,7 +97,10 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, pkt.hdr.proto_version = sbi->version; pkt.hdr.type = type; - if (type == autofs_ptype_missing) { + switch (type) { + /* Kernel protocol v4 missing and expire packets */ + case autofs_ptype_missing: + { struct autofs_packet_missing *mp = &pkt.missing; pktsz = sizeof(*mp); @@ -106,7 +109,10 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, mp->len = wq->len; memcpy(mp->name, wq->name, wq->len); mp->name[wq->len] = '\0'; - } else if (type == autofs_ptype_expire_multi) { + break; + } + case autofs_ptype_expire_multi: + { struct autofs_packet_expire_multi *ep = &pkt.expire_multi; pktsz = sizeof(*ep); @@ -115,7 +121,34 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, ep->len = wq->len; memcpy(ep->name, wq->name, wq->len); ep->name[wq->len] = '\0'; - } else { + break; + } + /* + * Kernel protocol v5 packet for handling indirect and direct + * mount missing and expire requests + */ + case autofs_ptype_missing_indirect: + case autofs_ptype_expire_indirect: + case autofs_ptype_missing_direct: + case autofs_ptype_expire_direct: + { + struct autofs_v5_packet *packet = &pkt.v5_packet; + + pktsz = sizeof(*packet); + + packet->wait_queue_token = wq->wait_queue_token; + packet->len = wq->len; + memcpy(packet->name, wq->name, wq->len); + packet->name[wq->len] = '\0'; + packet->dev = wq->dev; + packet->ino = wq->ino; + packet->uid = wq->uid; + packet->gid = wq->gid; + packet->pid = wq->pid; + packet->tgid = wq->tgid; + break; + } + default: printk("autofs4_notify_daemon: bad type %d!\n", type); return; } @@ -161,7 +194,9 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, { struct autofs_wait_queue *wq; char *name; - int len, status; + unsigned int len = 0; + unsigned int hash = 0; + int status; /* In catatonic mode, we don't wait for nobody */ if (sbi->catatonic) @@ -171,11 +206,17 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, if (!name) return -ENOMEM; - len = autofs4_getpath(sbi, dentry, &name); - if (!len) { - kfree(name); - return -ENOENT; + /* If this is a direct mount request create a dummy name */ + if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYP_DIRECT)) + len = sprintf(name, "%p", dentry); + else { + len = autofs4_getpath(sbi, dentry, &name); + if (!len) { + kfree(name); + return -ENOENT; + } } + hash = full_name_hash(name, len); if (mutex_lock_interruptible(&sbi->wq_mutex)) { kfree(name); @@ -211,9 +252,15 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, wq->next = sbi->queues; sbi->queues = wq; init_waitqueue_head(&wq->queue); - wq->hash = dentry->d_name.hash; + wq->hash = hash; wq->name = name; wq->len = len; + wq->dev = autofs4_get_dev(sbi); + wq->ino = autofs4_get_ino(sbi); + wq->uid = current->uid; + wq->gid = current->gid; + wq->pid = current->pid; + wq->tgid = current->tgid; wq->status = -EINTR; /* Status return if interrupted */ atomic_set(&wq->wait_ctr, 2); atomic_set(&wq->notified, 1); @@ -227,8 +274,23 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, } if (notify != NFY_NONE && atomic_dec_and_test(&wq->notified)) { - int type = (notify == NFY_MOUNT ? - autofs_ptype_missing : autofs_ptype_expire_multi); + int type; + + if (sbi->version < 5) { + if (notify == NFY_MOUNT) + type = autofs_ptype_missing; + else + type = autofs_ptype_expire_multi; + } else { + if (notify == NFY_MOUNT) + type = (sbi->type & AUTOFS_TYP_DIRECT) ? + autofs_ptype_missing_direct : + autofs_ptype_missing_indirect; + else + type = (sbi->type & AUTOFS_TYP_DIRECT) ? + autofs_ptype_expire_direct : + autofs_ptype_expire_indirect; + } DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n", (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h index d998ddc..0a6bc52 100644 --- a/include/linux/auto_fs4.h +++ b/include/linux/auto_fs4.h @@ -19,18 +19,37 @@ #undef AUTOFS_MIN_PROTO_VERSION #undef AUTOFS_MAX_PROTO_VERSION -#define AUTOFS_PROTO_VERSION 4 +#define AUTOFS_PROTO_VERSION 5 #define AUTOFS_MIN_PROTO_VERSION 3 -#define AUTOFS_MAX_PROTO_VERSION 4 +#define AUTOFS_MAX_PROTO_VERSION 5 -#define AUTOFS_PROTO_SUBVERSION 10 +#define AUTOFS_PROTO_SUBVERSION 0 /* Mask for expire behaviour */ #define AUTOFS_EXP_IMMEDIATE 1 #define AUTOFS_EXP_LEAVES 2 -/* New message type */ -#define autofs_ptype_expire_multi 2 /* Expire entry (umount request) */ +/* Daemon notification packet types */ +enum autofs_notify { + NFY_NONE, + NFY_MOUNT, + NFY_EXPIRE +}; + +/* Kernel protocol version 4 packet types */ + +/* Expire entry (umount request) */ +#define autofs_ptype_expire_multi 2 + +/* Kernel protocol version 5 packet types */ + +/* Indirect mount missing and expire requests. */ +#define autofs_ptype_missing_indirect 3 +#define autofs_ptype_expire_indirect 4 + +/* Direct mount missing and expire requests */ +#define autofs_ptype_missing_direct 5 +#define autofs_ptype_expire_direct 6 /* v4 multi expire (via pipe) */ struct autofs_packet_expire_multi { @@ -40,14 +59,36 @@ struct autofs_packet_expire_multi { char name[NAME_MAX+1]; }; +/* autofs v5 common packet struct */ +struct autofs_v5_packet { + struct autofs_packet_hdr hdr; + autofs_wqt_t wait_queue_token; + __u32 dev; + __u64 ino; + __u32 uid; + __u32 gid; + __u32 pid; + __u32 tgid; + __u32 len; + char name[NAME_MAX+1]; +}; + +typedef struct autofs_v5_packet autofs_packet_missing_indirect_t; +typedef struct autofs_v5_packet autofs_packet_expire_indirect_t; +typedef struct autofs_v5_packet autofs_packet_missing_direct_t; +typedef struct autofs_v5_packet autofs_packet_expire_direct_t; + union autofs_packet_union { struct autofs_packet_hdr hdr; struct autofs_packet_missing missing; struct autofs_packet_expire expire; struct autofs_packet_expire_multi expire_multi; + struct autofs_v5_packet v5_packet; }; #define AUTOFS_IOC_EXPIRE_MULTI _IOW(0x93,0x66,int) +#define AUTOFS_IOC_EXPIRE_INDIRECT AUTOFS_IOC_EXPIRE_MULTI +#define AUTOFS_IOC_EXPIRE_DIRECT AUTOFS_IOC_EXPIRE_MULTI #define AUTOFS_IOC_PROTOSUBVER _IOR(0x93,0x67,int) #define AUTOFS_IOC_ASKREGHOST _IOR(0x93,0x68,int) #define AUTOFS_IOC_TOGGLEREGHOST _IOR(0x93,0x69,int) -- cgit v0.10.2 From 44d53eb041d901620b1090590a549a705767fd10 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:56 -0800 Subject: [PATCH] autofs4: change AUTOFS_TYP_* AUTOFS_TYPE_* Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 37c8d90..ff6239d 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -91,9 +91,9 @@ struct autofs_wait_queue { #define AUTOFS_SBI_MAGIC 0x6d4a556d -#define AUTOFS_TYP_INDIRECT 0x0001 -#define AUTOFS_TYP_DIRECT 0x0002 -#define AUTOFS_TYP_OFFSET 0x0004 +#define AUTOFS_TYPE_INDIRECT 0x0001 +#define AUTOFS_TYPE_DIRECT 0x0002 +#define AUTOFS_TYPE_OFFSET 0x0004 struct autofs_sb_info { u32 magic; diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 8fd92ea..08e3321 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -424,7 +424,7 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, if (arg && get_user(do_now, arg)) return -EFAULT; - if (sbi->type & AUTOFS_TYP_DIRECT) + if (sbi->type & AUTOFS_TYPE_DIRECT) dentry = autofs4_expire_direct(sb, mnt, sbi, do_now); else dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now); diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 3801bed..9438889 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -178,9 +178,9 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt) seq_printf(m, ",minproto=%d", sbi->min_proto); seq_printf(m, ",maxproto=%d", sbi->max_proto); - if (sbi->type & AUTOFS_TYP_OFFSET) + if (sbi->type & AUTOFS_TYPE_OFFSET) seq_printf(m, ",offset"); - else if (sbi->type & AUTOFS_TYP_DIRECT) + else if (sbi->type & AUTOFS_TYPE_DIRECT) seq_printf(m, ",direct"); else seq_printf(m, ",indirect"); @@ -267,13 +267,13 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, *maxproto = option; break; case Opt_indirect: - *type = AUTOFS_TYP_INDIRECT; + *type = AUTOFS_TYPE_INDIRECT; break; case Opt_direct: - *type = AUTOFS_TYP_DIRECT; + *type = AUTOFS_TYPE_DIRECT; break; case Opt_offset: - *type = AUTOFS_TYP_DIRECT | AUTOFS_TYP_OFFSET; + *type = AUTOFS_TYPE_DIRECT | AUTOFS_TYPE_OFFSET; break; default: return 1; @@ -364,7 +364,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) } root_inode->i_fop = &autofs4_root_operations; - root_inode->i_op = sbi->type & AUTOFS_TYP_DIRECT ? + root_inode->i_op = sbi->type & AUTOFS_TYPE_DIRECT ? &autofs4_direct_root_inode_operations : &autofs4_indirect_root_inode_operations; diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 12da2c9..894d746 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -207,7 +207,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, return -ENOMEM; /* If this is a direct mount request create a dummy name */ - if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYP_DIRECT)) + if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT)) len = sprintf(name, "%p", dentry); else { len = autofs4_getpath(sbi, dentry, &name); @@ -283,11 +283,11 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, type = autofs_ptype_expire_multi; } else { if (notify == NFY_MOUNT) - type = (sbi->type & AUTOFS_TYP_DIRECT) ? + type = (sbi->type & AUTOFS_TYPE_DIRECT) ? autofs_ptype_missing_direct : autofs_ptype_missing_indirect; else - type = (sbi->type & AUTOFS_TYP_DIRECT) ? + type = (sbi->type & AUTOFS_TYPE_DIRECT) ? autofs_ptype_expire_direct : autofs_ptype_expire_indirect; } -- cgit v0.10.2 From 3370c74b2e147169d5bba6665e03d3281f5795ed Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 27 Mar 2006 01:14:57 -0800 Subject: [PATCH] Remove redundant check from autofs4_put_super We have to have a valid sbi here, or we'd have oopsed already. (There's a dereference of sbi->catatonic a few lines above) Coverity #740 Signed-off-by: Dave Jones Cc: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 9438889..4eddee4e 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -157,8 +157,7 @@ static void autofs4_put_super(struct super_block *sb) autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */ /* Clean up and release dangling references */ - if (sbi) - autofs4_force_release(sbi); + autofs4_force_release(sbi); kfree(sbi); -- cgit v0.10.2 From 871f94344cea36b2ce91231f442f9f9298529712 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:58 -0800 Subject: [PATCH] autofs4: follow_link missing functionality This functionality is also need for operation of autofs v5 direct mounts. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 08e3321..b8ce026 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -113,10 +113,6 @@ static int autofs4_direct_busy(struct vfsmount *mnt, DPRINTK("top %p %.*s", top, (int) top->d_name.len, top->d_name.name); - /* Not a mountpoint - give up */ - if (!d_mountpoint(top)) - return 1; - /* If it's busy update the expiry counters */ if (!may_umount_tree(mnt)) { struct autofs_info *ino = autofs4_dentry_ino(top); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 3f00485..c8fe43a 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -57,6 +57,9 @@ struct inode_operations autofs4_indirect_root_inode_operations = { struct inode_operations autofs4_direct_root_inode_operations = { .lookup = autofs4_lookup, + .unlink = autofs4_dir_unlink, + .mkdir = autofs4_dir_mkdir, + .rmdir = autofs4_dir_rmdir, .follow_link = autofs4_follow_link, }; @@ -337,15 +340,50 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) if (oz_mode || !lookup_type) goto done; - status = try_to_fill_dentry(dentry, 0); - if (status) - goto out_error; + /* + * If a request is pending wait for it. + * If it's a mount then it won't be expired till at least + * a liitle later and if it's an expire then we might need + * to mount it again. + */ + if (autofs4_ispending(dentry)) { + DPRINTK("waiting for active request %p name=%.*s", + dentry, dentry->d_name.len, dentry->d_name.name); - if (!autofs4_follow_mount(&nd->mnt, &nd->dentry)) { - status = -ENOENT; - goto out_error; + status = autofs4_wait(sbi, dentry, NFY_NONE); + + DPRINTK("request done status=%d", status); } + /* + * If the dentry contains directories then it is an + * autofs multi-mount with no root mount offset. So + * don't try to mount it again. + */ + spin_lock(&dcache_lock); + if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { + spin_unlock(&dcache_lock); + + status = try_to_fill_dentry(dentry, 0); + if (status) + goto out_error; + + /* + * The mount succeeded but if there is no root mount + * it must be an autofs multi-mount with no root offset + * so we don't need to follow the mount. + */ + if (d_mountpoint(dentry)) { + if (!autofs4_follow_mount(&nd->mnt, &nd->dentry)) { + status = -ENOENT; + goto out_error; + } + } + + goto done; + } + spin_unlock(&dcache_lock); + done: return NULL; -- cgit v0.10.2 From 3e7b19198003fc25b11838e709f17d4fa173b2d7 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:59 -0800 Subject: [PATCH] autofs4: atomic var underflow Fix accidental underflow of the atomic counter. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index ff6239d..617fd7b 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -85,7 +85,7 @@ struct autofs_wait_queue { pid_t tgid; /* This is for status reporting upon return */ int status; - atomic_t notified; + atomic_t notify; atomic_t wait_ctr; }; diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 894d746..142ab6a 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -263,7 +263,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, wq->tgid = current->tgid; wq->status = -EINTR; /* Status return if interrupted */ atomic_set(&wq->wait_ctr, 2); - atomic_set(&wq->notified, 1); + atomic_set(&wq->notify, 1); mutex_unlock(&sbi->wq_mutex); } else { atomic_inc(&wq->wait_ctr); @@ -273,9 +273,11 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); } - if (notify != NFY_NONE && atomic_dec_and_test(&wq->notified)) { + if (notify != NFY_NONE && atomic_read(&wq->notify)) { int type; + atomic_dec(&wq->notify); + if (sbi->version < 5) { if (notify == NFY_MOUNT) type = autofs_ptype_missing; -- cgit v0.10.2 From efc36aa5608f5717338747e152c23f2cfdb14697 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:14:59 -0800 Subject: [PATCH] knfsd: Change the store of auth_domains to not be a 'cache' The 'auth_domain's are simply handles on internal data structures. They do not cache information from user-space, and forcing them into the mold of a 'cache' misrepresents their true nature and causes confusion. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 417ec02..ac09977 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -242,7 +242,7 @@ static inline int svc_expkey_match (struct svc_expkey *a, struct svc_expkey *b) static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *item) { - cache_get(&item->ek_client->h); + kref_get(&item->ek_client->ref); new->ek_client = item->ek_client; new->ek_fsidtype = item->ek_fsidtype; new->ek_fsid[0] = item->ek_fsid[0]; @@ -474,7 +474,7 @@ static inline int svc_export_match(struct svc_export *a, struct svc_export *b) } static inline void svc_export_init(struct svc_export *new, struct svc_export *item) { - cache_get(&item->ex_client->h); + kref_get(&item->ex_client->ref); new->ex_client = item->ex_client; new->ex_dentry = dget(item->ex_dentry); new->ex_mnt = mntget(item->ex_mnt); @@ -1129,7 +1129,6 @@ exp_delclient(struct nfsctl_client *ncp) */ if (dom) { err = auth_unix_forget_old(dom); - dom->h.expiry_time = get_seconds(); auth_domain_put(dom); } diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index c119ce7..2fe2087 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -45,9 +45,10 @@ struct svc_rqst; /* forward decl */ * of ip addresses to the given client. */ struct auth_domain { - struct cache_head h; + struct kref ref; + struct hlist_node hash; char *name; - int flavour; + struct auth_ops *flavour; }; /* @@ -86,6 +87,9 @@ struct auth_domain { * * domain_release() * This call releases a domain. + * set_client() + * Givens a pending request (struct svc_rqst), finds and assigns + * an appropriate 'auth_domain' as the client. */ struct auth_ops { char * name; @@ -117,7 +121,7 @@ extern void svc_auth_unregister(rpc_authflavor_t flavor); extern struct auth_domain *unix_domain_find(char *name); extern void auth_domain_put(struct auth_domain *item); extern int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom); -extern struct auth_domain *auth_domain_lookup(struct auth_domain *item, int set); +extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new); extern struct auth_domain *auth_domain_find(char *name); extern struct auth_domain *auth_unix_lookup(struct in_addr addr); extern int auth_unix_forget_old(struct auth_domain *dom); @@ -160,8 +164,6 @@ static inline unsigned long hash_mem(char *buf, int length, int bits) return hash >> (BITS_PER_LONG - bits); } -extern struct cache_detail auth_domain_cache, ip_map_cache; - #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_SVCAUTH_H_ */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 23632d8..6b073c2 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -645,6 +645,8 @@ find_gss_auth_domain(struct gss_ctx *ctx, u32 svc) return auth_domain_find(name); } +static struct auth_ops svcauthops_gss; + int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name) { @@ -655,20 +657,18 @@ svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name) new = kmalloc(sizeof(*new), GFP_KERNEL); if (!new) goto out; - cache_init(&new->h.h); + kref_init(&new->h.ref); new->h.name = kmalloc(strlen(name) + 1, GFP_KERNEL); if (!new->h.name) goto out_free_dom; strcpy(new->h.name, name); - new->h.flavour = RPC_AUTH_GSS; + new->h.flavour = &svcauthops_gss; new->pseudoflavor = pseudoflavor; - new->h.h.expiry_time = NEVER; - test = auth_domain_lookup(&new->h, 1); - if (test == &new->h) { - BUG_ON(atomic_dec_and_test(&new->h.h.refcnt)); - } else { /* XXX Duplicate registration? */ + test = auth_domain_lookup(name, &new->h); + if (test != &new->h) { /* XXX Duplicate registration? */ auth_domain_put(&new->h); + /* dangling ref-count... */ goto out; } return 0; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 9f73732..4040119 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -142,6 +142,7 @@ EXPORT_SYMBOL(nlm_debug); extern int register_rpc_pipefs(void); extern void unregister_rpc_pipefs(void); +extern struct cache_detail ip_map_cache; static int __init init_sunrpc(void) @@ -158,7 +159,6 @@ init_sunrpc(void) #ifdef CONFIG_PROC_FS rpc_proc_init(); #endif - cache_register(&auth_domain_cache); cache_register(&ip_map_cache); out: return err; @@ -169,8 +169,6 @@ cleanup_sunrpc(void) { unregister_rpc_pipefs(); rpc_destroy_mempool(); - if (cache_unregister(&auth_domain_cache)) - printk(KERN_ERR "sunrpc: failed to unregister auth_domain cache\n"); if (cache_unregister(&ip_map_cache)) printk(KERN_ERR "sunrpc: failed to unregister ip_map cache\n"); #ifdef RPC_DEBUG diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index dda4f0c..5b28c61 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -106,112 +106,56 @@ svc_auth_unregister(rpc_authflavor_t flavor) EXPORT_SYMBOL(svc_auth_unregister); /************************************************** - * cache for domain name to auth_domain - * Entries are only added by flavours which will normally - * have a structure that 'inherits' from auth_domain. - * e.g. when an IP -> domainname is given to auth_unix, - * and the domain name doesn't exist, it will create a - * auth_unix_domain and add it to this hash table. - * If it finds the name does exist, but isn't AUTH_UNIX, - * it will complain. + * 'auth_domains' are stored in a hash table indexed by name. + * When the last reference to an 'auth_domain' is dropped, + * the object is unhashed and freed. + * If auth_domain_lookup fails to find an entry, it will return + * it's second argument 'new'. If this is non-null, it will + * have been atomically linked into the table. */ -/* - * Auth auth_domain cache is somewhat different to other caches, - * largely because the entries are possibly of different types: - * each auth flavour has it's own type. - * One consequence of this that DefineCacheLookup cannot - * allocate a new structure as it cannot know the size. - * Notice that the "INIT" code fragment is quite different - * from other caches. When auth_domain_lookup might be - * creating a new domain, the new domain is passed in - * complete and it is used as-is rather than being copied into - * another structure. - */ #define DN_HASHBITS 6 #define DN_HASHMAX (1<flavour]->domain_release(dom); -} - - -struct cache_detail auth_domain_cache = { - .owner = THIS_MODULE, - .hash_size = DN_HASHMAX, - .hash_table = auth_domain_table, - .name = "auth.domain", - .cache_put = auth_domain_drop, -}; +static struct hlist_head auth_domain_table[DN_HASHMAX]; +static spinlock_t auth_domain_lock = SPIN_LOCK_UNLOCKED; void auth_domain_put(struct auth_domain *dom) { - auth_domain_drop(&dom->h, &auth_domain_cache); -} - -static inline int auth_domain_hash(struct auth_domain *item) -{ - return hash_str(item->name, DN_HASHBITS); -} -static inline int auth_domain_match(struct auth_domain *tmp, struct auth_domain *item) -{ - return strcmp(tmp->name, item->name) == 0; + if (atomic_dec_and_lock(&dom->ref.refcount, &auth_domain_lock)) { + hlist_del(&dom->hash); + dom->flavour->domain_release(dom); + } } struct auth_domain * -auth_domain_lookup(struct auth_domain *item, int set) +auth_domain_lookup(char *name, struct auth_domain *new) { - struct auth_domain *tmp = NULL; - struct cache_head **hp, **head; - head = &auth_domain_cache.hash_table[auth_domain_hash(item)]; - - if (set) - write_lock(&auth_domain_cache.hash_lock); - else - read_lock(&auth_domain_cache.hash_lock); - for (hp=head; *hp != NULL; hp = &tmp->h.next) { - tmp = container_of(*hp, struct auth_domain, h); - if (!auth_domain_match(tmp, item)) - continue; - if (!set) { - cache_get(&tmp->h); - goto out_noset; + struct auth_domain *hp; + struct hlist_head *head; + struct hlist_node *np; + + head = &auth_domain_table[hash_str(name, DN_HASHBITS)]; + + spin_lock(&auth_domain_lock); + + hlist_for_each_entry(hp, np, head, hash) { + if (strcmp(hp->name, name)==0) { + kref_get(&hp->ref); + spin_unlock(&auth_domain_lock); + return hp; } - *hp = tmp->h.next; - tmp->h.next = NULL; - auth_domain_drop(&tmp->h, &auth_domain_cache); - goto out_set; } - /* Didn't find anything */ - if (!set) - goto out_nada; - auth_domain_cache.entries++; -out_set: - item->h.next = *head; - *head = &item->h; - cache_get(&item->h); - write_unlock(&auth_domain_cache.hash_lock); - cache_fresh(&auth_domain_cache, &item->h, item->h.expiry_time); - cache_get(&item->h); - return item; -out_nada: - tmp = NULL; -out_noset: - read_unlock(&auth_domain_cache.hash_lock); - return tmp; + if (new) { + hlist_add_head(&new->hash, head); + kref_get(&new->ref); + } + spin_unlock(&auth_domain_lock); + return new; } struct auth_domain *auth_domain_find(char *name) { - struct auth_domain *rv, ad; - - ad.name = name; - rv = auth_domain_lookup(&ad, 0); - return rv; + return auth_domain_lookup(name, NULL); } diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 3e6c694..17e8b2a 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -27,41 +27,35 @@ struct unix_domain { /* other stuff later */ }; +extern struct auth_ops svcauth_unix; + struct auth_domain *unix_domain_find(char *name) { - struct auth_domain *rv, ud; - struct unix_domain *new; - - ud.name = name; - - rv = auth_domain_lookup(&ud, 0); - - foundit: - if (rv && rv->flavour != RPC_AUTH_UNIX) { - auth_domain_put(rv); - return NULL; - } - if (rv) - return rv; - - new = kmalloc(sizeof(*new), GFP_KERNEL); - if (new == NULL) - return NULL; - cache_init(&new->h.h); - new->h.name = kstrdup(name, GFP_KERNEL); - new->h.flavour = RPC_AUTH_UNIX; - new->addr_changes = 0; - new->h.h.expiry_time = NEVER; - - rv = auth_domain_lookup(&new->h, 2); - if (rv == &new->h) { - if (atomic_dec_and_test(&new->h.h.refcnt)) BUG(); - } else { - auth_domain_put(&new->h); - goto foundit; + struct auth_domain *rv; + struct unix_domain *new = NULL; + + rv = auth_domain_lookup(name, NULL); + while(1) { + if (rv != &new->h) { + if (new) auth_domain_put(&new->h); + return rv; + } + if (rv && rv->flavour != &svcauth_unix) { + auth_domain_put(rv); + return NULL; + } + if (rv) + return rv; + + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (new == NULL) + return NULL; + kref_init(&new->h.ref); + new->h.name = kstrdup(name, GFP_KERNEL); + new->h.flavour = &svcauth_unix; + new->addr_changes = 0; + rv = auth_domain_lookup(name, &new->h); } - - return rv; } static void svcauth_unix_domain_release(struct auth_domain *dom) @@ -130,7 +124,7 @@ static inline void ip_map_init(struct ip_map *new, struct ip_map *item) } static inline void ip_map_update(struct ip_map *new, struct ip_map *item) { - cache_get(&item->m_client->h.h); + kref_get(&item->m_client->h.ref); new->m_client = item->m_client; new->m_add_change = item->m_add_change; } @@ -272,7 +266,7 @@ int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom) struct unix_domain *udom; struct ip_map ip, *ipmp; - if (dom->flavour != RPC_AUTH_UNIX) + if (dom->flavour != &svcauth_unix) return -EINVAL; udom = container_of(dom, struct unix_domain, h); strcpy(ip.m_class, "nfsd"); @@ -295,7 +289,7 @@ int auth_unix_forget_old(struct auth_domain *dom) { struct unix_domain *udom; - if (dom->flavour != RPC_AUTH_UNIX) + if (dom->flavour != &svcauth_unix) return -EINVAL; udom = container_of(dom, struct unix_domain, h); udom->addr_changes++; @@ -323,7 +317,7 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr) rv = NULL; } else { rv = &ipm->m_client->h; - cache_get(&rv->h); + kref_get(&rv->ref); } ip_map_put(&ipm->h, &ip_map_cache); return rv; @@ -332,7 +326,6 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr) void svcauth_unix_purge(void) { cache_purge(&ip_map_cache); - cache_purge(&auth_domain_cache); } static int @@ -361,7 +354,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) return SVC_DENIED; case 0: rqstp->rq_client = &ipm->m_client->h; - cache_get(&rqstp->rq_client->h); + kref_get(&rqstp->rq_client->ref); ip_map_put(&ipm->h, &ip_map_cache); break; } -- cgit v0.10.2 From eab7e2e647c348b418e8715ecaca0177e1b473c7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:00 -0800 Subject: [PATCH] knfsd: Break the hard linkage from svc_expkey to svc_export Current svc_expkey holds a pointer to the svc_export structure, so updates to that structure have to be in-place, which is a wart on the whole cache infrastruct. So we break that linkage and just do a second lookup. If this became a performance issue, it would be possible to put a direct link back in which was only used conditionally. i.e. when an object is replaced in the cache, we set a flag in the old object. When dereferencing the link from svc_expkey, if the flag is set, we drop the reference and do a fresh lookup. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index ac09977..587829e 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -73,8 +73,10 @@ void expkey_put(struct cache_head *item, struct cache_detail *cd) if (cache_put(item, cd)) { struct svc_expkey *key = container_of(item, struct svc_expkey, h); if (test_bit(CACHE_VALID, &item->flags) && - !test_bit(CACHE_NEGATIVE, &item->flags)) - exp_put(key->ek_export); + !test_bit(CACHE_NEGATIVE, &item->flags)) { + dput(key->ek_dentry); + mntput(key->ek_mnt); + } auth_domain_put(key->ek_client); kfree(key); } @@ -164,26 +166,18 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) } else { struct nameidata nd; struct svc_expkey *ek; - struct svc_export *exp; err = path_lookup(buf, 0, &nd); if (err) goto out; dprintk("Found the path %s\n", buf); - exp = exp_get_by_name(dom, nd.mnt, nd.dentry, NULL); - - err = -ENOENT; - if (!exp) - goto out_nd; - key.ek_export = exp; - dprintk("And found export\n"); + key.ek_mnt = nd.mnt; + key.ek_dentry = nd.dentry; ek = svc_expkey_lookup(&key, 1); if (ek) expkey_put(&ek->h, &svc_expkey_cache); - exp_put(exp); err = 0; - out_nd: path_release(&nd); } cache_flush(); @@ -214,7 +208,7 @@ static int expkey_show(struct seq_file *m, if (test_bit(CACHE_VALID, &h->flags) && !test_bit(CACHE_NEGATIVE, &h->flags)) { seq_printf(m, " "); - seq_path(m, ek->ek_export->ex_mnt, ek->ek_export->ex_dentry, "\\ \t\n"); + seq_path(m, ek->ek_mnt, ek->ek_dentry, "\\ \t\n"); } seq_printf(m, "\n"); return 0; @@ -252,8 +246,8 @@ static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *it static inline void svc_expkey_update(struct svc_expkey *new, struct svc_expkey *item) { - cache_get(&item->ek_export->h); - new->ek_export = item->ek_export; + new->ek_mnt = mntget(item->ek_mnt); + new->ek_dentry = dget(item->ek_dentry); } static DefineSimpleCacheLookup(svc_expkey,0) /* no inplace updates */ @@ -519,7 +513,8 @@ static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv, key.ek_client = clp; key.ek_fsidtype = fsid_type; memcpy(key.ek_fsid, fsidv, key_len(fsid_type)); - key.ek_export = exp; + key.ek_mnt = exp->ex_mnt; + key.ek_dentry = exp->ex_dentry; key.h.expiry_time = NEVER; key.h.flags = 0; @@ -741,8 +736,8 @@ exp_export(struct nfsctl_export *nxp) if ((nxp->ex_flags & NFSEXP_FSID) && (fsid_key = exp_get_fsid_key(clp, nxp->ex_dev)) && !IS_ERR(fsid_key) && - fsid_key->ek_export && - fsid_key->ek_export != exp) + fsid_key->ek_mnt && + (fsid_key->ek_mnt != nd.mnt || fsid_key->ek_dentry != nd.dentry) ) goto finish; if (exp) { @@ -912,6 +907,24 @@ out: return err; } +struct svc_export * +exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv, + struct cache_req *reqp) +{ + struct svc_export *exp; + struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp); + if (!ek || IS_ERR(ek)) + return ERR_PTR(PTR_ERR(ek)); + + exp = exp_get_by_name(clp, ek->ek_mnt, ek->ek_dentry, reqp); + expkey_put(&ek->h, &svc_expkey_cache); + + if (!exp || IS_ERR(exp)) + return ERR_PTR(PTR_ERR(exp)); + return exp; +} + + /* * Called when we need the filehandle for the root of the pseudofs, * for a given NFSv4 client. The root is defined to be the @@ -922,6 +935,7 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp, struct cache_req *creq) { struct svc_expkey *fsid_key; + struct svc_export *exp; int rv; u32 fsidv[2]; @@ -933,8 +947,14 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp, if (!fsid_key || IS_ERR(fsid_key)) return nfserr_perm; - rv = fh_compose(fhp, fsid_key->ek_export, - fsid_key->ek_export->ex_dentry, NULL); + exp = exp_get_by_name(clp, fsid_key->ek_mnt, fsid_key->ek_dentry, creq); + if (exp == NULL) + rv = nfserr_perm; + else if (IS_ERR(exp)) + rv = nfserrno(PTR_ERR(exp)); + else + rv = fh_compose(fhp, exp, + fsid_key->ek_dentry, NULL); expkey_put(&fsid_key->h, &svc_expkey_cache); return rv; } diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 6bad476..d52e0b7 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -67,7 +67,8 @@ struct svc_expkey { int ek_fsidtype; u32 ek_fsid[3]; - struct svc_export * ek_export; + struct vfsmount * ek_mnt; + struct dentry * ek_dentry; }; #define EX_SECURE(exp) (!((exp)->ex_flags & NFSEXP_INSECURE_PORT)) @@ -114,22 +115,9 @@ static inline void exp_get(struct svc_export *exp) { cache_get(&exp->h); } -static inline struct svc_export * +extern struct svc_export * exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv, - struct cache_req *reqp) -{ - struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp); - if (ek && !IS_ERR(ek)) { - struct svc_export *exp = ek->ek_export; - int err; - exp_get(exp); - expkey_put(&ek->h, &svc_expkey_cache); - if ((err = cache_check(&svc_export_cache, &exp->h, reqp))) - exp = ERR_PTR(err); - return exp; - } else - return ERR_PTR(PTR_ERR(ek)); -} + struct cache_req *reqp); #endif /* __KERNEL__ */ -- cgit v0.10.2 From 7d317f2c9f1e9dcf4f632fa98f91d1d4a36c4cae Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:01 -0800 Subject: [PATCH] knfsd: Get rid of 'inplace' sunrpc caches These were an unnecessary wart. Also only have one 'DefineSimpleCache..' instead of two. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 587829e..c591761 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -250,7 +250,7 @@ static inline void svc_expkey_update(struct svc_expkey *new, struct svc_expkey * new->ek_dentry = dget(item->ek_dentry); } -static DefineSimpleCacheLookup(svc_expkey,0) /* no inplace updates */ +static DefineSimpleCacheLookup(svc_expkey, svc_expkey) #define EXPORT_HASHBITS 8 #define EXPORT_HASHMAX (1<< EXPORT_HASHBITS) @@ -482,7 +482,7 @@ static inline void svc_export_update(struct svc_export *new, struct svc_export * new->ex_fsid = item->ex_fsid; } -static DefineSimpleCacheLookup(svc_export,1) /* allow inplace updates */ +static DefineSimpleCacheLookup(svc_export, svc_export) struct svc_expkey * diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 1336965..dea690a 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -76,12 +76,6 @@ struct ent { char authname[IDMAP_NAMESZ]; }; -#define DefineSimpleCacheLookupMap(STRUCT, FUNC) \ - DefineCacheLookup(struct STRUCT, h, FUNC##_lookup, \ - (struct STRUCT *item, int set), /*no setup */, \ - & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp), \ - STRUCT##_init(new, item), STRUCT##_update(tmp, item), 0) - /* Common entry handling */ #define ENT_HASHBITS 8 @@ -264,7 +258,7 @@ out: return error; } -static DefineSimpleCacheLookupMap(ent, idtoname); +static DefineSimpleCacheLookup(ent, idtoname); /* * Name -> ID cache @@ -390,7 +384,7 @@ out: return (error); } -static DefineSimpleCacheLookupMap(ent, nametoid); +static DefineSimpleCacheLookup(ent, nametoid); /* * Exported API diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index c4e3ea7..405ac14 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -133,14 +133,11 @@ struct cache_deferred_req { * If "set" == 0 : * If an entry is found, it is returned * If no entry is found, a new non-VALID entry is created. - * If "set" == 1 and INPLACE == 0 : + * If "set" == 1 : * If no entry is found a new one is inserted with data from "template" * If a non-CACHE_VALID entry is found, it is updated from template using UPDATE * If a CACHE_VALID entry is found, a new entry is swapped in with data * from "template" - * If set == 1, and INPLACE == 1 : - * As above, except that if a CACHE_VALID entry is found, we UPDATE in place - * instead of swapping in a new entry. * * If the passed handle has the CACHE_NEGATIVE flag set, then UPDATE is not * run but insteead CACHE_NEGATIVE is set in any new item. @@ -159,13 +156,8 @@ struct cache_deferred_req { * TEST tests if "tmp" matches "item" * INIT copies key information from "item" to "new" * UPDATE copies content information from "item" to "tmp" - * INPLACE is true if updates can happen inplace rather than allocating a new structure - * - * WARNING: any substantial changes to this must be reflected in - * net/sunrpc/svcauth.c(auth_domain_lookup) - * which is a similar routine that is open-coded. */ -#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,SETUP,DETAIL,HASHFN,TEST,INIT,UPDATE,INPLACE) \ +#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,SETUP,DETAIL,HASHFN,TEST,INIT,UPDATE) \ RTN *FNAME ARGS \ { \ RTN *tmp, *new=NULL; \ @@ -179,13 +171,13 @@ RTN *FNAME ARGS \ tmp = container_of(*hp, RTN, MEMBER); \ if (TEST) { /* found a match */ \ \ - if (set && !INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \ + if (set && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \ break; \ \ if (new) \ {INIT;} \ if (set) { \ - if (!INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags))\ + if (test_bit(CACHE_VALID, &tmp->MEMBER.flags))\ { /* need to swap in new */ \ RTN *t2; \ \ @@ -206,7 +198,7 @@ RTN *FNAME ARGS \ else read_unlock(&(DETAIL)->hash_lock); \ if (set) \ cache_fresh(DETAIL, &tmp->MEMBER, item->MEMBER.expiry_time); \ - if (set && !INPLACE && new) cache_fresh(DETAIL, &new->MEMBER, 0); \ + if (set && new) cache_fresh(DETAIL, &new->MEMBER, 0); \ if (new) (DETAIL)->cache_put(&new->MEMBER, DETAIL); \ return tmp; \ } \ @@ -239,10 +231,12 @@ RTN *FNAME ARGS \ return NULL; \ } -#define DefineSimpleCacheLookup(STRUCT,INPLACE) \ - DefineCacheLookup(struct STRUCT, h, STRUCT##_lookup, (struct STRUCT *item, int set), /*no setup */, \ - & STRUCT##_cache, STRUCT##_hash(item), STRUCT##_match(item, tmp),\ - STRUCT##_init(new, item), STRUCT##_update(tmp, item),INPLACE) +#define DefineSimpleCacheLookup(STRUCT, FUNC) \ + DefineCacheLookup(struct STRUCT, h, FUNC##_lookup, \ + (struct STRUCT *item, int set), /*no setup */, \ + & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp), \ + STRUCT##_init(new, item), STRUCT##_update(tmp, item)) + #define cache_for_each(pos, detail, index, member) \ for (({read_lock(&(detail)->hash_lock); index = (detail)->hash_size;}) ; \ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 6b073c2..aadb4e8 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -259,7 +259,7 @@ static struct cache_detail rsi_cache = { .cache_parse = rsi_parse, }; -static DefineSimpleCacheLookup(rsi, 0) +static DefineSimpleCacheLookup(rsi, rsi) /* * The rpcsec_context cache is used to store a context that is @@ -446,7 +446,7 @@ static struct cache_detail rsc_cache = { .cache_parse = rsc_parse, }; -static DefineSimpleCacheLookup(rsc, 0); +static DefineSimpleCacheLookup(rsc, rsc); static struct rsc * gss_svc_searchbyctx(struct xdr_netobj *handle) diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 17e8b2a..7ddf068 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -258,7 +258,7 @@ struct cache_detail ip_map_cache = { .cache_show = ip_map_show, }; -static DefineSimpleCacheLookup(ip_map, 0) +static DefineSimpleCacheLookup(ip_map, ip_map) int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom) -- cgit v0.10.2 From 15a5f6bd23eddd5b3be80366f364be04fb1c1c99 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:02 -0800 Subject: [PATCH] knfsd: Create cache_lookup function instead of using a macro to declare one The C++-like 'template' approach proves to be too ugly and hard to work with. The old 'template' won't go away until all users are updated. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 405ac14..3e17a5f 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -81,6 +81,11 @@ struct cache_detail { struct cache_detail *cd, struct cache_head *h); + struct cache_head * (*alloc)(void); + int (*match)(struct cache_head *orig, struct cache_head *new); + void (*init)(struct cache_head *orig, struct cache_head *new); + void (*update)(struct cache_head *orig, struct cache_head *new); + /* fields below this comment are for internal use * and should not be touched by cache owners */ @@ -237,6 +242,13 @@ RTN *FNAME ARGS \ & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp), \ STRUCT##_init(new, item), STRUCT##_update(tmp, item)) +extern struct cache_head * +sunrpc_cache_lookup(struct cache_detail *detail, + struct cache_head *key, int hash); +extern struct cache_head * +sunrpc_cache_update(struct cache_detail *detail, + struct cache_head *new, struct cache_head *old, int hash); + #define cache_for_each(pos, detail, index, member) \ for (({read_lock(&(detail)->hash_lock); index = (detail)->hash_size;}) ; \ diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 0acccfe..4449dc5 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -47,6 +47,104 @@ void cache_init(struct cache_head *h) h->last_refresh = now; } +struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, + struct cache_head *key, int hash) +{ + struct cache_head **head, **hp; + struct cache_head *new = NULL; + + head = &detail->hash_table[hash]; + + read_lock(&detail->hash_lock); + + for (hp=head; *hp != NULL ; hp = &(*hp)->next) { + struct cache_head *tmp = *hp; + if (detail->match(tmp, key)) { + cache_get(tmp); + read_unlock(&detail->hash_lock); + return tmp; + } + } + read_unlock(&detail->hash_lock); + /* Didn't find anything, insert an empty entry */ + + new = detail->alloc(); + if (!new) + return NULL; + cache_init(new); + + write_lock(&detail->hash_lock); + + /* check if entry appeared while we slept */ + for (hp=head; *hp != NULL ; hp = &(*hp)->next) { + struct cache_head *tmp = *hp; + if (detail->match(tmp, key)) { + cache_get(tmp); + write_unlock(&detail->hash_lock); + detail->cache_put(new, detail); + return tmp; + } + } + detail->init(new, key); + new->next = *head; + *head = new; + detail->entries++; + cache_get(new); + write_unlock(&detail->hash_lock); + + return new; +} +EXPORT_SYMBOL(sunrpc_cache_lookup); + +struct cache_head *sunrpc_cache_update(struct cache_detail *detail, + struct cache_head *new, struct cache_head *old, int hash) +{ + /* The 'old' entry is to be replaced by 'new'. + * If 'old' is not VALID, we update it directly, + * otherwise we need to replace it + */ + struct cache_head **head; + struct cache_head *tmp; + + if (!test_bit(CACHE_VALID, &old->flags)) { + write_lock(&detail->hash_lock); + if (!test_bit(CACHE_VALID, &old->flags)) { + if (test_bit(CACHE_NEGATIVE, &new->flags)) + set_bit(CACHE_NEGATIVE, &old->flags); + else + detail->update(old, new); + /* FIXME cache_fresh should come first */ + write_unlock(&detail->hash_lock); + cache_fresh(detail, old, new->expiry_time); + return old; + } + write_unlock(&detail->hash_lock); + } + /* We need to insert a new entry */ + tmp = detail->alloc(); + if (!tmp) { + detail->cache_put(old, detail); + return NULL; + } + cache_init(tmp); + detail->init(tmp, old); + head = &detail->hash_table[hash]; + + write_lock(&detail->hash_lock); + if (test_bit(CACHE_NEGATIVE, &new->flags)) + set_bit(CACHE_NEGATIVE, &tmp->flags); + else + detail->update(tmp, new); + tmp->next = *head; + *head = tmp; + cache_get(tmp); + write_unlock(&detail->hash_lock); + cache_fresh(detail, tmp, new->expiry_time); + cache_fresh(detail, old, 0); + detail->cache_put(old, detail); + return tmp; +} +EXPORT_SYMBOL(sunrpc_cache_update); static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h); /* -- cgit v0.10.2 From 1a9917c2da5b81fa536e560be0c431435e2c965b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:02 -0800 Subject: [PATCH] knfsd: Convert ip_map cache to use the new lookup routine Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 7ddf068..7e38621 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -106,28 +106,38 @@ static inline int hash_ip(unsigned long ip) return (hash ^ (hash>>8)) & 0xff; } #endif - -static inline int ip_map_hash(struct ip_map *item) -{ - return hash_str(item->m_class, IP_HASHBITS) ^ - hash_ip((unsigned long)item->m_addr.s_addr); -} -static inline int ip_map_match(struct ip_map *item, struct ip_map *tmp) +static int ip_map_match(struct cache_head *corig, struct cache_head *cnew) { - return strcmp(tmp->m_class, item->m_class) == 0 - && tmp->m_addr.s_addr == item->m_addr.s_addr; + struct ip_map *orig = container_of(corig, struct ip_map, h); + struct ip_map *new = container_of(cnew, struct ip_map, h); + return strcmp(orig->m_class, new->m_class) == 0 + && orig->m_addr.s_addr == new->m_addr.s_addr; } -static inline void ip_map_init(struct ip_map *new, struct ip_map *item) +static void ip_map_init(struct cache_head *cnew, struct cache_head *citem) { + struct ip_map *new = container_of(cnew, struct ip_map, h); + struct ip_map *item = container_of(citem, struct ip_map, h); + strcpy(new->m_class, item->m_class); new->m_addr.s_addr = item->m_addr.s_addr; } -static inline void ip_map_update(struct ip_map *new, struct ip_map *item) +static void update(struct cache_head *cnew, struct cache_head *citem) { + struct ip_map *new = container_of(cnew, struct ip_map, h); + struct ip_map *item = container_of(citem, struct ip_map, h); + kref_get(&item->m_client->h.ref); new->m_client = item->m_client; new->m_add_change = item->m_add_change; } +static struct cache_head *ip_map_alloc(void) +{ + struct ip_map *i = kmalloc(sizeof(*i), GFP_KERNEL); + if (i) + return &i->h; + else + return NULL; +} static void ip_map_request(struct cache_detail *cd, struct cache_head *h, @@ -148,7 +158,8 @@ static void ip_map_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static struct ip_map *ip_map_lookup(struct ip_map *, int); +static struct ip_map *ip_map_lookup(char *class, struct in_addr addr); +static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry); static int ip_map_parse(struct cache_detail *cd, char *mesg, int mlen) @@ -160,7 +171,11 @@ static int ip_map_parse(struct cache_detail *cd, int len; int b1,b2,b3,b4; char c; - struct ip_map ipm, *ipmp; + char class[8]; + struct in_addr addr; + int err; + + struct ip_map *ipmp; struct auth_domain *dom; time_t expiry; @@ -169,7 +184,7 @@ static int ip_map_parse(struct cache_detail *cd, mesg[mlen-1] = 0; /* class */ - len = qword_get(&mesg, ipm.m_class, sizeof(ipm.m_class)); + len = qword_get(&mesg, class, sizeof(class)); if (len <= 0) return -EINVAL; /* ip address */ @@ -194,25 +209,22 @@ static int ip_map_parse(struct cache_detail *cd, } else dom = NULL; - ipm.m_addr.s_addr = + addr.s_addr = htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4); - ipm.h.flags = 0; - if (dom) { - ipm.m_client = container_of(dom, struct unix_domain, h); - ipm.m_add_change = ipm.m_client->addr_changes; + + ipmp = ip_map_lookup(class,addr); + if (ipmp) { + err = ip_map_update(ipmp, + container_of(dom, struct unix_domain, h), + expiry); } else - set_bit(CACHE_NEGATIVE, &ipm.h.flags); - ipm.h.expiry_time = expiry; + err = -ENOMEM; - ipmp = ip_map_lookup(&ipm, 1); - if (ipmp) - ip_map_put(&ipmp->h, &ip_map_cache); if (dom) auth_domain_put(dom); - if (!ipmp) - return -ENOMEM; + cache_flush(); - return 0; + return err; } static int ip_map_show(struct seq_file *m, @@ -256,32 +268,70 @@ struct cache_detail ip_map_cache = { .cache_request = ip_map_request, .cache_parse = ip_map_parse, .cache_show = ip_map_show, + .match = ip_map_match, + .init = ip_map_init, + .update = update, + .alloc = ip_map_alloc, }; -static DefineSimpleCacheLookup(ip_map, ip_map) +static struct ip_map *ip_map_lookup(char *class, struct in_addr addr) +{ + struct ip_map ip; + struct cache_head *ch; + + strcpy(ip.m_class, class); + ip.m_addr = addr; + ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h, + hash_str(class, IP_HASHBITS) ^ + hash_ip((unsigned long)addr.s_addr)); + + if (ch) + return container_of(ch, struct ip_map, h); + else + return NULL; +} +static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry) +{ + struct ip_map ip; + struct cache_head *ch; + + ip.m_client = udom; + ip.h.flags = 0; + if (!udom) + set_bit(CACHE_NEGATIVE, &ip.h.flags); + else { + ip.m_add_change = udom->addr_changes; + /* if this is from the legacy set_client system call, + * we need m_add_change to be one higher + */ + if (expiry == NEVER) + ip.m_add_change++; + } + ip.h.expiry_time = expiry; + ch = sunrpc_cache_update(&ip_map_cache, + &ip.h, &ipm->h, + hash_str(ipm->m_class, IP_HASHBITS) ^ + hash_ip((unsigned long)ipm->m_addr.s_addr)); + if (!ch) + return -ENOMEM; + ip_map_put(ch, &ip_map_cache); + return 0; +} int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom) { struct unix_domain *udom; - struct ip_map ip, *ipmp; + struct ip_map *ipmp; if (dom->flavour != &svcauth_unix) return -EINVAL; udom = container_of(dom, struct unix_domain, h); - strcpy(ip.m_class, "nfsd"); - ip.m_addr = addr; - ip.m_client = udom; - ip.m_add_change = udom->addr_changes+1; - ip.h.flags = 0; - ip.h.expiry_time = NEVER; - - ipmp = ip_map_lookup(&ip, 1); + ipmp = ip_map_lookup("nfsd", addr); - if (ipmp) { - ip_map_put(&ipmp->h, &ip_map_cache); - return 0; - } else + if (ipmp) + return ip_map_update(ipmp, udom, NEVER); + else return -ENOMEM; } @@ -304,7 +354,7 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr) strcpy(key.m_class, "nfsd"); key.m_addr = addr; - ipm = ip_map_lookup(&key, 0); + ipm = ip_map_lookup("nfsd", addr); if (!ipm) return NULL; @@ -331,16 +381,14 @@ void svcauth_unix_purge(void) static int svcauth_unix_set_client(struct svc_rqst *rqstp) { - struct ip_map key, *ipm; + struct ip_map *ipm; rqstp->rq_client = NULL; if (rqstp->rq_proc == 0) return SVC_OK; - strcpy(key.m_class, rqstp->rq_server->sv_program->pg_class); - key.m_addr = rqstp->rq_addr.sin_addr; - - ipm = ip_map_lookup(&key, 0); + ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class, + rqstp->rq_addr.sin_addr); if (ipm == NULL) return SVC_DENIED; -- cgit v0.10.2 From 4f7774c3a0558526c0faaf525581f2029480b313 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:03 -0800 Subject: [PATCH] knfsd: Use new cache_lookup for svc_export Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index c591761..2ebd77d 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -258,16 +258,6 @@ static DefineSimpleCacheLookup(svc_expkey, svc_expkey) static struct cache_head *export_table[EXPORT_HASHMAX]; -static inline int svc_export_hash(struct svc_export *item) -{ - int rv; - - rv = hash_ptr(item->ex_client, EXPORT_HASHBITS); - rv ^= hash_ptr(item->ex_dentry, EXPORT_HASHBITS); - rv ^= hash_ptr(item->ex_mnt, EXPORT_HASHBITS); - return rv; -} - void svc_export_put(struct cache_head *item, struct cache_detail *cd) { if (cache_put(item, cd)) { @@ -298,7 +288,8 @@ static void svc_export_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static struct svc_export *svc_export_lookup(struct svc_export *, int); +struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old); +static struct svc_export *svc_export_lookup(struct svc_export *); static int check_export(struct inode *inode, int flags) { @@ -411,11 +402,16 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) if (err) goto out; } - expp = svc_export_lookup(&exp, 1); + expp = svc_export_lookup(&exp); if (expp) - exp_put(expp); - err = 0; + expp = svc_export_update(&exp, expp); + else + err = -ENOMEM; cache_flush(); + if (expp == NULL) + err = -ENOMEM; + else + exp_put(expp); out: if (nd.dentry) path_release(&nd); @@ -449,40 +445,95 @@ static int svc_export_show(struct seq_file *m, seq_puts(m, ")\n"); return 0; } -struct cache_detail svc_export_cache = { - .owner = THIS_MODULE, - .hash_size = EXPORT_HASHMAX, - .hash_table = export_table, - .name = "nfsd.export", - .cache_put = svc_export_put, - .cache_request = svc_export_request, - .cache_parse = svc_export_parse, - .cache_show = svc_export_show, -}; - -static inline int svc_export_match(struct svc_export *a, struct svc_export *b) +static int svc_export_match(struct cache_head *a, struct cache_head *b) { - return a->ex_client == b->ex_client && - a->ex_dentry == b->ex_dentry && - a->ex_mnt == b->ex_mnt; + struct svc_export *orig = container_of(a, struct svc_export, h); + struct svc_export *new = container_of(b, struct svc_export, h); + return orig->ex_client == new->ex_client && + orig->ex_dentry == new->ex_dentry && + orig->ex_mnt == new->ex_mnt; } -static inline void svc_export_init(struct svc_export *new, struct svc_export *item) + +static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) { + struct svc_export *new = container_of(cnew, struct svc_export, h); + struct svc_export *item = container_of(citem, struct svc_export, h); + kref_get(&item->ex_client->ref); new->ex_client = item->ex_client; new->ex_dentry = dget(item->ex_dentry); new->ex_mnt = mntget(item->ex_mnt); } -static inline void svc_export_update(struct svc_export *new, struct svc_export *item) +static void export_update(struct cache_head *cnew, struct cache_head *citem) { + struct svc_export *new = container_of(cnew, struct svc_export, h); + struct svc_export *item = container_of(citem, struct svc_export, h); + new->ex_flags = item->ex_flags; new->ex_anon_uid = item->ex_anon_uid; new->ex_anon_gid = item->ex_anon_gid; new->ex_fsid = item->ex_fsid; } -static DefineSimpleCacheLookup(svc_export, svc_export) +static struct cache_head *svc_export_alloc(void) +{ + struct svc_export *i = kmalloc(sizeof(*i), GFP_KERNEL); + if (i) + return &i->h; + else + return NULL; +} + +struct cache_detail svc_export_cache = { + .owner = THIS_MODULE, + .hash_size = EXPORT_HASHMAX, + .hash_table = export_table, + .name = "nfsd.export", + .cache_put = svc_export_put, + .cache_request = svc_export_request, + .cache_parse = svc_export_parse, + .cache_show = svc_export_show, + .match = svc_export_match, + .init = svc_export_init, + .update = export_update, + .alloc = svc_export_alloc, +}; + +static struct svc_export * +svc_export_lookup(struct svc_export *exp) +{ + struct cache_head *ch; + int hash; + hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS); + hash ^= hash_ptr(exp->ex_dentry, EXPORT_HASHBITS); + hash ^= hash_ptr(exp->ex_mnt, EXPORT_HASHBITS); + + ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h, + hash); + if (ch) + return container_of(ch, struct svc_export, h); + else + return NULL; +} + +struct svc_export * +svc_export_update(struct svc_export *new, struct svc_export *old) +{ + struct cache_head *ch; + int hash; + hash = hash_ptr(old->ex_client, EXPORT_HASHBITS); + hash ^= hash_ptr(old->ex_dentry, EXPORT_HASHBITS); + hash ^= hash_ptr(old->ex_mnt, EXPORT_HASHBITS); + + ch = sunrpc_cache_update(&svc_export_cache, &new->h, + &old->h, + hash); + if (ch) + return container_of(ch, struct svc_export, h); + else + return NULL; +} struct svc_expkey * @@ -568,7 +619,7 @@ exp_get_by_name(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry, key.ex_mnt = mnt; key.ex_dentry = dentry; - exp = svc_export_lookup(&key, 0); + exp = svc_export_lookup(&key); if (exp != NULL) switch (cache_check(&svc_export_cache, &exp->h, reqp)) { case 0: break; @@ -770,13 +821,13 @@ exp_export(struct nfsctl_export *nxp) new.ex_anon_gid = nxp->ex_anon_gid; new.ex_fsid = nxp->ex_dev; - exp = svc_export_lookup(&new, 1); + exp = svc_export_lookup(&new); + if (exp) + exp = svc_export_update(&new, exp); - if (exp == NULL) + if (!exp) goto finish; - err = 0; - if (exp_hash(clp, exp) || exp_fsid_hash(clp, exp)) { /* failed to create at least one index */ -- cgit v0.10.2 From 8d270f7f4cdab792d7263926ac59f747258735ee Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:04 -0800 Subject: [PATCH] knfsd: Use new cache_lookup for svc_expkey cache Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 2ebd77d..abd6896 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -57,17 +57,6 @@ static int exp_verify_string(char *cp, int max); #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) static struct cache_head *expkey_table[EXPKEY_HASHMAX]; -static inline int svc_expkey_hash(struct svc_expkey *item) -{ - int hash = item->ek_fsidtype; - char * cp = (char*)item->ek_fsid; - int len = key_len(item->ek_fsidtype); - - hash ^= hash_mem(cp, len, EXPKEY_HASHBITS); - hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS); - return hash & EXPKEY_HASHMASK; -} - void expkey_put(struct cache_head *item, struct cache_detail *cd) { if (cache_put(item, cd)) { @@ -97,7 +86,8 @@ static void expkey_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *, int); +static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old); +static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *); static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) { /* client fsidtype fsid [path] */ @@ -108,6 +98,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) int fsidtype; char *ep; struct svc_expkey key; + struct svc_expkey *ek; if (mesg[mlen-1] != '\n') return -EINVAL; @@ -152,20 +143,25 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) key.ek_fsidtype = fsidtype; memcpy(key.ek_fsid, buf, len); + ek = svc_expkey_lookup(&key); + err = -ENOMEM; + if (!ek) + goto out; + /* now we want a pathname, or empty meaning NEGATIVE */ + err = -EINVAL; if ((len=qword_get(&mesg, buf, PAGE_SIZE)) < 0) goto out; dprintk("Path seems to be <%s>\n", buf); err = 0; if (len == 0) { - struct svc_expkey *ek; set_bit(CACHE_NEGATIVE, &key.h.flags); - ek = svc_expkey_lookup(&key, 1); + ek = svc_expkey_update(&key, ek); if (ek) expkey_put(&ek->h, &svc_expkey_cache); + else err = -ENOMEM; } else { struct nameidata nd; - struct svc_expkey *ek; err = path_lookup(buf, 0, &nd); if (err) goto out; @@ -174,10 +170,11 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) key.ek_mnt = nd.mnt; key.ek_dentry = nd.dentry; - ek = svc_expkey_lookup(&key, 1); + ek = svc_expkey_update(&key, ek); if (ek) expkey_put(&ek->h, &svc_expkey_cache); - err = 0; + else + err = -ENOMEM; path_release(&nd); } cache_flush(); @@ -213,29 +210,25 @@ static int expkey_show(struct seq_file *m, seq_printf(m, "\n"); return 0; } - -struct cache_detail svc_expkey_cache = { - .owner = THIS_MODULE, - .hash_size = EXPKEY_HASHMAX, - .hash_table = expkey_table, - .name = "nfsd.fh", - .cache_put = expkey_put, - .cache_request = expkey_request, - .cache_parse = expkey_parse, - .cache_show = expkey_show, -}; -static inline int svc_expkey_match (struct svc_expkey *a, struct svc_expkey *b) +static inline int expkey_match (struct cache_head *a, struct cache_head *b) { - if (a->ek_fsidtype != b->ek_fsidtype || - a->ek_client != b->ek_client || - memcmp(a->ek_fsid, b->ek_fsid, key_len(a->ek_fsidtype)) != 0) + struct svc_expkey *orig = container_of(a, struct svc_expkey, h); + struct svc_expkey *new = container_of(b, struct svc_expkey, h); + + if (orig->ek_fsidtype != new->ek_fsidtype || + orig->ek_client != new->ek_client || + memcmp(orig->ek_fsid, new->ek_fsid, key_len(orig->ek_fsidtype)) != 0) return 0; return 1; } -static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *item) +static inline void expkey_init(struct cache_head *cnew, + struct cache_head *citem) { + struct svc_expkey *new = container_of(cnew, struct svc_expkey, h); + struct svc_expkey *item = container_of(citem, struct svc_expkey, h); + kref_get(&item->ek_client->ref); new->ek_client = item->ek_client; new->ek_fsidtype = item->ek_fsidtype; @@ -244,13 +237,80 @@ static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *it new->ek_fsid[2] = item->ek_fsid[2]; } -static inline void svc_expkey_update(struct svc_expkey *new, struct svc_expkey *item) +static inline void expkey_update(struct cache_head *cnew, + struct cache_head *citem) { + struct svc_expkey *new = container_of(cnew, struct svc_expkey, h); + struct svc_expkey *item = container_of(citem, struct svc_expkey, h); + new->ek_mnt = mntget(item->ek_mnt); new->ek_dentry = dget(item->ek_dentry); } -static DefineSimpleCacheLookup(svc_expkey, svc_expkey) +static struct cache_head *expkey_alloc(void) +{ + struct svc_expkey *i = kmalloc(sizeof(*i), GFP_KERNEL); + if (i) + return &i->h; + else + return NULL; +} + +struct cache_detail svc_expkey_cache = { + .owner = THIS_MODULE, + .hash_size = EXPKEY_HASHMAX, + .hash_table = expkey_table, + .name = "nfsd.fh", + .cache_put = expkey_put, + .cache_request = expkey_request, + .cache_parse = expkey_parse, + .cache_show = expkey_show, + .match = expkey_match, + .init = expkey_init, + .update = expkey_update, + .alloc = expkey_alloc, +}; + +static struct svc_expkey * +svc_expkey_lookup(struct svc_expkey *item) +{ + struct cache_head *ch; + int hash = item->ek_fsidtype; + char * cp = (char*)item->ek_fsid; + int len = key_len(item->ek_fsidtype); + + hash ^= hash_mem(cp, len, EXPKEY_HASHBITS); + hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS); + hash &= EXPKEY_HASHMASK; + + ch = sunrpc_cache_lookup(&svc_expkey_cache, &item->h, + hash); + if (ch) + return container_of(ch, struct svc_expkey, h); + else + return NULL; +} + +static struct svc_expkey * +svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old) +{ + struct cache_head *ch; + int hash = new->ek_fsidtype; + char * cp = (char*)new->ek_fsid; + int len = key_len(new->ek_fsidtype); + + hash ^= hash_mem(cp, len, EXPKEY_HASHBITS); + hash ^= hash_ptr(new->ek_client, EXPKEY_HASHBITS); + hash &= EXPKEY_HASHMASK; + + ch = sunrpc_cache_update(&svc_expkey_cache, &new->h, + &old->h, hash); + if (ch) + return container_of(ch, struct svc_expkey, h); + else + return NULL; +} + #define EXPORT_HASHBITS 8 #define EXPORT_HASHMAX (1<< EXPORT_HASHBITS) @@ -549,7 +609,7 @@ exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) key.ek_fsidtype = fsid_type; memcpy(key.ek_fsid, fsidv, key_len(fsid_type)); - ek = svc_expkey_lookup(&key, 0); + ek = svc_expkey_lookup(&key); if (ek != NULL) if ((err = cache_check(&svc_expkey_cache, &ek->h, reqp))) ek = ERR_PTR(err); @@ -569,7 +629,9 @@ static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv, key.h.expiry_time = NEVER; key.h.flags = 0; - ek = svc_expkey_lookup(&key, 1); + ek = svc_expkey_lookup(&key); + if (ek) + ek = svc_expkey_update(&key,ek); if (ek) { expkey_put(&ek->h, &svc_expkey_cache); return 0; -- cgit v0.10.2 From d4d11ea9d6d6fcfaa66fbd10fd4a8d43aa575597 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:04 -0800 Subject: [PATCH] knfsd: Use new sunrpc cache for rsi cache Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index aadb4e8..237d935 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -78,7 +78,8 @@ struct rsi { static struct cache_head *rsi_table[RSI_HASHMAX]; static struct cache_detail rsi_cache; -static struct rsi *rsi_lookup(struct rsi *item, int set); +static struct rsi *rsi_update(struct rsi *new, struct rsi *old); +static struct rsi *rsi_lookup(struct rsi *item); static void rsi_free(struct rsi *rsii) { @@ -103,8 +104,10 @@ static inline int rsi_hash(struct rsi *item) ^ hash_mem(item->in_token.data, item->in_token.len, RSI_HASHBITS); } -static inline int rsi_match(struct rsi *item, struct rsi *tmp) +static int rsi_match(struct cache_head *a, struct cache_head *b) { + struct rsi *item = container_of(a, struct rsi, h); + struct rsi *tmp = container_of(b, struct rsi, h); return netobj_equal(&item->in_handle, &tmp->in_handle) && netobj_equal(&item->in_token, &tmp->in_token); } @@ -125,8 +128,11 @@ static inline int dup_netobj(struct xdr_netobj *dst, struct xdr_netobj *src) return dup_to_netobj(dst, src->data, src->len); } -static inline void rsi_init(struct rsi *new, struct rsi *item) +static void rsi_init(struct cache_head *cnew, struct cache_head *citem) { + struct rsi *new = container_of(cnew, struct rsi, h); + struct rsi *item = container_of(citem, struct rsi, h); + new->out_handle.data = NULL; new->out_handle.len = 0; new->out_token.data = NULL; @@ -141,8 +147,11 @@ static inline void rsi_init(struct rsi *new, struct rsi *item) item->in_token.data = NULL; } -static inline void rsi_update(struct rsi *new, struct rsi *item) +static void update_rsi(struct cache_head *cnew, struct cache_head *citem) { + struct rsi *new = container_of(cnew, struct rsi, h); + struct rsi *item = container_of(citem, struct rsi, h); + BUG_ON(new->out_handle.data || new->out_token.data); new->out_handle.len = item->out_handle.len; item->out_handle.len = 0; @@ -157,6 +166,15 @@ static inline void rsi_update(struct rsi *new, struct rsi *item) new->minor_status = item->minor_status; } +static struct cache_head *rsi_alloc(void) +{ + struct rsi *rsii = kmalloc(sizeof(*rsii), GFP_KERNEL); + if (rsii) + return &rsii->h; + else + return NULL; +} + static void rsi_request(struct cache_detail *cd, struct cache_head *h, char **bpp, int *blen) @@ -198,6 +216,10 @@ static int rsi_parse(struct cache_detail *cd, if (dup_to_netobj(&rsii.in_token, buf, len)) goto out; + rsip = rsi_lookup(&rsii); + if (!rsip) + goto out; + rsii.h.flags = 0; /* expiry */ expiry = get_expiry(&mesg); @@ -240,12 +262,14 @@ static int rsi_parse(struct cache_detail *cd, goto out; } rsii.h.expiry_time = expiry; - rsip = rsi_lookup(&rsii, 1); + rsip = rsi_update(&rsii, rsip); status = 0; out: rsi_free(&rsii); if (rsip) rsi_put(&rsip->h, &rsi_cache); + else + status = -ENOMEM; return status; } @@ -257,9 +281,37 @@ static struct cache_detail rsi_cache = { .cache_put = rsi_put, .cache_request = rsi_request, .cache_parse = rsi_parse, + .match = rsi_match, + .init = rsi_init, + .update = update_rsi, + .alloc = rsi_alloc, }; -static DefineSimpleCacheLookup(rsi, rsi) +static struct rsi *rsi_lookup(struct rsi *item) +{ + struct cache_head *ch; + int hash = rsi_hash(item); + + ch = sunrpc_cache_lookup(&rsi_cache, &item->h, hash); + if (ch) + return container_of(ch, struct rsi, h); + else + return NULL; +} + +static struct rsi *rsi_update(struct rsi *new, struct rsi *old) +{ + struct cache_head *ch; + int hash = rsi_hash(new); + + ch = sunrpc_cache_update(&rsi_cache, &new->h, + &old->h, hash); + if (ch) + return container_of(ch, struct rsi, h); + else + return NULL; +} + /* * The rpcsec_context cache is used to store a context that is @@ -895,7 +947,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) goto drop; } - rsip = rsi_lookup(&rsikey, 0); + rsip = rsi_lookup(&rsikey); rsi_free(&rsikey); if (!rsip) { goto drop; -- cgit v0.10.2 From 17f834b6d2e0b102ab53d73ba44d5dbb34c38f90 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:05 -0800 Subject: [PATCH] knfsd: Use new cache code for rsc cache Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 237d935..3801526 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -345,7 +345,8 @@ struct rsc { static struct cache_head *rsc_table[RSC_HASHMAX]; static struct cache_detail rsc_cache; -static struct rsc *rsc_lookup(struct rsc *item, int set); +static struct rsc *rsc_update(struct rsc *new, struct rsc *old); +static struct rsc *rsc_lookup(struct rsc *item); static void rsc_free(struct rsc *rsci) { @@ -372,15 +373,21 @@ rsc_hash(struct rsc *rsci) return hash_mem(rsci->handle.data, rsci->handle.len, RSC_HASHBITS); } -static inline int -rsc_match(struct rsc *new, struct rsc *tmp) +static int +rsc_match(struct cache_head *a, struct cache_head *b) { + struct rsc *new = container_of(a, struct rsc, h); + struct rsc *tmp = container_of(b, struct rsc, h); + return netobj_equal(&new->handle, &tmp->handle); } -static inline void -rsc_init(struct rsc *new, struct rsc *tmp) +static void +rsc_init(struct cache_head *cnew, struct cache_head *ctmp) { + struct rsc *new = container_of(cnew, struct rsc, h); + struct rsc *tmp = container_of(ctmp, struct rsc, h); + new->handle.len = tmp->handle.len; tmp->handle.len = 0; new->handle.data = tmp->handle.data; @@ -389,9 +396,12 @@ rsc_init(struct rsc *new, struct rsc *tmp) new->cred.cr_group_info = NULL; } -static inline void -rsc_update(struct rsc *new, struct rsc *tmp) +static void +update_rsc(struct cache_head *cnew, struct cache_head *ctmp) { + struct rsc *new = container_of(cnew, struct rsc, h); + struct rsc *tmp = container_of(ctmp, struct rsc, h); + new->mechctx = tmp->mechctx; tmp->mechctx = NULL; memset(&new->seqdata, 0, sizeof(new->seqdata)); @@ -400,6 +410,16 @@ rsc_update(struct rsc *new, struct rsc *tmp) tmp->cred.cr_group_info = NULL; } +static struct cache_head * +rsc_alloc(void) +{ + struct rsc *rsci = kmalloc(sizeof(*rsci), GFP_KERNEL); + if (rsci) + return &rsci->h; + else + return NULL; +} + static int rsc_parse(struct cache_detail *cd, char *mesg, int mlen) { @@ -425,6 +445,10 @@ static int rsc_parse(struct cache_detail *cd, if (expiry == 0) goto out; + rscp = rsc_lookup(&rsci); + if (!rscp) + goto out; + /* uid, or NEGATIVE */ rv = get_int(&mesg, &rsci.cred.cr_uid); if (rv == -EINVAL) @@ -480,12 +504,14 @@ static int rsc_parse(struct cache_detail *cd, gss_mech_put(gm); } rsci.h.expiry_time = expiry; - rscp = rsc_lookup(&rsci, 1); + rscp = rsc_update(&rsci, rscp); status = 0; out: rsc_free(&rsci); if (rscp) rsc_put(&rscp->h, &rsc_cache); + else + status = -ENOMEM; return status; } @@ -496,9 +522,37 @@ static struct cache_detail rsc_cache = { .name = "auth.rpcsec.context", .cache_put = rsc_put, .cache_parse = rsc_parse, + .match = rsc_match, + .init = rsc_init, + .update = update_rsc, + .alloc = rsc_alloc, }; -static DefineSimpleCacheLookup(rsc, rsc); +static struct rsc *rsc_lookup(struct rsc *item) +{ + struct cache_head *ch; + int hash = rsc_hash(item); + + ch = sunrpc_cache_lookup(&rsc_cache, &item->h, hash); + if (ch) + return container_of(ch, struct rsc, h); + else + return NULL; +} + +static struct rsc *rsc_update(struct rsc *new, struct rsc *old) +{ + struct cache_head *ch; + int hash = rsc_hash(new); + + ch = sunrpc_cache_update(&rsc_cache, &new->h, + &old->h, hash); + if (ch) + return container_of(ch, struct rsc, h); + else + return NULL; +} + static struct rsc * gss_svc_searchbyctx(struct xdr_netobj *handle) @@ -509,7 +563,7 @@ gss_svc_searchbyctx(struct xdr_netobj *handle) memset(&rsci, 0, sizeof(rsci)); if (dup_to_netobj(&rsci.handle, handle->data, handle->len)) return NULL; - found = rsc_lookup(&rsci, 0); + found = rsc_lookup(&rsci); rsc_free(&rsci); if (!found) return NULL; -- cgit v0.10.2 From f9ecc921b5b5e135050e7f22fef873c249aee3fc Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:06 -0800 Subject: [PATCH] knfsd: Use new cache code for name/id lookup caches Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index dea690a..75cfbb6 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -82,9 +82,12 @@ struct ent { #define ENT_HASHMAX (1 << ENT_HASHBITS) #define ENT_HASHMASK (ENT_HASHMAX - 1) -static inline void -ent_init(struct ent *new, struct ent *itm) +static void +ent_init(struct cache_head *cnew, struct cache_head *citm) { + struct ent *new = container_of(cnew, struct ent, h); + struct ent *itm = container_of(citm, struct ent, h); + new->id = itm->id; new->type = itm->type; @@ -92,12 +95,6 @@ ent_init(struct ent *new, struct ent *itm) strlcpy(new->authname, itm->authname, sizeof(new->name)); } -static inline void -ent_update(struct ent *new, struct ent *itm) -{ - ent_init(new, itm); -} - static void ent_put(struct cache_head *ch, struct cache_detail *cd) { @@ -107,6 +104,16 @@ ent_put(struct cache_head *ch, struct cache_detail *cd) } } +static struct cache_head * +ent_alloc(void) +{ + struct ent *e = kmalloc(sizeof(*e), GFP_KERNEL); + if (e) + return &e->h; + else + return NULL; +} + /* * ID -> Name cache */ @@ -143,9 +150,12 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, (*bpp)[-1] = '\n'; } -static inline int -idtoname_match(struct ent *a, struct ent *b) +static int +idtoname_match(struct cache_head *ca, struct cache_head *cb) { + struct ent *a = container_of(ca, struct ent, h); + struct ent *b = container_of(cb, struct ent, h); + return (a->id == b->id && a->type == b->type && strcmp(a->authname, b->authname) == 0); } @@ -178,7 +188,8 @@ warn_no_idmapd(struct cache_detail *detail) static int idtoname_parse(struct cache_detail *, char *, int); -static struct ent *idtoname_lookup(struct ent *, int); +static struct ent *idtoname_lookup(struct ent *); +static struct ent *idtoname_update(struct ent *, struct ent *); static struct cache_detail idtoname_cache = { .owner = THIS_MODULE, @@ -190,6 +201,10 @@ static struct cache_detail idtoname_cache = { .cache_parse = idtoname_parse, .cache_show = idtoname_show, .warn_no_listener = warn_no_idmapd, + .match = idtoname_match, + .init = ent_init, + .update = ent_init, + .alloc = ent_alloc, }; int @@ -232,6 +247,11 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) if (ent.h.expiry_time == 0) goto out; + error = -ENOMEM; + res = idtoname_lookup(&ent); + if (!res) + goto out; + /* Name */ error = qword_get(&buf, buf1, PAGE_SIZE); if (error == -EINVAL) @@ -246,7 +266,8 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) memcpy(ent.name, buf1, sizeof(ent.name)); } error = -ENOMEM; - if ((res = idtoname_lookup(&ent, 1)) == NULL) + res = idtoname_update(&ent, res); + if (res == NULL) goto out; ent_put(&res->h, &idtoname_cache); @@ -258,7 +279,31 @@ out: return error; } -static DefineSimpleCacheLookup(ent, idtoname); + +static struct ent * +idtoname_lookup(struct ent *item) +{ + struct cache_head *ch = sunrpc_cache_lookup(&idtoname_cache, + &item->h, + idtoname_hash(item)); + if (ch) + return container_of(ch, struct ent, h); + else + return NULL; +} + +static struct ent * +idtoname_update(struct ent *new, struct ent *old) +{ + struct cache_head *ch = sunrpc_cache_update(&idtoname_cache, + &new->h, &old->h, + idtoname_hash(new)); + if (ch) + return container_of(ch, struct ent, h); + else + return NULL; +} + /* * Name -> ID cache @@ -285,9 +330,12 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, (*bpp)[-1] = '\n'; } -static inline int -nametoid_match(struct ent *a, struct ent *b) +static int +nametoid_match(struct cache_head *ca, struct cache_head *cb) { + struct ent *a = container_of(ca, struct ent, h); + struct ent *b = container_of(cb, struct ent, h); + return (a->type == b->type && strcmp(a->name, b->name) == 0 && strcmp(a->authname, b->authname) == 0); } @@ -311,7 +359,8 @@ nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) return 0; } -static struct ent *nametoid_lookup(struct ent *, int); +static struct ent *nametoid_lookup(struct ent *); +static struct ent *nametoid_update(struct ent *, struct ent *); static int nametoid_parse(struct cache_detail *, char *, int); static struct cache_detail nametoid_cache = { @@ -324,6 +373,10 @@ static struct cache_detail nametoid_cache = { .cache_parse = nametoid_parse, .cache_show = nametoid_show, .warn_no_listener = warn_no_idmapd, + .match = nametoid_match, + .init = ent_init, + .update = ent_init, + .alloc = ent_alloc, }; static int @@ -373,7 +426,11 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) set_bit(CACHE_NEGATIVE, &ent.h.flags); error = -ENOMEM; - if ((res = nametoid_lookup(&ent, 1)) == NULL) + res = nametoid_lookup(&ent); + if (res == NULL) + goto out; + res = nametoid_update(&ent, res); + if (res == NULL) goto out; ent_put(&res->h, &nametoid_cache); @@ -384,7 +441,30 @@ out: return (error); } -static DefineSimpleCacheLookup(ent, nametoid); + +static struct ent * +nametoid_lookup(struct ent *item) +{ + struct cache_head *ch = sunrpc_cache_lookup(&nametoid_cache, + &item->h, + nametoid_hash(item)); + if (ch) + return container_of(ch, struct ent, h); + else + return NULL; +} + +static struct ent * +nametoid_update(struct ent *new, struct ent *old) +{ + struct cache_head *ch = sunrpc_cache_update(&nametoid_cache, + &new->h, &old->h, + nametoid_hash(new)); + if (ch) + return container_of(ch, struct ent, h); + else + return NULL; +} /* * Exported API @@ -452,24 +532,24 @@ idmap_defer(struct cache_req *req) } static inline int -do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *, int), struct ent *key, +do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *), struct ent *key, struct cache_detail *detail, struct ent **item, struct idmap_defer_req *mdr) { - *item = lookup_fn(key, 0); + *item = lookup_fn(key); if (!*item) return -ENOMEM; return cache_check(detail, &(*item)->h, &mdr->req); } static inline int -do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *, int), +do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *), struct ent *key, struct cache_detail *detail, struct ent **item) { int ret = -ENOMEM; - *item = lookup_fn(key, 0); + *item = lookup_fn(key); if (!*item) goto out_err; ret = -ETIMEDOUT; @@ -490,7 +570,7 @@ out_err: static int idmap_lookup(struct svc_rqst *rqstp, - struct ent *(*lookup_fn)(struct ent *, int), struct ent *key, + struct ent *(*lookup_fn)(struct ent *), struct ent *key, struct cache_detail *detail, struct ent **item) { struct idmap_defer_req *mdr; -- cgit v0.10.2 From 4013edea9a0b6cdcb1fdf5d4011e47e068fd6efb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:07 -0800 Subject: [PATCH] knfsd: An assortment of little fixes to the sunrpc cache code - in cache_check, h must be non-NULL as it has been de-referenced, so don't bother checking for NULL. - When a cache-item is updated, we need to call cache_revisit_request to see if there is a pending request waiting for that item. We were using a transition to CACHE_VALID to see if that was needed, however that is wrong as an expired entry will still be marked 'valid' (as the data is valid and will need to be released). So instead use an off transition for CACHE_PENDING which is exactly the right thing to test. - Add a little bit more debugging info. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 4449dc5..b242f49 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -208,7 +208,7 @@ int cache_check(struct cache_detail *detail, if (rv == -EAGAIN) cache_defer_req(rqstp, h); - if (rv && h) + if (rv) detail->cache_put(h, detail); return rv; } @@ -223,8 +223,10 @@ void cache_fresh(struct cache_detail *detail, head->last_refresh = get_seconds(); if (!test_and_set_bit(CACHE_VALID, &head->flags)) cache_revisit_request(head); - if (test_and_clear_bit(CACHE_PENDING, &head->flags)) + if (test_and_clear_bit(CACHE_PENDING, &head->flags)) { + cache_revisit_request(head); queue_loose(detail, head); + } } /* @@ -551,7 +553,7 @@ static void cache_defer_req(struct cache_req *req, struct cache_head *item) /* there was one too many */ dreq->revisit(dreq, 1); } - if (test_bit(CACHE_VALID, &item->flags)) { + if (!test_bit(CACHE_PENDING, &item->flags)) { /* must have just been validated... */ cache_revisit_request(item); } @@ -892,7 +894,7 @@ static void queue_loose(struct cache_detail *detail, struct cache_head *ch) if (cr->item != ch) continue; if (cr->readers != 0) - break; + continue; list_del(&cr->q.list); spin_unlock(&queue_lock); detail->cache_put(cr->item, detail); @@ -1180,8 +1182,8 @@ static int c_show(struct seq_file *m, void *p) return cd->cache_show(m, cd, NULL); ifdebug(CACHE) - seq_printf(m, "# expiry=%ld refcnt=%d\n", - cp->expiry_time, atomic_read(&cp->refcnt)); + seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n", + cp->expiry_time, atomic_read(&cp->refcnt), cp->flags); cache_get(cp); if (cache_check(cd, cp, NULL)) /* cache_check does a cache_put on failure */ -- cgit v0.10.2 From 4d90452cb23b08a9a9dd001010f0ee6b1ee83a45 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:07 -0800 Subject: [PATCH] knfsd: Remove DefineCacheLookup This has been replaced by more traditional code. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 3e17a5f..afc481d 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -128,119 +128,6 @@ struct cache_deferred_req { int too_many); }; -/* - * just like a template in C++, this macro does cache lookup - * for us. - * The function is passed some sort of HANDLE from which a cache_detail - * structure can be determined (via SETUP, DETAIL), a template - * cache entry (type RTN*), and a "set" flag. Using the HASHFN and the - * TEST, the function will try to find a matching cache entry in the cache. - * If "set" == 0 : - * If an entry is found, it is returned - * If no entry is found, a new non-VALID entry is created. - * If "set" == 1 : - * If no entry is found a new one is inserted with data from "template" - * If a non-CACHE_VALID entry is found, it is updated from template using UPDATE - * If a CACHE_VALID entry is found, a new entry is swapped in with data - * from "template" - * - * If the passed handle has the CACHE_NEGATIVE flag set, then UPDATE is not - * run but insteead CACHE_NEGATIVE is set in any new item. - - * In any case, the new entry is returned with a reference count. - * - * - * RTN is a struct type for a cache entry - * MEMBER is the member of the cache which is cache_head, which must be first - * FNAME is the name for the function - * ARGS are arguments to function and must contain RTN *item, int set. May - * also contain something to be usedby SETUP or DETAIL to find cache_detail. - * SETUP locates the cache detail and makes it available as... - * DETAIL identifies the cache detail, possibly set up by SETUP - * HASHFN returns a hash value of the cache entry "item" - * TEST tests if "tmp" matches "item" - * INIT copies key information from "item" to "new" - * UPDATE copies content information from "item" to "tmp" - */ -#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,SETUP,DETAIL,HASHFN,TEST,INIT,UPDATE) \ -RTN *FNAME ARGS \ -{ \ - RTN *tmp, *new=NULL; \ - struct cache_head **hp, **head; \ - SETUP; \ - head = &(DETAIL)->hash_table[HASHFN]; \ - retry: \ - if (set||new) write_lock(&(DETAIL)->hash_lock); \ - else read_lock(&(DETAIL)->hash_lock); \ - for(hp=head; *hp != NULL; hp = &tmp->MEMBER.next) { \ - tmp = container_of(*hp, RTN, MEMBER); \ - if (TEST) { /* found a match */ \ - \ - if (set && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \ - break; \ - \ - if (new) \ - {INIT;} \ - if (set) { \ - if (test_bit(CACHE_VALID, &tmp->MEMBER.flags))\ - { /* need to swap in new */ \ - RTN *t2; \ - \ - new->MEMBER.next = tmp->MEMBER.next; \ - *hp = &new->MEMBER; \ - tmp->MEMBER.next = NULL; \ - t2 = tmp; tmp = new; new = t2; \ - } \ - if (test_bit(CACHE_NEGATIVE, &item->MEMBER.flags)) \ - set_bit(CACHE_NEGATIVE, &tmp->MEMBER.flags); \ - else { \ - UPDATE; \ - clear_bit(CACHE_NEGATIVE, &tmp->MEMBER.flags); \ - } \ - } \ - cache_get(&tmp->MEMBER); \ - if (set||new) write_unlock(&(DETAIL)->hash_lock); \ - else read_unlock(&(DETAIL)->hash_lock); \ - if (set) \ - cache_fresh(DETAIL, &tmp->MEMBER, item->MEMBER.expiry_time); \ - if (set && new) cache_fresh(DETAIL, &new->MEMBER, 0); \ - if (new) (DETAIL)->cache_put(&new->MEMBER, DETAIL); \ - return tmp; \ - } \ - } \ - /* Didn't find anything */ \ - if (new) { \ - INIT; \ - new->MEMBER.next = *head; \ - *head = &new->MEMBER; \ - (DETAIL)->entries ++; \ - cache_get(&new->MEMBER); \ - if (set) { \ - tmp = new; \ - if (test_bit(CACHE_NEGATIVE, &item->MEMBER.flags)) \ - set_bit(CACHE_NEGATIVE, &tmp->MEMBER.flags); \ - else {UPDATE;} \ - } \ - } \ - if (set||new) write_unlock(&(DETAIL)->hash_lock); \ - else read_unlock(&(DETAIL)->hash_lock); \ - if (new && set) \ - cache_fresh(DETAIL, &new->MEMBER, item->MEMBER.expiry_time); \ - if (new) \ - return new; \ - new = kmalloc(sizeof(*new), GFP_KERNEL); \ - if (new) { \ - cache_init(&new->MEMBER); \ - goto retry; \ - } \ - return NULL; \ -} - -#define DefineSimpleCacheLookup(STRUCT, FUNC) \ - DefineCacheLookup(struct STRUCT, h, FUNC##_lookup, \ - (struct STRUCT *item, int set), /*no setup */, \ - & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp), \ - STRUCT##_init(new, item), STRUCT##_update(tmp, item)) extern struct cache_head * sunrpc_cache_lookup(struct cache_detail *detail, -- cgit v0.10.2 From ebd0cb1af3be2729cc1f574681dfba01fcf458d9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:08 -0800 Subject: [PATCH] knfsd: Unexport cache_fresh and fix a small race Cache_fresh is now only used in cache.c, so unexport it. Part of cache_fresh (setting CACHE_VALID) should really be done under the lock, while part (calling cache_revisit_request etc) must be done outside the lock. So we split it up appropriately. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index afc481d..a37fead 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -165,8 +165,6 @@ static inline int cache_put(struct cache_head *h, struct cache_detail *cd) } extern void cache_init(struct cache_head *h); -extern void cache_fresh(struct cache_detail *detail, - struct cache_head *head, time_t expiry); extern int cache_check(struct cache_detail *detail, struct cache_head *h, struct cache_req *rqstp); extern void cache_flush(void); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index b242f49..edcda4f 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -96,6 +96,27 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, } EXPORT_SYMBOL(sunrpc_cache_lookup); + +static void queue_loose(struct cache_detail *detail, struct cache_head *ch); + +static int cache_fresh_locked(struct cache_head *head, time_t expiry) +{ + head->expiry_time = expiry; + head->last_refresh = get_seconds(); + return !test_and_set_bit(CACHE_VALID, &head->flags); +} + +static void cache_fresh_unlocked(struct cache_head *head, + struct cache_detail *detail, int new) +{ + if (new) + cache_revisit_request(head); + if (test_and_clear_bit(CACHE_PENDING, &head->flags)) { + cache_revisit_request(head); + queue_loose(detail, head); + } +} + struct cache_head *sunrpc_cache_update(struct cache_detail *detail, struct cache_head *new, struct cache_head *old, int hash) { @@ -105,6 +126,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, */ struct cache_head **head; struct cache_head *tmp; + int is_new; if (!test_bit(CACHE_VALID, &old->flags)) { write_lock(&detail->hash_lock); @@ -113,9 +135,9 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, set_bit(CACHE_NEGATIVE, &old->flags); else detail->update(old, new); - /* FIXME cache_fresh should come first */ + is_new = cache_fresh_locked(old, new->expiry_time); write_unlock(&detail->hash_lock); - cache_fresh(detail, old, new->expiry_time); + cache_fresh_unlocked(old, detail, is_new); return old; } write_unlock(&detail->hash_lock); @@ -138,9 +160,11 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, tmp->next = *head; *head = tmp; cache_get(tmp); + is_new = cache_fresh_locked(tmp, new->expiry_time); + cache_fresh_locked(old, 0); write_unlock(&detail->hash_lock); - cache_fresh(detail, tmp, new->expiry_time); - cache_fresh(detail, old, 0); + cache_fresh_unlocked(tmp, detail, is_new); + cache_fresh_unlocked(old, detail, 0); detail->cache_put(old, detail); return tmp; } @@ -192,7 +216,8 @@ int cache_check(struct cache_detail *detail, clear_bit(CACHE_PENDING, &h->flags); if (rv == -EAGAIN) { set_bit(CACHE_NEGATIVE, &h->flags); - cache_fresh(detail, h, get_seconds()+CACHE_NEW_EXPIRY); + cache_fresh_unlocked(h, detail, + cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY)); rv = -ENOENT; } break; @@ -213,22 +238,6 @@ int cache_check(struct cache_detail *detail, return rv; } -static void queue_loose(struct cache_detail *detail, struct cache_head *ch); - -void cache_fresh(struct cache_detail *detail, - struct cache_head *head, time_t expiry) -{ - - head->expiry_time = expiry; - head->last_refresh = get_seconds(); - if (!test_and_set_bit(CACHE_VALID, &head->flags)) - cache_revisit_request(head); - if (test_and_clear_bit(CACHE_PENDING, &head->flags)) { - cache_revisit_request(head); - queue_loose(detail, head); - } -} - /* * caches need to be periodically cleaned. * For this we maintain a list of cache_detail and diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 4040119..69b8238 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -105,7 +105,6 @@ EXPORT_SYMBOL(auth_unix_lookup); EXPORT_SYMBOL(cache_check); EXPORT_SYMBOL(cache_flush); EXPORT_SYMBOL(cache_purge); -EXPORT_SYMBOL(cache_fresh); EXPORT_SYMBOL(cache_init); EXPORT_SYMBOL(cache_register); EXPORT_SYMBOL(cache_unregister); -- cgit v0.10.2 From baab935ff3bdac20c558809da0d8e8f761840219 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:09 -0800 Subject: [PATCH] knfsd: Convert sunrpc_cache to use krefs .. it makes some of the code nicer. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index abd6896..cc811a1 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -57,18 +57,17 @@ static int exp_verify_string(char *cp, int max); #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) static struct cache_head *expkey_table[EXPKEY_HASHMAX]; -void expkey_put(struct cache_head *item, struct cache_detail *cd) +void expkey_put(struct kref *ref) { - if (cache_put(item, cd)) { - struct svc_expkey *key = container_of(item, struct svc_expkey, h); - if (test_bit(CACHE_VALID, &item->flags) && - !test_bit(CACHE_NEGATIVE, &item->flags)) { - dput(key->ek_dentry); - mntput(key->ek_mnt); - } - auth_domain_put(key->ek_client); - kfree(key); + struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); + + if (test_bit(CACHE_VALID, &key->h.flags) && + !test_bit(CACHE_NEGATIVE, &key->h.flags)) { + dput(key->ek_dentry); + mntput(key->ek_mnt); } + auth_domain_put(key->ek_client); + kfree(key); } static void expkey_request(struct cache_detail *cd, @@ -158,7 +157,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) set_bit(CACHE_NEGATIVE, &key.h.flags); ek = svc_expkey_update(&key, ek); if (ek) - expkey_put(&ek->h, &svc_expkey_cache); + cache_put(&ek->h, &svc_expkey_cache); else err = -ENOMEM; } else { struct nameidata nd; @@ -172,7 +171,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) ek = svc_expkey_update(&key, ek); if (ek) - expkey_put(&ek->h, &svc_expkey_cache); + cache_put(&ek->h, &svc_expkey_cache); else err = -ENOMEM; path_release(&nd); @@ -318,15 +317,13 @@ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old) static struct cache_head *export_table[EXPORT_HASHMAX]; -void svc_export_put(struct cache_head *item, struct cache_detail *cd) +static void svc_export_put(struct kref *ref) { - if (cache_put(item, cd)) { - struct svc_export *exp = container_of(item, struct svc_export, h); - dput(exp->ex_dentry); - mntput(exp->ex_mnt); - auth_domain_put(exp->ex_client); - kfree(exp); - } + struct svc_export *exp = container_of(ref, struct svc_export, h.ref); + dput(exp->ex_dentry); + mntput(exp->ex_mnt); + auth_domain_put(exp->ex_client); + kfree(exp); } static void svc_export_request(struct cache_detail *cd, @@ -633,7 +630,7 @@ static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv, if (ek) ek = svc_expkey_update(&key,ek); if (ek) { - expkey_put(&ek->h, &svc_expkey_cache); + cache_put(&ek->h, &svc_expkey_cache); return 0; } return -ENOMEM; @@ -762,7 +759,7 @@ static void exp_fsid_unhash(struct svc_export *exp) ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); if (ek && !IS_ERR(ek)) { ek->h.expiry_time = get_seconds()-1; - expkey_put(&ek->h, &svc_expkey_cache); + cache_put(&ek->h, &svc_expkey_cache); } svc_expkey_cache.nextcheck = get_seconds(); } @@ -800,7 +797,7 @@ static void exp_unhash(struct svc_export *exp) ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); if (ek && !IS_ERR(ek)) { ek->h.expiry_time = get_seconds()-1; - expkey_put(&ek->h, &svc_expkey_cache); + cache_put(&ek->h, &svc_expkey_cache); } svc_expkey_cache.nextcheck = get_seconds(); } @@ -902,7 +899,7 @@ finish: if (exp) exp_put(exp); if (fsid_key && !IS_ERR(fsid_key)) - expkey_put(&fsid_key->h, &svc_expkey_cache); + cache_put(&fsid_key->h, &svc_expkey_cache); if (clp) auth_domain_put(clp); path_release(&nd); @@ -1030,7 +1027,7 @@ exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv, return ERR_PTR(PTR_ERR(ek)); exp = exp_get_by_name(clp, ek->ek_mnt, ek->ek_dentry, reqp); - expkey_put(&ek->h, &svc_expkey_cache); + cache_put(&ek->h, &svc_expkey_cache); if (!exp || IS_ERR(exp)) return ERR_PTR(PTR_ERR(exp)); @@ -1068,7 +1065,7 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp, else rv = fh_compose(fhp, exp, fsid_key->ek_dentry, NULL); - expkey_put(&fsid_key->h, &svc_expkey_cache); + cache_put(&fsid_key->h, &svc_expkey_cache); return rv; } @@ -1187,7 +1184,7 @@ static int e_show(struct seq_file *m, void *p) cache_get(&exp->h); if (cache_check(&svc_export_cache, &exp->h, NULL)) return 0; - if (cache_put(&exp->h, &svc_export_cache)) BUG(); + cache_put(&exp->h, &svc_export_cache); return svc_export_show(m, &svc_export_cache, cp); } diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 75cfbb6..4b6aa60 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -96,12 +96,10 @@ ent_init(struct cache_head *cnew, struct cache_head *citm) } static void -ent_put(struct cache_head *ch, struct cache_detail *cd) +ent_put(struct kref *ref) { - if (cache_put(ch, cd)) { - struct ent *map = container_of(ch, struct ent, h); - kfree(map); - } + struct ent *map = container_of(ref, struct ent, h.ref); + kfree(map); } static struct cache_head * @@ -270,7 +268,7 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) if (res == NULL) goto out; - ent_put(&res->h, &idtoname_cache); + cache_put(&res->h, &idtoname_cache); error = 0; out: @@ -433,7 +431,7 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) if (res == NULL) goto out; - ent_put(&res->h, &nametoid_cache); + cache_put(&res->h, &nametoid_cache); error = 0; out: kfree(buf1); @@ -562,7 +560,7 @@ do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *), goto out_put; return 0; out_put: - ent_put(&(*item)->h, detail); + cache_put(&(*item)->h, detail); out_err: *item = NULL; return ret; @@ -613,7 +611,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen if (ret) return ret; *id = item->id; - ent_put(&item->h, &nametoid_cache); + cache_put(&item->h, &nametoid_cache); return 0; } @@ -635,7 +633,7 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) ret = strlen(item->name); BUG_ON(ret > IDMAP_NAMESZ); memcpy(name, item->name, ret); - ent_put(&item->h, &idtoname_cache); + cache_put(&item->h, &idtoname_cache); return ret; } diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 7a3e397..3f2ec2e 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -506,7 +506,7 @@ fh_put(struct svc_fh *fhp) nfsd_nr_put++; } if (exp) { - svc_export_put(&exp->h, &svc_export_cache); + cache_put(&exp->h, &svc_export_cache); fhp->fh_export = NULL; } return; diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index d52e0b7..a6c08a4 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -102,13 +102,11 @@ int exp_rootfh(struct auth_domain *, int exp_pseudoroot(struct auth_domain *, struct svc_fh *fhp, struct cache_req *creq); int nfserrno(int errno); -extern void expkey_put(struct cache_head *item, struct cache_detail *cd); -extern void svc_export_put(struct cache_head *item, struct cache_detail *cd); extern struct cache_detail svc_export_cache, svc_expkey_cache; static inline void exp_put(struct svc_export *exp) { - svc_export_put(&exp->h, &svc_export_cache); + cache_put(&exp->h, &svc_export_cache); } static inline void exp_get(struct svc_export *exp) diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index a37fead..ad3f5cb 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -50,7 +50,7 @@ struct cache_head { time_t last_refresh; /* If CACHE_PENDING, this is when upcall * was sent, else this is when update was received */ - atomic_t refcnt; + struct kref ref; unsigned long flags; }; #define CACHE_VALID 0 /* Entry contains valid data */ @@ -68,8 +68,7 @@ struct cache_detail { atomic_t inuse; /* active user-space update or lookup */ char *name; - void (*cache_put)(struct cache_head *, - struct cache_detail*); + void (*cache_put)(struct kref *); void (*cache_request)(struct cache_detail *cd, struct cache_head *h, @@ -151,17 +150,17 @@ extern void cache_clean_deferred(void *owner); static inline struct cache_head *cache_get(struct cache_head *h) { - atomic_inc(&h->refcnt); + kref_get(&h->ref); return h; } -static inline int cache_put(struct cache_head *h, struct cache_detail *cd) +static inline void cache_put(struct cache_head *h, struct cache_detail *cd) { - if (atomic_read(&h->refcnt) <= 2 && + if (atomic_read(&h->ref.refcount) <= 2 && h->expiry_time < cd->nextcheck) cd->nextcheck = h->expiry_time; - return atomic_dec_and_test(&h->refcnt); + kref_put(&h->ref, cd->cache_put); } extern void cache_init(struct cache_head *h); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 3801526..4d7eb9e 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -89,13 +89,11 @@ static void rsi_free(struct rsi *rsii) kfree(rsii->out_token.data); } -static void rsi_put(struct cache_head *item, struct cache_detail *cd) +static void rsi_put(struct kref *ref) { - struct rsi *rsii = container_of(item, struct rsi, h); - if (cache_put(item, cd)) { - rsi_free(rsii); - kfree(rsii); - } + struct rsi *rsii = container_of(ref, struct rsi, h.ref); + rsi_free(rsii); + kfree(rsii); } static inline int rsi_hash(struct rsi *item) @@ -267,7 +265,7 @@ static int rsi_parse(struct cache_detail *cd, out: rsi_free(&rsii); if (rsip) - rsi_put(&rsip->h, &rsi_cache); + cache_put(&rsip->h, &rsi_cache); else status = -ENOMEM; return status; @@ -357,14 +355,12 @@ static void rsc_free(struct rsc *rsci) put_group_info(rsci->cred.cr_group_info); } -static void rsc_put(struct cache_head *item, struct cache_detail *cd) +static void rsc_put(struct kref *ref) { - struct rsc *rsci = container_of(item, struct rsc, h); + struct rsc *rsci = container_of(ref, struct rsc, h.ref); - if (cache_put(item, cd)) { - rsc_free(rsci); - kfree(rsci); - } + rsc_free(rsci); + kfree(rsci); } static inline int @@ -509,7 +505,7 @@ static int rsc_parse(struct cache_detail *cd, out: rsc_free(&rsci); if (rscp) - rsc_put(&rscp->h, &rsc_cache); + cache_put(&rscp->h, &rsc_cache); else status = -ENOMEM; return status; @@ -1076,7 +1072,7 @@ drop: ret = SVC_DROP; out: if (rsci) - rsc_put(&rsci->h, &rsc_cache); + cache_put(&rsci->h, &rsc_cache); return ret; } @@ -1168,7 +1164,7 @@ out_err: put_group_info(rqstp->rq_cred.cr_group_info); rqstp->rq_cred.cr_group_info = NULL; if (gsd->rsci) - rsc_put(&gsd->rsci->h, &rsc_cache); + cache_put(&gsd->rsci->h, &rsc_cache); gsd->rsci = NULL; return stat; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index edcda4f..dd81e59 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -42,7 +42,7 @@ void cache_init(struct cache_head *h) time_t now = get_seconds(); h->next = NULL; h->flags = 0; - atomic_set(&h->refcnt, 1); + kref_init(&h->ref); h->expiry_time = now + CACHE_NEW_EXPIRY; h->last_refresh = now; } @@ -81,7 +81,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, if (detail->match(tmp, key)) { cache_get(tmp); write_unlock(&detail->hash_lock); - detail->cache_put(new, detail); + cache_put(new, detail); return tmp; } } @@ -145,7 +145,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, /* We need to insert a new entry */ tmp = detail->alloc(); if (!tmp) { - detail->cache_put(old, detail); + cache_put(old, detail); return NULL; } cache_init(tmp); @@ -165,7 +165,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, write_unlock(&detail->hash_lock); cache_fresh_unlocked(tmp, detail, is_new); cache_fresh_unlocked(old, detail, 0); - detail->cache_put(old, detail); + cache_put(old, detail); return tmp; } EXPORT_SYMBOL(sunrpc_cache_update); @@ -234,7 +234,7 @@ int cache_check(struct cache_detail *detail, cache_defer_req(rqstp, h); if (rv) - detail->cache_put(h, detail); + cache_put(h, detail); return rv; } @@ -431,7 +431,7 @@ static int cache_clean(void) if (test_and_clear_bit(CACHE_PENDING, &ch->flags)) queue_loose(current_detail, ch); - if (atomic_read(&ch->refcnt) == 1) + if (atomic_read(&ch->ref.refcount) == 1) break; } if (ch) { @@ -446,7 +446,7 @@ static int cache_clean(void) current_index ++; spin_unlock(&cache_list_lock); if (ch) - d->cache_put(ch, d); + cache_put(ch, d); } else spin_unlock(&cache_list_lock); @@ -723,7 +723,7 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) !test_bit(CACHE_PENDING, &rq->item->flags)) { list_del(&rq->q.list); spin_unlock(&queue_lock); - cd->cache_put(rq->item, cd); + cache_put(rq->item, cd); kfree(rq->buf); kfree(rq); } else @@ -906,7 +906,7 @@ static void queue_loose(struct cache_detail *detail, struct cache_head *ch) continue; list_del(&cr->q.list); spin_unlock(&queue_lock); - detail->cache_put(cr->item, detail); + cache_put(cr->item, detail); kfree(cr->buf); kfree(cr); return; @@ -1192,7 +1192,7 @@ static int c_show(struct seq_file *m, void *p) ifdebug(CACHE) seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n", - cp->expiry_time, atomic_read(&cp->refcnt), cp->flags); + cp->expiry_time, atomic_read(&cp->ref.refcount), cp->flags); cache_get(cp); if (cache_check(cd, cp, NULL)) /* cache_check does a cache_put on failure */ diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 7e38621..11020c0 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -84,15 +84,15 @@ struct ip_map { }; static struct cache_head *ip_table[IP_HASHMAX]; -static void ip_map_put(struct cache_head *item, struct cache_detail *cd) +static void ip_map_put(struct kref *kref) { + struct cache_head *item = container_of(kref, struct cache_head, ref); struct ip_map *im = container_of(item, struct ip_map,h); - if (cache_put(item, cd)) { - if (test_bit(CACHE_VALID, &item->flags) && - !test_bit(CACHE_NEGATIVE, &item->flags)) - auth_domain_put(&im->m_client->h); - kfree(im); - } + + if (test_bit(CACHE_VALID, &item->flags) && + !test_bit(CACHE_NEGATIVE, &item->flags)) + auth_domain_put(&im->m_client->h); + kfree(im); } #if IP_HASHBITS == 8 @@ -315,7 +315,7 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex hash_ip((unsigned long)ipm->m_addr.s_addr)); if (!ch) return -ENOMEM; - ip_map_put(ch, &ip_map_cache); + cache_put(ch, &ip_map_cache); return 0; } @@ -369,7 +369,7 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr) rv = &ipm->m_client->h; kref_get(&rv->ref); } - ip_map_put(&ipm->h, &ip_map_cache); + cache_put(&ipm->h, &ip_map_cache); return rv; } @@ -403,7 +403,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) case 0: rqstp->rq_client = &ipm->m_client->h; kref_get(&rqstp->rq_client->ref); - ip_map_put(&ipm->h, &ip_map_cache); + cache_put(&ipm->h, &ip_map_cache); break; } return SVC_OK; -- cgit v0.10.2 From 74cae61ab45f19a3e8c4d9f53c0e94df129c7915 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 27 Mar 2006 01:15:10 -0800 Subject: [PATCH] fs/nfsd/export.c,net/sunrpc/cache.c: make needlessly global code static We can now make some code static. Signed-off-by: Adrian Bunk Cc: Neil Brown Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index cc811a1..c340be0 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -57,7 +57,7 @@ static int exp_verify_string(char *cp, int max); #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) static struct cache_head *expkey_table[EXPKEY_HASHMAX]; -void expkey_put(struct kref *ref) +static void expkey_put(struct kref *ref) { struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); @@ -87,6 +87,8 @@ static void expkey_request(struct cache_detail *cd, static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old); static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *); +static struct cache_detail svc_expkey_cache; + static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) { /* client fsidtype fsid [path] */ @@ -255,7 +257,7 @@ static struct cache_head *expkey_alloc(void) return NULL; } -struct cache_detail svc_expkey_cache = { +static struct cache_detail svc_expkey_cache = { .owner = THIS_MODULE, .hash_size = EXPKEY_HASHMAX, .hash_table = expkey_table, @@ -345,7 +347,8 @@ static void svc_export_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old); +static struct svc_export *svc_export_update(struct svc_export *new, + struct svc_export *old); static struct svc_export *svc_export_lookup(struct svc_export *); static int check_export(struct inode *inode, int flags) @@ -574,7 +577,7 @@ svc_export_lookup(struct svc_export *exp) return NULL; } -struct svc_export * +static struct svc_export * svc_export_update(struct svc_export *new, struct svc_export *old) { struct cache_head *ch; @@ -593,7 +596,7 @@ svc_export_update(struct svc_export *new, struct svc_export *old) } -struct svc_expkey * +static struct svc_expkey * exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) { struct svc_expkey key, *ek; diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index a6c08a4..d2a8abb 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -86,9 +86,6 @@ void nfsd_export_shutdown(void); void nfsd_export_flush(void); void exp_readlock(void); void exp_readunlock(void); -struct svc_expkey * exp_find_key(struct auth_domain *clp, - int fsid_type, u32 *fsidv, - struct cache_req *reqp); struct svc_export * exp_get_by_name(struct auth_domain *clp, struct vfsmount *mnt, struct dentry *dentry, @@ -102,7 +99,7 @@ int exp_rootfh(struct auth_domain *, int exp_pseudoroot(struct auth_domain *, struct svc_fh *fhp, struct cache_req *creq); int nfserrno(int errno); -extern struct cache_detail svc_export_cache, svc_expkey_cache; +extern struct cache_detail svc_export_cache; static inline void exp_put(struct svc_export *exp) { diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index ad3f5cb..b5612c9 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -163,7 +163,6 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd) kref_put(&h->ref, cd->cache_put); } -extern void cache_init(struct cache_head *h); extern int cache_check(struct cache_detail *detail, struct cache_head *h, struct cache_req *rqstp); extern void cache_flush(void); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index dd81e59..3ac4193 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -37,7 +37,7 @@ static void cache_defer_req(struct cache_req *req, struct cache_head *item); static void cache_revisit_request(struct cache_head *item); -void cache_init(struct cache_head *h) +static void cache_init(struct cache_head *h) { time_t now = get_seconds(); h->next = NULL; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 69b8238..769114f 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -105,7 +105,6 @@ EXPORT_SYMBOL(auth_unix_lookup); EXPORT_SYMBOL(cache_check); EXPORT_SYMBOL(cache_flush); EXPORT_SYMBOL(cache_purge); -EXPORT_SYMBOL(cache_init); EXPORT_SYMBOL(cache_register); EXPORT_SYMBOL(cache_unregister); EXPORT_SYMBOL(qword_add); -- cgit v0.10.2 From ad1b5229def92b71631a927895b034ceec06c991 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:11 -0800 Subject: [PATCH] knfsd: Tidy up unix_domain_find We shouldn't really compare &new->h with anything when new ==NULL, and gather three different if statements that all start if (rv ... into one large if. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 11020c0..7e5707e 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -36,16 +36,16 @@ struct auth_domain *unix_domain_find(char *name) rv = auth_domain_lookup(name, NULL); while(1) { - if (rv != &new->h) { - if (new) auth_domain_put(&new->h); + if (rv) { + if (new && rv != &new->h) + auth_domain_put(&new->h); + + if (rv->flavour != &svcauth_unix) { + auth_domain_put(rv); + return NULL; + } return rv; } - if (rv && rv->flavour != &svcauth_unix) { - auth_domain_put(rv); - return NULL; - } - if (rv) - return rv; new = kmalloc(sizeof(*new), GFP_KERNEL); if (new == NULL) -- cgit v0.10.2 From 491214716755894986b99e0cf0a08b830d244577 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:12 -0800 Subject: [PATCH] knfsd: Update rpc-cache.txt to match recent changes Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/rpc-cache.txt b/Documentation/rpc-cache.txt index 2b5d443..5f757c8 100644 --- a/Documentation/rpc-cache.txt +++ b/Documentation/rpc-cache.txt @@ -1,4 +1,4 @@ -This document gives a brief introduction to the caching + This document gives a brief introduction to the caching mechanisms in the sunrpc layer that is used, in particular, for NFS authentication. @@ -25,25 +25,17 @@ The common code handles such things as: - supporting 'NEGATIVE' as well as positive entries - allowing an EXPIRED time on cache items, and removing items after they expire, and are no longe in-use. - - Future code extensions are expect to handle - making requests to user-space to fill in cache entries - allowing user-space to directly set entries in the cache - delaying RPC requests that depend on as-yet incomplete cache entries, and replaying those requests when the cache entry is complete. - - maintaining last-access times on cache entries - - clean out old entries when the caches become full - -The code for performing a cache lookup is also common, but in the form -of a template. i.e. a #define. -Each cache defines a lookup function by using the DefineCacheLookup -macro, or the simpler DefineSimpleCacheLookup macro + - clean out old entries as they expire. Creating a Cache ---------------- -1/ A cache needs a datum to cache. This is in the form of a +1/ A cache needs a datum to store. This is in the form of a structure definition that must contain a struct cache_head as an element, usually the first. @@ -51,35 +43,69 @@ Creating a Cache Each cache element is reference counted and contains expiry and update times for use in cache management. 2/ A cache needs a "cache_detail" structure that - describes the cache. This stores the hash table, and some - parameters for cache management. -3/ A cache needs a lookup function. This is created using - the DefineCacheLookup macro. This lookup function is used both - to find entries and to update entries. The normal mode for - updating an entry is to replace the old entry with a new - entry. However it is possible to allow update-in-place - for those caches where it makes sense (no atomicity issues - or indirect reference counting issue) -4/ A cache needs to be registered using cache_register(). This - includes in on a list of caches that will be regularly - cleaned to discard old data. For this to work, some - thread must periodically call cache_clean - + describes the cache. This stores the hash table, some + parameters for cache management, and some operations detailing how + to work with particular cache items. + The operations requires are: + struct cache_head *alloc(void) + This simply allocates appropriate memory and returns + a pointer to the cache_detail embedded within the + structure + void cache_put(struct kref *) + This is called when the last reference to an item is + is dropped. The pointer passed is to the 'ref' field + in the cache_head. cache_put should release any + references create by 'cache_init' and, if CACHE_VALID + is set, any references created by cache_update. + It should then release the memory allocated by + 'alloc'. + int match(struct cache_head *orig, struct cache_head *new) + test if the keys in the two structures match. Return + 1 if they do, 0 if they don't. + void init(struct cache_head *orig, struct cache_head *new) + Set the 'key' fields in 'new' from 'orig'. This may + include taking references to shared objects. + void update(struct cache_head *orig, struct cache_head *new) + Set the 'content' fileds in 'new' from 'orig'. + int cache_show(struct seq_file *m, struct cache_detail *cd, + struct cache_head *h) + Optional. Used to provide a /proc file that lists the + contents of a cache. This should show one item, + usually on just one line. + int cache_request(struct cache_detail *cd, struct cache_head *h, + char **bpp, int *blen) + Format a request to be send to user-space for an item + to be instantiated. *bpp is a buffer of size *blen. + bpp should be moved forward over the encoded message, + and *blen should be reduced to show how much free + space remains. Return 0 on success or <0 if not + enough room or other problem. + int cache_parse(struct cache_detail *cd, char *buf, int len) + A message from user space has arrived to fill out a + cache entry. It is in 'buf' of length 'len'. + cache_parse should parse this, find the item in the + cache with sunrpc_cache_lookup, and update the item + with sunrpc_cache_update. + + +3/ A cache needs to be registered using cache_register(). This + includes it on a list of caches that will be regularly + cleaned to discard old data. + Using a cache ------------- -To find a value in a cache, call the lookup function passing it a the -datum which contains key, and possibly content, and a flag saying -whether to update the cache with new data from the datum. Depending -on how the cache lookup function was defined, it may take an extra -argument to identify the particular cache in question. +To find a value in a cache, call sunrpc_cache_lookup passing a pointer +to the cache_head in a sample item with the 'key' fields filled in. +This will be passed to ->match to identify the target entry. If no +entry is found, a new entry will be create, added to the cache, and +marked as not containing valid data. -Except in cases of kmalloc failure, the lookup function -will return a new datum which will store the key and -may contain valid content, or may not. -This datum is typically passed to cache_check which determines the -validity of the datum and may later initiate an upcall to fill -in the data. +The item returned is typically passed to cache_check which will check +if the data is valid, and may initiate an up-call to get fresh data. +cache_check will return -ENOENT in the entry is negative or if an up +call is needed but not possible, -EAGAIN if an upcall is pending, +or 0 if the data is valid; cache_check can be passed a "struct cache_req *". This structure is typically embedded in the actual request and can be used to create a @@ -90,6 +116,13 @@ item does become valid, the deferred copy of the request will be revisited (->revisit). It is expected that this method will reschedule the request for processing. +The value returned by sunrpc_cache_lookup can also be passed to +sunrpc_cache_update to set the content for the item. A second item is +passed which should hold the content. If the item found by _lookup +has valid data, then it is discarded and a new item is created. This +saves any user of an item from worrying about content changing while +it is being inspected. If the item found by _lookup does not contain +valid data, then the content is copied across and CACHE_VALID is set. Populating a cache ------------------ @@ -114,8 +147,8 @@ should be create or updated to have the given content, and the expiry time should be set on that item. Reading from a channel is a bit more interesting. When a cache -lookup fail, or when it suceeds but finds an entry that may soon -expiry, a request is lodged for that cache item to be updated by +lookup fails, or when it succeeds but finds an entry that may soon +expire, a request is lodged for that cache item to be updated by user-space. These requests appear in the channel file. Successive reads will return successive requests. @@ -130,7 +163,7 @@ Thus a user-space helper is likely to: write a response loop. -If it dies and needs to be restarted, any requests that have not be +If it dies and needs to be restarted, any requests that have not been answered will still appear in the file and will be read by the new instance of the helper. @@ -142,10 +175,9 @@ Each cache should also define a "cache_request" method which takes a cache item and encodes a request into the buffer provided. - Note: If a cache has no active readers on the channel, and has had not active readers for more than 60 seconds, further requests will not be -added to the channel but instead all looks that do not find a valid +added to the channel but instead all lookups that do not find a valid entry will fail. This is partly for backward compatibility: The previous nfs exports table was deemed to be authoritative and a failed lookup meant a definite 'no'. @@ -154,18 +186,17 @@ request/response format ----------------------- While each cache is free to use it's own format for requests -and responses over channel, the following is recommended are +and responses over channel, the following is recommended as appropriate and support routines are available to help: Each request or response record should be printable ASCII with precisely one newline character which should be at the end. Fields within the record should be separated by spaces, normally one. If spaces, newlines, or nul characters are needed in a field they -much be quotes. two mechanisms are available: +much be quoted. two mechanisms are available: 1/ If a field begins '\x' then it must contain an even number of hex digits, and pairs of these digits provide the bytes in the field. 2/ otherwise a \ in the field must be followed by 3 octal digits which give the code for a byte. Other characters are treated - as them selves. At the very least, space, newlines nul, and + as them selves. At the very least, space, newline, nul, and '\' must be quoted in this way. - -- cgit v0.10.2 From 013d3868143387be99bb0373d49452eaa3c55d41 Mon Sep 17 00:00:00 2001 From: Martin Andersson Date: Mon, 27 Mar 2006 01:15:18 -0800 Subject: [PATCH] sched: fix task interactivity calculation Is a truncation error in kernel/sched.c triggered when the nice value is negative. The affected code is used in the TASK_INTERACTIVE macro. The code is: #define SCALE(v1,v1_max,v2_max) \ (v1) * (v2_max) / (v1_max) which is used in this way: SCALE(TASK_NICE(p), 40, MAX_BONUS) Comments in the code says: * This part scales the interactivity limit depending on niceness. * * We scale it linearly, offset by the INTERACTIVE_DELTA delta. * Here are a few examples of different nice levels: * * TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0] * TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0] * TASK_INTERACTIVE( 0): [1,1,1,1,0,0,0,0,0,0,0] * TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0] * TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0] * * (the X axis represents the possible -5 ... 0 ... +5 dynamic * priority range a task can explore, a value of '1' means the * task is rated interactive.) However, the current code does not scale it linearly and the result differs from the given examples. If the mathematical function "floor" is used when the nice value is negative instead of the truncation one gets when using integer division, the result conforms to the documentation. Output of TASK_INTERACTIVE when using the kernel code: nice dynamic priorities -20 1 1 1 1 1 1 1 1 1 0 0 -19 1 1 1 1 1 1 1 1 0 0 0 -18 1 1 1 1 1 1 1 1 0 0 0 -17 1 1 1 1 1 1 1 1 0 0 0 -16 1 1 1 1 1 1 1 1 0 0 0 -15 1 1 1 1 1 1 1 0 0 0 0 -14 1 1 1 1 1 1 1 0 0 0 0 -13 1 1 1 1 1 1 1 0 0 0 0 -12 1 1 1 1 1 1 1 0 0 0 0 -11 1 1 1 1 1 1 0 0 0 0 0 -10 1 1 1 1 1 1 0 0 0 0 0 -9 1 1 1 1 1 1 0 0 0 0 0 -8 1 1 1 1 1 1 0 0 0 0 0 -7 1 1 1 1 1 0 0 0 0 0 0 -6 1 1 1 1 1 0 0 0 0 0 0 -5 1 1 1 1 1 0 0 0 0 0 0 -4 1 1 1 1 1 0 0 0 0 0 0 -3 1 1 1 1 0 0 0 0 0 0 0 -2 1 1 1 1 0 0 0 0 0 0 0 -1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 2 1 1 1 1 0 0 0 0 0 0 0 3 1 1 1 1 0 0 0 0 0 0 0 4 1 1 1 0 0 0 0 0 0 0 0 5 1 1 1 0 0 0 0 0 0 0 0 6 1 1 1 0 0 0 0 0 0 0 0 7 1 1 1 0 0 0 0 0 0 0 0 8 1 1 0 0 0 0 0 0 0 0 0 9 1 1 0 0 0 0 0 0 0 0 0 10 1 1 0 0 0 0 0 0 0 0 0 11 1 1 0 0 0 0 0 0 0 0 0 12 1 0 0 0 0 0 0 0 0 0 0 13 1 0 0 0 0 0 0 0 0 0 0 14 1 0 0 0 0 0 0 0 0 0 0 15 1 0 0 0 0 0 0 0 0 0 0 16 0 0 0 0 0 0 0 0 0 0 0 17 0 0 0 0 0 0 0 0 0 0 0 18 0 0 0 0 0 0 0 0 0 0 0 19 0 0 0 0 0 0 0 0 0 0 0 Output of TASK_INTERACTIVE when using "floor" nice dynamic priorities -20 1 1 1 1 1 1 1 1 1 0 0 -19 1 1 1 1 1 1 1 1 1 0 0 -18 1 1 1 1 1 1 1 1 1 0 0 -17 1 1 1 1 1 1 1 1 1 0 0 -16 1 1 1 1 1 1 1 1 0 0 0 -15 1 1 1 1 1 1 1 1 0 0 0 -14 1 1 1 1 1 1 1 1 0 0 0 -13 1 1 1 1 1 1 1 1 0 0 0 -12 1 1 1 1 1 1 1 0 0 0 0 -11 1 1 1 1 1 1 1 0 0 0 0 -10 1 1 1 1 1 1 1 0 0 0 0 -9 1 1 1 1 1 1 1 0 0 0 0 -8 1 1 1 1 1 1 0 0 0 0 0 -7 1 1 1 1 1 1 0 0 0 0 0 -6 1 1 1 1 1 1 0 0 0 0 0 -5 1 1 1 1 1 1 0 0 0 0 0 -4 1 1 1 1 1 0 0 0 0 0 0 -3 1 1 1 1 1 0 0 0 0 0 0 -2 1 1 1 1 1 0 0 0 0 0 0 -1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 2 1 1 1 1 0 0 0 0 0 0 0 3 1 1 1 1 0 0 0 0 0 0 0 4 1 1 1 0 0 0 0 0 0 0 0 5 1 1 1 0 0 0 0 0 0 0 0 6 1 1 1 0 0 0 0 0 0 0 0 7 1 1 1 0 0 0 0 0 0 0 0 8 1 1 0 0 0 0 0 0 0 0 0 9 1 1 0 0 0 0 0 0 0 0 0 10 1 1 0 0 0 0 0 0 0 0 0 11 1 1 0 0 0 0 0 0 0 0 0 12 1 0 0 0 0 0 0 0 0 0 0 13 1 0 0 0 0 0 0 0 0 0 0 14 1 0 0 0 0 0 0 0 0 0 0 15 1 0 0 0 0 0 0 0 0 0 0 16 0 0 0 0 0 0 0 0 0 0 0 17 0 0 0 0 0 0 0 0 0 0 0 18 0 0 0 0 0 0 0 0 0 0 0 19 0 0 0 0 0 0 0 0 0 0 0 Signed-off-by: Martin Andersson Acked-by: Ingo Molnar Cc: Nick Piggin Cc: Mike Galbraith Cc: Peter Williams Cc: Con Kolivas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/sched.c b/kernel/sched.c index 78acdef..dc599c8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -145,7 +145,8 @@ (v1) * (v2_max) / (v1_max) #define DELTA(p) \ - (SCALE(TASK_NICE(p), 40, MAX_BONUS) + INTERACTIVE_DELTA) + (SCALE(TASK_NICE(p) + 20, 40, MAX_BONUS) - 20 * MAX_BONUS / 40 + \ + INTERACTIVE_DELTA) #define TASK_INTERACTIVE(p) \ ((p)->prio <= (p)->static_prio - DELTA(p)) -- cgit v0.10.2 From 77e4bfbcf071f795b54862455dce8902b3fc29c2 Mon Sep 17 00:00:00 2001 From: Andreas Mohr Date: Mon, 27 Mar 2006 01:15:20 -0800 Subject: [PATCH] Small schedule() optimization small schedule() microoptimization. Signed-off-by: Andreas Mohr Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/sched.c b/kernel/sched.c index dc599c8..a96a05d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2879,13 +2879,11 @@ asmlinkage void __sched schedule(void) * schedule() atomically, we ignore that path for now. * Otherwise, whine if we are scheduling when we should not be. */ - if (likely(!current->exit_state)) { - if (unlikely(in_atomic())) { - printk(KERN_ERR "BUG: scheduling while atomic: " - "%s/0x%08x/%d\n", - current->comm, preempt_count(), current->pid); - dump_stack(); - } + if (unlikely(in_atomic() && !current->exit_state)) { + printk(KERN_ERR "BUG: scheduling while atomic: " + "%s/0x%08x/%d\n", + current->comm, preempt_count(), current->pid); + dump_stack(); } profile_hit(SCHED_PROFILING, __builtin_return_address(0)); -- cgit v0.10.2 From 1e9f28fa1eb9773bf65bae08288c6a0a38eef4a7 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Mon, 27 Mar 2006 01:15:22 -0800 Subject: [PATCH] sched: new sched domain for representing multi-core Add a new sched domain for representing multi-core with shared caches between cores. Consider a dual package system, each package containing two cores and with last level cache shared between cores with in a package. If there are two runnable processes, with this appended patch those two processes will be scheduled on different packages. On such systems, with this patch we have observed 8% perf improvement with specJBB(2 warehouse) benchmark and 35% improvement with CFP2000 rate(with 2 users). This new domain will come into play only on multi-core systems with shared caches. On other systems, this sched domain will be removed by domain degeneration code. This new domain can be also used for implementing power savings policy (see OLS 2005 CMP kernel scheduler paper for more details.. I will post another patch for power savings policy soon) Most of the arch/* file changes are for cpu_coregroup_map() implementation. Signed-off-by: Suresh Siddha Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index f7db71d..f17bd1d 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -231,6 +231,15 @@ config SCHED_SMT cost of slightly increased overhead in some places. If unsure say N here. +config SCHED_MC + bool "Multi-core scheduler support" + depends on SMP + default y + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + source "kernel/Kconfig.preempt" config X86_UP_APIC diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 7e3d6b6..a06a490 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -266,7 +266,7 @@ static void __init early_cpu_detect(void) void __cpuinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; - int junk; + int ebx; if (have_cpuid_p()) { /* Get vendor name */ @@ -282,7 +282,7 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c) /* Intel-defined flags: level 0x00000001 */ if ( c->cpuid_level >= 0x00000001 ) { u32 capability, excap; - cpuid(0x00000001, &tfms, &junk, &excap, &capability); + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); c->x86_capability[0] = capability; c->x86_capability[4] = excap; c->x86 = (tfms >> 8) & 15; @@ -292,6 +292,11 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c) if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; c->x86_mask = tfms & 15; +#ifdef CONFIG_SMP + c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); +#else + c->apicid = (ebx >> 24) & 0xFF; +#endif } else { /* Have CPUID level 0 only - unheard of */ c->x86 = 4; @@ -474,7 +479,6 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) cpuid(1, &eax, &ebx, &ecx, &edx); - c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index ce61921..7e7fd4e 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -173,6 +173,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ + unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; +#ifdef CONFIG_SMP + unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); +#endif if (c->cpuid_level > 3) { static int is_initialized; @@ -205,9 +209,15 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) break; case 2: new_l2 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l2_id = c->apicid >> index_msb; break; case 3: new_l3 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l3_id = c->apicid >> index_msb; break; default: break; @@ -273,11 +283,19 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) if (new_l1i) l1i = new_l1i; - if (new_l2) + if (new_l2) { l2 = new_l2; +#ifdef CONFIG_SMP + cpu_llc_id[cpu] = l2_id; +#endif + } - if (new_l3) + if (new_l3) { l3 = new_l3; +#ifdef CONFIG_SMP + cpu_llc_id[cpu] = l3_id; +#endif + } if ( trace ) printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 82371d8..a696990 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -72,6 +72,9 @@ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; /* Core ID of each logical CPU */ int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; +/* Last level cache ID of each logical CPU */ +int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; + /* representing HT siblings of each logical CPU */ cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); @@ -440,6 +443,18 @@ static void __devinit smp_callin(void) static int cpucount; +/* maps the cpu to the sched domain representing multi-core */ +cpumask_t cpu_coregroup_map(int cpu) +{ + struct cpuinfo_x86 *c = cpu_data + cpu; + /* + * For perf, we return last level cache shared map. + * TBD: when power saving sched policy is added, we will return + * cpu_core_map when power saving policy is enabled + */ + return c->llc_shared_map; +} + /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -459,12 +474,16 @@ set_cpu_sibling_map(int cpu) cpu_set(cpu, cpu_sibling_map[i]); cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); } } } else { cpu_set(cpu, cpu_sibling_map[cpu]); } + cpu_set(cpu, c[cpu].llc_shared_map); + if (current_cpu_data.x86_max_cores == 1) { cpu_core_map[cpu] = cpu_sibling_map[cpu]; c[cpu].booted_cores = 1; @@ -472,6 +491,11 @@ set_cpu_sibling_map(int cpu) } for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (cpu_llc_id[cpu] != BAD_APICID && + cpu_llc_id[cpu] == cpu_llc_id[i]) { + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); + } if (phys_proc_id[cpu] == phys_proc_id[i]) { cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 45efe0c..1cb4aa2 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -250,6 +250,15 @@ config SCHED_SMT cost of slightly increased overhead in some places. If unsure say N here. +config SCHED_MC + bool "Multi-core scheduler support" + depends on SMP + default y + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + source "kernel/Kconfig.preempt" config NUMA diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index a57eec8..d1f3e92 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -962,7 +962,6 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) cpuid(1, &eax, &ebx, &ecx, &edx); - c->apicid = phys_pkg_id(0); if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; @@ -1171,6 +1170,8 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_capability[2] = cpuid_edx(0x80860001); } + c->apicid = phys_pkg_id(0); + /* * Vendor-specific initialization. In this section we * canonicalize the feature flags, meaning if there are diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 66e9865..ea48fa6 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -68,6 +68,9 @@ u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; /* core ID of each logical CPU */ u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; +/* Last level cache ID of each logical CPU */ +u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; + /* Bitmask of currently online CPUs */ cpumask_t cpu_online_map __read_mostly; @@ -445,6 +448,18 @@ void __cpuinit smp_callin(void) cpu_set(cpuid, cpu_callin_map); } +/* maps the cpu to the sched domain representing multi-core */ +cpumask_t cpu_coregroup_map(int cpu) +{ + struct cpuinfo_x86 *c = cpu_data + cpu; + /* + * For perf, we return last level cache shared map. + * TBD: when power saving sched policy is added, we will return + * cpu_core_map when power saving policy is enabled + */ + return c->llc_shared_map; +} + /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -463,12 +478,16 @@ static inline void set_cpu_sibling_map(int cpu) cpu_set(cpu, cpu_sibling_map[i]); cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); } } } else { cpu_set(cpu, cpu_sibling_map[cpu]); } + cpu_set(cpu, c[cpu].llc_shared_map); + if (current_cpu_data.x86_max_cores == 1) { cpu_core_map[cpu] = cpu_sibling_map[cpu]; c[cpu].booted_cores = 1; @@ -476,6 +495,11 @@ static inline void set_cpu_sibling_map(int cpu) } for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (cpu_llc_id[cpu] != BAD_APICID && + cpu_llc_id[cpu] == cpu_llc_id[i]) { + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); + } if (phys_proc_id[cpu] == phys_proc_id[i]) { cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index feca5d9..af4bfd0 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -20,6 +20,7 @@ #include #include #include +#include /* flag for disabling the tsc */ extern int tsc_disable; @@ -67,6 +68,9 @@ struct cpuinfo_x86 { char pad0; int x86_power; unsigned long loops_per_jiffy; +#ifdef CONFIG_SMP + cpumask_t llc_shared_map; /* cpus sharing the last level cache */ +#endif unsigned char x86_max_cores; /* cpuid returned max cores value */ unsigned char booted_cores; /* number of cores as seen by OS */ unsigned char apicid; @@ -103,6 +107,7 @@ extern struct cpuinfo_x86 cpu_data[]; extern int phys_proc_id[NR_CPUS]; extern int cpu_core_id[NR_CPUS]; +extern int cpu_llc_id[NR_CPUS]; extern char ignore_fpu_irq; extern void identify_cpu(struct cpuinfo_x86 *); diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h index aa958c6..b94e5ee 100644 --- a/include/asm-i386/topology.h +++ b/include/asm-i386/topology.h @@ -112,4 +112,6 @@ extern unsigned long node_remap_size[]; #endif /* CONFIG_NUMA */ +extern cpumask_t cpu_coregroup_map(int cpu); + #endif /* _ASM_I386_TOPOLOGY_H */ diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index 8c8d88c..1aa2cee 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h @@ -20,6 +20,7 @@ #include #include #include +#include #define TF_MASK 0x00000100 #define IF_MASK 0x00000200 @@ -65,6 +66,9 @@ struct cpuinfo_x86 { __u32 x86_power; __u32 extended_cpuid_level; /* Max extended CPUID function supported */ unsigned long loops_per_jiffy; +#ifdef CONFIG_SMP + cpumask_t llc_shared_map; /* cpus sharing the last level cache */ +#endif __u8 apicid; __u8 booted_cores; /* number of cores as seen by OS */ } ____cacheline_aligned; diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index 9ccbb2c..a4fdaeb 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h @@ -56,6 +56,7 @@ extern cpumask_t cpu_sibling_map[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS]; extern u8 phys_proc_id[NR_CPUS]; extern u8 cpu_core_id[NR_CPUS]; +extern u8 cpu_llc_id[NR_CPUS]; #define SMP_TRAMPOLINE_BASE 0x6000 diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h index c642f5d..9db54e9 100644 --- a/include/asm-x86_64/topology.h +++ b/include/asm-x86_64/topology.h @@ -68,4 +68,6 @@ extern int __node_distance(int, int); #include +extern cpumask_t cpu_coregroup_map(int cpu); + #endif diff --git a/include/linux/topology.h b/include/linux/topology.h index e8eb004..a305ae2 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -164,6 +164,15 @@ .nr_balance_failed = 0, \ } +#ifdef CONFIG_SCHED_MC +#ifndef SD_MC_INIT +/* for now its same as SD_CPU_INIT. + * TBD: Tune Domain parameters! + */ +#define SD_MC_INIT SD_CPU_INIT +#endif +#endif + #ifdef CONFIG_NUMA #ifndef SD_NODE_INIT #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!! diff --git a/kernel/sched.c b/kernel/sched.c index a96a05d..8a8b71b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5574,11 +5574,31 @@ static int cpu_to_cpu_group(int cpu) } #endif +#ifdef CONFIG_SCHED_MC +static DEFINE_PER_CPU(struct sched_domain, core_domains); +static struct sched_group sched_group_core[NR_CPUS]; +#endif + +#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) +static int cpu_to_core_group(int cpu) +{ + return first_cpu(cpu_sibling_map[cpu]); +} +#elif defined(CONFIG_SCHED_MC) +static int cpu_to_core_group(int cpu) +{ + return cpu; +} +#endif + static DEFINE_PER_CPU(struct sched_domain, phys_domains); static struct sched_group sched_group_phys[NR_CPUS]; static int cpu_to_phys_group(int cpu) { -#ifdef CONFIG_SCHED_SMT +#if defined(CONFIG_SCHED_MC) + cpumask_t mask = cpu_coregroup_map(cpu); + return first_cpu(mask); +#elif defined(CONFIG_SCHED_SMT) return first_cpu(cpu_sibling_map[cpu]); #else return cpu; @@ -5676,6 +5696,17 @@ void build_sched_domains(const cpumask_t *cpu_map) sd->parent = p; sd->groups = &sched_group_phys[group]; +#ifdef CONFIG_SCHED_MC + p = sd; + sd = &per_cpu(core_domains, i); + group = cpu_to_core_group(i); + *sd = SD_MC_INIT; + sd->span = cpu_coregroup_map(i); + cpus_and(sd->span, sd->span, *cpu_map); + sd->parent = p; + sd->groups = &sched_group_core[group]; +#endif + #ifdef CONFIG_SCHED_SMT p = sd; sd = &per_cpu(cpu_domains, i); @@ -5701,6 +5732,19 @@ void build_sched_domains(const cpumask_t *cpu_map) } #endif +#ifdef CONFIG_SCHED_MC + /* Set up multi-core groups */ + for_each_cpu_mask(i, *cpu_map) { + cpumask_t this_core_map = cpu_coregroup_map(i); + cpus_and(this_core_map, this_core_map, *cpu_map); + if (i != first_cpu(this_core_map)) + continue; + init_sched_build_groups(sched_group_core, this_core_map, + &cpu_to_core_group); + } +#endif + + /* Set up physical groups */ for (i = 0; i < MAX_NUMNODES; i++) { cpumask_t nodemask = node_to_cpumask(i); @@ -5797,11 +5841,31 @@ void build_sched_domains(const cpumask_t *cpu_map) power = SCHED_LOAD_SCALE; sd->groups->cpu_power = power; #endif +#ifdef CONFIG_SCHED_MC + sd = &per_cpu(core_domains, i); + power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1) + * SCHED_LOAD_SCALE / 10; + sd->groups->cpu_power = power; + + sd = &per_cpu(phys_domains, i); + /* + * This has to be < 2 * SCHED_LOAD_SCALE + * Lets keep it SCHED_LOAD_SCALE, so that + * while calculating NUMA group's cpu_power + * we can simply do + * numa_group->cpu_power += phys_group->cpu_power; + * + * See "only add power once for each physical pkg" + * comment below + */ + sd->groups->cpu_power = SCHED_LOAD_SCALE; +#else sd = &per_cpu(phys_domains, i); power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * (cpus_weight(sd->groups->cpumask)-1) / 10; sd->groups->cpu_power = power; +#endif #ifdef CONFIG_NUMA sd = &per_cpu(allnodes_domains, i); @@ -5823,7 +5887,6 @@ void build_sched_domains(const cpumask_t *cpu_map) next_sg: for_each_cpu_mask(j, sg->cpumask) { struct sched_domain *sd; - int power; sd = &per_cpu(phys_domains, j); if (j != first_cpu(sd->groups->cpumask)) { @@ -5833,10 +5896,8 @@ next_sg: */ continue; } - power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * - (cpus_weight(sd->groups->cpumask)-1) / 10; - sg->cpu_power += power; + sg->cpu_power += sd->groups->cpu_power; } sg = sg->next; if (sg != sched_group_nodes[i]) @@ -5849,6 +5910,8 @@ next_sg: struct sched_domain *sd; #ifdef CONFIG_SCHED_SMT sd = &per_cpu(cpu_domains, i); +#elif defined(CONFIG_SCHED_MC) + sd = &per_cpu(core_domains, i); #else sd = &per_cpu(phys_domains, i); #endif -- cgit v0.10.2 From 0806903316d516a3b3851c51cea5c71724d9051d Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Mon, 27 Mar 2006 01:15:23 -0800 Subject: [PATCH] sched: fix group power for allnodes_domains Current sched groups power calculation for allnodes_domains is wrong. We should really be using cumulative power of the physical packages in that group (similar to the calculation in node_domains) Signed-off-by: Suresh Siddha Cc: Ingo Molnar Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/sched.c b/kernel/sched.c index 8a8b71b..7854ee5 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5621,6 +5621,32 @@ static int cpu_to_allnodes_group(int cpu) { return cpu_to_node(cpu); } +static void init_numa_sched_groups_power(struct sched_group *group_head) +{ + struct sched_group *sg = group_head; + int j; + + if (!sg) + return; +next_sg: + for_each_cpu_mask(j, sg->cpumask) { + struct sched_domain *sd; + + sd = &per_cpu(phys_domains, j); + if (j != first_cpu(sd->groups->cpumask)) { + /* + * Only add "power" once for each + * physical package. + */ + continue; + } + + sg->cpu_power += sd->groups->cpu_power; + } + sg = sg->next; + if (sg != group_head) + goto next_sg; +} #endif /* @@ -5866,43 +5892,13 @@ void build_sched_domains(const cpumask_t *cpu_map) (cpus_weight(sd->groups->cpumask)-1) / 10; sd->groups->cpu_power = power; #endif - -#ifdef CONFIG_NUMA - sd = &per_cpu(allnodes_domains, i); - if (sd->groups) { - power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * - (cpus_weight(sd->groups->cpumask)-1) / 10; - sd->groups->cpu_power = power; - } -#endif } #ifdef CONFIG_NUMA - for (i = 0; i < MAX_NUMNODES; i++) { - struct sched_group *sg = sched_group_nodes[i]; - int j; - - if (sg == NULL) - continue; -next_sg: - for_each_cpu_mask(j, sg->cpumask) { - struct sched_domain *sd; + for (i = 0; i < MAX_NUMNODES; i++) + init_numa_sched_groups_power(sched_group_nodes[i]); - sd = &per_cpu(phys_domains, j); - if (j != first_cpu(sd->groups->cpumask)) { - /* - * Only add "power" once for each - * physical package. - */ - continue; - } - - sg->cpu_power += sd->groups->cpu_power; - } - sg = sg->next; - if (sg != sched_group_nodes[i]) - goto next_sg; - } + init_numa_sched_groups_power(sched_group_allnodes); #endif /* Attach the domains */ -- cgit v0.10.2 From b06be912a3ad68c69dba0ed6e92723140020e392 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 27 Mar 2006 01:15:24 -0800 Subject: [PATCH] x86: don't use cpuid.2 to determine cache info if cpuid.4 is supported Don't use cpuid.2 to determine cache info if cpuid.4 is supported. The exception is P4 trace cache. We always use cpuid.2 to get trace cache under P4. Signed-off-by: Shaohua Li Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 7e7fd4e..9df87b0 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -225,11 +225,19 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) } } } - if (c->cpuid_level > 1) { + /* + * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for + * trace cache + */ + if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { /* supports eax=2 call */ int i, j, n; int regs[4]; unsigned char *dp = (unsigned char *)regs; + int only_trace = 0; + + if (num_cache_leaves != 0 && c->x86 == 15) + only_trace = 1; /* Number of times to iterate */ n = cpuid_eax(2) & 0xFF; @@ -251,6 +259,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) while (cache_table[k].descriptor != 0) { if (cache_table[k].descriptor == des) { + if (only_trace && cache_table[k].cache_type != LVL_TRACE) + break; switch (cache_table[k].cache_type) { case LVL_1_INST: l1i += cache_table[k].size; @@ -276,43 +286,46 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) } } } + } - if (new_l1d) - l1d = new_l1d; + if (new_l1d) + l1d = new_l1d; - if (new_l1i) - l1i = new_l1i; + if (new_l1i) + l1i = new_l1i; - if (new_l2) { - l2 = new_l2; + if (new_l2) { + l2 = new_l2; #ifdef CONFIG_SMP - cpu_llc_id[cpu] = l2_id; + cpu_llc_id[cpu] = l2_id; #endif - } + } - if (new_l3) { - l3 = new_l3; + if (new_l3) { + l3 = new_l3; #ifdef CONFIG_SMP - cpu_llc_id[cpu] = l3_id; + cpu_llc_id[cpu] = l3_id; #endif - } - - if ( trace ) - printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); - else if ( l1i ) - printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); - if ( l1d ) - printk(", L1 D cache: %dK\n", l1d); - else - printk("\n"); - if ( l2 ) - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); - if ( l3 ) - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); - - c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); } + if (trace) + printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); + else if ( l1i ) + printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); + + if (l1d) + printk(", L1 D cache: %dK\n", l1d); + else + printk("\n"); + + if (l2) + printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); + + if (l3) + printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); + + c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); + return l2; } -- cgit v0.10.2 From a117e66ed45ac0569c039ea60bd7a9a61e031858 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:25 -0800 Subject: [PATCH] unify pfn_to_page: generic functions There are 3 memory models, FLATMEM, DISCONTIGMEM, SPARSEMEM. Each arch has its own page_to_pfn(), pfn_to_page() for each models. But most of them can use the same arithmetic. This patch adds asm-generic/memory_model.h, which includes generic page_to_pfn(), pfn_to_page() definitions for each memory model. When CONFIG_OUT_OF_LINE_PFN_TO_PAGE=y, out-of-line functions are used instead of macro. This is enabled by some archs and reduces text size. Signed-off-by: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Andi Kleen Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Russell King Cc: Ian Molton Cc: Mikael Starvik Cc: David Howells Cc: Yoshinori Sato Cc: Hirokazu Takata Cc: Ralf Baechle Cc: Kyle McMartin Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Paul Mundt Cc: Kazumoto Kojima Cc: Richard Curnow Cc: William Lee Irwin III Cc: "David S. Miller" Cc: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Cc: Miles Bader Cc: Chris Zankel Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h new file mode 100644 index 0000000..a7bb497 --- /dev/null +++ b/include/asm-generic/memory_model.h @@ -0,0 +1,77 @@ +#ifndef __ASM_MEMORY_MODEL_H +#define __ASM_MEMORY_MODEL_H + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ + +#if defined(CONFIG_FLATMEM) + +#ifndef ARCH_PFN_OFFSET +#define ARCH_PFN_OFFSET (0UL) +#endif + +#elif defined(CONFIG_DISCONTIGMEM) + +#ifndef arch_pfn_to_nid +#define arch_pfn_to_nid(pfn) pfn_to_nid(pfn) +#endif + +#ifndef arch_local_page_offset +#define arch_local_page_offset(pfn, nid) \ + ((pfn) - NODE_DATA(nid)->node_start_pfn) +#endif + +#endif /* CONFIG_DISCONTIGMEM */ + +#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE +struct page; +/* this is useful when inlined pfn_to_page is too big */ +extern struct page *pfn_to_page(unsigned long pfn); +extern unsigned long page_to_pfn(struct page *page); +#else +/* + * supports 3 memory models. + */ +#if defined(CONFIG_FLATMEM) + +#define pfn_to_page(pfn) (mem_map + ((pfn) - ARCH_PFN_OFFSET)) +#define page_to_pfn(page) ((unsigned long)((page) - mem_map) + \ + ARCH_PFN_OFFSET) +#elif defined(CONFIG_DISCONTIGMEM) + +#define pfn_to_page(pfn) \ +({ unsigned long __pfn = (pfn); \ + unsigned long __nid = arch_pfn_to_nid(pfn); \ + NODE_DATA(__nid)->node_mem_map + arch_local_page_offset(__pfn, __nid);\ +}) + +#define page_to_pfn(pg) \ +({ struct page *__pg = (pg); \ + struct zone *__zone = page_zone(__pg); \ + (unsigned long)(__pg - __zone->zone_mem_map) + \ + __zone->zone_start_pfn; \ +}) + +#elif defined(CONFIG_SPARSEMEM) +/* + * Note: section's mem_map is encorded to reflect its start_pfn. + * section[i].section_mem_map == mem_map's address - start_pfn; + */ +#define page_to_pfn(pg) \ +({ struct page *__pg = (pg); \ + int __sec = page_to_section(__pg); \ + __pg - __section_mem_map_addr(__nr_to_section(__sec)); \ +}) + +#define pfn_to_page(pfn) \ +({ unsigned long __pfn = (pfn); \ + struct mem_section *__sec = __pfn_to_section(__pfn); \ + __section_mem_map_addr(__sec) + __pfn; \ +}) +#endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */ +#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */ + +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ + +#endif diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h index 66fe4ac..aabb219 100644 --- a/include/asm-sparc64/page.h +++ b/include/asm-sparc64/page.h @@ -111,6 +111,8 @@ typedef unsigned long pgprot_t; (_AC(0x0000000070000000,UL)) : \ (_AC(0xfffff80000000000,UL) + (1UL << 32UL))) +#include + #endif /* !(__ASSEMBLY__) */ /* to align the pointer to the (next) page boundary */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ebfc238..0c1c0c0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -602,17 +602,6 @@ static inline struct mem_section *__pfn_to_section(unsigned long pfn) return __nr_to_section(pfn_to_section_nr(pfn)); } -#define pfn_to_page(pfn) \ -({ \ - unsigned long __pfn = (pfn); \ - __section_mem_map_addr(__pfn_to_section(__pfn)) + __pfn; \ -}) -#define page_to_pfn(page) \ -({ \ - page - __section_mem_map_addr(__nr_to_section( \ - page_to_section(page))); \ -}) - static inline int pfn_valid(unsigned long pfn) { if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 338a02b..349b328 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2745,3 +2745,45 @@ void *__init alloc_large_system_hash(const char *tablename, return table; } + +#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE +/* + * pfn <-> page translation. out-of-line version. + * (see asm-generic/memory_model.h) + */ +#if defined(CONFIG_FLATMEM) +struct page *pfn_to_page(unsigned long pfn) +{ + return mem_map + (pfn - ARCH_PFN_OFFSET); +} +unsigned long page_to_pfn(struct page *page) +{ + return (page - mem_map) + ARCH_PFN_OFFSET; +} +#elif defined(CONFIG_DISCONTIGMEM) +struct page *pfn_to_page(unsigned long pfn) +{ + int nid = arch_pfn_to_nid(pfn); + return NODE_DATA(nid)->node_mem_map + arch_local_page_offset(pfn,nid); +} +unsigned long page_to_pfn(struct page *page) +{ + struct zone *zone = page_zone(page); + return (page - zone->zone_mem_map) + zone->zone_start_pfn; + +} +#elif defined(CONFIG_SPARSEMEM) +struct page *pfn_to_page(unsigned long pfn) +{ + return __section_mem_map_addr(__pfn_to_section(pfn)) + pfn; +} + +unsigned long page_to_pfn(struct page *page) +{ + long section_id = page_to_section(page); + return page - __section_mem_map_addr(__nr_to_section(section_id)); +} +#endif /* CONFIG_FLATMEM/DISCONTIGMME/SPARSEMEM */ +EXPORT_SYMBOL(pfn_to_page); +EXPORT_SYMBOL(page_to_pfn); +#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */ -- cgit v0.10.2 From ad658b385e6308066f9084a7ea01305b223cd3a0 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:33 -0800 Subject: [PATCH] unify pfn_to_page: i386 pfn_to_page i386 can use generic funcs. Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h index 74f595d..e33e9f9 100644 --- a/include/asm-i386/mmzone.h +++ b/include/asm-i386/mmzone.h @@ -70,8 +70,6 @@ static inline int pfn_to_nid(unsigned long pfn) #endif } -#define node_localnr(pfn, nid) ((pfn) - node_data[nid]->node_start_pfn) - /* * Following are macros that each numa implmentation must define. */ @@ -86,21 +84,6 @@ static inline int pfn_to_nid(unsigned long pfn) /* XXX: FIXME -- wli */ #define kern_addr_valid(kaddr) (0) -#define pfn_to_page(pfn) \ -({ \ - unsigned long __pfn = pfn; \ - int __node = pfn_to_nid(__pfn); \ - &NODE_DATA(__node)->node_mem_map[node_localnr(__pfn,__node)]; \ -}) - -#define page_to_pfn(pg) \ -({ \ - struct page *__page = pg; \ - struct zone *__zone = page_zone(__page); \ - (unsigned long)(__page - __zone->zone_mem_map) \ - + __zone->zone_start_pfn; \ -}) - #ifdef CONFIG_X86_NUMAQ /* we have contiguous memory on NUMA-Q */ #define pfn_valid(pfn) ((pfn) < num_physpages) #else diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h index 997ca5d..30f52a2 100644 --- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -126,8 +126,6 @@ extern int page_is_ram(unsigned long pagenr); #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) #ifdef CONFIG_FLATMEM -#define pfn_to_page(pfn) (mem_map + (pfn)) -#define page_to_pfn(page) ((unsigned long)((page) - mem_map)) #define pfn_valid(pfn) ((pfn) < max_mapnr) #endif /* CONFIG_FLATMEM */ #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) @@ -141,6 +139,7 @@ extern int page_is_ram(unsigned long pagenr); #endif /* __KERNEL__ */ +#include #include #endif /* _I386_PAGE_H */ -- cgit v0.10.2 From dc8ecb43701a78bd3c38e7fed1d1c76840579450 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:34 -0800 Subject: [PATCH] unify pfn_to_page: x86_64 pfn_to_page x86_64 can use generic funcs. For DISCONTIGMEM, CONFIG_OUT_OF_LINE_PFN_TO_PAGE is selected. Signed-off-by: KAMEZAWA Hiroyuki Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 1cb4aa2..4310b4a 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -334,6 +334,10 @@ config HAVE_ARCH_EARLY_PFN_TO_NID def_bool y depends on NUMA +config OUT_OF_LINE_PFN_TO_PAGE + def_bool y + depends on DISCONTIGMEM + config NR_CPUS int "Maximum number of CPUs (2-256)" range 2 255 diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 63c7264..4be82d6 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -377,21 +377,6 @@ EXPORT_SYMBOL(node_data); * Should do that. */ -/* Requires pfn_valid(pfn) to be true */ -struct page *pfn_to_page(unsigned long pfn) -{ - int nid = phys_to_nid(((unsigned long)(pfn)) << PAGE_SHIFT); - return (pfn - node_start_pfn(nid)) + NODE_DATA(nid)->node_mem_map; -} -EXPORT_SYMBOL(pfn_to_page); - -unsigned long page_to_pfn(struct page *page) -{ - return (long)(((page) - page_zone(page)->zone_mem_map) + - page_zone(page)->zone_start_pfn); -} -EXPORT_SYMBOL(page_to_pfn); - int pfn_valid(unsigned long pfn) { unsigned nid; diff --git a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h index 937f99b..6b18cd8 100644 --- a/include/asm-x86_64/mmzone.h +++ b/include/asm-x86_64/mmzone.h @@ -44,12 +44,8 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) #define pfn_to_nid(pfn) phys_to_nid((unsigned long)(pfn) << PAGE_SHIFT) #define kvaddr_to_nid(kaddr) phys_to_nid(__pa(kaddr)) -extern struct page *pfn_to_page(unsigned long pfn); -extern unsigned long page_to_pfn(struct page *page); extern int pfn_valid(unsigned long pfn); #endif -#define local_mapnr(kvaddr) \ - ( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr)) ) #endif #endif diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h index 615e3e4..408185b 100644 --- a/include/asm-x86_64/page.h +++ b/include/asm-x86_64/page.h @@ -123,8 +123,6 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define __boot_va(x) __va(x) #define __boot_pa(x) __pa(x) #ifdef CONFIG_FLATMEM -#define pfn_to_page(pfn) (mem_map + (pfn)) -#define page_to_pfn(page) ((unsigned long)((page) - mem_map)) #define pfn_valid(pfn) ((pfn) < end_pfn) #endif @@ -140,6 +138,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; #endif /* __KERNEL__ */ +#include #include #endif /* _X86_64_PAGE_H */ -- cgit v0.10.2 From 659e35051b165fb40f1160190c2d23f3d5c319d5 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:35 -0800 Subject: [PATCH] unify pfn_to_page: powerpc pfn_to_page PowerPC can use generic ones. Signed-off-by: KAMEZAWA Hiroyuki Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-powerpc/page.h b/include/asm-powerpc/page.h index 0b82df4..2fbeceb 100644 --- a/include/asm-powerpc/page.h +++ b/include/asm-powerpc/page.h @@ -69,8 +69,6 @@ #endif #ifdef CONFIG_FLATMEM -#define pfn_to_page(pfn) (mem_map + (pfn)) -#define page_to_pfn(page) ((unsigned long)((page) - mem_map)) #define pfn_valid(pfn) ((pfn) < max_mapnr) #endif @@ -200,6 +198,7 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *p); extern int page_is_ram(unsigned long pfn); +#include #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ -- cgit v0.10.2 From 1c05dda2b6f025267ab79a267e0a84628a3760e1 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:36 -0800 Subject: [PATCH] unify pfn_to_page: alpha pfn_to_page Alpha can use generic funcs. Signed-off-by: KAMEZAWA Hiroyuki Cc: Richard Henderson Cc: Ivan Kokshaysky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-alpha/mmzone.h b/include/asm-alpha/mmzone.h index a011ef4..c900439 100644 --- a/include/asm-alpha/mmzone.h +++ b/include/asm-alpha/mmzone.h @@ -59,9 +59,6 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n) #define kvaddr_to_nid(kaddr) pa_to_nid(__pa(kaddr)) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) -#define local_mapnr(kvaddr) \ - ((__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr))) - /* * Given a kaddr, LOCAL_BASE_ADDR finds the owning node of the memory * and returns the kaddr corresponding to first physical page in the @@ -104,19 +101,8 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n) __xx; \ }) -#define pfn_to_page(pfn) \ -({ \ - unsigned long kaddr = (unsigned long)__va((pfn) << PAGE_SHIFT); \ - (NODE_DATA(kvaddr_to_nid(kaddr))->node_mem_map + local_mapnr(kaddr)); \ -}) - -#define page_to_pfn(page) \ - ((page) - page_zone(page)->zone_mem_map + \ - (page_zone(page)->zone_start_pfn)) - #define page_to_pa(page) \ - ((( (page) - page_zone(page)->zone_mem_map ) \ - + page_zone(page)->zone_start_pfn) << PAGE_SHIFT) + (page_to_pfn(page) << PAGE_SHIFT) #define pfn_to_nid(pfn) pa_to_nid(((u64)(pfn) << PAGE_SHIFT)) #define pfn_valid(pfn) \ diff --git a/include/asm-alpha/page.h b/include/asm-alpha/page.h index fa0b41b..61bcf70 100644 --- a/include/asm-alpha/page.h +++ b/include/asm-alpha/page.h @@ -85,8 +85,6 @@ typedef unsigned long pgprot_t; #define __pa(x) ((unsigned long) (x) - PAGE_OFFSET) #define __va(x) ((void *)((unsigned long) (x) + PAGE_OFFSET)) #ifndef CONFIG_DISCONTIGMEM -#define pfn_to_page(pfn) (mem_map + (pfn)) -#define page_to_pfn(page) ((unsigned long)((page) - mem_map)) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) #define pfn_valid(pfn) ((pfn) < max_mapnr) @@ -95,9 +93,9 @@ typedef unsigned long pgprot_t; #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #endif /* __KERNEL__ */ +#include #include #endif /* _ALPHA_PAGE_H */ -- cgit v0.10.2 From 7eb98a2f3b605d8a11434d855b58d828393ea533 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:37 -0800 Subject: [PATCH] unify pfn_to_page: arm pfn_to_page ARM can use generic funcs. PFN_TO_NID, LOCAL_MAP_NR are defined by sub-archs. Signed-off-by: KAMEZAWA Hirotuki Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-arm/memory.h b/include/asm-arm/memory.h index b4e1146..afa5c3e 100644 --- a/include/asm-arm/memory.h +++ b/include/asm-arm/memory.h @@ -172,9 +172,7 @@ static inline __deprecated void *bus_to_virt(unsigned long x) * virt_addr_valid(k) indicates whether a virtual address is valid */ #ifndef CONFIG_DISCONTIGMEM - -#define page_to_pfn(page) (((page) - mem_map) + PHYS_PFN_OFFSET) -#define pfn_to_page(pfn) ((mem_map + (pfn)) - PHYS_PFN_OFFSET) +#define ARCH_PFN_OFFSET (PHYS_PFN_OFFSET) #define pfn_valid(pfn) ((pfn) >= PHYS_PFN_OFFSET && (pfn) < (PHYS_PFN_OFFSET + max_mapnr)) #define virt_to_page(kaddr) (pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)) @@ -189,13 +187,8 @@ static inline __deprecated void *bus_to_virt(unsigned long x) * around in memory. */ #include - -#define page_to_pfn(page) \ - (( (page) - page_zone(page)->zone_mem_map) \ - + page_zone(page)->zone_start_pfn) - -#define pfn_to_page(pfn) \ - (PFN_TO_MAPBASE(pfn) + LOCAL_MAP_NR((pfn) << PAGE_SHIFT)) +#define arch_pfn_to_nid(pfn) (PFN_TO_NID(pfn)) +#define arch_local_page_offset(pfn, nid) (LOCAL_MAP_NR((pfn) << PAGE_OFFSET)) #define pfn_valid(pfn) \ ({ \ @@ -243,4 +236,6 @@ static inline __deprecated void *bus_to_virt(unsigned long x) #endif +#include + #endif -- cgit v0.10.2 From 89fccaf2c4a4a8318153e3460a92284dadeb8f71 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:38 -0800 Subject: [PATCH] unify pfn_to_page: arm26 pfn_to_page arm26 can use generic funcs. Signed-off-by: KAMEZAWA Hiroyuki Cc: Ian Molton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-arm26/memory.h b/include/asm-arm26/memory.h index 20d7861..a65f10b 100644 --- a/include/asm-arm26/memory.h +++ b/include/asm-arm26/memory.h @@ -81,8 +81,7 @@ static inline void *phys_to_virt(unsigned long x) * virt_to_page(k) convert a _valid_ virtual address to struct page * * virt_addr_valid(k) indicates whether a virtual address is valid */ -#define page_to_pfn(page) (((page) - mem_map) + PHYS_PFN_OFFSET) -#define pfn_to_page(pfn) ((mem_map + (pfn)) - PHYS_PFN_OFFSET) +#define ARCH_PFN_OFFSET (PHYS_PFN_OFFSET) #define pfn_valid(pfn) ((pfn) >= PHYS_PFN_OFFSET && (pfn) < (PHYS_PFN_OFFSET + max_mapnr)) #define virt_to_page(kaddr) (pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)) @@ -98,4 +97,5 @@ static inline void *phys_to_virt(unsigned long x) */ #define page_to_bus(page) (page_address(page)) +#include #endif -- cgit v0.10.2 From bb872f787a9b6a40a4aa206175e768137f595d9c Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:39 -0800 Subject: [PATCH] unify pfn_to_page: cris pfn_to_page cris can use generic funcs. Signed-off-by: KAMEZAWA Hiroyuki Cc: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-cris/page.h b/include/asm-cris/page.h index c99c478..3787633 100644 --- a/include/asm-cris/page.h +++ b/include/asm-cris/page.h @@ -43,8 +43,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; /* On CRIS the PFN numbers doesn't start at 0 so we have to compensate */ /* for that before indexing into the page table starting at mem_map */ -#define pfn_to_page(pfn) (mem_map + ((pfn) - (PAGE_OFFSET >> PAGE_SHIFT))) -#define page_to_pfn(page) ((unsigned long)((page) - mem_map) + (PAGE_OFFSET >> PAGE_SHIFT)) +#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT) #define pfn_valid(pfn) (((pfn) - (PAGE_OFFSET >> PAGE_SHIFT)) < max_mapnr) /* to index into the page map. our pages all start at physical addr PAGE_OFFSET so @@ -77,6 +76,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; #endif /* __KERNEL__ */ +#include #include #endif /* _CRIS_PAGE_H */ -- cgit v0.10.2 From 5cdac7ca1b727184b1af21effaffcb9e2cd535c2 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:40 -0800 Subject: [PATCH] unify pfn_to_page: FRV pfn_to_page FRV can use generic funcs. Signed-off-by: KAMEZAWA Hiroyuki Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-frv/page.h b/include/asm-frv/page.h index b8221b6..dc0f7e0 100644 --- a/include/asm-frv/page.h +++ b/include/asm-frv/page.h @@ -57,13 +57,9 @@ extern unsigned long min_low_pfn; extern unsigned long max_pfn; #ifdef CONFIG_MMU -#define pfn_to_page(pfn) (mem_map + (pfn)) -#define page_to_pfn(page) ((unsigned long) ((page) - mem_map)) #define pfn_valid(pfn) ((pfn) < max_mapnr) - #else -#define pfn_to_page(pfn) (&mem_map[(pfn) - (PAGE_OFFSET >> PAGE_SHIFT)]) -#define page_to_pfn(page) ((PAGE_OFFSET >> PAGE_SHIFT) + (unsigned long) ((page) - mem_map)) +#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT) #define pfn_valid(pfn) ((pfn) >= min_low_pfn && (pfn) < max_low_pfn) #endif @@ -87,6 +83,7 @@ extern unsigned long max_pfn; #define WANT_PAGE_VIRTUAL 1 #endif +#include #include #endif /* _ASM_PAGE_H */ -- cgit v0.10.2 From dd6cc7631cf2a63b3d6c32619054e017479e72ca Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:41 -0800 Subject: [PATCH] unify pfn_to_page: h8300 pfn_to_page H8300 can use generic funcs. Signed-off-by: KAMEZAWA Hiroyuki Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-h8300/page.h b/include/asm-h8300/page.h index cd35b1c..6472c9f 100644 --- a/include/asm-h8300/page.h +++ b/include/asm-h8300/page.h @@ -71,8 +71,7 @@ extern unsigned long memory_end; #define page_to_virt(page) ((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET) #define pfn_valid(page) (page < max_mapnr) -#define pfn_to_page(pfn) virt_to_page(pfn_to_virt(pfn)) -#define page_to_pfn(page) virt_to_pfn(page_to_virt(page)) +#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT) #define virt_addr_valid(kaddr) (((void *)(kaddr) >= (void *)PAGE_OFFSET) && \ ((void *)(kaddr) < (void *)memory_end)) @@ -81,6 +80,7 @@ extern unsigned long memory_end; #endif /* __KERNEL__ */ +#include #include #endif /* _H8300_PAGE_H */ -- cgit v0.10.2 From 7126cffe74f0dbcd015aaabab01069f422526535 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:42 -0800 Subject: [PATCH] unify pfn_to_page: m32r pfn_to_page m32r can use generic funcs. Signed-off-by: KAMEZAWA Hiroyuki Cc: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-m32r/mmzone.h b/include/asm-m32r/mmzone.h index adc7970..9f3b5ac 100644 --- a/include/asm-m32r/mmzone.h +++ b/include/asm-m32r/mmzone.h @@ -21,20 +21,6 @@ extern struct pglist_data *node_data[]; __pgdat->node_start_pfn + __pgdat->node_spanned_pages - 1; \ }) -#define pfn_to_page(pfn) \ -({ \ - unsigned long __pfn = pfn; \ - int __node = pfn_to_nid(__pfn); \ - &NODE_DATA(__node)->node_mem_map[node_localnr(__pfn,__node)]; \ -}) - -#define page_to_pfn(pg) \ -({ \ - struct page *__page = pg; \ - struct zone *__zone = page_zone(__page); \ - (unsigned long)(__page - __zone->zone_mem_map) \ - + __zone->zone_start_pfn; \ -}) #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) /* * pfn_valid should be made as fast as possible, and the current definition diff --git a/include/asm-m32r/page.h b/include/asm-m32r/page.h index 4ab5788..9ddbc08 100644 --- a/include/asm-m32r/page.h +++ b/include/asm-m32r/page.h @@ -76,9 +76,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; #ifndef CONFIG_DISCONTIGMEM #define PFN_BASE (CONFIG_MEMORY_START >> PAGE_SHIFT) -#define pfn_to_page(pfn) (mem_map + ((pfn) - PFN_BASE)) -#define page_to_pfn(page) \ - ((unsigned long)((page) - mem_map) + PFN_BASE) +#define ARCH_PFN_OFFSET PFN_BASE #define pfn_valid(pfn) (((pfn) - PFN_BASE) < max_mapnr) #endif /* !CONFIG_DISCONTIGMEM */ @@ -92,6 +90,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; #endif /* __KERNEL__ */ +#include #include #endif /* _ASM_M32R_PAGE_H */ -- cgit v0.10.2 From a02036e796e5046fe0463b9a092e9b617c525866 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:43 -0800 Subject: [PATCH] unify pfn_to_page: mips pfn_to_page MIPS can use generic funcs. Signed-off-by: KAMEZAWA Hiroyuki Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-mips/mmzone.h b/include/asm-mips/mmzone.h index 011caeb..7bde443 100644 --- a/include/asm-mips/mmzone.h +++ b/include/asm-mips/mmzone.h @@ -22,20 +22,6 @@ NODE_DATA(__n)->node_spanned_pages) : 0);\ }) -#define pfn_to_page(pfn) \ -({ \ - unsigned long __pfn = (pfn); \ - pg_data_t *__pg = NODE_DATA(pfn_to_nid(__pfn)); \ - __pg->node_mem_map + (__pfn - __pg->node_start_pfn); \ -}) - -#define page_to_pfn(p) \ -({ \ - struct page *__p = (p); \ - struct zone *__z = page_zone(__p); \ - ((__p - __z->zone_mem_map) + __z->zone_start_pfn); \ -}) - /* XXX: FIXME -- wli */ #define kern_addr_valid(addr) (0) diff --git a/include/asm-mips/page.h b/include/asm-mips/page.h index ee25a77..a1eab13 100644 --- a/include/asm-mips/page.h +++ b/include/asm-mips/page.h @@ -140,8 +140,6 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) #ifndef CONFIG_NEED_MULTIPLE_NODES -#define pfn_to_page(pfn) (mem_map + (pfn)) -#define page_to_pfn(page) ((unsigned long)((page) - mem_map)) #define pfn_valid(pfn) ((pfn) < max_mapnr) #endif @@ -160,6 +158,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define WANT_PAGE_VIRTUAL #endif +#include #include #endif /* _ASM_PAGE_H */ -- cgit v0.10.2 From 0d833b41092df2f4a65cbdb0a0a947c35cdd2f9d Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:43 -0800 Subject: [PATCH] unify pfn_to_page: parisc pfn_to_page PARISC can use generic funcs. Signed-off-by: KAMEZAWA Hiroyuki Cc: Kyle McMartin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-parisc/mmzone.h b/include/asm-parisc/mmzone.h index ae039f4..ceb9b73 100644 --- a/include/asm-parisc/mmzone.h +++ b/include/asm-parisc/mmzone.h @@ -25,23 +25,6 @@ extern struct node_map_data node_data[]; pg_data_t *__pgdat = NODE_DATA(nid); \ __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \ }) -#define node_localnr(pfn, nid) ((pfn) - node_start_pfn(nid)) - -#define pfn_to_page(pfn) \ -({ \ - unsigned long __pfn = (pfn); \ - int __node = pfn_to_nid(__pfn); \ - &NODE_DATA(__node)->node_mem_map[node_localnr(__pfn,__node)]; \ -}) - -#define page_to_pfn(pg) \ -({ \ - struct page *__page = pg; \ - struct zone *__zone = page_zone(__page); \ - BUG_ON(__zone == NULL); \ - (unsigned long)(__page - __zone->zone_mem_map) \ - + __zone->zone_start_pfn; \ -}) /* We have these possible memory map layouts: * Astro: 0-3.75, 67.75-68, 4-64 diff --git a/include/asm-parisc/page.h b/include/asm-parisc/page.h index 4a6752b..9f303c0 100644 --- a/include/asm-parisc/page.h +++ b/include/asm-parisc/page.h @@ -130,8 +130,6 @@ extern int npmem_ranges; #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) #ifndef CONFIG_DISCONTIGMEM -#define pfn_to_page(pfn) (mem_map + (pfn)) -#define page_to_pfn(page) ((unsigned long)((page) - mem_map)) #define pfn_valid(pfn) ((pfn) < max_mapnr) #endif /* CONFIG_DISCONTIGMEM */ @@ -152,6 +150,7 @@ extern int npmem_ranges; #endif /* __KERNEL__ */ +#include #include #endif /* _PARISC_PAGE_H */ -- cgit v0.10.2 From f68d4c996d3062c8fe64e3ed36d4c83d23288ad8 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:44 -0800 Subject: [PATCH] unify pfn_to_page: ppc pfn_to_page PPC can use generic funcs. Signed-off-by: KAMEZAWA Hiroyuki Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-ppc/page.h b/include/asm-ppc/page.h index 538e0c8..a70ba2e 100644 --- a/include/asm-ppc/page.h +++ b/include/asm-ppc/page.h @@ -149,8 +149,7 @@ extern int page_is_ram(unsigned long pfn); #define __pa(x) ___pa((unsigned long)(x)) #define __va(x) ((void *)(___va((unsigned long)(x)))) -#define pfn_to_page(pfn) (mem_map + ((pfn) - PPC_PGSTART)) -#define page_to_pfn(page) ((unsigned long)((page) - mem_map) + PPC_PGSTART) +#define ARCH_PFN_OFFSET (PPC_PGSTART) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) #define page_to_virt(page) __va(page_to_pfn(page) << PAGE_SHIFT) @@ -175,5 +174,6 @@ extern __inline__ int get_order(unsigned long size) /* We do define AT_SYSINFO_EHDR but don't use the gate mecanism */ #define __HAVE_ARCH_GATE_AREA 1 +#include #endif /* __KERNEL__ */ #endif /* _PPC_PAGE_H */ -- cgit v0.10.2 From aed630434c4fc0bca8ed62f6730d44ba00bdf595 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:45 -0800 Subject: [PATCH] unify pfn_to_page: s390 pfn_to_page s390 can use generic funcs. Signed-off-by: KAMEZAWA Hiroyuki Cc: Heiko Carstens Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-s390/page.h b/include/asm-s390/page.h index 2430c56..3b1138a 100644 --- a/include/asm-s390/page.h +++ b/include/asm-s390/page.h @@ -181,8 +181,6 @@ page_get_storage_key(unsigned long addr) #define PAGE_OFFSET 0x0UL #define __pa(x) (unsigned long)(x) #define __va(x) (void *)(unsigned long)(x) -#define pfn_to_page(pfn) (mem_map + (pfn)) -#define page_to_pfn(page) ((unsigned long)((page) - mem_map)) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) #define pfn_valid(pfn) ((pfn) < max_mapnr) @@ -193,6 +191,7 @@ page_get_storage_key(unsigned long addr) #endif /* __KERNEL__ */ +#include #include #endif /* _S390_PAGE_H */ -- cgit v0.10.2 From 104b8deaa5c0144cccfc7d914413ff80c7176af1 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:46 -0800 Subject: [PATCH] unify pfn_to_page: sh pfn_to_page sh can use generic funcs. Signed-off-by: KAMEZAWA Hiroyuki Cc: Paul Mundt Cc: Kazumoto Kojima Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-sh/page.h b/include/asm-sh/page.h index 972c3f6..9c89287 100644 --- a/include/asm-sh/page.h +++ b/include/asm-sh/page.h @@ -105,9 +105,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; /* PFN start number, because of __MEMORY_START */ #define PFN_START (__MEMORY_START >> PAGE_SHIFT) - -#define pfn_to_page(pfn) (mem_map + (pfn) - PFN_START) -#define page_to_pfn(page) ((unsigned long)((page) - mem_map) + PFN_START) +#define ARCH_PFN_OFFSET (FPN_START) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) #define pfn_valid(pfn) (((pfn) - PFN_START) < max_mapnr) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) @@ -117,6 +115,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; #endif /* __KERNEL__ */ +#include #include #endif /* __ASM_SH_PAGE_H */ -- cgit v0.10.2 From 309d34bbe521940de9098d306a58a1797a42d990 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:47 -0800 Subject: [PATCH] unify pfn_to_page: sh64 pfn_to_page sh64 can use generic funcs. Signed-off-by: KAMEZAWA Hiroyuki Cc: Paul Mundt Cc: Richard Curnow Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-sh64/page.h b/include/asm-sh64/page.h index c86df90..e4937cd 100644 --- a/include/asm-sh64/page.h +++ b/include/asm-sh64/page.h @@ -105,9 +105,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; /* PFN start number, because of __MEMORY_START */ #define PFN_START (__MEMORY_START >> PAGE_SHIFT) - -#define pfn_to_page(pfn) (mem_map + (pfn) - PFN_START) -#define page_to_pfn(page) ((unsigned long)((page) - mem_map) + PFN_START) +#define ARCH_PFN_OFFSET (PFN_START) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) #define pfn_valid(pfn) (((pfn) - PFN_START) < max_mapnr) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) @@ -117,6 +115,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; #endif /* __KERNEL__ */ +#include #include #endif /* __ASM_SH64_PAGE_H */ -- cgit v0.10.2 From 064b2a0762ef7dca5f7068a90b260a309ffe2002 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:48 -0800 Subject: [PATCH] unify pfn_to_page: sparc pfn_to_page sparc can use generic funcs. Signed-off-by: KAMEZAWA Hiroyuki Cc: William Lee Irwin III Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-sparc/page.h b/include/asm-sparc/page.h index 9122684..ec3274b 100644 --- a/include/asm-sparc/page.h +++ b/include/asm-sparc/page.h @@ -152,8 +152,7 @@ extern unsigned long pfn_base; #define virt_to_phys __pa #define phys_to_virt __va -#define pfn_to_page(pfn) (mem_map + ((pfn)-(pfn_base))) -#define page_to_pfn(page) ((unsigned long)(((page) - mem_map) + pfn_base)) +#define ARCH_PFN_OFFSET (pfn_base) #define virt_to_page(kaddr) (mem_map + ((((unsigned long)(kaddr)-PAGE_OFFSET)>>PAGE_SHIFT))) #define pfn_valid(pfn) (((pfn) >= (pfn_base)) && (((pfn)-(pfn_base)) < max_mapnr)) @@ -164,6 +163,7 @@ extern unsigned long pfn_base; #endif /* __KERNEL__ */ +#include #include #endif /* _SPARC_PAGE_H */ -- cgit v0.10.2 From 9828c185db7ab41426c03c5539b1759ccf3c28ed Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:50 -0800 Subject: [PATCH] unify pfn_to_page: uml pfn_to_page UML can use generic funcs. Signed-off-by: KAMEZAWA Hiroyuki Cc: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-um/page.h b/include/asm-um/page.h index 0229814..4136433 100644 --- a/include/asm-um/page.h +++ b/include/asm-um/page.h @@ -106,9 +106,6 @@ extern unsigned long uml_physmem; #define __pa(virt) to_phys((void *) (unsigned long) (virt)) #define __va(phys) to_virt((unsigned long) (phys)) -#define page_to_pfn(page) ((page) - mem_map) -#define pfn_to_page(pfn) (mem_map + (pfn)) - #define phys_to_pfn(p) ((p) >> PAGE_SHIFT) #define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT) @@ -121,6 +118,7 @@ extern struct page *arch_validate(struct page *page, gfp_t mask, int order); extern void arch_free_page(struct page *page, int order); #define HAVE_ARCH_FREE_PAGE +#include #include #endif -- cgit v0.10.2 From e6009f1ba5f6ff0fc8723254ce89ef9c979cc370 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:52 -0800 Subject: [PATCH] unify pfn_to_page: v850 pfn_to_page v850 can use generic funcs. Signed-off-by: KAMEZAWA Hiroyuki Cc: Miles Bader Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-v850/page.h b/include/asm-v850/page.h index b4bc85e..ad03c46 100644 --- a/include/asm-v850/page.h +++ b/include/asm-v850/page.h @@ -111,8 +111,7 @@ typedef unsigned long pgprot_t; #define page_to_virt(page) \ ((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET) -#define pfn_to_page(pfn) virt_to_page (pfn_to_virt (pfn)) -#define page_to_pfn(page) virt_to_pfn (page_to_virt (page)) +#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT) #define pfn_valid(pfn) ((pfn) < max_mapnr) #define virt_addr_valid(kaddr) \ @@ -125,6 +124,7 @@ typedef unsigned long pgprot_t; #endif /* KERNEL */ +#include #include #endif /* __V850_PAGE_H__ */ -- cgit v0.10.2 From 655a0443470a73d5dc36e974a241e8db59bb1ccb Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:52 -0800 Subject: [PATCH] unify pfn_to_page: xtensa pfn_to_page xtensa can use generic funcs. Signed-off-by: KAMEZAWA Hiroyuki Cc: Chris Zankel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-xtensa/page.h b/include/asm-xtensa/page.h index 8ded36f..992bac5 100644 --- a/include/asm-xtensa/page.h +++ b/include/asm-xtensa/page.h @@ -109,10 +109,7 @@ void copy_user_page(void *to,void* from,unsigned long vaddr,struct page* page); #define __pa(x) ((unsigned long) (x) - PAGE_OFFSET) #define __va(x) ((void *)((unsigned long) (x) + PAGE_OFFSET)) #define pfn_valid(pfn) ((unsigned long)pfn < max_mapnr) -#ifndef CONFIG_DISCONTIGMEM -# define pfn_to_page(pfn) (mem_map + (pfn)) -# define page_to_pfn(page) ((unsigned long)((page) - mem_map)) -#else +#ifdef CONFIG_DISCONTIGMEM # error CONFIG_DISCONTIGMEM not supported #endif @@ -130,4 +127,5 @@ void copy_user_page(void *to,void* from,unsigned long vaddr,struct page* page); VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #endif /* __KERNEL__ */ +#include #endif /* _XTENSA_PAGE_H */ -- cgit v0.10.2 From 0ecd702bcb924d5fb7f687e09986f688336ac896 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:53 -0800 Subject: [PATCH] unify pfn_to_page: ia64 pfn_to_page ia64 has special config CONFIG_VIRTUAL_MEM_MAP. CONFIG_DISCONTIGMEM=y && CONFIG_VIRTUAL_MEM_MAP!=y is bug ? Signed-off-by: KAMEZAWA Hiroyuki Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-ia64/page.h b/include/asm-ia64/page.h index 6e9aa23..2087825 100644 --- a/include/asm-ia64/page.h +++ b/include/asm-ia64/page.h @@ -106,17 +106,25 @@ extern int ia64_pfn_valid (unsigned long pfn); # define ia64_pfn_valid(pfn) 1 #endif +#ifdef CONFIG_VIRTUAL_MEM_MAP +extern struct page *vmem_map; +#ifdef CONFIG_DISCONTIGMEM +# define page_to_pfn(page) ((unsigned long) (page - vmem_map)) +# define pfn_to_page(pfn) (vmem_map + (pfn)) +#endif +#endif + +#if defined(CONFIG_FLATMEM) || defined(CONFIG_SPARSEMEM) +/* FLATMEM always configures mem_map (mem_map = vmem_map if necessary) */ +#include +#endif + #ifdef CONFIG_FLATMEM # define pfn_valid(pfn) (((pfn) < max_mapnr) && ia64_pfn_valid(pfn)) -# define page_to_pfn(page) ((unsigned long) (page - mem_map)) -# define pfn_to_page(pfn) (mem_map + (pfn)) #elif defined(CONFIG_DISCONTIGMEM) -extern struct page *vmem_map; extern unsigned long min_low_pfn; extern unsigned long max_low_pfn; # define pfn_valid(pfn) (((pfn) >= min_low_pfn) && ((pfn) < max_low_pfn) && ia64_pfn_valid(pfn)) -# define page_to_pfn(page) ((unsigned long) (page - vmem_map)) -# define pfn_to_page(pfn) (vmem_map + (pfn)) #endif #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) -- cgit v0.10.2 From a0140c1d85637ee5f4ea7c78f066e3611a6a79dc Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:55 -0800 Subject: [PATCH] remove zone_mem_map This patch removes zone_mem_map. pfn_to_page uses pgdat, page_to_pfn uses zone. page_to_pfn can use pgdat instead of zone, which is only one user of zone_mem_map. By modifing it, we can remove zone_mem_map. Signed-off-by: KAMEZAWA Hiroyuki Cc: Dave Hansen Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-alpha/mmzone.h b/include/asm-alpha/mmzone.h index c900439..192d80c 100644 --- a/include/asm-alpha/mmzone.h +++ b/include/asm-alpha/mmzone.h @@ -83,8 +83,7 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n) pte_t pte; \ unsigned long pfn; \ \ - pfn = ((unsigned long)((page)-page_zone(page)->zone_mem_map)) << 32; \ - pfn += page_zone(page)->zone_start_pfn << 32; \ + pfn = page_to_pfn(page) << 32; \ pte_val(pte) = pfn | pgprot_val(pgprot); \ \ pte; \ diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index a7bb497..0cfb086 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -45,11 +45,11 @@ extern unsigned long page_to_pfn(struct page *page); NODE_DATA(__nid)->node_mem_map + arch_local_page_offset(__pfn, __nid);\ }) -#define page_to_pfn(pg) \ -({ struct page *__pg = (pg); \ - struct zone *__zone = page_zone(__pg); \ - (unsigned long)(__pg - __zone->zone_mem_map) + \ - __zone->zone_start_pfn; \ +#define page_to_pfn(pg) \ +({ struct page *__pg = (pg); \ + struct pglist_data *__pgdat = NODE_DATA(page_to_nid(__pg)); \ + (unsigned long)(__pg - __pgdat->node_mem_map) + \ + __pgdat->node_start_pfn; \ }) #elif defined(CONFIG_SPARSEMEM) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0c1c0c0..ace31c5 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -225,7 +225,6 @@ struct zone { * Discontig memory support fields. */ struct pglist_data *zone_pgdat; - struct page *zone_mem_map; /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ unsigned long zone_start_pfn; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 349b328..8dc8f27 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2042,7 +2042,6 @@ static __meminit void init_currently_empty_zone(struct zone *zone, zone_wait_table_init(zone, size); pgdat->nr_zones = zone_idx(zone) + 1; - zone->zone_mem_map = pfn_to_page(zone_start_pfn); zone->zone_start_pfn = zone_start_pfn; memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn); @@ -2768,9 +2767,8 @@ struct page *pfn_to_page(unsigned long pfn) } unsigned long page_to_pfn(struct page *page) { - struct zone *zone = page_zone(page); - return (page - zone->zone_mem_map) + zone->zone_start_pfn; - + struct pglist_data *pgdat = NODE_DATA(page_to_nid(page)); + return (page - pgdat->node_mem_map) + pgdat->node_start_pfn; } #elif defined(CONFIG_SPARSEMEM) struct page *pfn_to_page(unsigned long pfn) -- cgit v0.10.2 From 8357f8695d58b50fbf2bd507b4b0fc2cd1e43bd6 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:57 -0800 Subject: [PATCH] define for_each_online_pgdat This patch defines for_each_online_pgdat() as a replacement of for_each_pgdat() Now, online nodes are managed by node_online_map. But for_each_pgdat() uses pgdat_link to iterate over all nodes(pgdat). This means management structure for online pgdat is duplicated. I think using node_online_map for for_each_pgdat() is simple and sane rather ather than pgdat_link. New macro is named as for_each_online_pgdat(). Following patch will fix callers of for_each_pgdat(). The bootmem allocater uses for_each_pgdat() before pgdat initialization. I don't think it's sane. Following patch will fix it. Signed-off-by: Yasunori Goto Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ace31c5..96eb080 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -13,6 +13,7 @@ #include #include #include +#include #include /* Free memory management - zoned buddy allocator. */ @@ -349,57 +350,6 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); */ #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) -/** - * for_each_pgdat - helper macro to iterate over all nodes - * @pgdat - pointer to a pg_data_t variable - * - * Meant to help with common loops of the form - * pgdat = pgdat_list; - * while(pgdat) { - * ... - * pgdat = pgdat->pgdat_next; - * } - */ -#define for_each_pgdat(pgdat) \ - for (pgdat = pgdat_list; pgdat; pgdat = pgdat->pgdat_next) - -/* - * next_zone - helper magic for for_each_zone() - * Thanks to William Lee Irwin III for this piece of ingenuity. - */ -static inline struct zone *next_zone(struct zone *zone) -{ - pg_data_t *pgdat = zone->zone_pgdat; - - if (zone < pgdat->node_zones + MAX_NR_ZONES - 1) - zone++; - else if (pgdat->pgdat_next) { - pgdat = pgdat->pgdat_next; - zone = pgdat->node_zones; - } else - zone = NULL; - - return zone; -} - -/** - * for_each_zone - helper macro to iterate over all memory zones - * @zone - pointer to struct zone variable - * - * The user only needs to declare the zone variable, for_each_zone - * fills it in. This basically means for_each_zone() is an - * easier to read version of this piece of code: - * - * for (pgdat = pgdat_list; pgdat; pgdat = pgdat->node_next) - * for (i = 0; i < MAX_NR_ZONES; ++i) { - * struct zone * z = pgdat->node_zones + i; - * ... - * } - * } - */ -#define for_each_zone(zone) \ - for (zone = pgdat_list->node_zones; zone; zone = next_zone(zone)) - static inline int populated_zone(struct zone *zone) { return (!!zone->present_pages); @@ -471,6 +421,62 @@ extern struct pglist_data contig_page_data; #endif /* !CONFIG_NEED_MULTIPLE_NODES */ +static inline struct pglist_data *first_online_pgdat(void) +{ + return NODE_DATA(first_online_node); +} + +static inline struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) +{ + int nid = next_online_node(pgdat->node_id); + + if (nid == MAX_NUMNODES) + return NULL; + return NODE_DATA(nid); +} + + +/** + * for_each_pgdat - helper macro to iterate over all nodes + * @pgdat - pointer to a pg_data_t variable + */ +#define for_each_online_pgdat(pgdat) \ + for (pgdat = first_online_pgdat(); \ + pgdat; \ + pgdat = next_online_pgdat(pgdat)) + +/* + * next_zone - helper magic for for_each_zone() + * Thanks to William Lee Irwin III for this piece of ingenuity. + */ +static inline struct zone *next_zone(struct zone *zone) +{ + pg_data_t *pgdat = zone->zone_pgdat; + + if (zone < pgdat->node_zones + MAX_NR_ZONES - 1) + zone++; + else { + pgdat = next_online_pgdat(pgdat); + if (pgdat) + zone = pgdat->node_zones; + else + zone = NULL; + } + return zone; +} + +/** + * for_each_zone - helper macro to iterate over all memory zones + * @zone - pointer to struct zone variable + * + * The user only needs to declare the zone variable, for_each_zone + * fills it in. + */ +#define for_each_zone(zone) \ + for (zone = (first_online_pgdat())->node_zones; \ + zone; \ + zone = next_zone(zone)) + #ifdef CONFIG_SPARSEMEM #include #endif diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index b959a45..1a9ef3e 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -350,11 +350,15 @@ extern nodemask_t node_possible_map; #define num_possible_nodes() nodes_weight(node_possible_map) #define node_online(node) node_isset((node), node_online_map) #define node_possible(node) node_isset((node), node_possible_map) +#define first_online_node first_node(node_online_map) +#define next_online_node(nid) next_node((nid), node_online_map) #else #define num_online_nodes() 1 #define num_possible_nodes() 1 #define node_online(node) ((node) == 0) #define node_possible(node) ((node) == 0) +#define first_online_node 0 +#define next_online_node(nid) (MAX_NUMNODES) #endif #define any_online_node(mask) \ -- cgit v0.10.2 From 679bc9fbb508a0aac9539b2de747eb5849feb428 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:58 -0800 Subject: [PATCH] for_each_online_pgdat: for_each_bootmem Add a list_head to bootmem_data_t and make bootmems use it. bootmem list is sorted by node_boot_start. Only nodes against which init_bootmem() is called are linked to the list. (i386 allocates bootmem only from one node(0) not from all online nodes.) A summary: 1. for_each_online_pgdat() traverses all *online* nodes. 2. alloc_bootmem() allocates memory only from initialized-for-bootmem nodes. Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 7155452..de3eb8d 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -38,6 +38,7 @@ typedef struct bootmem_data { unsigned long last_pos; unsigned long last_success; /* Previous allocation point. To speed * up searching */ + struct list_head list; } bootmem_data_t; extern unsigned long __init bootmem_bootmap_pages (unsigned long); diff --git a/mm/bootmem.c b/mm/bootmem.c index b55bd39..d3e3bd2 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -33,6 +33,7 @@ EXPORT_SYMBOL(max_pfn); /* This is exported so * dma_get_required_mask(), which uses * it, can be an inline function */ +static LIST_HEAD(bdata_list); #ifdef CONFIG_CRASH_DUMP /* * If we have booted due to a crash, max_pfn will be a very low value. We need @@ -52,6 +53,27 @@ unsigned long __init bootmem_bootmap_pages (unsigned long pages) return mapsize; } +/* + * link bdata in order + */ +static void link_bootmem(bootmem_data_t *bdata) +{ + bootmem_data_t *ent; + if (list_empty(&bdata_list)) { + list_add(&bdata->list, &bdata_list); + return; + } + /* insert in order */ + list_for_each_entry(ent, &bdata_list, list) { + if (bdata->node_boot_start < ent->node_boot_start) { + list_add_tail(&bdata->list, &ent->list); + return; + } + } + list_add_tail(&bdata->list, &bdata_list); + return; +} + /* * Called once to set up the allocator itself. @@ -62,13 +84,11 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat, bootmem_data_t *bdata = pgdat->bdata; unsigned long mapsize = ((end - start)+7)/8; - pgdat->pgdat_next = pgdat_list; - pgdat_list = pgdat; - mapsize = ALIGN(mapsize, sizeof(long)); bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT); bdata->node_boot_start = (start << PAGE_SHIFT); bdata->node_low_pfn = end; + link_bootmem(bdata); /* * Initially all pages are reserved - setup_arch() has to @@ -383,12 +403,11 @@ unsigned long __init free_all_bootmem (void) void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal) { - pg_data_t *pgdat = pgdat_list; + bootmem_data_t *bdata; void *ptr; - for_each_pgdat(pgdat) - if ((ptr = __alloc_bootmem_core(pgdat->bdata, size, - align, goal, 0))) + list_for_each_entry(bdata, &bdata_list, list) + if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0))) return(ptr); /* @@ -416,11 +435,11 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigne void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal) { - pg_data_t *pgdat = pgdat_list; + bootmem_data_t *bdata; void *ptr; - for_each_pgdat(pgdat) - if ((ptr = __alloc_bootmem_core(pgdat->bdata, size, + list_for_each_entry(bdata, &bdata_list, list) + if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, LOW32LIMIT))) return(ptr); -- cgit v0.10.2 From ec936fc563715a9e2b2e363eb060655b49529325 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:59 -0800 Subject: [PATCH] for_each_online_pgdat: renaming for_each_pgdat Replace for_each_pgdat() with for_each_online_pgdat(). Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index 9db3242..2889567 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -36,7 +36,7 @@ void show_mem(void) printk(KERN_INFO "Mem-info:\n"); show_free_areas(); printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat_page_nr(pgdat, i); diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 2f5e448..384f1d7 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -386,7 +386,7 @@ static void __init pgdat_insert(pg_data_t *pgdat) { pg_data_t *prev = NULL, *next; - for_each_pgdat(next) + for_each_online_pgdat(next) if (pgdat->node_id < next->node_id) break; else @@ -560,7 +560,7 @@ void show_mem(void) printk("Mem-info:\n"); show_free_areas(); printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { unsigned long present; unsigned long flags; int shared = 0, cached = 0, reserved = 0; diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index ff4f31f..2ef1151 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -600,7 +600,7 @@ mem_init (void) kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START); kclist_add(&kcore_kernel, _stext, _end - _stext); - for_each_pgdat(pgdat) + for_each_online_pgdat(pgdat) if (pgdat->bdata->node_bootmem_map) totalram_pages += free_all_bootmem_node(pgdat); diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index c9e7dad..2e0fe19 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c @@ -47,7 +47,7 @@ void show_mem(void) printk("Mem-info:\n"); show_free_areas(); printk("Free swap: %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { unsigned long flags; pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; ++i) { diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index badac10..5e435a9 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -195,7 +195,7 @@ void show_mem(void) printk("Mem-info:\n"); show_free_areas(); printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { unsigned long flags; pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; i++) { @@ -351,7 +351,7 @@ void __init mem_init(void) max_mapnr = max_pfn; totalram_pages += free_all_bootmem(); #endif - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; i++) { if (!pfn_valid(pgdat->node_start_pfn + i)) continue; diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index b044156..e5f7f1c 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -72,7 +72,7 @@ void show_mem(void) show_free_areas(); printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pfn_to_page(pgdat->node_start_pfn + i); total++; diff --git a/fs/buffer.c b/fs/buffer.c index d597758..23f1f3a 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -493,7 +493,7 @@ static void free_more_memory(void) wakeup_pdflush(1024); yield(); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones; if (*zones) try_to_free_pages(zones, GFP_NOFS); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8dc8f27..ccc3713 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1201,7 +1201,7 @@ unsigned int nr_free_highpages (void) pg_data_t *pgdat; unsigned int pages = 0; - for_each_pgdat(pgdat) + for_each_online_pgdat(pgdat) pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages; return pages; @@ -1343,7 +1343,7 @@ void get_zone_counts(unsigned long *active, *active = 0; *inactive = 0; *free = 0; - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { unsigned long l, m, n; __get_zone_counts(&l, &m, &n, pgdat); *active += l; @@ -2482,7 +2482,7 @@ static void setup_per_zone_lowmem_reserve(void) struct pglist_data *pgdat; int j, idx; - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; unsigned long present_pages = zone->present_pages; diff --git a/mm/vmscan.c b/mm/vmscan.c index 78865c8..acdf001 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1305,7 +1305,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) current->reclaim_state = &reclaim_state; repeat: - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { unsigned long freed; freed = balance_pgdat(pgdat, nr_to_free, 0); @@ -1335,7 +1335,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb, cpumask_t mask; if (action == CPU_ONLINE) { - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { mask = node_to_cpumask(pgdat->node_id); if (any_online_cpu(mask) != NR_CPUS) /* One of our CPUs online: restore mask */ @@ -1351,7 +1351,7 @@ static int __init kswapd_init(void) pg_data_t *pgdat; swap_setup(); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { pid_t pid; pid = kernel_thread(kswapd, pgdat, CLONE_KERNEL); -- cgit v0.10.2 From 3571761fe49d960bb720c2308ffb9401f0a5e161 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:16:00 -0800 Subject: [PATCH] for_each_online_pgdat: remove sorting pgdat Because pgdat_list was linked to pgdat_list in *reverse* order, (By default) some of arch has to sort it by themselves. for_each_pgdat has gone..for_each_online_pgdat() uses node_online_map, which doesn't need to be sorted. This patch removes codes for sorting pgdat. Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index c4af963..c3f3ae9 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -352,17 +352,6 @@ void __init zone_sizes_init(void) { int nid; - /* - * Insert nodes into pgdat_list backward so they appear in order. - * Clobber node 0's links and NULL out pgdat_list before starting. - */ - pgdat_list = NULL; - for (nid = MAX_NUMNODES - 1; nid >= 0; nid--) { - if (!node_online(nid)) - continue; - NODE_DATA(nid)->pgdat_next = pgdat_list; - pgdat_list = NODE_DATA(nid); - } for_each_online_node(nid) { unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 384f1d7..ec9eeb8 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -379,31 +379,6 @@ static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize) } /** - * pgdat_insert - insert the pgdat into global pgdat_list - * @pgdat: the pgdat for a node. - */ -static void __init pgdat_insert(pg_data_t *pgdat) -{ - pg_data_t *prev = NULL, *next; - - for_each_online_pgdat(next) - if (pgdat->node_id < next->node_id) - break; - else - prev = next; - - if (prev) { - prev->pgdat_next = pgdat; - pgdat->pgdat_next = next; - } else { - pgdat->pgdat_next = pgdat_list; - pgdat_list = pgdat; - } - - return; -} - -/** * memory_less_nodes - allocate and initialize CPU only nodes pernode * information. */ @@ -745,11 +720,5 @@ void __init paging_init(void) pfn_offset, zholes_size); } - /* - * Make memory less nodes become a member of the known nodes. - */ - for_each_node_mask(node, memory_less_mask) - pgdat_insert(mem_data[node].pgdat); - zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); } diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c index 08e7279..70c8528 100644 --- a/arch/m32r/mm/discontig.c +++ b/arch/m32r/mm/discontig.c @@ -137,12 +137,6 @@ unsigned long __init zone_sizes_init(void) int nid, i; mem_prof_t *mp; - pgdat_list = NULL; - for (nid = num_online_nodes() - 1 ; nid >= 0 ; nid--) { - NODE_DATA(nid)->pgdat_next = pgdat_list; - pgdat_list = NODE_DATA(nid); - } - for_each_online_node(nid) { mp = &mem_prof[nid]; for (i = 0 ; i < MAX_NR_ZONES ; i++) { -- cgit v0.10.2 From ae0f15fb91274e67d78836d38c99ec363df33073 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:16:01 -0800 Subject: [PATCH] for_each_online_pgdat: remove pgdat_list By using for_each_online_pgdat(), pgdat_list is not necessary now. This patch removes it. Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 96eb080..0d12c3c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -307,7 +307,6 @@ typedef struct pglist_data { unsigned long node_spanned_pages; /* total size of physical page range, including holes */ int node_id; - struct pglist_data *pgdat_next; wait_queue_head_t kswapd_wait; struct task_struct *kswapd; int kswapd_max_order; @@ -324,8 +323,6 @@ typedef struct pglist_data { #include -extern struct pglist_data *pgdat_list; - void __get_zone_counts(unsigned long *active, unsigned long *inactive, unsigned long *free, struct pglist_data *pgdat); void get_zone_counts(unsigned long *active, unsigned long *inactive, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ccc3713..dc523a1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -49,7 +49,6 @@ nodemask_t node_online_map __read_mostly = { { [0] = 1UL } }; EXPORT_SYMBOL(node_online_map); nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL; EXPORT_SYMBOL(node_possible_map); -struct pglist_data *pgdat_list __read_mostly; unsigned long totalram_pages __read_mostly; unsigned long totalhigh_pages __read_mostly; long nr_swap_pages; @@ -2169,8 +2168,9 @@ static void *frag_start(struct seq_file *m, loff_t *pos) { pg_data_t *pgdat; loff_t node = *pos; - - for (pgdat = pgdat_list; pgdat && node; pgdat = pgdat->pgdat_next) + for (pgdat = first_online_pgdat(); + pgdat && node; + pgdat = next_online_pgdat(pgdat)) --node; return pgdat; @@ -2181,7 +2181,7 @@ static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) pg_data_t *pgdat = (pg_data_t *)arg; (*pos)++; - return pgdat->pgdat_next; + return next_online_pgdat(pgdat); } static void frag_stop(struct seq_file *m, void *arg) -- cgit v0.10.2 From 95144c788dc01b6a0ff2c9c2222e37ffdab358b8 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:16:02 -0800 Subject: [PATCH] uninline zone helpers Helper functions for for_each_online_pgdat/for_each_zone look too big to be inlined. Speed of these helper macro itself is not very important. (inner loops are tend to do more work than this) This patch make helper function to be out-of-lined. inline out-of-line .text 005c0680 005bf6a0 005c0680 - 005bf6a0 = FE0 = 4Kbytes. Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0d12c3c..b5c2112 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -418,20 +418,9 @@ extern struct pglist_data contig_page_data; #endif /* !CONFIG_NEED_MULTIPLE_NODES */ -static inline struct pglist_data *first_online_pgdat(void) -{ - return NODE_DATA(first_online_node); -} - -static inline struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) -{ - int nid = next_online_node(pgdat->node_id); - - if (nid == MAX_NUMNODES) - return NULL; - return NODE_DATA(nid); -} - +extern struct pglist_data *first_online_pgdat(void); +extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat); +extern struct zone *next_zone(struct zone *zone); /** * for_each_pgdat - helper macro to iterate over all nodes @@ -441,27 +430,6 @@ static inline struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) for (pgdat = first_online_pgdat(); \ pgdat; \ pgdat = next_online_pgdat(pgdat)) - -/* - * next_zone - helper magic for for_each_zone() - * Thanks to William Lee Irwin III for this piece of ingenuity. - */ -static inline struct zone *next_zone(struct zone *zone) -{ - pg_data_t *pgdat = zone->zone_pgdat; - - if (zone < pgdat->node_zones + MAX_NR_ZONES - 1) - zone++; - else { - pgdat = next_online_pgdat(pgdat); - if (pgdat) - zone = pgdat->node_zones; - else - zone = NULL; - } - return zone; -} - /** * for_each_zone - helper macro to iterate over all memory zones * @zone - pointer to struct zone variable diff --git a/mm/Makefile b/mm/Makefile index f10c753..0b8f73f 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -10,7 +10,7 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ page_alloc.o page-writeback.o pdflush.o \ readahead.o swap.o truncate.o vmscan.o \ - prio_tree.o util.o $(mmu-y) + prio_tree.o util.o mmzone.o $(mmu-y) obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o diff --git a/mm/mmzone.c b/mm/mmzone.c new file mode 100644 index 0000000..b022370 --- /dev/null +++ b/mm/mmzone.c @@ -0,0 +1,50 @@ +/* + * linux/mm/mmzone.c + * + * management codes for pgdats and zones. + */ + + +#include +#include +#include +#include + +struct pglist_data *first_online_pgdat(void) +{ + return NODE_DATA(first_online_node); +} + +EXPORT_SYMBOL(first_online_pgdat); + +struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) +{ + int nid = next_online_node(pgdat->node_id); + + if (nid == MAX_NUMNODES) + return NULL; + return NODE_DATA(nid); +} +EXPORT_SYMBOL(next_online_pgdat); + + +/* + * next_zone - helper magic for for_each_zone() + */ +struct zone *next_zone(struct zone *zone) +{ + pg_data_t *pgdat = zone->zone_pgdat; + + if (zone < pgdat->node_zones + MAX_NR_ZONES - 1) + zone++; + else { + pgdat = next_online_pgdat(pgdat); + if (pgdat) + zone = pgdat->node_zones; + else + zone = NULL; + } + return zone; +} +EXPORT_SYMBOL(next_zone); + -- cgit v0.10.2 From 22a9835c350782a5c3257343713932af3ac92ee0 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 27 Mar 2006 01:16:04 -0800 Subject: [PATCH] unify PFN_* macros Just about every architecture defines some macros to do operations on pfns. They're all virtually identical. This patch consolidates all of them. One minor glitch is that at least i386 uses them in a very skeletal header file. To keep away from #include dependency hell, I stuck the new definitions in a new, isolated header. Of all of the implementations, sh64 is the only one that varied by a bit. It used some masks to ensure that any sign-extension got ripped away before the arithmetic is done. This has been posted to that sh64 maintainers and the development list. Compiles on x86, x86_64, ia64 and ppc64. Signed-off-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index b4e5f8f..9402624 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -34,6 +34,7 @@ #include #include #include +#include #ifdef CONFIG_MAGIC_SYSRQ #include #include @@ -241,9 +242,6 @@ reserve_std_resources(void) request_resource(io, standard_io_resources+i); } -#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) -#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) -#define PFN_PHYS(x) ((x) << PAGE_SHIFT) #define PFN_MAX PFN_DOWN(0x80000000) #define for_each_mem_cluster(memdesc, cluster, i) \ for ((cluster) = (memdesc)->cluster, (i) = 0; \ @@ -472,11 +470,6 @@ page_is_ram(unsigned long pfn) return 0; } -#undef PFN_UP -#undef PFN_DOWN -#undef PFN_PHYS -#undef PFN_MAX - void __init setup_arch(char **cmdline_p) { diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index 6d52512..bf6b65c 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -27,9 +28,6 @@ bootmem_data_t node_bdata[MAX_NUMNODES]; #define DBGDCONT(args...) #endif -#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) -#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) -#define PFN_PHYS(x) ((x) << PAGE_SHIFT) #define for_each_mem_cluster(memdesc, cluster, i) \ for ((cluster) = (memdesc)->cluster, (i) = 0; \ (i) < (memdesc)->numclusters; (i)++, (cluster)++) diff --git a/arch/arm26/mm/init.c b/arch/arm26/mm/init.c index e3ecaa4..7da8a52 100644 --- a/arch/arm26/mm/init.c +++ b/arch/arm26/mm/init.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -101,12 +102,6 @@ struct node_info { int bootmap_pages; }; -#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) -#define PFN_UP(x) (PAGE_ALIGN(x) >> PAGE_SHIFT) -#define PFN_SIZE(x) ((x) >> PAGE_SHIFT) -#define PFN_RANGE(s,e) PFN_SIZE(PAGE_ALIGN((unsigned long)(e)) - \ - (((unsigned long)(s)) & PAGE_MASK)) - /* * FIXME: We really want to avoid allocating the bootmap bitmap * over the top of the initrd. Hopefully, this is located towards diff --git a/arch/cris/kernel/setup.c b/arch/cris/kernel/setup.c index 1ba57ef..619a6ee 100644 --- a/arch/cris/kernel/setup.c +++ b/arch/cris/kernel/setup.c @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -88,10 +89,6 @@ setup_arch(char **cmdline_p) init_mm.end_data = (unsigned long) &_edata; init_mm.brk = (unsigned long) &_end; -#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) -#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) -#define PFN_PHYS(x) ((x) << PAGE_SHIFT) - /* min_low_pfn points to the start of DRAM, start_pfn points * to the first DRAM pages after the kernel, and max_low_pfn * to the end of DRAM. diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 6917daa..8c08660 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -46,6 +46,7 @@ #include #include #include +#include #include