From a953e4597abd51b74c99e0e3b7074532a60fd031 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:12 +0200 Subject: sched: replace MAX_NUMNODES with nr_node_ids in kernel/sched.c * Replace usages of MAX_NUMNODES with nr_node_ids in kernel/sched.c, where appropriate. This saves some allocated space as well as many wasted cycles going through node entries that are non-existent. For inclusion into sched-devel/latest tree. Based on: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git + sched-devel/latest .../mingo/linux-2.6-sched-devel.git Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index cfa222a..1ed8011 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6879,9 +6879,9 @@ static int find_next_best_node(int node, nodemask_t *used_nodes) min_val = INT_MAX; - for (i = 0; i < MAX_NUMNODES; i++) { + for (i = 0; i < nr_node_ids; i++) { /* Start at @node */ - n = (node + i) % MAX_NUMNODES; + n = (node + i) % nr_node_ids; if (!nr_cpus_node(n)) continue; @@ -7075,7 +7075,7 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) if (!sched_group_nodes) continue; - for (i = 0; i < MAX_NUMNODES; i++) { + for (i = 0; i < nr_node_ids; i++) { struct sched_group *oldsg, *sg = sched_group_nodes[i]; *nodemask = node_to_cpumask(i); @@ -7263,7 +7263,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, /* * Allocate the per-node list of sched groups */ - sched_group_nodes = kcalloc(MAX_NUMNODES, sizeof(struct sched_group *), + sched_group_nodes = kcalloc(nr_node_ids, sizeof(struct sched_group *), GFP_KERNEL); if (!sched_group_nodes) { printk(KERN_WARNING "Can not alloc sched group node list\n"); @@ -7407,7 +7407,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, #endif /* Set up physical groups */ - for (i = 0; i < MAX_NUMNODES; i++) { + for (i = 0; i < nr_node_ids; i++) { SCHED_CPUMASK_VAR(nodemask, allmasks); SCHED_CPUMASK_VAR(send_covered, allmasks); @@ -7431,7 +7431,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, send_covered, tmpmask); } - for (i = 0; i < MAX_NUMNODES; i++) { + for (i = 0; i < nr_node_ids; i++) { /* Set up node groups */ struct sched_group *sg, *prev; SCHED_CPUMASK_VAR(nodemask, allmasks); @@ -7470,9 +7470,9 @@ static int __build_sched_domains(const cpumask_t *cpu_map, cpus_or(*covered, *covered, *nodemask); prev = sg; - for (j = 0; j < MAX_NUMNODES; j++) { + for (j = 0; j < nr_node_ids; j++) { SCHED_CPUMASK_VAR(notcovered, allmasks); - int n = (i + j) % MAX_NUMNODES; + int n = (i + j) % nr_node_ids; node_to_cpumask_ptr(pnodemask, n); cpus_complement(*notcovered, *covered); @@ -7525,7 +7525,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, } #ifdef CONFIG_NUMA - for (i = 0; i < MAX_NUMNODES; i++) + for (i = 0; i < nr_node_ids; i++) init_numa_sched_groups_power(sched_group_nodes[i]); if (sd_allnodes) { -- cgit v0.10.2 From 143aa5c53bd3895d42d7c08753fe58293988a69d Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: cpu: change some globals to statics in drivers/base/cpu.c v2 This patch makes the following needlessly global code static: - attr_online_map - attr_possible_map - attr_present_map - cpu_state_attr [v2] Signed-off-by: Adrian Bunk Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index e38dfed..5000402 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -119,14 +119,14 @@ static ssize_t print_cpus_##type(struct sysdev_class *class, char *buf) \ { \ return print_cpus_map(buf, &cpu_##type##_map); \ } \ -struct sysdev_class_attribute attr_##type##_map = \ +static struct sysdev_class_attribute attr_##type##_map = \ _SYSDEV_CLASS_ATTR(type, 0444, print_cpus_##type, NULL) print_cpus_func(online); print_cpus_func(possible); print_cpus_func(present); -struct sysdev_class_attribute *cpu_state_attr[] = { +static struct sysdev_class_attribute *cpu_state_attr[] = { &attr_online_map, &attr_possible_map, &attr_present_map, -- cgit v0.10.2 From 41df0d61c266998b8049df7fec119cd518a43aa1 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: x86: Add performance variants of cpumask operators * Increase performance for systems with large count NR_CPUS by limiting the range of the cpumask operators that loop over the bits in a cpumask_t variable. This removes a large amount of wasted cpu cycles. * Add performance variants of the cpumask operators: int cpus_weight_nr(mask) Same using nr_cpu_ids instead of NR_CPUS int first_cpu_nr(mask) Number lowest set bit, or nr_cpu_ids int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids * Modify following to use performance variants: #define num_online_cpus() cpus_weight_nr(cpu_online_map) #define num_possible_cpus() cpus_weight_nr(cpu_possible_map) #define num_present_cpus() cpus_weight_nr(cpu_present_map) #define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), ...) #define for_each_online_cpu(cpu) for_each_cpu_mask_nr((cpu), ...) #define for_each_present_cpu(cpu) for_each_cpu_mask_nr((cpu), ...) * Comment added to include/linux/cpumask.h: Note: The alternate operations with the suffix "_nr" are used to limit the range of the loop to nr_cpu_ids instead of NR_CPUS when NR_CPUS > 64 for performance reasons. If NR_CPUS is <= 64 then most assembler bitmask operators execute faster with a constant range, so the operator will continue to use NR_CPUS. Another consideration is that nr_cpu_ids is initialized to NR_CPUS and isn't lowered until the possible cpus are discovered (including any disabled cpus). So early uses will span the entire range of NR_CPUS. (The net effect is that for systems with 64 or less CPU's there are no functional changes.) For inclusion into sched-devel/latest tree. Based on: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git + sched-devel/latest .../mingo/linux-2.6-sched-devel.git Cc: Paul Jackson Cc: Christoph Lameter Reviewed-by: Paul Jackson Reviewed-by: Christoph Lameter Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 5df3db5..b49472d 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -17,6 +17,20 @@ * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c. * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c. * + * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + * Note: The alternate operations with the suffix "_nr" are used + * to limit the range of the loop to nr_cpu_ids instead of + * NR_CPUS when NR_CPUS > 64 for performance reasons. + * If NR_CPUS is <= 64 then most assembler bitmask + * operators execute faster with a constant range, so + * the operator will continue to use NR_CPUS. + * + * Another consideration is that nr_cpu_ids is initialized + * to NR_CPUS and isn't lowered until the possible cpus are + * discovered (including any disabled cpus). So early uses + * will span the entire range of NR_CPUS. + * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + * * The available cpumask operations are: * * void cpu_set(cpu, mask) turn on bit 'cpu' in mask @@ -38,12 +52,14 @@ * int cpus_empty(mask) Is mask empty (no bits sets)? * int cpus_full(mask) Is mask full (all bits sets)? * int cpus_weight(mask) Hamming weigh - number of set bits + * int cpus_weight_nr(mask) Same using nr_cpu_ids instead of NR_CPUS * * void cpus_shift_right(dst, src, n) Shift right * void cpus_shift_left(dst, src, n) Shift left * * int first_cpu(mask) Number lowest set bit, or NR_CPUS * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS + * int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids * * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set * CPU_MASK_ALL Initializer - all bits set @@ -59,7 +75,8 @@ * void cpus_onto(dst, orig, relmap) *dst = orig relative to relmap * void cpus_fold(dst, orig, sz) dst bits = orig bits mod sz * - * for_each_cpu_mask(cpu, mask) for-loop cpu over mask + * for_each_cpu_mask(cpu, mask) for-loop cpu over mask using NR_CPUS + * for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids * * int num_online_cpus() Number of online CPUs * int num_possible_cpus() Number of all possible CPUs @@ -216,15 +233,6 @@ static inline void __cpus_shift_left(cpumask_t *dstp, bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); } -#ifdef CONFIG_SMP -int __first_cpu(const cpumask_t *srcp); -#define first_cpu(src) __first_cpu(&(src)) -int __next_cpu(int n, const cpumask_t *srcp); -#define next_cpu(n, src) __next_cpu((n), &(src)) -#else -#define first_cpu(src) ({ (void)(src); 0; }) -#define next_cpu(n, src) ({ (void)(src); 1; }) -#endif #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP extern cpumask_t *cpumask_of_cpu_map; @@ -343,15 +351,48 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp, bitmap_fold(dstp->bits, origp->bits, sz, nbits); } -#if NR_CPUS > 1 +#if NR_CPUS == 1 + +#define nr_cpu_ids 1 +#define first_cpu(src) ({ (void)(src); 0; }) +#define next_cpu(n, src) ({ (void)(src); 1; }) +#define any_online_cpu(mask) 0 +#define for_each_cpu_mask(cpu, mask) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) + +#else /* NR_CPUS > 1 */ + +extern int nr_cpu_ids; +int __first_cpu(const cpumask_t *srcp); +int __next_cpu(int n, const cpumask_t *srcp); +int __any_online_cpu(const cpumask_t *mask); + +#define first_cpu(src) __first_cpu(&(src)) +#define next_cpu(n, src) __next_cpu((n), &(src)) +#define any_online_cpu(mask) __any_online_cpu(&(mask)) #define for_each_cpu_mask(cpu, mask) \ for ((cpu) = first_cpu(mask); \ (cpu) < NR_CPUS; \ (cpu) = next_cpu((cpu), (mask))) -#else /* NR_CPUS == 1 */ -#define for_each_cpu_mask(cpu, mask) \ - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) -#endif /* NR_CPUS */ +#endif + +#if NR_CPUS <= 64 + +#define next_cpu_nr(n, src) next_cpu(n, src) +#define cpus_weight_nr(cpumask) cpus_weight(cpumask) +#define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask) + +#else /* NR_CPUS > 64 */ + +int __next_cpu_nr(int n, const cpumask_t *srcp); +#define next_cpu_nr(n, src) __next_cpu_nr((n), &(src)) +#define cpus_weight_nr(cpumask) __cpus_weight(&(cpumask), nr_cpu_ids) +#define for_each_cpu_mask_nr(cpu, mask) \ + for ((cpu) = first_cpu(mask); \ + (cpu) < nr_cpu_ids; \ + (cpu) = next_cpu_nr((cpu), (mask))) + +#endif /* NR_CPUS > 64 */ /* * The following particular system cpumasks and operations manage @@ -414,9 +455,9 @@ extern cpumask_t cpu_online_map; extern cpumask_t cpu_present_map; #if NR_CPUS > 1 -#define num_online_cpus() cpus_weight(cpu_online_map) -#define num_possible_cpus() cpus_weight(cpu_possible_map) -#define num_present_cpus() cpus_weight(cpu_present_map) +#define num_online_cpus() cpus_weight_nr(cpu_online_map) +#define num_possible_cpus() cpus_weight_nr(cpu_possible_map) +#define num_present_cpus() cpus_weight_nr(cpu_present_map) #define cpu_online(cpu) cpu_isset((cpu), cpu_online_map) #define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map) #define cpu_present(cpu) cpu_isset((cpu), cpu_present_map) @@ -431,17 +472,8 @@ extern cpumask_t cpu_present_map; #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) -#ifdef CONFIG_SMP -extern int nr_cpu_ids; -#define any_online_cpu(mask) __any_online_cpu(&(mask)) -int __any_online_cpu(const cpumask_t *mask); -#else -#define nr_cpu_ids 1 -#define any_online_cpu(mask) 0 -#endif - -#define for_each_possible_cpu(cpu) for_each_cpu_mask((cpu), cpu_possible_map) -#define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map) -#define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map) +#define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_possible_map) +#define for_each_online_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_online_map) +#define for_each_present_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_present_map) #endif /* __LINUX_CPUMASK_H */ diff --git a/lib/cpumask.c b/lib/cpumask.c index bb4f76d..5f97dc2 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -15,6 +15,15 @@ int __next_cpu(int n, const cpumask_t *srcp) } EXPORT_SYMBOL(__next_cpu); +#if NR_CPUS > 64 +int __next_cpu_nr(int n, const cpumask_t *srcp) +{ + return min_t(int, nr_cpu_ids, + find_next_bit(srcp->bits, nr_cpu_ids, n+1)); +} +EXPORT_SYMBOL(__next_cpu_nr); +#endif + int __any_online_cpu(const cpumask_t *mask) { int cpu; -- cgit v0.10.2 From 141ad0688adb53094d6f75b39b4b3b0625de0e07 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: acpi: use performance variant for_each_cpu_mask_nr Change references from for_each_cpu_mask to for_each_cpu_mask_nr where appropriate Reviewed-by: Paul Jackson Reviewed-by: Christoph Lameter Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index bb06738..28509fb 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -1013,7 +1013,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) * affected cpu in order to get one proper T-state. * The notifier event is THROTTLING_PRECHANGE. */ - for_each_cpu_mask(i, online_throttling_cpus) { + for_each_cpu_mask_nr(i, online_throttling_cpus) { t_state.cpu = i; acpi_processor_throttling_notifier(THROTTLING_PRECHANGE, &t_state); @@ -1034,7 +1034,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) * it is necessary to set T-state for every affected * cpus. */ - for_each_cpu_mask(i, online_throttling_cpus) { + for_each_cpu_mask_nr(i, online_throttling_cpus) { match_pr = processors[i]; /* * If the pointer is invalid, we will report the @@ -1068,7 +1068,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) * affected cpu to update the T-states. * The notifier event is THROTTLING_POSTCHANGE */ - for_each_cpu_mask(i, online_throttling_cpus) { + for_each_cpu_mask_nr(i, online_throttling_cpus) { t_state.cpu = i; acpi_processor_throttling_notifier(THROTTLING_POSTCHANGE, &t_state); -- cgit v0.10.2 From 068b12772a64c2440ef2f64ac5d780688c06576f Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: cpufreq: use performance variant for_each_cpu_mask_nr Change references from for_each_cpu_mask to for_each_cpu_mask_nr where appropriate Reviewed-by: Paul Jackson Reviewed-by: Christoph Lameter Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 7fce038..4937f2f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -589,7 +589,7 @@ static ssize_t show_cpus(cpumask_t mask, char *buf) ssize_t i = 0; unsigned int cpu; - for_each_cpu_mask(cpu, mask) { + for_each_cpu_mask_nr(cpu, mask) { if (i) i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " "); i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu); @@ -835,7 +835,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) } #endif - for_each_cpu_mask(j, policy->cpus) { + for_each_cpu_mask_nr(j, policy->cpus) { if (cpu == j) continue; @@ -898,14 +898,14 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) } spin_lock_irqsave(&cpufreq_driver_lock, flags); - for_each_cpu_mask(j, policy->cpus) { + for_each_cpu_mask_nr(j, policy->cpus) { cpufreq_cpu_data[j] = policy; per_cpu(policy_cpu, j) = policy->cpu; } spin_unlock_irqrestore(&cpufreq_driver_lock, flags); /* symlink affected CPUs */ - for_each_cpu_mask(j, policy->cpus) { + for_each_cpu_mask_nr(j, policy->cpus) { if (j == cpu) continue; if (!cpu_online(j)) @@ -945,7 +945,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) err_out_unregister: spin_lock_irqsave(&cpufreq_driver_lock, flags); - for_each_cpu_mask(j, policy->cpus) + for_each_cpu_mask_nr(j, policy->cpus) cpufreq_cpu_data[j] = NULL; spin_unlock_irqrestore(&cpufreq_driver_lock, flags); @@ -1028,7 +1028,7 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) * links afterwards. */ if (unlikely(cpus_weight(data->cpus) > 1)) { - for_each_cpu_mask(j, data->cpus) { + for_each_cpu_mask_nr(j, data->cpus) { if (j == cpu) continue; cpufreq_cpu_data[j] = NULL; @@ -1038,7 +1038,7 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) spin_unlock_irqrestore(&cpufreq_driver_lock, flags); if (unlikely(cpus_weight(data->cpus) > 1)) { - for_each_cpu_mask(j, data->cpus) { + for_each_cpu_mask_nr(j, data->cpus) { if (j == cpu) continue; dprintk("removing link for cpu %u\n", j); diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 5d3a04b..fe565ee 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -497,7 +497,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, return rc; } - for_each_cpu_mask(j, policy->cpus) { + for_each_cpu_mask_nr(j, policy->cpus) { struct cpu_dbs_info_s *j_dbs_info; j_dbs_info = &per_cpu(cpu_dbs_info, j); j_dbs_info->cur_policy = policy; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index d2af20d..33855cb 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -367,7 +367,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) /* Get Idle Time */ idle_ticks = UINT_MAX; - for_each_cpu_mask(j, policy->cpus) { + for_each_cpu_mask_nr(j, policy->cpus) { cputime64_t total_idle_ticks; unsigned int tmp_idle_ticks; struct cpu_dbs_info_s *j_dbs_info; @@ -521,7 +521,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, return rc; } - for_each_cpu_mask(j, policy->cpus) { + for_each_cpu_mask_nr(j, policy->cpus) { struct cpu_dbs_info_s *j_dbs_info; j_dbs_info = &per_cpu(cpu_dbs_info, j); j_dbs_info->cur_policy = policy; -- cgit v0.10.2 From 363ab6f1424cdea63e5d182312d60e19077b892a Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: core: use performance variant for_each_cpu_mask_nr Change references from for_each_cpu_mask to for_each_cpu_mask_nr where appropriate Reviewed-by: Paul Jackson Reviewed-by: Christoph Lameter Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/cpu.c b/kernel/cpu.c index c77bc3a..50ae922 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -390,7 +390,7 @@ void __ref enable_nonboot_cpus(void) goto out; printk("Enabling non-boot CPUs ...\n"); - for_each_cpu_mask(cpu, frozen_cpus) { + for_each_cpu_mask_nr(cpu, frozen_cpus) { error = _cpu_up(cpu, 1); if (!error) { printk("CPU%d is up\n", cpu); diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index f4ffbd0..251358d 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -92,7 +92,7 @@ static void force_quiescent_state(struct rcu_data *rdp, */ cpumask = rcp->cpumask; cpu_clear(rdp->cpu, cpumask); - for_each_cpu_mask(cpu, cpumask) + for_each_cpu_mask_nr(cpu, cpumask) smp_send_reschedule(cpu); } } diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index e1cdf19..18af270 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -657,7 +657,7 @@ rcu_try_flip_idle(void) /* Now ask each CPU for acknowledgement of the flip. */ - for_each_cpu_mask(cpu, rcu_cpu_online_map) { + for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) { per_cpu(rcu_flip_flag, cpu) = rcu_flipped; dyntick_save_progress_counter(cpu); } @@ -675,7 +675,7 @@ rcu_try_flip_waitack(void) int cpu; RCU_TRACE_ME(rcupreempt_trace_try_flip_a1); - for_each_cpu_mask(cpu, rcu_cpu_online_map) + for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) if (rcu_try_flip_waitack_needed(cpu) && per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) { RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1); @@ -707,7 +707,7 @@ rcu_try_flip_waitzero(void) /* Check to see if the sum of the "last" counters is zero. */ RCU_TRACE_ME(rcupreempt_trace_try_flip_z1); - for_each_cpu_mask(cpu, rcu_cpu_online_map) + for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx]; if (sum != 0) { RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1); @@ -722,7 +722,7 @@ rcu_try_flip_waitzero(void) smp_mb(); /* ^^^^^^^^^^^^ */ /* Call for a memory barrier from each CPU. */ - for_each_cpu_mask(cpu, rcu_cpu_online_map) { + for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) { per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed; dyntick_save_progress_counter(cpu); } @@ -742,7 +742,7 @@ rcu_try_flip_waitmb(void) int cpu; RCU_TRACE_ME(rcupreempt_trace_try_flip_m1); - for_each_cpu_mask(cpu, rcu_cpu_online_map) + for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) if (rcu_try_flip_waitmb_needed(cpu) && per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) { RCU_TRACE_ME(rcupreempt_trace_try_flip_me1); diff --git a/kernel/sched.c b/kernel/sched.c index 1ed8011..814d6e1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2271,7 +2271,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) /* Tally up the load of all CPUs in the group */ avg_load = 0; - for_each_cpu_mask(i, group->cpumask) { + for_each_cpu_mask_nr(i, group->cpumask) { /* Bias balancing toward cpus of our domain */ if (local_group) load = source_load(i, load_idx); @@ -2313,7 +2313,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu, /* Traverse only the allowed CPUs */ cpus_and(*tmp, group->cpumask, p->cpus_allowed); - for_each_cpu_mask(i, *tmp) { + for_each_cpu_mask_nr(i, *tmp) { load = weighted_cpuload(i); if (load < min_load || (load == min_load && i == this_cpu)) { @@ -3296,7 +3296,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, max_cpu_load = 0; min_cpu_load = ~0UL; - for_each_cpu_mask(i, group->cpumask) { + for_each_cpu_mask_nr(i, group->cpumask) { struct rq *rq; if (!cpu_isset(i, *cpus)) @@ -3560,7 +3560,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, unsigned long max_load = 0; int i; - for_each_cpu_mask(i, group->cpumask) { + for_each_cpu_mask_nr(i, group->cpumask) { unsigned long wl; if (!cpu_isset(i, *cpus)) @@ -4100,7 +4100,7 @@ static void run_rebalance_domains(struct softirq_action *h) int balance_cpu; cpu_clear(this_cpu, cpus); - for_each_cpu_mask(balance_cpu, cpus) { + for_each_cpu_mask_nr(balance_cpu, cpus) { /* * If this cpu gets work to do, stop the load balancing * work being done for other cpus. Next load @@ -6832,7 +6832,7 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map, cpus_clear(*covered); - for_each_cpu_mask(i, *span) { + for_each_cpu_mask_nr(i, *span) { struct sched_group *sg; int group = group_fn(i, cpu_map, &sg, tmpmask); int j; @@ -6843,7 +6843,7 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map, cpus_clear(sg->cpumask); sg->__cpu_power = 0; - for_each_cpu_mask(j, *span) { + for_each_cpu_mask_nr(j, *span) { if (group_fn(j, cpu_map, NULL, tmpmask) != group) continue; @@ -7043,7 +7043,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) if (!sg) return; do { - for_each_cpu_mask(j, sg->cpumask) { + for_each_cpu_mask_nr(j, sg->cpumask) { struct sched_domain *sd; sd = &per_cpu(phys_domains, j); @@ -7068,7 +7068,7 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) { int cpu, i; - for_each_cpu_mask(cpu, *cpu_map) { + for_each_cpu_mask_nr(cpu, *cpu_map) { struct sched_group **sched_group_nodes = sched_group_nodes_bycpu[cpu]; @@ -7302,7 +7302,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, /* * Set up domains for cpus specified by the cpu_map. */ - for_each_cpu_mask(i, *cpu_map) { + for_each_cpu_mask_nr(i, *cpu_map) { struct sched_domain *sd = NULL, *p; SCHED_CPUMASK_VAR(nodemask, allmasks); @@ -7374,7 +7374,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, #ifdef CONFIG_SCHED_SMT /* Set up CPU (sibling) groups */ - for_each_cpu_mask(i, *cpu_map) { + for_each_cpu_mask_nr(i, *cpu_map) { SCHED_CPUMASK_VAR(this_sibling_map, allmasks); SCHED_CPUMASK_VAR(send_covered, allmasks); @@ -7391,7 +7391,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, #ifdef CONFIG_SCHED_MC /* Set up multi-core groups */ - for_each_cpu_mask(i, *cpu_map) { + for_each_cpu_mask_nr(i, *cpu_map) { SCHED_CPUMASK_VAR(this_core_map, allmasks); SCHED_CPUMASK_VAR(send_covered, allmasks); @@ -7458,7 +7458,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, goto error; } sched_group_nodes[i] = sg; - for_each_cpu_mask(j, *nodemask) { + for_each_cpu_mask_nr(j, *nodemask) { struct sched_domain *sd; sd = &per_cpu(node_domains, j); @@ -7504,21 +7504,21 @@ static int __build_sched_domains(const cpumask_t *cpu_map, /* Calculate CPU power for physical packages and nodes */ #ifdef CONFIG_SCHED_SMT - for_each_cpu_mask(i, *cpu_map) { + for_each_cpu_mask_nr(i, *cpu_map) { struct sched_domain *sd = &per_cpu(cpu_domains, i); init_sched_groups_power(i, sd); } #endif #ifdef CONFIG_SCHED_MC - for_each_cpu_mask(i, *cpu_map) { + for_each_cpu_mask_nr(i, *cpu_map) { struct sched_domain *sd = &per_cpu(core_domains, i); init_sched_groups_power(i, sd); } #endif - for_each_cpu_mask(i, *cpu_map) { + for_each_cpu_mask_nr(i, *cpu_map) { struct sched_domain *sd = &per_cpu(phys_domains, i); init_sched_groups_power(i, sd); @@ -7538,7 +7538,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, #endif /* Attach the domains */ - for_each_cpu_mask(i, *cpu_map) { + for_each_cpu_mask_nr(i, *cpu_map) { struct sched_domain *sd; #ifdef CONFIG_SCHED_SMT sd = &per_cpu(cpu_domains, i); @@ -7621,7 +7621,7 @@ static void detach_destroy_domains(const cpumask_t *cpu_map) unregister_sched_domain_sysctl(); - for_each_cpu_mask(i, *cpu_map) + for_each_cpu_mask_nr(i, *cpu_map) cpu_attach_domain(NULL, &def_root_domain, i); synchronize_sched(); arch_destroy_sched_domains(cpu_map, &tmpmask); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index e24ecd3..e398318 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1022,7 +1022,7 @@ static int wake_idle(int cpu, struct task_struct *p) || ((sd->flags & SD_WAKE_IDLE_FAR) && !task_hot(p, task_rq(p)->clock, sd))) { cpus_and(tmp, sd->span, p->cpus_allowed); - for_each_cpu_mask(i, tmp) { + for_each_cpu_mask_nr(i, tmp) { if (idle_cpu(i)) { if (i != task_cpu(p)) { schedstat_inc(p, diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 060e87b..d73386c 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -231,7 +231,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) return 1; span = sched_rt_period_mask(); - for_each_cpu_mask(i, span) { + for_each_cpu_mask_nr(i, span) { int enqueue = 0; struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); struct rq *rq = rq_of_rt_rq(rt_rq); @@ -272,7 +272,7 @@ static int balance_runtime(struct rt_rq *rt_rq) spin_lock(&rt_b->rt_runtime_lock); rt_period = ktime_to_ns(rt_b->rt_period); - for_each_cpu_mask(i, rd->span) { + for_each_cpu_mask_nr(i, rd->span) { struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); s64 diff; @@ -1000,7 +1000,7 @@ static int pull_rt_task(struct rq *this_rq) next = pick_next_task_rt(this_rq); - for_each_cpu_mask(cpu, this_rq->rd->rto_mask) { + for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) { if (this_cpu == cpu) continue; diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 4a23517..06b1754 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -301,7 +301,7 @@ static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd) return -EINVAL; if (isadd == REGISTER) { - for_each_cpu_mask(cpu, mask) { + for_each_cpu_mask_nr(cpu, mask) { s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, cpu_to_node(cpu)); if (!s) @@ -320,7 +320,7 @@ static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd) /* Deregister or cleanup */ cleanup: - for_each_cpu_mask(cpu, mask) { + for_each_cpu_mask_nr(cpu, mask) { listeners = &per_cpu(listener_array, cpu); down_write(&listeners->sem); list_for_each_entry_safe(s, tmp, &listeners->list, list) { diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 29fc39f..28c2b2c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -397,7 +397,7 @@ void flush_workqueue(struct workqueue_struct *wq) might_sleep(); lock_acquire(&wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_); lock_release(&wq->lockdep_map, 1, _THIS_IP_); - for_each_cpu_mask(cpu, *cpu_map) + for_each_cpu_mask_nr(cpu, *cpu_map) flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu)); } EXPORT_SYMBOL_GPL(flush_workqueue); @@ -477,7 +477,7 @@ static void wait_on_work(struct work_struct *work) wq = cwq->wq; cpu_map = wq_cpu_map(wq); - for_each_cpu_mask(cpu, *cpu_map) + for_each_cpu_mask_nr(cpu, *cpu_map) wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work); } @@ -813,7 +813,7 @@ void destroy_workqueue(struct workqueue_struct *wq) list_del(&wq->list); spin_unlock(&workqueue_lock); - for_each_cpu_mask(cpu, *cpu_map) + for_each_cpu_mask_nr(cpu, *cpu_map) cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu)); put_online_cpus(); -- cgit v0.10.2 From 6d6a4360876f1e758e215570ccb04518db7cec3a Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: mm: use performance variant for_each_cpu_mask_nr Change references from for_each_cpu_mask to for_each_cpu_mask_nr where appropriate Reviewed-by: Paul Jackson Reviewed-by: Christoph Lameter Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c index f4026ba..b5411c2 100644 --- a/mm/allocpercpu.c +++ b/mm/allocpercpu.c @@ -35,7 +35,7 @@ EXPORT_SYMBOL_GPL(percpu_depopulate); void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask) { int cpu; - for_each_cpu_mask(cpu, *mask) + for_each_cpu_mask_nr(cpu, *mask) percpu_depopulate(__pdata, cpu); } EXPORT_SYMBOL_GPL(__percpu_depopulate_mask); @@ -86,7 +86,7 @@ int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, int cpu; cpus_clear(populated); - for_each_cpu_mask(cpu, *mask) + for_each_cpu_mask_nr(cpu, *mask) if (unlikely(!percpu_populate(__pdata, size, gfp, cpu))) { __percpu_depopulate_mask(__pdata, &populated); return -ENOMEM; diff --git a/mm/vmstat.c b/mm/vmstat.c index db9eabb..c3d4a78 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -26,7 +26,7 @@ static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask) memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long)); - for_each_cpu_mask(cpu, *cpumask) { + for_each_cpu_mask_nr(cpu, *cpumask) { struct vm_event_state *this = &per_cpu(vm_event_states, cpu); for (i = 0; i < NR_VM_EVENT_ITEMS; i++) -- cgit v0.10.2 From 0e12f848b337fc034ceb3c0d03d75f8de1b8cc96 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: net: use performance variant for_each_cpu_mask_nr Change references from for_each_cpu_mask to for_each_cpu_mask_nr where appropriate Reviewed-by: Paul Jackson Reviewed-by: Christoph Lameter Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/net/core/dev.c b/net/core/dev.c index 5829630..ee61b98 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2239,7 +2239,7 @@ out: */ if (!cpus_empty(net_dma.channel_mask)) { int chan_idx; - for_each_cpu_mask(chan_idx, net_dma.channel_mask) { + for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) { struct dma_chan *chan = net_dma.channels[chan_idx]; if (chan) dma_async_memcpy_issue_pending(chan); @@ -4300,7 +4300,7 @@ static void net_dma_rebalance(struct net_dma *net_dma) i = 0; cpu = first_cpu(cpu_online_map); - for_each_cpu_mask(chan_idx, net_dma->channel_mask) { + for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) { chan = net_dma->channels[chan_idx]; n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask)) diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 9189707..8de5110 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -497,7 +497,7 @@ static void iucv_setmask_up(void) /* Disable all cpu but the first in cpu_irq_cpumask. */ cpumask = iucv_irq_cpumask; cpu_clear(first_cpu(iucv_irq_cpumask), cpumask); - for_each_cpu_mask(cpu, cpumask) + for_each_cpu_mask_nr(cpu, cpumask) smp_call_function_single(cpu, iucv_block_cpu, NULL, 0, 1); } -- cgit v0.10.2 From 334ef7a7ab8f80b689a2be95d5e62d2167900865 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: x86: use performance variant for_each_cpu_mask_nr Change references from for_each_cpu_mask to for_each_cpu_mask_nr where appropriate Reviewed-by: Paul Jackson Reviewed-by: Christoph Lameter Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 2d474871e2fb092eb46a0930aba5442e10eb96cc Author: Mike Travis Date: Mon May 12 21:21:13 2008 +0200 diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index b0c8208..dd097b8 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -202,7 +202,7 @@ static void drv_write(struct drv_cmd *cmd) cpumask_t saved_mask = current->cpus_allowed; unsigned int i; - for_each_cpu_mask(i, cmd->mask) { + for_each_cpu_mask_nr(i, cmd->mask) { set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); do_drv_write(cmd); } @@ -451,7 +451,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, freqs.old = perf->states[perf->state].core_frequency * 1000; freqs.new = data->freq_table[next_state].frequency; - for_each_cpu_mask(i, cmd.mask) { + for_each_cpu_mask_nr(i, cmd.mask) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } @@ -466,7 +466,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, } } - for_each_cpu_mask(i, cmd.mask) { + for_each_cpu_mask_nr(i, cmd.mask) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 199e4e0..f1685fb 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -122,7 +122,7 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, return 0; /* notifiers */ - for_each_cpu_mask(i, policy->cpus) { + for_each_cpu_mask_nr(i, policy->cpus) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } @@ -130,11 +130,11 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software * Developer's Manual, Volume 3 */ - for_each_cpu_mask(i, policy->cpus) + for_each_cpu_mask_nr(i, policy->cpus) cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); /* notifiers */ - for_each_cpu_mask(i, policy->cpus) { + for_each_cpu_mask_nr(i, policy->cpus) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 46d4034..06d6eea 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -966,7 +966,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i freqs.old = find_khz_freq_from_fid(data->currfid); freqs.new = find_khz_freq_from_fid(fid); - for_each_cpu_mask(i, *(data->available_cores)) { + for_each_cpu_mask_nr(i, *(data->available_cores)) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } @@ -974,7 +974,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i res = transition_fid_vid(data, fid, vid); freqs.new = find_khz_freq_from_fid(data->currfid); - for_each_cpu_mask(i, *(data->available_cores)) { + for_each_cpu_mask_nr(i, *(data->available_cores)) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } @@ -997,7 +997,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); - for_each_cpu_mask(i, *(data->available_cores)) { + for_each_cpu_mask_nr(i, *(data->available_cores)) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } @@ -1005,7 +1005,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i res = transition_pstate(data, pstate); freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); - for_each_cpu_mask(i, *(data->available_cores)) { + for_each_cpu_mask_nr(i, *(data->available_cores)) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 908dd34..8b0dd6f 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -476,7 +476,7 @@ static int centrino_target (struct cpufreq_policy *policy, saved_mask = current->cpus_allowed; first_cpu = 1; cpus_clear(covered_cpus); - for_each_cpu_mask(j, online_policy_cpus) { + for_each_cpu_mask_nr(j, online_policy_cpus) { /* * Support for SMP systems. * Make sure we are running on CPU that wants to change freq @@ -517,7 +517,7 @@ static int centrino_target (struct cpufreq_policy *policy, dprintk("target=%dkHz old=%d new=%d msr=%04x\n", target_freq, freqs.old, freqs.new, msr); - for_each_cpu_mask(k, online_policy_cpus) { + for_each_cpu_mask_nr(k, online_policy_cpus) { freqs.cpu = k; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); @@ -540,7 +540,7 @@ static int centrino_target (struct cpufreq_policy *policy, preempt_enable(); } - for_each_cpu_mask(k, online_policy_cpus) { + for_each_cpu_mask_nr(k, online_policy_cpus) { freqs.cpu = k; cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } @@ -554,7 +554,7 @@ static int centrino_target (struct cpufreq_policy *policy, */ if (!cpus_empty(covered_cpus)) { - for_each_cpu_mask(j, covered_cpus) { + for_each_cpu_mask_nr(j, covered_cpus) { set_cpus_allowed_ptr(current, &cpumask_of_cpu(j)); wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); @@ -564,7 +564,7 @@ static int centrino_target (struct cpufreq_policy *policy, tmp = freqs.new; freqs.new = freqs.old; freqs.old = tmp; - for_each_cpu_mask(j, online_policy_cpus) { + for_each_cpu_mask_nr(j, online_policy_cpus) { freqs.cpu = j; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 1b50244..191f726 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -279,7 +279,7 @@ static int speedstep_target (struct cpufreq_policy *policy, cpus_allowed = current->cpus_allowed; - for_each_cpu_mask(i, policy->cpus) { + for_each_cpu_mask_nr(i, policy->cpus) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } @@ -292,7 +292,7 @@ static int speedstep_target (struct cpufreq_policy *policy, /* allow to be run on all CPUs */ set_cpus_allowed_ptr(current, &cpus_allowed); - for_each_cpu_mask(i, policy->cpus) { + for_each_cpu_mask_nr(i, policy->cpus) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 26d615d..bfade33 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -488,7 +488,7 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) int sibling; this_leaf = CPUID4_INFO_IDX(cpu, index); - for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { + for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { sibling_leaf = CPUID4_INFO_IDX(sibling, index); cpu_clear(cpu, sibling_leaf->shared_cpu_map); } diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 7c9a813..88736ca 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -527,7 +527,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (err) goto out_free; - for_each_cpu_mask(i, b->cpus) { + for_each_cpu_mask_nr(i, b->cpus) { if (i == cpu) continue; @@ -617,7 +617,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) #endif /* remove all sibling symlinks before unregistering */ - for_each_cpu_mask(i, b->cpus) { + for_each_cpu_mask_nr(i, b->cpus) { if (i == cpu) continue; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index ef1a8df..e2838cb 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -718,7 +718,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask) return 0; } - for_each_cpu_mask(cpu, mask) { + for_each_cpu_mask_nr(cpu, mask) { cpumask_t domain, new_mask; int new_cpu; int vector, offset; @@ -739,7 +739,7 @@ next: continue; if (vector == IA32_SYSCALL_VECTOR) goto next; - for_each_cpu_mask(new_cpu, new_mask) + for_each_cpu_mask_nr(new_cpu, new_mask) if (per_cpu(vector_irq, new_cpu)[vector] != -1) goto next; /* Found one! */ @@ -749,7 +749,7 @@ next: cfg->move_in_progress = 1; cfg->old_domain = cfg->domain; } - for_each_cpu_mask(new_cpu, new_mask) + for_each_cpu_mask_nr(new_cpu, new_mask) per_cpu(vector_irq, new_cpu)[vector] = irq; cfg->vector = vector; cfg->domain = domain; @@ -781,7 +781,7 @@ static void __clear_irq_vector(int irq) vector = cfg->vector; cpus_and(mask, cfg->domain, cpu_online_map); - for_each_cpu_mask(cpu, mask) + for_each_cpu_mask_nr(cpu, mask) per_cpu(vector_irq, cpu)[vector] = -1; cfg->vector = 0; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3898849..fff8eba 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -487,7 +487,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) cpu_set(cpu, cpu_sibling_setup_map); if (smp_num_siblings > 1) { - for_each_cpu_mask(i, cpu_sibling_setup_map) { + for_each_cpu_mask_nr(i, cpu_sibling_setup_map) { if (c->phys_proc_id == cpu_data(i).phys_proc_id && c->cpu_core_id == cpu_data(i).cpu_core_id) { cpu_set(i, per_cpu(cpu_sibling_map, cpu)); @@ -510,7 +510,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) return; } - for_each_cpu_mask(i, cpu_sibling_setup_map) { + for_each_cpu_mask_nr(i, cpu_sibling_setup_map) { if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { cpu_set(i, c->llc_shared_map); @@ -1298,7 +1298,7 @@ static void remove_siblinginfo(int cpu) int sibling; struct cpuinfo_x86 *c = &cpu_data(cpu); - for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { + for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) { cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); /*/ * last thread sibling in this cpu core going down @@ -1307,7 +1307,7 @@ static void remove_siblinginfo(int cpu) cpu_data(sibling).booted_cores--; } - for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) + for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu)) cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); cpus_clear(per_cpu(cpu_sibling_map, cpu)); cpus_clear(per_cpu(cpu_core_map, cpu)); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 94e6900..7a70638 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -345,7 +345,7 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) cpus_and(mask, mask, cpu_online_map); - for_each_cpu_mask(cpu, mask) + for_each_cpu_mask_nr(cpu, mask) xen_send_IPI_one(cpu, vector); } @@ -413,7 +413,7 @@ int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), /* Make sure other vcpus get a chance to run if they need to. */ yield = false; - for_each_cpu_mask(cpu, mask) + for_each_cpu_mask_nr(cpu, mask) if (xen_vcpu_stolen(cpu)) yield = true; diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h index ecc80f3..5f7310a 100644 --- a/include/asm-x86/ipi.h +++ b/include/asm-x86/ipi.h @@ -121,7 +121,7 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) * - mbligh */ local_irq_save(flags); - for_each_cpu_mask(query_cpu, mask) { + for_each_cpu_mask_nr(query_cpu, mask) { __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } -- cgit v0.10.2 From 5d7bfd0c4d463d288422032c9903d0452dee141d Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: infiniband: use performance variant for_each_cpu_mask_nr Change references from for_each_cpu_mask to for_each_cpu_mask_nr where appropriate Reviewed-by: Paul Jackson Reviewed-by: Christoph Lameter Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index ce1ab05..43180b9 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -641,8 +641,8 @@ static inline int find_next_online_cpu(struct ehca_comp_pool *pool) ehca_dmp(&cpu_online_map, sizeof(cpumask_t), ""); spin_lock_irqsave(&pool->last_cpu_lock, flags); - cpu = next_cpu(pool->last_cpu, cpu_online_map); - if (cpu == NR_CPUS) + cpu = next_cpu_nr(pool->last_cpu, cpu_online_map); + if (cpu >= nr_cpu_ids) cpu = first_cpu(cpu_online_map); pool->last_cpu = cpu; spin_unlock_irqrestore(&pool->last_cpu_lock, flags); -- cgit v0.10.2 From cad0e458d17c643c20c1d38f45a1d26125e6a622 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: clocksource/events: use performance variant for_each_cpu_mask_nr Change references from for_each_cpu_mask to for_each_cpu_mask_nr where appropriate Reviewed-by: Paul Jackson Reviewed-by: Christoph Lameter Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index dadde53..60ceabd 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -145,9 +145,9 @@ static void clocksource_watchdog(unsigned long data) * Cycle through CPUs to check if the CPUs stay * synchronized to each other. */ - int next_cpu = next_cpu(raw_smp_processor_id(), cpu_online_map); + int next_cpu = next_cpu_nr(raw_smp_processor_id(), cpu_online_map); - if (next_cpu >= NR_CPUS) + if (next_cpu >= nr_cpu_ids) next_cpu = first_cpu(cpu_online_map); watchdog_timer.expires += WATCHDOG_INTERVAL; add_timer_on(&watchdog_timer, next_cpu); diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 57a1f02..2d0a963 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -397,8 +397,7 @@ again: mask = CPU_MASK_NONE; now = ktime_get(); /* Find all expired events */ - for (cpu = first_cpu(tick_broadcast_oneshot_mask); cpu != NR_CPUS; - cpu = next_cpu(cpu, tick_broadcast_oneshot_mask)) { + for_each_cpu_mask_nr(cpu, tick_broadcast_oneshot_mask) { td = &per_cpu(tick_cpu_device, cpu); if (td->evtdev->next_event.tv64 <= now.tv64) cpu_set(cpu, mask); -- cgit v0.10.2 From 3f9b48a7584851997702cdc3f58e7811b5546397 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: net: Pass reference to cpumask variable in net/sunrpc/svc.c * Pass reference to cpumask variable instead of using stack. For inclusion into sched-devel/latest tree. Based on: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git + sched-devel/latest .../mingo/linux-2.6-sched-devel.git Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 01c7e31..d43cf8d 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -603,7 +603,7 @@ __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, error = kernel_thread((int (*)(void *)) func, rqstp, 0); if (have_oldmask) - set_cpus_allowed(current, oldmask); + set_cpus_allowed_ptr(current, &oldmask); if (error < 0) goto out_thread; -- cgit v0.10.2 From 7baac8b91f9871ba8cb09af84de4ae1d86d07812 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 13 May 2008 11:28:21 +0200 Subject: cpumask: make for_each_cpu_mask a bit smaller The for_each_cpu_mask loop is used quite often in the kernel. It makes use of two functions: first_cpu and next_cpu. This patch changes for_each_cpu_mask to use only the latter. Because next_cpu finds the next eligible cpu _after_ the given one, the iteration variable has to be initialized to -1 and next_cpu has to be called with this value before the first iteration. An x86_64 defconfig kernel (from sched/latest) is about 2500 bytes smaller with this patch applied: text data bss dec hex filename 6222517 917952 749932 7890401 7865e1 vmlinux.orig 6219922 917952 749932 7887806 785bbe vmlinux The same size reduction is seen for defconfig+MAXSMP text data bss dec hex filename 6241772 2563968 1492716 10298456 9d2458 vmlinux.orig 6239211 2563968 1492716 10295895 9d1a57 vmlinux Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index b49472d..80226e7 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -370,10 +370,10 @@ int __any_online_cpu(const cpumask_t *mask); #define first_cpu(src) __first_cpu(&(src)) #define next_cpu(n, src) __next_cpu((n), &(src)) #define any_online_cpu(mask) __any_online_cpu(&(mask)) -#define for_each_cpu_mask(cpu, mask) \ - for ((cpu) = first_cpu(mask); \ - (cpu) < NR_CPUS; \ - (cpu) = next_cpu((cpu), (mask))) +#define for_each_cpu_mask(cpu, mask) \ + for ((cpu) = -1; \ + (cpu) = next_cpu((cpu), (mask)), \ + (cpu) < NR_CPUS; ) #endif #if NR_CPUS <= 64 @@ -387,10 +387,10 @@ int __any_online_cpu(const cpumask_t *mask); int __next_cpu_nr(int n, const cpumask_t *srcp); #define next_cpu_nr(n, src) __next_cpu_nr((n), &(src)) #define cpus_weight_nr(cpumask) __cpus_weight(&(cpumask), nr_cpu_ids) -#define for_each_cpu_mask_nr(cpu, mask) \ - for ((cpu) = first_cpu(mask); \ - (cpu) < nr_cpu_ids; \ - (cpu) = next_cpu_nr((cpu), (mask))) +#define for_each_cpu_mask_nr(cpu, mask) \ + for ((cpu) = -1; \ + (cpu) = next_cpu_nr((cpu), (mask)), \ + (cpu) < nr_cpu_ids; ) #endif /* NR_CPUS > 64 */ -- cgit v0.10.2 From 9982fbface82893e77d211fbabfbd229da6bdde6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 6 Jul 2008 14:24:08 +0200 Subject: Revert "cpumask: introduce new APIs" This reverts commit acb7669c125676e63cf96582455509216c39745e. the wrappers are not needed anymore. Signed-off-by: Ingo Molnar diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 47418b1..80226e7 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -394,10 +394,6 @@ int __next_cpu_nr(int n, const cpumask_t *srcp); #endif /* NR_CPUS > 64 */ -#define next_cpu_nr(n, src) next_cpu(n, src) -#define cpus_weight_nr(cpumask) cpus_weight(cpumask) -#define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask) - /* * The following particular system cpumasks and operations manage * possible, present and online cpus. Each of them is a fixed size -- cgit v0.10.2 From 65c011845316d3c1381f478ca0d8265c43b3b039 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 15 Jul 2008 14:14:30 -0700 Subject: cpumask: Replace cpumask_of_cpu with cpumask_of_cpu_ptr * This patch replaces the dangerous lvalue version of cpumask_of_cpu with new cpumask_of_cpu_ptr macros. These are patterned after the node_to_cpumask_ptr macros. In general terms, if there is a cpumask_of_cpu_map[] then a pointer to the cpumask_of_cpu_map[cpu] entry is used. The cpumask_of_cpu_map is provided when there is a large NR_CPUS count, reducing greatly the amount of code generated and stack space used for cpumask_of_cpu(). The pointer to the cpumask_t value is needed for calling set_cpus_allowed_ptr() to reduce the amount of stack space needed to pass the cpumask_t value. If there isn't a cpumask_of_cpu_map[], then a temporary variable is declared and filled in with value from cpumask_of_cpu(cpu) as well as a pointer variable pointing to this temporary variable. Afterwards, the pointer is used to reference the cpumask value. The compiler will optimize out the extra dereference through the pointer as well as the stack space used for the pointer, resulting in identical code. A good example of the orthogonal usages is in net/sunrpc/svc.c: case SVC_POOL_PERCPU: { unsigned int cpu = m->pool_to[pidx]; cpumask_of_cpu_ptr(cpumask, cpu); *oldmask = current->cpus_allowed; set_cpus_allowed_ptr(current, cpumask); return 1; } case SVC_POOL_PERNODE: { unsigned int node = m->pool_to[pidx]; node_to_cpumask_ptr(nodecpumask, node); *oldmask = current->cpus_allowed; set_cpus_allowed_ptr(current, nodecpumask); return 1; } Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index c2502eb..9220cf4 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -73,6 +73,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, struct cpuinfo_x86 *c = &cpu_data(cpu); cpumask_t saved_mask; + cpumask_of_cpu_ptr(new_mask, cpu); int retval; unsigned int eax, ebx, ecx, edx; unsigned int edx_part; @@ -91,7 +92,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, /* Make sure we are running on right CPU */ saved_mask = current->cpus_allowed; - retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + retval = set_cpus_allowed_ptr(current, new_mask); if (retval) return -1; diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index dd097b8..ff2fff5 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -200,10 +200,12 @@ static void drv_read(struct drv_cmd *cmd) static void drv_write(struct drv_cmd *cmd) { cpumask_t saved_mask = current->cpus_allowed; + cpumask_of_cpu_ptr_declare(cpu_mask); unsigned int i; for_each_cpu_mask_nr(i, cmd->mask) { - set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); + cpumask_of_cpu_ptr_next(cpu_mask, i); + set_cpus_allowed_ptr(current, cpu_mask); do_drv_write(cmd); } @@ -267,11 +269,12 @@ static unsigned int get_measured_perf(unsigned int cpu) } aperf_cur, mperf_cur; cpumask_t saved_mask; + cpumask_of_cpu_ptr(cpu_mask, cpu); unsigned int perf_percent; unsigned int retval; saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, cpu_mask); if (get_cpu() != cpu) { /* We were not able to run on requested processor */ put_cpu(); @@ -337,6 +340,7 @@ static unsigned int get_measured_perf(unsigned int cpu) static unsigned int get_cur_freq_on_cpu(unsigned int cpu) { + cpumask_of_cpu_ptr(cpu_mask, cpu); struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); unsigned int freq; unsigned int cached_freq; @@ -349,7 +353,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) } cached_freq = data->freq_table[data->acpi_data->state].frequency; - freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data); + freq = extract_freq(get_cur_val(cpu_mask), data); if (freq != cached_freq) { /* * The dreaded BIOS frequency change behind our back. diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index c45ca6d..53c7b69 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -479,11 +479,12 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi static int check_supported_cpu(unsigned int cpu) { cpumask_t oldmask; + cpumask_of_cpu_ptr(cpu_mask, cpu); u32 eax, ebx, ecx, edx; unsigned int rc = 0; oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, cpu_mask); if (smp_processor_id() != cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); @@ -1016,6 +1017,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) { cpumask_t oldmask; + cpumask_of_cpu_ptr(cpu_mask, pol->cpu); struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); u32 checkfid; u32 checkvid; @@ -1030,7 +1032,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); + set_cpus_allowed_ptr(current, cpu_mask); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1105,6 +1107,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; cpumask_t oldmask; + cpumask_of_cpu_ptr_declare(newmask); int rc; if (!cpu_online(pol->cpu)) @@ -1156,7 +1159,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); + cpumask_of_cpu_ptr_next(newmask, pol->cpu); + set_cpus_allowed_ptr(current, newmask); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1178,7 +1182,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) set_cpus_allowed_ptr(current, &oldmask); if (cpu_family == CPU_HW_PSTATE) - pol->cpus = cpumask_of_cpu(pol->cpu); + pol->cpus = *newmask; else pol->cpus = per_cpu(cpu_core_map, pol->cpu); data->available_cores = &(pol->cpus); @@ -1244,6 +1248,7 @@ static unsigned int powernowk8_get (unsigned int cpu) { struct powernow_k8_data *data; cpumask_t oldmask = current->cpus_allowed; + cpumask_of_cpu_ptr(newmask, cpu); unsigned int khz = 0; unsigned int first; @@ -1253,7 +1258,7 @@ static unsigned int powernowk8_get (unsigned int cpu) if (!data) return -EINVAL; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, newmask); if (smp_processor_id() != cpu) { printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 8b0dd6f..fd561bb 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -313,9 +313,10 @@ static unsigned int get_cur_freq(unsigned int cpu) unsigned l, h; unsigned clock_freq; cpumask_t saved_mask; + cpumask_of_cpu_ptr(new_mask, cpu); saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, new_mask); if (smp_processor_id() != cpu) return 0; @@ -554,9 +555,11 @@ static int centrino_target (struct cpufreq_policy *policy, */ if (!cpus_empty(covered_cpus)) { + cpumask_of_cpu_ptr_declare(new_mask); + for_each_cpu_mask_nr(j, covered_cpus) { - set_cpus_allowed_ptr(current, - &cpumask_of_cpu(j)); + cpumask_of_cpu_ptr_next(new_mask, j); + set_cpus_allowed_ptr(current, new_mask); wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); } } diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 191f726..2f3728d 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -244,7 +244,8 @@ static unsigned int _speedstep_get(const cpumask_t *cpus) static unsigned int speedstep_get(unsigned int cpu) { - return _speedstep_get(&cpumask_of_cpu(cpu)); + cpumask_of_cpu_ptr(newmask, cpu); + return _speedstep_get(newmask); } /** diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index a7b0f8f..e4b8d18 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -516,6 +516,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) unsigned long j; int retval; cpumask_t oldmask; + cpumask_of_cpu_ptr(newmask, cpu); if (num_cache_leaves == 0) return -ENOENT; @@ -526,7 +527,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) return -ENOMEM; oldmask = current->cpus_allowed; - retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + retval = set_cpus_allowed_ptr(current, newmask); if (retval) goto out; diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 56b9331..5852016 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -388,6 +388,7 @@ static int do_microcode_update (void) void *new_mc = NULL; int cpu; cpumask_t old; + cpumask_of_cpu_ptr_declare(newmask); old = current->cpus_allowed; @@ -404,7 +405,8 @@ static int do_microcode_update (void) if (!uci->valid) continue; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + cpumask_of_cpu_ptr_next(newmask, cpu); + set_cpus_allowed_ptr(current, newmask); error = get_maching_microcode(new_mc, cpu); if (error < 0) goto out; @@ -574,6 +576,7 @@ static int apply_microcode_check_cpu(int cpu) struct cpuinfo_x86 *c = &cpu_data(cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); unsigned int val[2]; int err = 0; @@ -582,7 +585,7 @@ static int apply_microcode_check_cpu(int cpu) return 0; old = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, newmask); /* Check if the microcode we have in memory matches the CPU */ if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || @@ -620,11 +623,12 @@ static int apply_microcode_check_cpu(int cpu) static void microcode_init_cpu(int cpu, int resume) { cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; old = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, newmask); mutex_lock(µcode_mutex); collect_cpu_info(cpu); if (uci->valid && system_state == SYSTEM_RUNNING && !resume) @@ -656,11 +660,12 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz) return -EINVAL; if (val == 1) { cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); old = current->cpus_allowed; get_online_cpus(); - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, newmask); mutex_lock(µcode_mutex); if (uci->valid) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f8a6216..214bbdf 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -403,24 +403,28 @@ void native_machine_shutdown(void) { /* Stop the cpus and apics */ #ifdef CONFIG_SMP - int reboot_cpu_id; /* The boot cpu is always logical cpu 0 */ - reboot_cpu_id = 0; + int reboot_cpu_id = 0; + cpumask_of_cpu_ptr(newmask, reboot_cpu_id); #ifdef CONFIG_X86_32 /* See if there has been given a command line override */ if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && - cpu_online(reboot_cpu)) + cpu_online(reboot_cpu)) { reboot_cpu_id = reboot_cpu; + cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id); + } #endif /* Make certain the cpu I'm about to reboot on is online */ - if (!cpu_online(reboot_cpu_id)) + if (!cpu_online(reboot_cpu_id)) { reboot_cpu_id = smp_processor_id(); + cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id); + } /* Make certain I only run on the appropriate processor */ - set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id)); + set_cpus_allowed_ptr(current, newmask); /* O.K Now that I'm on the appropriate processor, * stop all of the others. diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index a56fc6c..a2c3f9c 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -827,6 +827,7 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr) static int acpi_processor_get_throttling(struct acpi_processor *pr) { cpumask_t saved_mask; + cpumask_of_cpu_ptr_declare(new_mask); int ret; if (!pr) @@ -838,7 +839,8 @@ static int acpi_processor_get_throttling(struct acpi_processor *pr) * Migrate task to the cpu pointed by pr. */ saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id)); + cpumask_of_cpu_ptr_next(new_mask, pr->id); + set_cpus_allowed_ptr(current, new_mask); ret = pr->throttling.acpi_processor_get_throttling(pr); /* restore the previous state */ set_cpus_allowed_ptr(current, &saved_mask); @@ -987,6 +989,7 @@ static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr, int acpi_processor_set_throttling(struct acpi_processor *pr, int state) { cpumask_t saved_mask; + cpumask_of_cpu_ptr_declare(new_mask); int ret = 0; unsigned int i; struct acpi_processor *match_pr; @@ -1025,7 +1028,8 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) * it can be called only for the cpu pointed by pr. */ if (p_throttling->shared_type == DOMAIN_COORD_TYPE_SW_ANY) { - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id)); + cpumask_of_cpu_ptr_next(new_mask, pr->id); + set_cpus_allowed_ptr(current, new_mask); ret = p_throttling->acpi_processor_set_throttling(pr, t_state.target_state); } else { @@ -1056,7 +1060,8 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) continue; } t_state.cpu = i; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); + cpumask_of_cpu_ptr_next(new_mask, i); + set_cpus_allowed_ptr(current, new_mask); ret = match_pr->throttling. acpi_processor_set_throttling( match_pr, t_state.target_state); diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c index 25918f7..0b624e9 100644 --- a/drivers/firmware/dcdbas.c +++ b/drivers/firmware/dcdbas.c @@ -254,6 +254,7 @@ static ssize_t host_control_on_shutdown_store(struct device *dev, static int smi_request(struct smi_cmd *smi_cmd) { cpumask_t old_mask; + cpumask_of_cpu_ptr(new_mask, 0); int ret = 0; if (smi_cmd->magic != SMI_CMD_MAGIC) { @@ -264,7 +265,7 @@ static int smi_request(struct smi_cmd *smi_cmd) /* SMI requires CPU 0 */ old_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(0)); + set_cpus_allowed_ptr(current, new_mask); if (smp_processor_id() != 0) { dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n", __func__); diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 80226e7..2dbd9a2 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -62,6 +62,15 @@ * int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids * * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set + *ifdef CONFIG_HAS_CPUMASK_OF_CPU + * cpumask_of_cpu_ptr_declare(v) Declares cpumask_t *v + * cpumask_of_cpu_ptr_next(v, cpu) Sets v = &cpumask_of_cpu_map[cpu] + * cpumask_of_cpu_ptr(v, cpu) Combines above two operations + *else + * cpumask_of_cpu_ptr_declare(v) Declares cpumask_t _v and *v = &_v + * cpumask_of_cpu_ptr_next(v, cpu) Sets _v = cpumask_of_cpu(cpu) + * cpumask_of_cpu_ptr(v, cpu) Combines above two operations + *endif * CPU_MASK_ALL Initializer - all bits set * CPU_MASK_NONE Initializer - no bits set * unsigned long *cpus_addr(mask) Array of unsigned long's in mask @@ -236,11 +245,16 @@ static inline void __cpus_shift_left(cpumask_t *dstp, #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP extern cpumask_t *cpumask_of_cpu_map; -#define cpumask_of_cpu(cpu) (cpumask_of_cpu_map[cpu]) - +#define cpumask_of_cpu(cpu) (cpumask_of_cpu_map[cpu]) +#define cpumask_of_cpu_ptr(v, cpu) \ + const cpumask_t *v = &cpumask_of_cpu(cpu) +#define cpumask_of_cpu_ptr_declare(v) \ + const cpumask_t *v +#define cpumask_of_cpu_ptr_next(v, cpu) \ + v = &cpumask_of_cpu(cpu) #else #define cpumask_of_cpu(cpu) \ -(*({ \ +({ \ typeof(_unused_cpumask_arg_) m; \ if (sizeof(m) == sizeof(unsigned long)) { \ m.bits[0] = 1UL<<(cpu); \ @@ -248,8 +262,16 @@ extern cpumask_t *cpumask_of_cpu_map; cpus_clear(m); \ cpu_set((cpu), m); \ } \ - &m; \ -})) + m; \ +}) +#define cpumask_of_cpu_ptr(v, cpu) \ + cpumask_t _##v = cpumask_of_cpu(cpu); \ + const cpumask_t *v = &_##v +#define cpumask_of_cpu_ptr_declare(v) \ + cpumask_t _##v; \ + const cpumask_t *v = &_##v +#define cpumask_of_cpu_ptr_next(v, cpu) \ + _##v = cpumask_of_cpu(cpu) #endif #define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index ba9b205..738b411 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -33,8 +33,9 @@ static int stopmachine(void *cpu) { int irqs_disabled = 0; int prepared = 0; + cpumask_of_cpu_ptr(cpumask, (int)(long)cpu); - set_cpus_allowed_ptr(current, &cpumask_of_cpu((int)(long)cpu)); + set_cpus_allowed_ptr(current, cpumask); /* Ack: we are alive */ smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */ diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index 2301e1e..6352808 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -213,7 +213,9 @@ static void start_stack_timers(void) int cpu; for_each_online_cpu(cpu) { - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + cpumask_of_cpu_ptr(new_mask, cpu); + + set_cpus_allowed_ptr(current, new_mask); start_stack_timer(cpu); } set_cpus_allowed_ptr(current, &saved_mask); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index d43cf8d..083d126 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -314,9 +314,10 @@ svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask) case SVC_POOL_PERCPU: { unsigned int cpu = m->pool_to[pidx]; + cpumask_of_cpu_ptr(cpumask, cpu); *oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, cpumask); return 1; } case SVC_POOL_PERNODE: -- cgit v0.10.2 From cb6d2be60dc3ec9ac788f45d8e24b82a9faacdd9 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 15 Jul 2008 14:14:31 -0700 Subject: cpumask: Optimize cpumask_of_cpu in arch/x86/kernel/io_apic_64.c * Optimize various places where a pointer to the cpumask_of_cpu value will result in reducing stack pressure. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index bf27114..a85db79 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1372,12 +1372,10 @@ static unsigned int startup_ioapic_irq(unsigned int irq) static int ioapic_retrigger_irq(unsigned int irq) { struct irq_cfg *cfg = &irq_cfg[irq]; - cpumask_t mask; unsigned long flags; spin_lock_irqsave(&vector_lock, flags); - mask = cpumask_of_cpu(first_cpu(cfg->domain)); - send_IPI_mask(mask, cfg->vector); + send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); spin_unlock_irqrestore(&vector_lock, flags); return 1; -- cgit v0.10.2 From c42f4f4c6dab3b2b7768c36173ee7c7ecf79eddb Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 15 Jul 2008 14:14:32 -0700 Subject: cpumask: Optimize cpumask_of_cpu in arch/x86/kernel/ldt.c * Optimize various places where a pointer to the cpumask_of_cpu value will result in reducing stack pressure. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index a844957..3fee2aa 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -62,12 +62,12 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) if (reload) { #ifdef CONFIG_SMP - cpumask_t mask; + cpumask_of_cpu_ptr_declare(mask); preempt_disable(); load_LDT(pc); - mask = cpumask_of_cpu(smp_processor_id()); - if (!cpus_equal(current->mm->cpu_vm_mask, mask)) + cpumask_of_cpu_ptr_next(mask, smp_processor_id()); + if (!cpus_equal(current->mm->cpu_vm_mask, *mask)) smp_call_function(flush_ldt, current->mm, 1); preempt_enable(); #else -- cgit v0.10.2 From 333cdd1f0e5e0aad6b7f8992291563bc7b14670b Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 15 Jul 2008 14:14:33 -0700 Subject: cpumask: Optimize cpumask_of_cpu in drivers/misc/sgi-xp/xpc_main.c * Optimize various places where a pointer to the cpumask_of_cpu value will result in reducing stack pressure. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c index 08256ed..579b01f 100644 --- a/drivers/misc/sgi-xp/xpc_main.c +++ b/drivers/misc/sgi-xp/xpc_main.c @@ -229,10 +229,11 @@ xpc_hb_checker(void *ignore) int last_IRQ_count = 0; int new_IRQ_count; int force_IRQ = 0; + cpumask_of_cpu_ptr(cpumask, XPC_HB_CHECK_CPU); /* this thread was marked active by xpc_hb_init() */ - set_cpus_allowed(current, cpumask_of_cpu(XPC_HB_CHECK_CPU)); + set_cpus_allowed_ptr(current, cpumask); /* set our heartbeating to other partitions into motion */ xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ); -- cgit v0.10.2 From c18a41fbbc500ac0307ffd2b0ae73c2af9d0b0ab Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 15 Jul 2008 14:14:34 -0700 Subject: cpumask: Optimize cpumask_of_cpu in kernel/time/tick-common.c * Optimize various places where a pointer to the cpumask_of_cpu value will result in reducing stack pressure. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 4f38865..bf43284 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -135,7 +135,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) */ static void tick_setup_device(struct tick_device *td, struct clock_event_device *newdev, int cpu, - cpumask_t cpumask) + const cpumask_t *cpumask) { ktime_t next_event; void (*handler)(struct clock_event_device *) = NULL; @@ -169,8 +169,8 @@ static void tick_setup_device(struct tick_device *td, * When the device is not per cpu, pin the interrupt to the * current cpu: */ - if (!cpus_equal(newdev->cpumask, cpumask)) - irq_set_affinity(newdev->irq, cpumask); + if (!cpus_equal(newdev->cpumask, *cpumask)) + irq_set_affinity(newdev->irq, *cpumask); /* * When global broadcasting is active, check if the current @@ -196,20 +196,20 @@ static int tick_check_new_device(struct clock_event_device *newdev) struct tick_device *td; int cpu, ret = NOTIFY_OK; unsigned long flags; - cpumask_t cpumask; + cpumask_of_cpu_ptr_declare(cpumask); spin_lock_irqsave(&tick_device_lock, flags); cpu = smp_processor_id(); + cpumask_of_cpu_ptr_next(cpumask, cpu); if (!cpu_isset(cpu, newdev->cpumask)) goto out_bc; td = &per_cpu(tick_cpu_device, cpu); curdev = td->evtdev; - cpumask = cpumask_of_cpu(cpu); /* cpu local device ? */ - if (!cpus_equal(newdev->cpumask, cpumask)) { + if (!cpus_equal(newdev->cpumask, *cpumask)) { /* * If the cpu affinity of the device interrupt can not @@ -222,7 +222,7 @@ static int tick_check_new_device(struct clock_event_device *newdev) * If we have a cpu local device already, do not replace it * by a non cpu local device */ - if (curdev && cpus_equal(curdev->cpumask, cpumask)) + if (curdev && cpus_equal(curdev->cpumask, *cpumask)) goto out_bc; } -- cgit v0.10.2 From 4755b9291204982ac908e069674d6e5eb45815b3 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 15 Jul 2008 14:14:35 -0700 Subject: cpumask: Optimize cpumask_of_cpu in lib/smp_processor_id.c * Optimize various places where a pointer to the cpumask_of_cpu value will result in reducing stack pressure. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 3b4dc09..9fb2df0 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -11,7 +11,7 @@ notrace unsigned int debug_smp_processor_id(void) { unsigned long preempt_count = preempt_count(); int this_cpu = raw_smp_processor_id(); - cpumask_t this_mask; + cpumask_of_cpu_ptr_declare(this_mask); if (likely(preempt_count)) goto out; @@ -23,9 +23,9 @@ notrace unsigned int debug_smp_processor_id(void) * Kernel threads bound to a single CPU can safely use * smp_processor_id(): */ - this_mask = cpumask_of_cpu(this_cpu); + cpumask_of_cpu_ptr_next(this_mask, cpu); - if (cpus_equal(current->cpus_allowed, this_mask)) + if (cpus_equal(current->cpus_allowed, *this_mask)) goto out; /* -- cgit v0.10.2 From 77586c2bdad0798cb24e35de5a878e7c6b200574 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 15 Jul 2008 14:14:36 -0700 Subject: cpumask: Provide a generic set of CPUMASK_ALLOC macros * Provide a generic set of CPUMASK_ALLOC macros patterned after the SCHED_CPUMASK_ALLOC macros. This is used where multiple cpumask_t variables are declared on the stack to reduce the amount of stack space required. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 2dbd9a2..72f9c32 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -75,6 +75,17 @@ * CPU_MASK_NONE Initializer - no bits set * unsigned long *cpus_addr(mask) Array of unsigned long's in mask * + *if NR_CPUS > BITS_PER_LONG + * CPUMASK_ALLOC(m) Declares and allocates struct m *m = + * (struct m *)kmalloc(sizeof(*m), ...) + * CPUMASK_FREE(m) Macro for kfree(v) + *else + * CPUMASK_ALLOC(m) Declares struct m _m, *m = &_m + * CPUMASK_FREE(m) Nop + *endif + * CPUMASK_VAR(v, m) Declares cpumask_t *v = + * m + offset(struct m, v) + * * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing * int cpumask_parse_user(ubuf, ulen, mask) Parse ascii string as cpumask * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing @@ -311,6 +322,16 @@ extern cpumask_t cpu_mask_all; #define cpus_addr(src) ((src).bits) +#if NR_CPUS > BITS_PER_LONG +#define CPUMASK_ALLOC(m) struct m *m = kmalloc(sizeof(*m), GFP_KERNEL) +#define CPUMASK_FREE(m) kfree(m) +#else +#define CPUMASK_ALLOC(m) struct allmasks _m, *m = &_m +#define CPUMASK_FREE(m) +#endif +#define CPUMASK_VAR(v, m) cpumask_t *v = (cpumask_t *) \ + ((unsigned long)(m) + offsetof(struct m, v)) + #define cpumask_scnprintf(buf, len, src) \ __cpumask_scnprintf((buf), (len), &(src), NR_CPUS) static inline int __cpumask_scnprintf(char *buf, int len, -- cgit v0.10.2 From eb53fac5cafc4b2f8443ff064938b4494a28c54e Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 15 Jul 2008 14:14:37 -0700 Subject: cpumask: Use optimized CPUMASK_ALLOC macros in the centrino_target * Use the CPUMASK_ALLOC macros in the centrino_target() function. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index fd561bb..470c016 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -442,6 +442,13 @@ static int centrino_verify (struct cpufreq_policy *policy) * * Sets a new CPUFreq policy. */ +struct allmasks { + cpumask_t online_policy_cpus; + cpumask_t saved_mask; + cpumask_t set_mask; + cpumask_t covered_cpus; +}; + static int centrino_target (struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) @@ -449,48 +456,55 @@ static int centrino_target (struct cpufreq_policy *policy, unsigned int newstate = 0; unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; struct cpufreq_freqs freqs; - cpumask_t online_policy_cpus; - cpumask_t saved_mask; - cpumask_t set_mask; - cpumask_t covered_cpus; int retval = 0; unsigned int j, k, first_cpu, tmp; - - if (unlikely(centrino_model[cpu] == NULL)) - return -ENODEV; + CPUMASK_ALLOC(allmasks); + CPUMASK_VAR(online_policy_cpus, allmasks); + CPUMASK_VAR(saved_mask, allmasks); + CPUMASK_VAR(set_mask, allmasks); + CPUMASK_VAR(covered_cpus, allmasks); + + if (unlikely(allmasks == NULL)) + return -ENOMEM; + + if (unlikely(centrino_model[cpu] == NULL)) { + retval = -ENODEV; + goto out; + } if (unlikely(cpufreq_frequency_table_target(policy, centrino_model[cpu]->op_points, target_freq, relation, &newstate))) { - return -EINVAL; + retval = -EINVAL; + goto out; } #ifdef CONFIG_HOTPLUG_CPU /* cpufreq holds the hotplug lock, so we are safe from here on */ - cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); + cpus_and(*online_policy_cpus, cpu_online_map, policy->cpus); #else - online_policy_cpus = policy->cpus; + *online_policy_cpus = policy->cpus; #endif - saved_mask = current->cpus_allowed; + *saved_mask = current->cpus_allowed; first_cpu = 1; - cpus_clear(covered_cpus); - for_each_cpu_mask_nr(j, online_policy_cpus) { + cpus_clear(*covered_cpus); + for_each_cpu_mask_nr(j, *online_policy_cpus) { /* * Support for SMP systems. * Make sure we are running on CPU that wants to change freq */ - cpus_clear(set_mask); + cpus_clear(*set_mask); if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) - cpus_or(set_mask, set_mask, online_policy_cpus); + cpus_or(*set_mask, *set_mask, *online_policy_cpus); else - cpu_set(j, set_mask); + cpu_set(j, *set_mask); - set_cpus_allowed_ptr(current, &set_mask); + set_cpus_allowed_ptr(current, set_mask); preempt_disable(); - if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) { + if (unlikely(!cpu_isset(smp_processor_id(), *set_mask))) { dprintk("couldn't limit to CPUs in this domain\n"); retval = -EAGAIN; if (first_cpu) { @@ -518,7 +532,7 @@ static int centrino_target (struct cpufreq_policy *policy, dprintk("target=%dkHz old=%d new=%d msr=%04x\n", target_freq, freqs.old, freqs.new, msr); - for_each_cpu_mask_nr(k, online_policy_cpus) { + for_each_cpu_mask_nr(k, *online_policy_cpus) { freqs.cpu = k; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); @@ -537,11 +551,11 @@ static int centrino_target (struct cpufreq_policy *policy, break; } - cpu_set(j, covered_cpus); + cpu_set(j, *covered_cpus); preempt_enable(); } - for_each_cpu_mask_nr(k, online_policy_cpus) { + for_each_cpu_mask_nr(k, *online_policy_cpus) { freqs.cpu = k; cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } @@ -554,10 +568,10 @@ static int centrino_target (struct cpufreq_policy *policy, * Best effort undo.. */ - if (!cpus_empty(covered_cpus)) { + if (!cpus_empty(*covered_cpus)) { cpumask_of_cpu_ptr_declare(new_mask); - for_each_cpu_mask_nr(j, covered_cpus) { + for_each_cpu_mask_nr(j, *covered_cpus) { cpumask_of_cpu_ptr_next(new_mask, j); set_cpus_allowed_ptr(current, new_mask); wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); @@ -567,19 +581,22 @@ static int centrino_target (struct cpufreq_policy *policy, tmp = freqs.new; freqs.new = freqs.old; freqs.old = tmp; - for_each_cpu_mask_nr(j, online_policy_cpus) { + for_each_cpu_mask_nr(j, *online_policy_cpus) { freqs.cpu = j; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } } - set_cpus_allowed_ptr(current, &saved_mask); - return 0; + set_cpus_allowed_ptr(current, saved_mask); + retval = 0; + goto out; migrate_end: preempt_enable(); - set_cpus_allowed_ptr(current, &saved_mask); - return 0; + set_cpus_allowed_ptr(current, saved_mask); +out: + CPUMASK_FREE(allmasks); + return retval; } static struct freq_attr* centrino_attr[] = { -- cgit v0.10.2 From 06f8d00e9eecb738c99b737ac38a585ea7583ad5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 18 Jul 2008 22:34:00 +0200 Subject: cpumask: Optimize cpumask_of_cpu in lib/smp_processor_id.c, fix fix typo. Signed-off-by: Ingo Molnar diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 9fb2df0..c4381d9 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -23,7 +23,7 @@ notrace unsigned int debug_smp_processor_id(void) * Kernel threads bound to a single CPU can safely use * smp_processor_id(): */ - cpumask_of_cpu_ptr_next(this_mask, cpu); + cpumask_of_cpu_ptr_next(this_mask, this_cpu); if (cpus_equal(current->cpus_allowed, *this_mask)) goto out; -- cgit v0.10.2 From 6bca67f951f80b9e61078f8cdf5fb7b3d9e51aa9 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 18 Jul 2008 18:11:27 -0700 Subject: NR_CPUS: Replace NR_CPUS in arch/x86/kernel/cpu/mcheck/mce_64.c * nr_cpu_ids should be used to allocate arrays based on the number of cpu's present. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index c4a7ec3..2fe06ab 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -580,7 +580,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, char __user *buf = ubuf; int i, err; - cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL); + cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); if (!cpu_tsc) return -ENOMEM; -- cgit v0.10.2 From f2ad47ffeb1d292b7c7d1e2f6aedb37646c391db Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 18 Jul 2008 18:11:28 -0700 Subject: NR_CPUS: Replace NR_CPUS in arch/x86/kernel/cpu/proc.c * Use nr_cpu_ids instead of NR_CPUS to limit traversal of cpu online map. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 0d0d905..a26c480 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -160,7 +160,7 @@ static void *c_start(struct seq_file *m, loff_t *pos) { if (*pos == 0) /* just in case, cpu 0 is not the first */ *pos = first_cpu(cpu_online_map); - if ((*pos) < NR_CPUS && cpu_online(*pos)) + if ((*pos) < nr_cpu_ids && cpu_online(*pos)) return &cpu_data(*pos); return NULL; } -- cgit v0.10.2 From 247bc6ca0f691e4617e7bdb70cbaccc939f754ec Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 18 Jul 2008 18:11:29 -0700 Subject: NR_CPUS: Replace NR_CPUS in arch/x86/kernel/genx2apic_uv_x.c * Replace NR_CPUS loop with for_each_possible_cpu(). * nr_cpu_ids should be used to determine if a percpu area is available for a given cpu. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 711f11c..3e5d7b8 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -94,7 +94,7 @@ static void uv_send_IPI_mask(cpumask_t mask, int vector) { unsigned int cpu; - for (cpu = 0; cpu < NR_CPUS; ++cpu) + for_each_possible_cpu(cpu) if (cpu_isset(cpu, mask)) uv_send_IPI_one(cpu, vector); } @@ -128,7 +128,7 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) * May as well be the first. */ cpu = first_cpu(cpumask); - if ((unsigned)cpu < NR_CPUS) + if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; -- cgit v0.10.2 From 1bd9d6b64e1474f1a03f8660e8721d746cffae57 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 18 Jul 2008 18:11:30 -0700 Subject: NR_CPUS: Replace NR_CPUS in arch/x86/kernel/genapic_flat_64.c * nr_cpu_ids should be used to determine if a percpu area is available for a given cpu. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 1a9c688..786548a 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -168,7 +168,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) * May as well be the first. */ cpu = first_cpu(cpumask); - if ((unsigned)cpu < NR_CPUS) + if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; -- cgit v0.10.2 From 94a1e869c7b96a9d30e260084866383a145fd8ae Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 18 Jul 2008 18:11:31 -0700 Subject: NR_CPUS: Replace per_cpu(..., smp_processor_id()) with __get_cpu_var * Slight optimization when getting one's own cpu_info percpu data. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 55402d2..f92315e 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -134,7 +134,7 @@ extern __u32 cleared_cpu_caps[NCAPINTS]; #ifdef CONFIG_SMP DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); #define cpu_data(cpu) per_cpu(cpu_info, cpu) -#define current_cpu_data cpu_data(smp_processor_id()) +#define current_cpu_data __get_cpu_var(cpu_info) #else #define cpu_data(cpu) boot_cpu_data #define current_cpu_data boot_cpu_data -- cgit v0.10.2 From b38868aabeeb9c0c76a41ac5fa98c24bf0096f2a Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 18 Jul 2008 18:11:32 -0700 Subject: NR_CPUS: Replace NR_CPUS in cpufreq userspace routines * Replace arrays sized by NR_CPUS with percpu variables. Prior reference: http://marc.info/?l=linux-kernel&m=120251421825989&w=4 Subject: [PATCH 1/4] cpufreq: change cpu freq tables to per_cpu variables From: Mike Travis Date: 2008-02-08 23:37:39 Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index cb2ac01..32244aa 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -30,16 +30,18 @@ /** * A few values needed by the userspace governor */ -static unsigned int cpu_max_freq[NR_CPUS]; -static unsigned int cpu_min_freq[NR_CPUS]; -static unsigned int cpu_cur_freq[NR_CPUS]; /* current CPU freq */ -static unsigned int cpu_set_freq[NR_CPUS]; /* CPU freq desired by userspace */ -static unsigned int cpu_is_managed[NR_CPUS]; +static DEFINE_PER_CPU(unsigned int, cpu_max_freq); +static DEFINE_PER_CPU(unsigned int, cpu_min_freq); +static DEFINE_PER_CPU(unsigned int, cpu_cur_freq); /* current CPU freq */ +static DEFINE_PER_CPU(unsigned int, cpu_set_freq); /* CPU freq desired by + userspace */ +static DEFINE_PER_CPU(unsigned int, cpu_is_managed); static DEFINE_MUTEX (userspace_mutex); static int cpus_using_userspace_governor; -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "userspace", msg) +#define dprintk(msg...) \ + cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "userspace", msg) /* keep track of frequency transitions */ static int @@ -48,12 +50,12 @@ userspace_cpufreq_notifier(struct notifier_block *nb, unsigned long val, { struct cpufreq_freqs *freq = data; - if (!cpu_is_managed[freq->cpu]) + if (!per_cpu(cpu_is_managed, freq->cpu)) return 0; dprintk("saving cpu_cur_freq of cpu %u to be %u kHz\n", freq->cpu, freq->new); - cpu_cur_freq[freq->cpu] = freq->new; + per_cpu(cpu_cur_freq, freq->cpu) = freq->new; return 0; } @@ -77,15 +79,15 @@ static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq) dprintk("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq); mutex_lock(&userspace_mutex); - if (!cpu_is_managed[policy->cpu]) + if (!per_cpu(cpu_is_managed, policy->cpu)) goto err; - cpu_set_freq[policy->cpu] = freq; + per_cpu(cpu_set_freq, policy->cpu) = freq; - if (freq < cpu_min_freq[policy->cpu]) - freq = cpu_min_freq[policy->cpu]; - if (freq > cpu_max_freq[policy->cpu]) - freq = cpu_max_freq[policy->cpu]; + if (freq < per_cpu(cpu_min_freq, policy->cpu)) + freq = per_cpu(cpu_min_freq, policy->cpu); + if (freq > per_cpu(cpu_max_freq, policy->cpu)) + freq = per_cpu(cpu_max_freq, policy->cpu); /* * We're safe from concurrent calls to ->target() here @@ -104,7 +106,7 @@ static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq) static ssize_t show_speed(struct cpufreq_policy *policy, char *buf) { - return sprintf(buf, "%u\n", cpu_cur_freq[policy->cpu]); + return sprintf(buf, "%u\n", per_cpu(cpu_cur_freq, policy->cpu)); } static int cpufreq_governor_userspace(struct cpufreq_policy *policy, @@ -127,12 +129,17 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy, } cpus_using_userspace_governor++; - cpu_is_managed[cpu] = 1; - cpu_min_freq[cpu] = policy->min; - cpu_max_freq[cpu] = policy->max; - cpu_cur_freq[cpu] = policy->cur; - cpu_set_freq[cpu] = policy->cur; - dprintk("managing cpu %u started (%u - %u kHz, currently %u kHz)\n", cpu, cpu_min_freq[cpu], cpu_max_freq[cpu], cpu_cur_freq[cpu]); + per_cpu(cpu_is_managed, cpu) = 1; + per_cpu(cpu_min_freq, cpu) = policy->min; + per_cpu(cpu_max_freq, cpu) = policy->max; + per_cpu(cpu_cur_freq, cpu) = policy->cur; + per_cpu(cpu_set_freq, cpu) = policy->cur; + dprintk("managing cpu %u started " + "(%u - %u kHz, currently %u kHz)\n", + cpu, + per_cpu(cpu_min_freq, cpu), + per_cpu(cpu_max_freq, cpu), + per_cpu(cpu_cur_freq, cpu)); mutex_unlock(&userspace_mutex); break; @@ -145,34 +152,34 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy, CPUFREQ_TRANSITION_NOTIFIER); } - cpu_is_managed[cpu] = 0; - cpu_min_freq[cpu] = 0; - cpu_max_freq[cpu] = 0; - cpu_set_freq[cpu] = 0; + per_cpu(cpu_is_managed, cpu) = 0; + per_cpu(cpu_min_freq, cpu) = 0; + per_cpu(cpu_max_freq, cpu) = 0; + per_cpu(cpu_set_freq, cpu) = 0; dprintk("managing cpu %u stopped\n", cpu); mutex_unlock(&userspace_mutex); break; case CPUFREQ_GOV_LIMITS: mutex_lock(&userspace_mutex); - dprintk("limit event for cpu %u: %u - %u kHz," + dprintk("limit event for cpu %u: %u - %u kHz, " "currently %u kHz, last set to %u kHz\n", cpu, policy->min, policy->max, - cpu_cur_freq[cpu], cpu_set_freq[cpu]); - if (policy->max < cpu_set_freq[cpu]) { + per_cpu(cpu_cur_freq, cpu), + per_cpu(cpu_set_freq, cpu)); + if (policy->max < per_cpu(cpu_set_freq, cpu)) { __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); - } - else if (policy->min > cpu_set_freq[cpu]) { + } else if (policy->min > per_cpu(cpu_set_freq, cpu)) { __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); - } - else { - __cpufreq_driver_target(policy, cpu_set_freq[cpu], + } else { + __cpufreq_driver_target(policy, + per_cpu(cpu_set_freq, cpu), CPUFREQ_RELATION_L); } - cpu_min_freq[cpu] = policy->min; - cpu_max_freq[cpu] = policy->max; - cpu_cur_freq[cpu] = policy->cur; + per_cpu(cpu_min_freq, cpu) = policy->min; + per_cpu(cpu_max_freq, cpu) = policy->max; + per_cpu(cpu_cur_freq, cpu) = policy->cur; mutex_unlock(&userspace_mutex); break; } -- cgit v0.10.2 From 80422d3431cc990b967da129f9eb8e3e9989f841 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 18 Jul 2008 18:11:33 -0700 Subject: cpumask: Provide a generic set of CPUMASK_ALLOC macros, FIXUP * Rename CPUMASK_VAR --> CPUMASK_PTR (and simplify) * Fix a semantic error in CPUMASK_ALLOC * Add a bit of commentry to cpumask.h Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 72f9c32..30d59d1 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -75,16 +75,36 @@ * CPU_MASK_NONE Initializer - no bits set * unsigned long *cpus_addr(mask) Array of unsigned long's in mask * + * CPUMASK_ALLOC kmalloc's a structure that is a composite of many cpumask_t + * variables, and CPUMASK_PTR provides pointers to each field. + * + * The structure should be defined something like this: + * struct my_cpumasks { + * cpumask_t mask1; + * cpumask_t mask2; + * }; + * + * Usage is then: + * CPUMASK_ALLOC(my_cpumasks); + * CPUMASK_PTR(mask1, my_cpumasks); + * CPUMASK_PTR(mask2, my_cpumasks); + * + * --- DO NOT reference cpumask_t pointers until this check --- + * if (my_cpumasks == NULL) + * "kmalloc failed"... + * + * References are now pointers to the cpumask_t variables (*mask1, ...) + * *if NR_CPUS > BITS_PER_LONG * CPUMASK_ALLOC(m) Declares and allocates struct m *m = - * (struct m *)kmalloc(sizeof(*m), ...) - * CPUMASK_FREE(m) Macro for kfree(v) + * kmalloc(sizeof(*m), GFP_KERNEL) + * CPUMASK_FREE(m) Macro for kfree(m) *else * CPUMASK_ALLOC(m) Declares struct m _m, *m = &_m * CPUMASK_FREE(m) Nop *endif - * CPUMASK_VAR(v, m) Declares cpumask_t *v = - * m + offset(struct m, v) + * CPUMASK_PTR(v, m) Declares cpumask_t *v = &(m->v) + * ------------------------------------------------------------------------ * * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing * int cpumask_parse_user(ubuf, ulen, mask) Parse ascii string as cpumask @@ -326,11 +346,10 @@ extern cpumask_t cpu_mask_all; #define CPUMASK_ALLOC(m) struct m *m = kmalloc(sizeof(*m), GFP_KERNEL) #define CPUMASK_FREE(m) kfree(m) #else -#define CPUMASK_ALLOC(m) struct allmasks _m, *m = &_m +#define CPUMASK_ALLOC(m) struct m _m, *m = &_m #define CPUMASK_FREE(m) #endif -#define CPUMASK_VAR(v, m) cpumask_t *v = (cpumask_t *) \ - ((unsigned long)(m) + offsetof(struct m, v)) +#define CPUMASK_PTR(v, m) cpumask_t *v = &(m->v) #define cpumask_scnprintf(buf, len, src) \ __cpumask_scnprintf((buf), (len), &(src), NR_CPUS) -- cgit v0.10.2 From c4762aba0b1f72659aae9ce37b772ca8bd8f06f4 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 18 Jul 2008 18:11:34 -0700 Subject: NR_CPUS: Replace NR_CPUS in speedstep-centrino.c Some cleanups in speedstep-centrino.c for NR_CPUS=4096. * Use new CPUMASK_PTR (instead of old CPUMASK_VAR). * Replace arrays sized by NR_CPUS with percpu variables. * Cleanup some formatting problems (>80 chars per line) and other checkpatch complaints. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 470c016..ca2ac13 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -28,7 +28,8 @@ #define PFX "speedstep-centrino: " #define MAINTAINER "cpufreq@lists.linux.org.uk" -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) +#define dprintk(msg...) \ + cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) #define INTEL_MSR_RANGE (0xffff) @@ -66,11 +67,12 @@ struct cpu_model struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ }; -static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x); +static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, + const struct cpu_id *x); /* Operating points for current CPU */ -static struct cpu_model *centrino_model[NR_CPUS]; -static const struct cpu_id *centrino_cpu[NR_CPUS]; +static DEFINE_PER_CPU(struct cpu_model *, centrino_model); +static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu); static struct cpufreq_driver centrino_driver; @@ -255,7 +257,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy) return -ENOENT; } - centrino_model[policy->cpu] = model; + per_cpu(centrino_model, policy->cpu) = model; dprintk("found \"%s\": max frequency: %dkHz\n", model->model_name, model->max_freq); @@ -264,10 +266,14 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy) } #else -static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) { return -ENODEV; } +static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) +{ + return -ENODEV; +} #endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ -static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x) +static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, + const struct cpu_id *x) { if ((c->x86 == x->x86) && (c->x86_model == x->x86_model) && @@ -286,23 +292,28 @@ static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe) * for centrino, as some DSDTs are buggy. * Ideally, this can be done using the acpi_data structure. */ - if ((centrino_cpu[cpu] == &cpu_ids[CPU_BANIAS]) || - (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_A1]) || - (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_B0])) { + if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) || + (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) || + (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) { msr = (msr >> 8) & 0xff; return msr * 100000; } - if ((!centrino_model[cpu]) || (!centrino_model[cpu]->op_points)) + if ((!per_cpu(centrino_model, cpu)) || + (!per_cpu(centrino_model, cpu)->op_points)) return 0; msr &= 0xffff; - for (i=0;centrino_model[cpu]->op_points[i].frequency != CPUFREQ_TABLE_END; i++) { - if (msr == centrino_model[cpu]->op_points[i].index) - return centrino_model[cpu]->op_points[i].frequency; + for (i = 0; + per_cpu(centrino_model, cpu)->op_points[i].frequency + != CPUFREQ_TABLE_END; + i++) { + if (msr == per_cpu(centrino_model, cpu)->op_points[i].index) + return per_cpu(centrino_model, cpu)-> + op_points[i].frequency; } if (failsafe) - return centrino_model[cpu]->op_points[i-1].frequency; + return per_cpu(centrino_model, cpu)->op_points[i-1].frequency; else return 0; } @@ -348,7 +359,8 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) int i; /* Only Intel makes Enhanced Speedstep-capable CPUs */ - if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) + if (cpu->x86_vendor != X86_VENDOR_INTEL || + !cpu_has(cpu, X86_FEATURE_EST)) return -ENODEV; if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) @@ -362,9 +374,9 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) break; if (i != N_IDS) - centrino_cpu[policy->cpu] = &cpu_ids[i]; + per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i]; - if (!centrino_cpu[policy->cpu]) { + if (!per_cpu(centrino_cpu, policy->cpu)) { dprintk("found unsupported CPU with " "Enhanced SpeedStep: send /proc/cpuinfo to " MAINTAINER "\n"); @@ -387,23 +399,26 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) /* check to see if it stuck */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); if (!(l & (1<<16))) { - printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n"); + printk(KERN_INFO PFX + "couldn't enable Enhanced SpeedStep\n"); return -ENODEV; } } freq = get_cur_freq(policy->cpu); - - policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */ + policy->cpuinfo.transition_latency = 10000; + /* 10uS transition latency */ policy->cur = freq; dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); - ret = cpufreq_frequency_table_cpuinfo(policy, centrino_model[policy->cpu]->op_points); + ret = cpufreq_frequency_table_cpuinfo(policy, + per_cpu(centrino_model, policy->cpu)->op_points); if (ret) return (ret); - cpufreq_frequency_table_get_attr(centrino_model[policy->cpu]->op_points, policy->cpu); + cpufreq_frequency_table_get_attr( + per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu); return 0; } @@ -412,12 +427,12 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy) { unsigned int cpu = policy->cpu; - if (!centrino_model[cpu]) + if (!per_cpu(centrino_model, cpu)) return -ENODEV; cpufreq_frequency_table_put_attr(cpu); - centrino_model[cpu] = NULL; + per_cpu(centrino_model, cpu) = NULL; return 0; } @@ -431,14 +446,16 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy) */ static int centrino_verify (struct cpufreq_policy *policy) { - return cpufreq_frequency_table_verify(policy, centrino_model[policy->cpu]->op_points); + return cpufreq_frequency_table_verify(policy, + per_cpu(centrino_model, policy->cpu)->op_points); } /** * centrino_setpolicy - set a new CPUFreq policy * @policy: new policy * @target_freq: the target frequency - * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) + * @relation: how that frequency relates to achieved frequency + * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) * * Sets a new CPUFreq policy. */ @@ -459,21 +476,21 @@ static int centrino_target (struct cpufreq_policy *policy, int retval = 0; unsigned int j, k, first_cpu, tmp; CPUMASK_ALLOC(allmasks); - CPUMASK_VAR(online_policy_cpus, allmasks); - CPUMASK_VAR(saved_mask, allmasks); - CPUMASK_VAR(set_mask, allmasks); - CPUMASK_VAR(covered_cpus, allmasks); + CPUMASK_PTR(online_policy_cpus, allmasks); + CPUMASK_PTR(saved_mask, allmasks); + CPUMASK_PTR(set_mask, allmasks); + CPUMASK_PTR(covered_cpus, allmasks); if (unlikely(allmasks == NULL)) return -ENOMEM; - if (unlikely(centrino_model[cpu] == NULL)) { + if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { retval = -ENODEV; goto out; } if (unlikely(cpufreq_frequency_table_target(policy, - centrino_model[cpu]->op_points, + per_cpu(centrino_model, cpu)->op_points, target_freq, relation, &newstate))) { @@ -515,7 +532,7 @@ static int centrino_target (struct cpufreq_policy *policy, break; } - msr = centrino_model[cpu]->op_points[newstate].index; + msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; if (first_cpu) { rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); -- cgit v0.10.2