From 448c8b1d07d5342922567e138a4b0108a42c24fb Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 13 Mar 2012 19:18:39 -0400 Subject: provide disable_cpufreq() function to disable the API. useful for disabling cpufreq altogether. The cpu frequency scaling drivers and cpu frequency governors will fail to register. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Dave Jones diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8c2df34..a9a1d81c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -126,6 +126,15 @@ static int __init init_cpufreq_transition_notifier_list(void) } pure_initcall(init_cpufreq_transition_notifier_list); +static int off __read_mostly; +int cpufreq_disabled(void) +{ + return off; +} +void disable_cpufreq(void) +{ + off = 1; +} static LIST_HEAD(cpufreq_governor_list); static DEFINE_MUTEX(cpufreq_governor_mutex); @@ -1442,6 +1451,9 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, { int retval = -EINVAL; + if (cpufreq_disabled()) + return -ENODEV; + pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu, target_freq, relation); if (cpu_online(policy->cpu) && cpufreq_driver->target) @@ -1550,6 +1562,9 @@ int cpufreq_register_governor(struct cpufreq_governor *governor) if (!governor) return -EINVAL; + if (cpufreq_disabled()) + return -ENODEV; + mutex_lock(&cpufreq_governor_mutex); err = -EBUSY; @@ -1573,6 +1588,9 @@ void cpufreq_unregister_governor(struct cpufreq_governor *governor) if (!governor) return; + if (cpufreq_disabled()) + return; + #ifdef CONFIG_HOTPLUG_CPU for_each_present_cpu(cpu) { if (cpu_online(cpu)) @@ -1815,6 +1833,9 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) unsigned long flags; int ret; + if (cpufreq_disabled()) + return -ENODEV; + if (!driver_data || !driver_data->verify || !driver_data->init || ((!driver_data->setpolicy) && (!driver_data->target))) return -EINVAL; @@ -1902,6 +1923,9 @@ static int __init cpufreq_core_init(void) { int cpu; + if (cpufreq_disabled()) + return -ENODEV; + for_each_possible_cpu(cpu) { per_cpu(cpufreq_policy_cpu, cpu) = -1; init_rwsem(&per_cpu(cpu_policy_rwsem, cpu)); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 6216115..8ff4427 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -35,6 +35,7 @@ #ifdef CONFIG_CPU_FREQ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list); int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list); +extern void disable_cpufreq(void); #else /* CONFIG_CPU_FREQ */ static inline int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list) @@ -46,6 +47,7 @@ static inline int cpufreq_unregister_notifier(struct notifier_block *nb, { return 0; } +static inline void disable_cpufreq(void) { } #endif /* CONFIG_CPU_FREQ */ /* if (cpufreq_driver->target) exists, the ->governor decides what frequency -- cgit v0.10.2 From 48cdd8287f47a3cdad5b9273a5ef81bf605f7826 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 13 Mar 2012 20:06:57 -0400 Subject: xen/cpufreq: Disable the cpu frequency scaling drivers from loading. By using the functionality provided by "[CPUFREQ]: provide disable_cpuidle() function to disable the API." Under the Xen hypervisor we do not want the initial domain to exercise the cpufreq scaling drivers. This is b/c the Xen hypervisor is in charge of doing this as well and we can end up with both the Linux kernel and the hypervisor trying to change the P-states leading to weird performance issues. Acked-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk [v2: Fix compile error spotted by Benjamin Schweikert ] diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 1236623..1ba8dff 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -420,6 +421,7 @@ void __init xen_arch_setup(void) boot_cpu_data.hlt_works_ok = 1; #endif disable_cpuidle(); + disable_cpufreq(); WARN_ON(set_pm_idle_to_default()); fiddle_vdso(); } -- cgit v0.10.2 From 102b208e6b3b16d3611b67a7af9a93d30b92c006 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 13 Mar 2012 13:28:12 -0400 Subject: xen/acpi-processor: Do not depend on CPU frequency scaling drivers. With patch "xen/cpufreq: Disable the cpu frequency scaling drivers from loading." we do not have to worry about said drivers loading themselves before the xen-acpi-processor driver. Hence we can remove the default selection (=y if CPU frequency drivers were built-in, or =m if CPU frequency drivers were built as modules), and just select =m for the default case. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 648bcd4..b1d524a 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -181,8 +181,7 @@ config XEN_PRIVCMD config XEN_ACPI_PROCESSOR tristate "Xen ACPI processor" depends on XEN && X86 && ACPI_PROCESSOR - default y if (X86_ACPI_CPUFREQ = y || X86_POWERNOW_K8 = y) - default m if (X86_ACPI_CPUFREQ = m || X86_POWERNOW_K8 = m) + default m help This ACPI processor uploads Power Management information to the Xen hypervisor. -- cgit v0.10.2 From 9846ff10af12f9e7caac696737db6c990592a74a Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Mon, 30 Jan 2012 16:21:48 +0000 Subject: xen: support pirq_eoi_map The pirq_eoi_map is a bitmap offered by Xen to check which pirqs need to be EOI'd without having to issue an hypercall every time. We use PHYSDEVOP_pirq_eoi_gmfn_v2 to map the bitmap, then if we succeed we use pirq_eoi_map to check whether pirqs need eoi. Changes in v3: - explicitly use PHYSDEVOP_pirq_eoi_gmfn_v2 rather than PHYSDEVOP_pirq_eoi_gmfn; - introduce pirq_check_eoi_map, a function to check if a pirq needs an eoi using the map; -rename pirq_needs_eoi into pirq_needs_eoi_flag; - introduce a function pointer called pirq_needs_eoi that is going to be set to the right implementation depending on the availability of PHYSDEVOP_pirq_eoi_gmfn_v2. Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/events.c b/drivers/xen/events.c index e5e5812..4b33acd 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -109,6 +110,8 @@ struct irq_info { #define PIRQ_SHAREABLE (1 << 1) static int *evtchn_to_irq; +static unsigned long *pirq_eoi_map; +static bool (*pirq_needs_eoi)(unsigned irq); static DEFINE_PER_CPU(unsigned long [NR_EVENT_CHANNELS/BITS_PER_LONG], cpu_evtchn_mask); @@ -269,10 +272,14 @@ static unsigned int cpu_from_evtchn(unsigned int evtchn) return ret; } -static bool pirq_needs_eoi(unsigned irq) +static bool pirq_check_eoi_map(unsigned irq) { - struct irq_info *info = info_for_irq(irq); + return test_bit(irq, pirq_eoi_map); +} +static bool pirq_needs_eoi_flag(unsigned irq) +{ + struct irq_info *info = info_for_irq(irq); BUG_ON(info->type != IRQT_PIRQ); return info->u.pirq.flags & PIRQ_NEEDS_EOI; @@ -1768,7 +1775,7 @@ void xen_callback_vector(void) {} void __init xen_init_IRQ(void) { - int i; + int i, rc; evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), GFP_KERNEL); @@ -1782,6 +1789,8 @@ void __init xen_init_IRQ(void) for (i = 0; i < NR_EVENT_CHANNELS; i++) mask_evtchn(i); + pirq_needs_eoi = pirq_needs_eoi_flag; + if (xen_hvm_domain()) { xen_callback_vector(); native_init_IRQ(); @@ -1789,8 +1798,19 @@ void __init xen_init_IRQ(void) * __acpi_register_gsi can point at the right function */ pci_xen_hvm_init(); } else { + struct physdev_pirq_eoi_gmfn eoi_gmfn; + irq_ctx_init(smp_processor_id()); if (xen_initial_domain()) pci_xen_initial_domain(); + + pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO); + eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map); + rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn); + if (rc != 0) { + free_page((unsigned long) pirq_eoi_map); + pirq_eoi_map = NULL; + } else + pirq_needs_eoi = pirq_check_eoi_map; } } diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index 0c28989..9ce788d 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -39,6 +39,27 @@ struct physdev_eoi { }; /* + * Register a shared page for the hypervisor to indicate whether the guest + * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly + * once the guest used this function in that the associated event channel + * will automatically get unmasked. The page registered is used as a bit + * array indexed by Xen's PIRQ value. + */ +#define PHYSDEVOP_pirq_eoi_gmfn_v1 17 +/* + * Register a shared page for the hypervisor to indicate whether the + * guest must issue PHYSDEVOP_eoi. This hypercall is very similar to + * PHYSDEVOP_pirq_eoi_gmfn_v1 but it doesn't change the semantics of + * PHYSDEVOP_eoi. The page registered is used as a bit array indexed by + * Xen's PIRQ value. + */ +#define PHYSDEVOP_pirq_eoi_gmfn_v2 28 +struct physdev_pirq_eoi_gmfn { + /* IN */ + unsigned long gmfn; +}; + +/* * Query the status of an IRQ line. * @arg == pointer to physdev_irq_status_query structure. */ -- cgit v0.10.2 From 8e6f7c23c135b13f3adf90906fac7edd325bb9af Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 3 Feb 2012 15:09:04 +0000 Subject: xen/tmem: cleanup Use 'bool' for boolean variables. Do proper section placement. Eliminate an unnecessary export. Signed-off-by: Jan Beulich Acked-by: Dan Magenheimer Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index d369965..6a8f76d 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -9,7 +9,6 @@ #include #include #include -#include #include /* temporary ifdef until include/linux/frontswap.h is upstream */ @@ -128,15 +127,13 @@ static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid) return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); } -int tmem_enabled __read_mostly; -EXPORT_SYMBOL(tmem_enabled); +bool __read_mostly tmem_enabled = false; static int __init enable_tmem(char *s) { - tmem_enabled = 1; + tmem_enabled = true; return 1; } - __setup("tmem", enable_tmem); #ifdef CONFIG_CLEANCACHE @@ -229,17 +226,16 @@ static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize) return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize); } -static int use_cleancache = 1; +static bool __initdata use_cleancache = true; static int __init no_cleancache(char *s) { - use_cleancache = 0; + use_cleancache = false; return 1; } - __setup("nocleancache", no_cleancache); -static struct cleancache_ops tmem_cleancache_ops = { +static struct cleancache_ops __initdata tmem_cleancache_ops = { .put_page = tmem_cleancache_put_page, .get_page = tmem_cleancache_get_page, .flush_page = tmem_cleancache_flush_page, @@ -356,17 +352,16 @@ static void tmem_frontswap_init(unsigned ignored) xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE); } -static int __initdata use_frontswap = 1; +static bool __initdata use_frontswap = true; static int __init no_frontswap(char *s) { - use_frontswap = 0; + use_frontswap = false; return 1; } - __setup("nofrontswap", no_frontswap); -static struct frontswap_ops tmem_frontswap_ops = { +static struct frontswap_ops __initdata tmem_frontswap_ops = { .put_page = tmem_frontswap_put_page, .get_page = tmem_frontswap_get_page, .flush_page = tmem_frontswap_flush_page, diff --git a/include/xen/tmem.h b/include/xen/tmem.h index 82e2c83..591550a 100644 --- a/include/xen/tmem.h +++ b/include/xen/tmem.h @@ -1,5 +1,9 @@ #ifndef _XEN_TMEM_H #define _XEN_TMEM_H + +#include + /* defined in drivers/xen/tmem.c */ -extern int tmem_enabled; +extern bool tmem_enabled; + #endif /* _XEN_TMEM_H */ -- cgit v0.10.2 From 27257fc07c044af99d85400c4bab670342bbc8a5 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 21 Mar 2012 11:43:32 -0400 Subject: xen/acpi: Remove the WARN's as they just create noise. When booting the kernel under machines that do not have P-states we would end up with: ------------[ cut here ]------------ WARNING: at drivers/xen/xen-acpi-processor.c:504 xen_acpi_processor_init+0x286/0 x2e0() Hardware name: ProLiant BL460c G6 Modules linked in: Pid: 1, comm: swapper Not tainted 2.6.39-200.0.3.el5uek #1 Call Trace: [] ? xen_acpi_processor_init+0x286/0x2e0 [] warn_slowpath_common+0x90/0xc0 [] ? check_acpi_ids+0x1e0/0x1e0 [] warn_slowpath_null+0x1a/0x20 [] xen_acpi_processor_init+0x286/0x2e0 [] ? check_acpi_ids+0x1e0/0x1e0 [] do_one_initcall+0xe8/0x130 .. snip.. Which is OK - the machines do not have P-states, so we fail to register to process the _PXX states. But there is no need to WARN the user of it. Oracle BZ# 13871288 Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index 5c2be96..174b565 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -501,11 +501,11 @@ static int __init xen_acpi_processor_init(void) perf = per_cpu_ptr(acpi_perf_data, i); rc = acpi_processor_register_performance(perf, i); - if (WARN_ON(rc)) + if (rc) goto err_out; } rc = acpi_processor_notify_smm(THIS_MODULE); - if (WARN_ON(rc)) + if (rc) goto err_unregister; for_each_possible_cpu(i) { -- cgit v0.10.2 From 106b44388d8f76373149c4ea144f717b6d4d9a6d Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 21 Mar 2012 13:03:45 -0400 Subject: xen/smp: Fix bringup bug in AP code. The CPU hotplug code has now a callback to help bring up the CPU. Without the call we end up getting: BUG: soft lockup - CPU#0 stuck for 29s! [migration/0:6] Modules linked in: CPU ] Pid: 6, comm: migration/0 Not tainted 3.3.0upstream-01180-ged378a5 #1 Dell Inc. PowerEdge T105 /0RR825 RIP: e030:[] [] stop_machine_cpu_stop+0x7b/0xf0 RSP: e02b:ffff8800ceaabdb0 EFLAGS: 00000293 .. snip.. Call Trace: [] ? stop_one_cpu_nowait+0x50/0x50 [] cpu_stopper_thread+0xf1/0x1c0 [] ? __schedule+0x3c6/0x760 [] ? _raw_spin_unlock_irqrestore+0x19/0x30 [] ? res_counter_charge+0x150/0x150 [] kthread+0x96/0xa0 [] kernel_thread_helper+0x4/0x10 [] ? retint_restore_ar Thix fixes it. Acked-by: Peter Zijlstra Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 449f868..240def4 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -75,8 +75,14 @@ static void __cpuinit cpu_bringup(void) xen_setup_cpu_clockevents(); + notify_cpu_starting(cpu); + + ipi_call_lock(); set_cpu_online(cpu, true); + ipi_call_unlock(); + this_cpu_write(cpu_state, CPU_ONLINE); + wmb(); /* We can take interrupts now: we're officially "up". */ -- cgit v0.10.2 From b9136d207f0c05c96c6b9c980fa7f7fd541a65a8 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 21 Mar 2012 15:08:38 +0100 Subject: xen: initialize platform-pci even if xen_emul_unplug=never When xen_emul_unplug=never is specified on kernel command line reading files from /sys/hypervisor is broken (returns -EBUSY). It is caused by xen_bus dependency on platform-pci and platform-pci isn't initialized when xen_emul_unplug=never is specified. Fix it by allowing platform-pci to ignore xen_emul_unplug=never, and do not intialize xen_[blk|net]front instead. Signed-off-by: Igor Mammedov Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 9fd3ee2..4276ab0 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1452,6 +1452,9 @@ static int __init xlblk_init(void) if (!xen_domain()) return -ENODEV; + if (!xen_platform_pci_unplug) + return -ENODEV; + if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n", XENVBD_MAJOR, DEV_NAME); diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index fa67905..8cc0914 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -1956,6 +1957,9 @@ static int __init netif_init(void) if (xen_initial_domain()) return 0; + if (!xen_platform_pci_unplug) + return -ENODEV; + printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n"); return xenbus_register_frontend(&netfront_driver); diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index 319dd0a..2389e58 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -186,11 +186,6 @@ static struct pci_driver platform_driver = { static int __init platform_pci_module_init(void) { - /* no unplug has been done, IGNORE hasn't been specified: just - * return now */ - if (!xen_platform_pci_unplug) - return -ENODEV; - return pci_register_driver(&platform_driver); } -- cgit v0.10.2 From df7a3ee29b775edd1c2d75cf0b128b174bd4091e Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sat, 24 Mar 2012 09:18:57 -0400 Subject: xen/acpi: Fix Kconfig dependency on CPU_FREQ The functions: "acpi_processor_*" sound like they depend on CONFIG_ACPI_PROCESSOR but in reality they are exposed when CONFIG_CPU_FREQ=[y|m]. As such update the Kconfig to have this dependency and fix compile issues: ERROR: "acpi_processor_unregister_performance" [drivers/xen/xen-acpi-processor.ko] undefined! ERROR: "acpi_processor_notify_smm" [drivers/xen/xen-acpi-processor.ko] undefined! ERROR: "acpi_processor_register_performance" [drivers/xen/xen-acpi-processor.ko] undefined! ERROR: "acpi_processor_preregister_performance" [drivers/xen/xen-acpi-processor.ko] undefined! Note: We still need the CONFIG_ACPI Reported-by: Randy Dunlap Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index b1d524a..9424313 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -180,7 +180,7 @@ config XEN_PRIVCMD config XEN_ACPI_PROCESSOR tristate "Xen ACPI processor" - depends on XEN && X86 && ACPI_PROCESSOR + depends on XEN && X86 && ACPI_PROCESSOR && CPU_FREQ default m help This ACPI processor uploads Power Management information to the Xen hypervisor. -- cgit v0.10.2