From 72523d8082df3d89bffe30cbc1356dbb45ab51ae Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 18 Oct 2012 10:36:11 +1100 Subject: Revert "powerpc/perf: Use pmc_overflow() to detect rolled back events" This reverts commit 813312110bede27bffd082c25cd31730bd567beb. This revert was requested by the author of the patch as it seems to cause system hangs with some low frequency events diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 0db88f5..aa2465e 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1463,7 +1463,7 @@ static void perf_event_interrupt(struct pt_regs *regs) if (!event->hw.idx || is_limited_pmc(event->hw.idx)) continue; val = read_pmc(event->hw.idx); - if (pmc_overflow(val)) { + if ((int)val < 0) { /* event has overflowed */ found = 1; record_and_restart(event, val, regs); -- cgit v0.10.2 From ce236ab576a2d76b1d6eb268221e74f05ed48d8b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 16 Oct 2012 22:25:45 +0000 Subject: powerpc: Build fix for powerpc KVM Fix build failure for powerpc KVM by adding missing VPN_SHIFT definition and the ';' arch/powerpc/kvm/book3s_32_mmu_host.c: In function 'kvmppc_mmu_map_page': arch/powerpc/kvm/book3s_32_mmu_host.c:176: error: 'VPN_SHIFT' undeclared (first use in this function) arch/powerpc/kvm/book3s_32_mmu_host.c:176: error: (Each undeclared identifier is reported only once arch/powerpc/kvm/book3s_32_mmu_host.c:176: error: for each function it appears in.) arch/powerpc/kvm/book3s_32_mmu_host.c:178: error: expected ';' before 'next_pteg' arch/powerpc/kvm/book3s_32_mmu_host.c:190: error: label 'next_pteg' used but not defined make[1]: *** [arch/powerpc/kvm/book3s_32_mmu_host.o] Error 1 Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/kvm_book3s_32.h b/arch/powerpc/include/asm/kvm_book3s_32.h index 38040ff..ce0ef6c 100644 --- a/arch/powerpc/include/asm/kvm_book3s_32.h +++ b/arch/powerpc/include/asm/kvm_book3s_32.h @@ -42,5 +42,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) #define SID_SHIFT 28 #define ESID_MASK 0xf0000000 #define VSID_MASK 0x00fffffff0000000ULL +#define VPN_SHIFT 12 #endif /* __ASM_KVM_BOOK3S_32_H__ */ diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 00aa612..b0f625a 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -173,8 +173,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) BUG_ON(!map); vsid = map->host_vsid; - vpn = (vsid << (SID_SHIFT - VPN_SHIFT)) | ((eaddr & ~ESID_MASK) >> VPN_SHIFT) - + vpn = (vsid << (SID_SHIFT - VPN_SHIFT)) | + ((eaddr & ~ESID_MASK) >> VPN_SHIFT); next_pteg: if (rr == 16) { primary = !primary; -- cgit v0.10.2 From 817deb05df45577d4037230f2facee486c11d9df Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Wed, 3 Oct 2012 18:42:10 +0000 Subject: cpuidle/powerpc: Fix target residency initialisation in pseries cpuidle Remove the redundant target residency initialisation in pseries_cpuidle_driver_init(). This is currently over-writing the residency time updated as part of the static table, resulting in all the idle states having the same target residency of 100us which is incorrect. This may result in the menu governor making wrong state decisions. Signed-off-by: Deepthi Dharwar Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c index 455760b..02e425a 100644 --- a/arch/powerpc/platforms/pseries/processor_idle.c +++ b/arch/powerpc/platforms/pseries/processor_idle.c @@ -246,10 +246,6 @@ static int pseries_cpuidle_driver_init(void) drv->states[drv->state_count] = /* structure copy */ cpuidle_state_table[idle_state]; - if (cpuidle_state_table == dedicated_states) - drv->states[drv->state_count].target_residency = - __get_cpu_var(smt_snooze_delay); - drv->state_count += 1; } -- cgit v0.10.2 From 8ea959a17fe6e27f7954dddad5b17b0e33f0d7ee Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Wed, 3 Oct 2012 18:42:18 +0000 Subject: cpuidle/powerpc: Fix smt_snooze_delay functionality. smt_snooze_delay was designed to delay idle loop's nap entry in the native idle code before it got ported over to use as part of the cpuidle framework. A -ve value assigned to smt_snooze_delay should result in busy looping, in other words disabling the entry to nap state. - https://lists.ozlabs.org/pipermail/linuxppc-dev/2010-May/082450.html This particular functionality can be achieved currently by echo 1 > /sys/devices/system/cpu/cpu*/state1/disable but it is broken when one assigns -ve value to the smt_snooze_delay variable either via sysfs entry or ppc64_cpu util. This patch aims to fix this, by disabling nap state when smt_snooze_delay variable is set to -ve value. Signed-off-by: Deepthi Dharwar Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 8734b38..8750204 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -388,9 +388,9 @@ extern int powersave_nap; /* set if nap mode can be used in idle loop */ extern void power7_nap(void); #ifdef CONFIG_PSERIES_IDLE -extern void update_smt_snooze_delay(int snooze); +extern void update_smt_snooze_delay(int cpu, int residency); #else -static inline void update_smt_snooze_delay(int snooze) {} +static inline void update_smt_snooze_delay(int cpu, int residency) {} #endif extern void flush_instruction_cache(void); diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 8302af6..cf357a0 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -50,7 +50,7 @@ static ssize_t store_smt_snooze_delay(struct device *dev, return -EINVAL; per_cpu(smt_snooze_delay, cpu->dev.id) = snooze; - update_smt_snooze_delay(snooze); + update_smt_snooze_delay(cpu->dev.id, snooze); return count; } diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c index 02e425a..a32d56d 100644 --- a/arch/powerpc/platforms/pseries/processor_idle.c +++ b/arch/powerpc/platforms/pseries/processor_idle.c @@ -33,13 +33,6 @@ static int max_idle_state = MAX_IDLE_STATE_COUNT - 1; static struct cpuidle_device __percpu *pseries_cpuidle_devices; static struct cpuidle_state *cpuidle_state_table; -void update_smt_snooze_delay(int snooze) -{ - struct cpuidle_driver *drv = cpuidle_get_driver(); - if (drv) - drv->states[0].target_residency = snooze; -} - static inline void idle_loop_prolog(unsigned long *in_purr, ktime_t *kt_before) { @@ -190,6 +183,23 @@ static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = { .enter = &shared_cede_loop }, }; +void update_smt_snooze_delay(int cpu, int residency) +{ + struct cpuidle_driver *drv = cpuidle_get_driver(); + struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu); + + if (cpuidle_state_table != dedicated_states) + return; + + if (residency < 0) { + /* Disable the Nap state on that cpu */ + if (dev) + dev->states_usage[1].disable = 1; + } else + if (drv) + drv->states[0].target_residency = residency; +} + static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n, unsigned long action, void *hcpu) { -- cgit v0.10.2 From 83dac59409387789b88bed40b1be86a8abc572be Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Wed, 3 Oct 2012 18:42:26 +0000 Subject: cpuidle/powerpc: Fix snooze state problem in the cpuidle design on pseries. Earlier without cpuidle framework on pseries, the native arch idle routine comprised of both snooze and nap states. smt_snooze_delay variable was used to delay the idle process entry to deeper idle state like nap. With the coming of cpuidle, this arch specific idle was replaced by two different idle routines, one for supporting snooze and other for nap. This enabled addition of more low level idle states on pseries in the future. On adopting the generic cpuidle framework for POWER systems, the decision of which idle state to choose from, given a predicted idle time is taken by the menu governor based on target_residency and exit_latency of the idle states. target_residency is the minimum time to be resident in that idle state. Exit_latency is time taken to exit out of idle state. Deeper the idle state, both the target residency and exit latency would be higher. In the current design, smt_snooze_delay is used as target_residency for the snooze state which is incorrect, as it is not the minimum but the maximum duration to be in snooze state. This would result in the governor in taking bad decision, as presently target_residency of nap < target_residency of snooze inspite of nap being deeper idle state. This patch aims to fix this problem by replacing the smt_snooze_delay loop in snooze state, with the need_resched() as the governor is aware of entry and exit of various idle transitions based on which next idle time prediction. The governor is intelligent enough to determine the idle state the needs to be transitioned to and maintains a whole of heuristics including io load, previous idle states predictions etc for the same, based on which idle state entry decision is taken. With this fix, of setting target_residency of snooze to 0 nap to smt_snooze_delay if the predicted idle time is less than smt_snooze_delay (target_residency of nap) value governor would pick snooze state, else nap. This adhers to the previous native idle design. Signed-off-by: Deepthi Dharwar Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c index a32d56d..45d00e5 100644 --- a/arch/powerpc/platforms/pseries/processor_idle.c +++ b/arch/powerpc/platforms/pseries/processor_idle.c @@ -59,32 +59,22 @@ static int snooze_loop(struct cpuidle_device *dev, { unsigned long in_purr; ktime_t kt_before; - unsigned long start_snooze; - long snooze = drv->states[0].target_residency; + int cpu = dev->cpu; idle_loop_prolog(&in_purr, &kt_before); + local_irq_enable(); + set_thread_flag(TIF_POLLING_NRFLAG); - if (snooze) { - start_snooze = get_tb() + snooze * tb_ticks_per_usec; - local_irq_enable(); - set_thread_flag(TIF_POLLING_NRFLAG); - - while ((snooze < 0) || (get_tb() < start_snooze)) { - if (need_resched() || cpu_is_offline(dev->cpu)) - goto out; - ppc64_runlatch_off(); - HMT_low(); - HMT_very_low(); - } - - HMT_medium(); - clear_thread_flag(TIF_POLLING_NRFLAG); - smp_mb(); - local_irq_disable(); + while ((!need_resched()) && cpu_online(cpu)) { + ppc64_runlatch_off(); + HMT_low(); + HMT_very_low(); } -out: HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb(); + dev->last_residency = (int)idle_loop_epilog(in_purr, kt_before); return index; @@ -165,8 +155,8 @@ static struct cpuidle_state dedicated_states[MAX_IDLE_STATE_COUNT] = { .name = "CEDE", .desc = "CEDE", .flags = CPUIDLE_FLAG_TIME_VALID, - .exit_latency = 1, - .target_residency = 10, + .exit_latency = 10, + .target_residency = 100, .enter = &dedicated_cede_loop }, }; @@ -197,7 +187,7 @@ void update_smt_snooze_delay(int cpu, int residency) dev->states_usage[1].disable = 1; } else if (drv) - drv->states[0].target_residency = residency; + drv->states[1].target_residency = residency; } static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n, -- cgit v0.10.2