From 3eb8057bbafc64dbf09d5c18513aa80c1b7f2fcb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 21 Jan 2009 21:30:23 -0800 Subject: sparc64: Move generic PCR support code to seperate file. It all lives in the oprofile support code currently and we will need to share this stuff with NMI watchdog and perf_counter support. Signed-off-by: David S. Miller diff --git a/arch/sparc/include/asm/pcr.h b/arch/sparc/include/asm/pcr.h new file mode 100644 index 0000000..4249bb5 --- /dev/null +++ b/arch/sparc/include/asm/pcr.h @@ -0,0 +1,30 @@ +#ifndef __PCR_H +#define __PCR_H + +struct pcr_ops { + u64 (*read)(void); + void (*write)(u64); +}; +extern const struct pcr_ops *pcr_ops; + +extern void deferred_pcr_work_irq(int irq, struct pt_regs *regs); +extern void schedule_deferred_pcr_work(void); + +#define PCR_PIC_PRIV 0x00000001 /* PIC access is privileged */ +#define PCR_STRACE 0x00000002 /* Trace supervisor events */ +#define PCR_UTRACE 0x00000004 /* Trace user events */ +#define PCR_N2_HTRACE 0x00000008 /* Trace hypervisor events */ +#define PCR_N2_TOE_OV0 0x00000010 /* Trap if PIC 0 overflows */ +#define PCR_N2_TOE_OV1 0x00000020 /* Trap if PIC 1 overflows */ +#define PCR_N2_MASK0 0x00003fc0 +#define PCR_N2_MASK0_SHIFT 6 +#define PCR_N2_SL0 0x0003c000 +#define PCR_N2_SL0_SHIFT 14 +#define PCR_N2_OV0 0x00040000 +#define PCR_N2_MASK1 0x07f80000 +#define PCR_N2_MASK1_SHIFT 19 +#define PCR_N2_SL1 0x78000000 +#define PCR_N2_SL1_SHIFT 27 +#define PCR_N2_OV1 0x80000000 + +#endif /* __PCR_H */ diff --git a/arch/sparc/include/asm/pil.h b/arch/sparc/include/asm/pil.h index d573820..32a7efe 100644 --- a/arch/sparc/include/asm/pil.h +++ b/arch/sparc/include/asm/pil.h @@ -23,6 +23,7 @@ #define PIL_SMP_CTX_NEW_VERSION 4 #define PIL_DEVICE_IRQ 5 #define PIL_SMP_CALL_FUNC_SNGL 6 +#define PIL_DEFERRED_PCR_WORK 7 #define PIL_NORMAL_MAX 14 #define PIL_NMI 15 diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 53adcaa..cb182d9 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -52,6 +52,7 @@ obj-$(CONFIG_SPARC64) += visemul.o obj-$(CONFIG_SPARC64) += hvapi.o obj-$(CONFIG_SPARC64) += sstate.o obj-$(CONFIG_SPARC64) += mdesc.o +obj-$(CONFIG_SPARC64) += pcr.o # sparc32 do not use GENERIC_HARDIRQS but uses the generic devres implementation obj-$(CONFIG_SPARC32) += devres.o diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c new file mode 100644 index 0000000..c4f2470 --- /dev/null +++ b/arch/sparc/kernel/pcr.c @@ -0,0 +1,140 @@ +/* pcr.c: Generic sparc64 performance counter infrastructure. + * + * Copyright (C) 2009 David S. Miller (davem@davemloft.net) + */ +#include +#include +#include +#include + +#include +#include + +/* This code is shared between various users of the performance + * counters. Users will be oprofile, pseudo-NMI watchdog, and the + * perf_counter support layer. + */ + +/* Performance counter interrupts run unmasked at PIL level 15. + * Therefore we can't do things like wakeups and other work + * that expects IRQ disabling to be adhered to in locking etc. + * + * Therefore in such situations we defer the work by signalling + * a lower level cpu IRQ. + */ +void deferred_pcr_work_irq(int irq, struct pt_regs *regs) +{ + clear_softint(1 << PIL_DEFERRED_PCR_WORK); +} + +void schedule_deferred_pcr_work(void) +{ + set_softint(1 << PIL_DEFERRED_PCR_WORK); +} + +const struct pcr_ops *pcr_ops; +EXPORT_SYMBOL_GPL(pcr_ops); + +static u64 direct_pcr_read(void) +{ + u64 val; + + read_pcr(val); + return val; +} + +static void direct_pcr_write(u64 val) +{ + write_pcr(val); +} + +static const struct pcr_ops direct_pcr_ops = { + .read = direct_pcr_read, + .write = direct_pcr_write, +}; + +static void n2_pcr_write(u64 val) +{ + unsigned long ret; + + ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); + if (val != HV_EOK) + write_pcr(val); +} + +static const struct pcr_ops n2_pcr_ops = { + .read = direct_pcr_read, + .write = n2_pcr_write, +}; + +static unsigned long perf_hsvc_group; +static unsigned long perf_hsvc_major; +static unsigned long perf_hsvc_minor; + +static int __init register_perf_hsvc(void) +{ + if (tlb_type == hypervisor) { + switch (sun4v_chip_type) { + case SUN4V_CHIP_NIAGARA1: + perf_hsvc_group = HV_GRP_NIAG_PERF; + break; + + case SUN4V_CHIP_NIAGARA2: + perf_hsvc_group = HV_GRP_N2_CPU; + break; + + default: + return -ENODEV; + } + + + perf_hsvc_major = 1; + perf_hsvc_minor = 0; + if (sun4v_hvapi_register(perf_hsvc_group, + perf_hsvc_major, + &perf_hsvc_minor)) { + printk("perfmon: Could not register hvapi.\n"); + return -ENODEV; + } + } + return 0; +} + +static void __init unregister_perf_hsvc(void) +{ + if (tlb_type != hypervisor) + return; + sun4v_hvapi_unregister(perf_hsvc_group); +} + +int __init pcr_arch_init(void) +{ + int err = register_perf_hsvc(); + + if (err) + return err; + + switch (tlb_type) { + case hypervisor: + pcr_ops = &n2_pcr_ops; + break; + + case spitfire: + case cheetah: + case cheetah_plus: + pcr_ops = &direct_pcr_ops; + break; + + default: + err = -ENODEV; + goto out_unregister; + } + + return 0; + +out_unregister: + unregister_perf_hsvc(); + return err; +} + +arch_initcall(pcr_arch_init); diff --git a/arch/sparc/kernel/ttable.S b/arch/sparc/kernel/ttable.S index ea92550..d9bdfb9 100644 --- a/arch/sparc/kernel/ttable.S +++ b/arch/sparc/kernel/ttable.S @@ -63,7 +63,8 @@ tl0_irq6: TRAP_IRQ(smp_call_function_single_client, 6) #else tl0_irq6: BTRAP(0x46) #endif -tl0_irq7: BTRAP(0x47) BTRAP(0x48) BTRAP(0x49) +tl0_irq7: TRAP_IRQ(deferred_pcr_work_irq, 7) +tl0_irq8: BTRAP(0x48) BTRAP(0x49) tl0_irq10: BTRAP(0x4a) BTRAP(0x4b) BTRAP(0x4c) BTRAP(0x4d) tl0_irq14: TRAP_IRQ(timer_interrupt, 14) tl0_irq15: TRAP_NMI_IRQ(perfctr_irq, 15) diff --git a/arch/sparc/oprofile/init.c b/arch/sparc/oprofile/init.c index d6e170c..c8877a5 100644 --- a/arch/sparc/oprofile/init.c +++ b/arch/sparc/oprofile/init.c @@ -17,47 +17,10 @@ #include #include #include +#include static int nmi_enabled; -struct pcr_ops { - u64 (*read)(void); - void (*write)(u64); -}; -static const struct pcr_ops *pcr_ops; - -static u64 direct_pcr_read(void) -{ - u64 val; - - read_pcr(val); - return val; -} - -static void direct_pcr_write(u64 val) -{ - write_pcr(val); -} - -static const struct pcr_ops direct_pcr_ops = { - .read = direct_pcr_read, - .write = direct_pcr_write, -}; - -static void n2_pcr_write(u64 val) -{ - unsigned long ret; - - ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); - if (val != HV_EOK) - write_pcr(val); -} - -static const struct pcr_ops n2_pcr_ops = { - .read = direct_pcr_read, - .write = n2_pcr_write, -}; - /* In order to commonize as much of the implementation as * possible, we use PICH as our counter. Mostly this is * to accomodate Niagara-1 which can only count insn cycles @@ -70,30 +33,13 @@ static u64 picl_value(void) return ((u64)((0 - delta) & 0xffffffff)) << 32; } -#define PCR_PIC_PRIV 0x00000001 /* PIC access is privileged */ -#define PCR_STRACE 0x00000002 /* Trace supervisor events */ -#define PCR_UTRACE 0x00000004 /* Trace user events */ -#define PCR_N2_HTRACE 0x00000008 /* Trace hypervisor events */ -#define PCR_N2_TOE_OV0 0x00000010 /* Trap if PIC 0 overflows */ -#define PCR_N2_TOE_OV1 0x00000020 /* Trap if PIC 1 overflows */ -#define PCR_N2_MASK0 0x00003fc0 -#define PCR_N2_MASK0_SHIFT 6 -#define PCR_N2_SL0 0x0003c000 -#define PCR_N2_SL0_SHIFT 14 -#define PCR_N2_OV0 0x00040000 -#define PCR_N2_MASK1 0x07f80000 -#define PCR_N2_MASK1_SHIFT 19 -#define PCR_N2_SL1 0x78000000 -#define PCR_N2_SL1_SHIFT 27 -#define PCR_N2_OV1 0x80000000 - #define PCR_SUN4U_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE) #define PCR_N2_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE | \ PCR_N2_TOE_OV1 | \ (2 << PCR_N2_SL1_SHIFT) | \ (0xff << PCR_N2_MASK1_SHIFT)) -static u64 pcr_enable = PCR_SUN4U_ENABLE; +static u64 pcr_enable; static void nmi_handler(struct pt_regs *regs) { @@ -153,62 +99,16 @@ static void nmi_stop(void) synchronize_sched(); } -static unsigned long perf_hsvc_group; -static unsigned long perf_hsvc_major; -static unsigned long perf_hsvc_minor; - -static int __init register_perf_hsvc(void) -{ - if (tlb_type == hypervisor) { - switch (sun4v_chip_type) { - case SUN4V_CHIP_NIAGARA1: - perf_hsvc_group = HV_GRP_NIAG_PERF; - break; - - case SUN4V_CHIP_NIAGARA2: - perf_hsvc_group = HV_GRP_N2_CPU; - break; - - default: - return -ENODEV; - } - - - perf_hsvc_major = 1; - perf_hsvc_minor = 0; - if (sun4v_hvapi_register(perf_hsvc_group, - perf_hsvc_major, - &perf_hsvc_minor)) { - printk("perfmon: Could not register N2 hvapi.\n"); - return -ENODEV; - } - } - return 0; -} - -static void unregister_perf_hsvc(void) -{ - if (tlb_type != hypervisor) - return; - sun4v_hvapi_unregister(perf_hsvc_group); -} - static int oprofile_nmi_init(struct oprofile_operations *ops) { - int err = register_perf_hsvc(); - - if (err) - return err; - switch (tlb_type) { case hypervisor: - pcr_ops = &n2_pcr_ops; pcr_enable = PCR_N2_ENABLE; break; case cheetah: case cheetah_plus: - pcr_ops = &direct_pcr_ops; + pcr_enable = PCR_SUN4U_ENABLE; break; default: @@ -241,10 +141,6 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) return ret; } - void oprofile_arch_exit(void) { -#ifdef CONFIG_SPARC64 - unregister_perf_hsvc(); -#endif } -- cgit v0.10.2 From c3cf5e8cc56d272f828a66610bb78bbb727b2ce1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 21 Jan 2009 23:16:40 -0800 Subject: sparc: Probe PMU type and record in sparc_pmu_type. Signed-off-by: David S. Miller diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index 32d32b4..d85c3dc 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -26,6 +26,7 @@ EXPORT_PER_CPU_SYMBOL(__cpu_data); struct cpu_info { int psr_vers; const char *name; + const char *pmu_name; }; struct fpu_info { @@ -45,6 +46,9 @@ struct manufacturer_info { #define CPU(ver, _name) \ { .psr_vers = ver, .name = _name } +#define CPU_PMU(ver, _name, _pmu_name) \ +{ .psr_vers = ver, .name = _name, .pmu_name = _pmu_name } + #define FPU(ver, _name) \ { .fp_vers = ver, .name = _name } @@ -183,10 +187,10 @@ static const struct manufacturer_info __initconst manufacturer_info[] = { },{ 0x17, .cpu_info = { - CPU(0x10, "TI UltraSparc I (SpitFire)"), - CPU(0x11, "TI UltraSparc II (BlackBird)"), - CPU(0x12, "TI UltraSparc IIi (Sabre)"), - CPU(0x13, "TI UltraSparc IIe (Hummingbird)"), + CPU_PMU(0x10, "TI UltraSparc I (SpitFire)", "ultra12"), + CPU_PMU(0x11, "TI UltraSparc II (BlackBird)", "ultra12"), + CPU_PMU(0x12, "TI UltraSparc IIi (Sabre)", "ultra12"), + CPU_PMU(0x13, "TI UltraSparc IIe (Hummingbird)", "ultra12"), CPU(-1, NULL) }, .fpu_info = { @@ -199,7 +203,7 @@ static const struct manufacturer_info __initconst manufacturer_info[] = { },{ 0x22, .cpu_info = { - CPU(0x10, "TI UltraSparc I (SpitFire)"), + CPU_PMU(0x10, "TI UltraSparc I (SpitFire)", "ultra12"), CPU(-1, NULL) }, .fpu_info = { @@ -209,12 +213,12 @@ static const struct manufacturer_info __initconst manufacturer_info[] = { },{ 0x3e, .cpu_info = { - CPU(0x14, "TI UltraSparc III (Cheetah)"), - CPU(0x15, "TI UltraSparc III+ (Cheetah+)"), - CPU(0x16, "TI UltraSparc IIIi (Jalapeno)"), - CPU(0x18, "TI UltraSparc IV (Jaguar)"), - CPU(0x19, "TI UltraSparc IV+ (Panther)"), - CPU(0x22, "TI UltraSparc IIIi+ (Serrano)"), + CPU_PMU(0x14, "TI UltraSparc III (Cheetah)", "ultra3"), + CPU_PMU(0x15, "TI UltraSparc III+ (Cheetah+)", "ultra3+"), + CPU_PMU(0x16, "TI UltraSparc IIIi (Jalapeno)", "ultra3i"), + CPU_PMU(0x18, "TI UltraSparc IV (Jaguar)", "ultra3+"), + CPU_PMU(0x19, "TI UltraSparc IV+ (Panther)", "ultra4+"), + CPU_PMU(0x22, "TI UltraSparc IIIi+ (Serrano)", "ultra3i"), CPU(-1, NULL) }, .fpu_info = { @@ -234,6 +238,7 @@ static const struct manufacturer_info __initconst manufacturer_info[] = { const char *sparc_cpu_type; const char *sparc_fpu_type; +const char *sparc_pmu_type; unsigned int fsr_storage; @@ -244,6 +249,7 @@ static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers) sparc_cpu_type = NULL; sparc_fpu_type = NULL; + sparc_pmu_type = NULL; manuf = NULL; for (i = 0; i < ARRAY_SIZE(manufacturer_info); i++) @@ -263,6 +269,7 @@ static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers) { if (cpu->psr_vers == psr_vers) { sparc_cpu_type = cpu->name; + sparc_pmu_type = cpu->pmu_name; sparc_fpu_type = "No FPU"; break; } @@ -290,6 +297,8 @@ static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers) psr_impl, fpu_vers); sparc_fpu_type = "Unknown FPU"; } + if (sparc_pmu_type == NULL) + sparc_pmu_type = "Unknown PMU"; } #ifdef CONFIG_SPARC32 @@ -315,11 +324,13 @@ static void __init sun4v_cpu_probe(void) case SUN4V_CHIP_NIAGARA1: sparc_cpu_type = "UltraSparc T1 (Niagara)"; sparc_fpu_type = "UltraSparc T1 integrated FPU"; + sparc_pmu_type = "niagara"; break; case SUN4V_CHIP_NIAGARA2: sparc_cpu_type = "UltraSparc T2 (Niagara2)"; sparc_fpu_type = "UltraSparc T2 integrated FPU"; + sparc_pmu_type = "niagara2"; break; default: diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index 81a972e..15d8a3f 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -5,6 +5,7 @@ /* cpu.c */ extern const char *sparc_cpu_type; +extern const char *sparc_pmu_type; extern const char *sparc_fpu_type; extern unsigned int fsr_storage; diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 49d061f..f2bcfd2 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -354,6 +354,7 @@ static int show_cpuinfo(struct seq_file *m, void *__unused) seq_printf(m, "cpu\t\t: %s\n" "fpu\t\t: %s\n" + "pmu\t\t: %s\n" "prom\t\t: %s\n" "type\t\t: %s\n" "ncpus probed\t: %d\n" @@ -366,6 +367,7 @@ static int show_cpuinfo(struct seq_file *m, void *__unused) , sparc_cpu_type, sparc_fpu_type, + sparc_pmu_type, prom_version, ((tlb_type == hypervisor) ? "sun4v" : -- cgit v0.10.2 From e5553a6d04421eec326a629571d696e8e745a0e4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 29 Jan 2009 21:22:47 -0800 Subject: sparc64: Implement NMI watchdog on capable cpus. Signed-off-by: David S. Miller diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index 7da7c13..a11b89e 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -17,7 +17,7 @@ typedef struct { /* Dcache line 1 */ unsigned int __softirq_pending; /* must be 1st, see rtrap.S */ - unsigned int __pad0; + unsigned int __nmi_count; unsigned long clock_tick; /* %tick's per second */ unsigned long __pad; unsigned int __pad1; diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h index d47d4a1..1934f2c 100644 --- a/arch/sparc/include/asm/irq_64.h +++ b/arch/sparc/include/asm/irq_64.h @@ -66,9 +66,6 @@ extern void virt_irq_free(unsigned int virt_irq); extern void __init init_IRQ(void); extern void fixup_irqs(void); -extern int register_perfctr_intr(void (*handler)(struct pt_regs *)); -extern void release_perfctr_intr(void (*handler)(struct pt_regs *)); - static inline void set_softint(unsigned long bits) { __asm__ __volatile__("wr %0, 0x0, %%set_softint" @@ -98,5 +95,6 @@ void __trigger_all_cpu_backtrace(void); extern void *hardirq_stack[NR_CPUS]; extern void *softirq_stack[NR_CPUS]; #define __ARCH_HAS_DO_SOFTIRQ +#define ARCH_HAS_NMI_WATCHDOG #endif diff --git a/arch/sparc/include/asm/kdebug_64.h b/arch/sparc/include/asm/kdebug_64.h index f905b77..feb3578 100644 --- a/arch/sparc/include/asm/kdebug_64.h +++ b/arch/sparc/include/asm/kdebug_64.h @@ -14,6 +14,8 @@ enum die_val { DIE_TRAP, DIE_TRAP_TL1, DIE_CALL, + DIE_NMI, + DIE_NMIWATCHDOG, }; #endif diff --git a/arch/sparc/include/asm/nmi.h b/arch/sparc/include/asm/nmi.h new file mode 100644 index 0000000..fbd546d --- /dev/null +++ b/arch/sparc/include/asm/nmi.h @@ -0,0 +1,10 @@ +#ifndef __NMI_H +#define __NMI_H + +extern int __init nmi_init(void); +extern void perfctr_irq(int irq, struct pt_regs *regs); +extern void nmi_adjust_hz(unsigned int new_hz); + +extern int nmi_usable; + +#endif /* __NMI_H */ diff --git a/arch/sparc/include/asm/pcr.h b/arch/sparc/include/asm/pcr.h index 4249bb5..a2f5c61 100644 --- a/arch/sparc/include/asm/pcr.h +++ b/arch/sparc/include/asm/pcr.h @@ -27,4 +27,20 @@ extern void schedule_deferred_pcr_work(void); #define PCR_N2_SL1_SHIFT 27 #define PCR_N2_OV1 0x80000000 +extern unsigned int picl_shift; + +/* In order to commonize as much of the implementation as + * possible, we use PICH as our counter. Mostly this is + * to accomodate Niagara-1 which can only count insn cycles + * in PICH. + */ +static inline u64 picl_value(unsigned int nmi_hz) +{ + u32 delta = local_cpu_data().clock_tick / (nmi_hz << picl_shift); + + return ((u64)((0 - delta) & 0xffffffff)) << 32; +} + +extern u64 pcr_enable; + #endif /* __PCR_H */ diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index cb182d9..54742e5 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -53,6 +53,7 @@ obj-$(CONFIG_SPARC64) += hvapi.o obj-$(CONFIG_SPARC64) += sstate.o obj-$(CONFIG_SPARC64) += mdesc.o obj-$(CONFIG_SPARC64) += pcr.o +obj-$(CONFIG_SPARC64) += nmi.o # sparc32 do not use GENERIC_HARDIRQS but uses the generic devres implementation obj-$(CONFIG_SPARC32) += devres.o diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index cab8e02..e289376 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -196,6 +196,11 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); + } else if (i == NR_IRQS) { + seq_printf(p, "NMI: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_data(j).__nmi_count); + seq_printf(p, " Non-maskable interrupts\n"); } return 0; } @@ -778,69 +783,6 @@ void do_softirq(void) local_irq_restore(flags); } -static void unhandled_perf_irq(struct pt_regs *regs) -{ - unsigned long pcr, pic; - - read_pcr(pcr); - read_pic(pic); - - write_pcr(0); - - printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n", - smp_processor_id()); - printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n", - smp_processor_id(), pcr, pic); -} - -/* Almost a direct copy of the powerpc PMC code. */ -static DEFINE_SPINLOCK(perf_irq_lock); -static void *perf_irq_owner_caller; /* mostly for debugging */ -static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq; - -/* Invoked from level 15 PIL handler in trap table. */ -void perfctr_irq(int irq, struct pt_regs *regs) -{ - clear_softint(1 << irq); - perf_irq(regs); -} - -int register_perfctr_intr(void (*handler)(struct pt_regs *)) -{ - int ret; - - if (!handler) - return -EINVAL; - - spin_lock(&perf_irq_lock); - if (perf_irq != unhandled_perf_irq) { - printk(KERN_WARNING "register_perfctr_intr: " - "perf IRQ busy (reserved by caller %p)\n", - perf_irq_owner_caller); - ret = -EBUSY; - goto out; - } - - perf_irq_owner_caller = __builtin_return_address(0); - perf_irq = handler; - - ret = 0; -out: - spin_unlock(&perf_irq_lock); - - return ret; -} -EXPORT_SYMBOL_GPL(register_perfctr_intr); - -void release_perfctr_intr(void (*handler)(struct pt_regs *)) -{ - spin_lock(&perf_irq_lock); - perf_irq_owner_caller = NULL; - perf_irq = unhandled_perf_irq; - spin_unlock(&perf_irq_lock); -} -EXPORT_SYMBOL_GPL(release_perfctr_intr); - #ifdef CONFIG_HOTPLUG_CPU void fixup_irqs(void) { diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c new file mode 100644 index 0000000..ffc6900 --- /dev/null +++ b/arch/sparc/kernel/nmi.c @@ -0,0 +1,224 @@ +/* Pseudo NMI support on sparc64 systems. + * + * Copyright (C) 2009 David S. Miller + * + * The NMI watchdog support and infrastructure is based almost + * entirely upon the x86 NMI support code. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* We don't have a real NMI on sparc64, but we can fake one + * up using profiling counter overflow interrupts and interrupt + * levels. + * + * The profile overflow interrupts at level 15, so we use + * level 14 as our IRQ off level. + */ + +static int nmi_watchdog_active; +static int panic_on_timeout; + +int nmi_usable; +EXPORT_SYMBOL_GPL(nmi_usable); + +static unsigned int nmi_hz = HZ; + +static DEFINE_PER_CPU(unsigned int, last_irq_sum); +static DEFINE_PER_CPU(local_t, alert_counter); +static DEFINE_PER_CPU(int, nmi_touch); + +void touch_nmi_watchdog(void) +{ + if (nmi_watchdog_active) { + int cpu; + + for_each_present_cpu(cpu) { + if (per_cpu(nmi_touch, cpu) != 1) + per_cpu(nmi_touch, cpu) = 1; + } + } + + touch_softlockup_watchdog(); +} +EXPORT_SYMBOL(touch_nmi_watchdog); + +static void die_nmi(const char *str, struct pt_regs *regs, int do_panic) +{ + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, + pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) + return; + + console_verbose(); + bust_spinlocks(1); + + printk(KERN_EMERG "%s", str); + printk(" on CPU%d, ip %08lx, registers:\n", + smp_processor_id(), regs->tpc); + show_regs(regs); + + bust_spinlocks(0); + + if (do_panic || panic_on_oops) + panic("Non maskable interrupt"); + + local_irq_enable(); + do_exit(SIGBUS); +} + +notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) +{ + unsigned int sum, touched = 0; + int cpu = smp_processor_id(); + + clear_softint(1 << irq); + pcr_ops->write(PCR_PIC_PRIV); + + local_cpu_data().__nmi_count++; + + if (notify_die(DIE_NMI, "nmi", regs, 0, + pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) + touched = 1; + + sum = kstat_cpu(cpu).irqs[0]; + if (__get_cpu_var(nmi_touch)) { + __get_cpu_var(nmi_touch) = 0; + touched = 1; + } + if (!touched && __get_cpu_var(last_irq_sum) == sum) { + local_inc(&__get_cpu_var(alert_counter)); + if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz) + die_nmi("BUG: NMI Watchdog detected LOCKUP", + regs, panic_on_timeout); + } else { + __get_cpu_var(last_irq_sum) = sum; + local_set(&__get_cpu_var(alert_counter), 0); + } + if (nmi_usable) { + write_pic(picl_value(nmi_hz)); + pcr_ops->write(pcr_enable); + } +} + +static inline unsigned int get_nmi_count(int cpu) +{ + return cpu_data(cpu).__nmi_count; +} + +static int endflag __initdata; + +static __init void nmi_cpu_busy(void *data) +{ + local_irq_enable_in_hardirq(); + while (endflag == 0) + mb(); +} + +static void report_broken_nmi(int cpu, int *prev_nmi_count) +{ + printk(KERN_CONT "\n"); + + printk(KERN_WARNING + "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", + cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); + + printk(KERN_WARNING + "Please report this to bugzilla.kernel.org,\n"); + printk(KERN_WARNING + "and attach the output of the 'dmesg' command.\n"); + + nmi_usable = 0; +} + +static void stop_watchdog(void *unused) +{ + pcr_ops->write(PCR_PIC_PRIV); +} + +static int __init check_nmi_watchdog(void) +{ + unsigned int *prev_nmi_count; + int cpu, err; + + prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(unsigned int), GFP_KERNEL); + if (!prev_nmi_count) { + err = -ENOMEM; + goto error; + } + + printk(KERN_INFO "Testing NMI watchdog ... "); + + smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); + + for_each_possible_cpu(cpu) + prev_nmi_count[cpu] = get_nmi_count(cpu); + local_irq_enable(); + mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ + + for_each_online_cpu(cpu) { + if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) + report_broken_nmi(cpu, prev_nmi_count); + } + endflag = 1; + if (!nmi_usable) { + kfree(prev_nmi_count); + err = -ENODEV; + goto error; + } + printk("OK.\n"); + + nmi_hz = 1; + + kfree(prev_nmi_count); + return 0; +error: + on_each_cpu(stop_watchdog, NULL, 1); + return err; +} + +static void start_watchdog(void *unused) +{ + pcr_ops->write(PCR_PIC_PRIV); + write_pic(picl_value(nmi_hz)); + + pcr_ops->write(pcr_enable); +} + +void nmi_adjust_hz(unsigned int new_hz) +{ + nmi_hz = new_hz; + on_each_cpu(start_watchdog, NULL, 1); +} +EXPORT_SYMBOL_GPL(nmi_adjust_hz); + +int __init nmi_init(void) +{ + nmi_usable = 1; + + on_each_cpu(start_watchdog, NULL, 1); + + return check_nmi_watchdog(); +} + +static int __init setup_nmi_watchdog(char *str) +{ + if (!strncmp(str, "panic", 5)) + panic_on_timeout = 1; + + return 0; +} +__setup("nmi_watchdog=", setup_nmi_watchdog); diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index c4f2470..92e0dda 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -9,12 +9,22 @@ #include #include +#include /* This code is shared between various users of the performance * counters. Users will be oprofile, pseudo-NMI watchdog, and the * perf_counter support layer. */ +#define PCR_SUN4U_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE) +#define PCR_N2_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE | \ + PCR_N2_TOE_OV1 | \ + (2 << PCR_N2_SL1_SHIFT) | \ + (0xff << PCR_N2_MASK1_SHIFT)) + +u64 pcr_enable; +unsigned int picl_shift; + /* Performance counter interrupts run unmasked at PIL level 15. * Therefore we can't do things like wakeups and other work * that expects IRQ disabling to be adhered to in locking etc. @@ -117,12 +127,15 @@ int __init pcr_arch_init(void) switch (tlb_type) { case hypervisor: pcr_ops = &n2_pcr_ops; + pcr_enable = PCR_N2_ENABLE; + picl_shift = 2; break; - case spitfire: case cheetah: case cheetah_plus: + case spitfire: pcr_ops = &direct_pcr_ops; + pcr_enable = PCR_SUN4U_ENABLE; break; default: @@ -130,7 +143,7 @@ int __init pcr_arch_init(void) goto out_unregister; } - return 0; + return nmi_init(); out_unregister: unregister_perf_hsvc(); diff --git a/arch/sparc/oprofile/init.c b/arch/sparc/oprofile/init.c index c8877a5..d172f86 100644 --- a/arch/sparc/oprofile/init.c +++ b/arch/sparc/oprofile/init.c @@ -13,117 +13,57 @@ #include #ifdef CONFIG_SPARC64 -#include -#include -#include -#include -#include +#include +#include +#include +#include -static int nmi_enabled; - -/* In order to commonize as much of the implementation as - * possible, we use PICH as our counter. Mostly this is - * to accomodate Niagara-1 which can only count insn cycles - * in PICH. - */ -static u64 picl_value(void) -{ - u32 delta = local_cpu_data().clock_tick / HZ; - - return ((u64)((0 - delta) & 0xffffffff)) << 32; -} - -#define PCR_SUN4U_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE) -#define PCR_N2_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE | \ - PCR_N2_TOE_OV1 | \ - (2 << PCR_N2_SL1_SHIFT) | \ - (0xff << PCR_N2_MASK1_SHIFT)) - -static u64 pcr_enable; - -static void nmi_handler(struct pt_regs *regs) +static int profile_timer_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { - pcr_ops->write(PCR_PIC_PRIV); + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; - if (nmi_enabled) { - oprofile_add_sample(regs, 0); - - write_pic(picl_value()); - pcr_ops->write(pcr_enable); + switch (val) { + case DIE_NMI: + oprofile_add_sample(args->regs, 0); + ret = NOTIFY_STOP; + break; + default: + break; } + return ret; } -/* We count "clock cycle" events in the lower 32-bit PIC. - * Then configure it such that it overflows every HZ, and thus - * generates a level 15 interrupt at that frequency. - */ -static void cpu_nmi_start(void *_unused) -{ - pcr_ops->write(PCR_PIC_PRIV); - write_pic(picl_value()); - - pcr_ops->write(pcr_enable); -} +static struct notifier_block profile_timer_exceptions_nb = { + .notifier_call = profile_timer_exceptions_notify, +}; -static void cpu_nmi_stop(void *_unused) +static int timer_start(void) { - pcr_ops->write(PCR_PIC_PRIV); + if (register_die_notifier(&profile_timer_exceptions_nb)) + return 1; + nmi_adjust_hz(HZ); + return 0; } -static int nmi_start(void) -{ - int err = register_perfctr_intr(nmi_handler); - - if (!err) { - nmi_enabled = 1; - wmb(); - err = on_each_cpu(cpu_nmi_start, NULL, 1); - if (err) { - nmi_enabled = 0; - wmb(); - on_each_cpu(cpu_nmi_stop, NULL, 1); - release_perfctr_intr(nmi_handler); - } - } - - return err; -} -static void nmi_stop(void) +static void timer_stop(void) { - nmi_enabled = 0; - wmb(); - - on_each_cpu(cpu_nmi_stop, NULL, 1); - release_perfctr_intr(nmi_handler); - synchronize_sched(); + nmi_adjust_hz(1); + unregister_die_notifier(&profile_timer_exceptions_nb); + synchronize_sched(); /* Allow already-started NMIs to complete. */ } -static int oprofile_nmi_init(struct oprofile_operations *ops) +static int op_nmi_timer_init(struct oprofile_operations *ops) { - switch (tlb_type) { - case hypervisor: - pcr_enable = PCR_N2_ENABLE; - break; - - case cheetah: - case cheetah_plus: - pcr_enable = PCR_SUN4U_ENABLE; - break; - - default: + if (!nmi_usable) return -ENODEV; - } - ops->create_files = NULL; - ops->setup = NULL; - ops->shutdown = NULL; - ops->start = nmi_start; - ops->stop = nmi_stop; + ops->start = timer_start; + ops->stop = timer_stop; ops->cpu_type = "timer"; - - printk(KERN_INFO "oprofile: Using perfctr based NMI timer interrupt.\n"); - + printk(KERN_INFO "oprofile: Using perfctr NMI timer interrupt.\n"); return 0; } #endif @@ -133,7 +73,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) int ret = -ENODEV; #ifdef CONFIG_SPARC64 - ret = oprofile_nmi_init(ops); + ret = op_nmi_timer_init(ops); if (!ret) return ret; #endif -- cgit v0.10.2 From 802c64b310e5b9dfda6cb50b850b962ed96a9e81 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Feb 2009 21:57:48 -0800 Subject: sparc64: On non-Niagara, need to touch NMI watchdog in NOHZ mode. When we're idling in NOHZ mode, timer interrupts are not running. Evidence of processing timer interrupts is what the NMI watchdog uses to determine if the CPU is stuck. On Niagara, we'll yield the cpu. This will make the cpu, at worst, hang out in the hypervisor until an interrupt arrives. This will prevent the NMI watchdog timer from firing. However on non-Niagara we just loop executing instructions which will cause the NMI watchdog to keep firing. It won't see timer interrupts happening so it will think the cpu is stuck. Fix this by touching the NMI watchdog in the cpu idle loop on non-Niagara machines. Signed-off-by: David S. Miller diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index cc8b560..a73954b 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -52,8 +53,10 @@ static void sparc64_yield(int cpu) { - if (tlb_type != hypervisor) + if (tlb_type != hypervisor) { + touch_nmi_watchdog(); return; + } clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb__after_clear_bit(); -- cgit v0.10.2 From eeabac7386ca13bfe1a58afeb04326a9e1a3a20e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Feb 2009 22:08:15 -0800 Subject: sparc64: Validate kernel generated fault addresses on sparc64. In order to handle all of the cases of address calculation overflow properly, we run sparc 32-bit processes in "address masking" mode when running on a 64-bit kernel. Address masking mode zeros out the top 32-bits of the address calculated for every load and store instruction. However, when we're in privileged mode we have to run with that address masking mode disabled even when accessing userspace from the kernel. To "simulate" the address masking mode we clear the top-bits by hand for 32-bit processes in the fault handler. It is the responsibility of code in the compat layer to properly zero extend addresses used to access userspace. If this isn't followed properly we can get into a fault loop. Say that the user address is 0xf0000000 but for whatever reason the kernel code sign extends this to 64-bit, and then the kernel tries to access the result. In such a case we'll fault on address 0xfffffffff0000000 but the fault handler will process that fault as if it were to address 0xf0000000. We'll loop faulting forever because the fault never gets satisfied. So add a check specifically for this case, when the kernel is faulting on a user address access and the addresses don't match up. This code path is sufficiently slow path, and this bug is sufficiently painful to diagnose, that this kind of bug check is warranted. Signed-off-by: David S. Miller diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index a9e474b..1a786ab 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -244,8 +245,14 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) (fault_code & FAULT_CODE_DTLB)) BUG(); + if (test_thread_flag(TIF_32BIT)) { + if (!(regs->tstate & TSTATE_PRIV)) + regs->tpc &= 0xffffffff; + address &= 0xffffffff; + } + if (regs->tstate & TSTATE_PRIV) { - unsigned long tpc = regs->tpc; + unsigned long eaddr, tpc = regs->tpc; /* Sanity check the PC. */ if ((tpc >= KERNBASE && tpc < (unsigned long) __init_end) || @@ -255,6 +262,16 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) bad_kernel_pc(regs, address); return; } + + insn = get_fault_insn(regs, insn); + eaddr = compute_effective_address(regs, insn, 0); + if (WARN_ON_ONCE((eaddr & PAGE_MASK) != (address & PAGE_MASK))){ + printk(KERN_ERR "FAULT: Mismatch kernel fault " + "address: addr[%lx] eaddr[%lx] TPC[%lx]\n", + address, eaddr, tpc); + show_regs(regs); + goto handle_kernel_fault; + } } /* @@ -264,12 +281,6 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) if (in_atomic() || !mm) goto intr_or_no_mm; - if (test_thread_flag(TIF_32BIT)) { - if (!(regs->tstate & TSTATE_PRIV)) - regs->tpc &= 0xffffffff; - address &= 0xffffffff; - } - if (!down_read_trylock(&mm->mmap_sem)) { if ((regs->tstate & TSTATE_PRIV) && !search_exception_tables(regs->tpc)) { -- cgit v0.10.2 From 47a4a0e766e3152dee667ea8fcca8465c8a0759f Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 2 Feb 2009 22:14:28 -0800 Subject: sparc: fixup for sparseirq changes Signed-off-by: Stephen Rothwell Signed-off-by: David S. Miller diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index ffc6900..09f088e 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -94,7 +94,7 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) touched = 1; - sum = kstat_cpu(cpu).irqs[0]; + sum = kstat_irqs_cpu(0, cpu); if (__get_cpu_var(nmi_touch)) { __get_cpu_var(nmi_touch) = 0; touched = 1; -- cgit v0.10.2 From 9b02605826903203d5301049c1f52e722584785c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 3 Feb 2009 16:28:23 -0800 Subject: sparc64: Kill bogus TPC/address truncation during 32-bit faults. This builds upon eeabac7386ca13bfe1a58afeb04326a9e1a3a20e ("sparc64: Validate kernel generated fault addresses on sparc64.") Upon further consideration, we actually should never see any fault addresses for 32-bit tasks with the upper 32-bits set. If it does every happen, by definition it's a bug. Whatever context created that fault would only have that fault satisfied if we used the full 64-bit address. If we truncate it, we'll always fault the wrong address and we'll always loop faulting forever. So catch such conditions and mark them as errors always. Log the error and fail the fault. Signed-off-by: David S. Miller diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 1a786ab..4ab8993 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -225,6 +225,30 @@ cannot_handle: unhandled_fault (address, current, regs); } +static void noinline bogus_32bit_fault_tpc(struct pt_regs *regs) +{ + static int times; + + if (times++ < 10) + printk(KERN_ERR "FAULT[%s:%d]: 32-bit process reports " + "64-bit TPC [%lx]\n", + current->comm, current->pid, + regs->tpc); + show_regs(regs); +} + +static void noinline bogus_32bit_fault_address(struct pt_regs *regs, + unsigned long addr) +{ + static int times; + + if (times++ < 10) + printk(KERN_ERR "FAULT[%s:%d]: 32-bit process " + "reports 64-bit fault address [%lx]\n", + current->comm, current->pid, addr); + show_regs(regs); +} + asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) { struct mm_struct *mm = current->mm; @@ -246,13 +270,20 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) BUG(); if (test_thread_flag(TIF_32BIT)) { - if (!(regs->tstate & TSTATE_PRIV)) - regs->tpc &= 0xffffffff; - address &= 0xffffffff; + if (!(regs->tstate & TSTATE_PRIV)) { + if (unlikely((regs->tpc >> 32) != 0)) { + bogus_32bit_fault_tpc(regs); + goto intr_or_no_mm; + } + } + if (unlikely((address >> 32) != 0)) { + bogus_32bit_fault_address(regs, address); + goto intr_or_no_mm; + } } if (regs->tstate & TSTATE_PRIV) { - unsigned long eaddr, tpc = regs->tpc; + unsigned long tpc = regs->tpc; /* Sanity check the PC. */ if ((tpc >= KERNBASE && tpc < (unsigned long) __init_end) || @@ -262,16 +293,6 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) bad_kernel_pc(regs, address); return; } - - insn = get_fault_insn(regs, insn); - eaddr = compute_effective_address(regs, insn, 0); - if (WARN_ON_ONCE((eaddr & PAGE_MASK) != (address & PAGE_MASK))){ - printk(KERN_ERR "FAULT: Mismatch kernel fault " - "address: addr[%lx] eaddr[%lx] TPC[%lx]\n", - address, eaddr, tpc); - show_regs(regs); - goto handle_kernel_fault; - } } /* -- cgit v0.10.2