From 351af0725e5222e35741011d1ea62215c1ed06db Mon Sep 17 00:00:00 2001 From: "Zhang, Yanmin" Date: Fri, 6 Aug 2010 13:39:08 +0800 Subject: perf, x86: Fix Intel-nhm PMU programming errata workaround Fix the Errata AAK100/AAP53/BD53 workaround, the officialy documented workaround we implemented in: 11164cd: perf, x86: Add Nehelem PMU programming errata workaround doesn't actually work fully and causes a stuck PMU state under load and non-functioning perf profiling. A functional workaround was found by trial & error. Affects all Nehalem-class Intel PMUs. Signed-off-by: Zhang Yanmin Signed-off-by: Peter Zijlstra LKML-Reference: <1281073148.2125.63.camel@ymzhang.sh.intel.com> Cc: Arjan van de Ven Cc: "H. Peter Anvin" Cc: # .35.x Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 214ac86..d8d86d0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -491,33 +491,78 @@ static void intel_pmu_enable_all(int added) * Intel Errata AAP53 (model 30) * Intel Errata BD53 (model 44) * - * These chips need to be 'reset' when adding counters by programming - * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5 - * either in sequence on the same PMC or on different PMCs. + * The official story: + * These chips need to be 'reset' when adding counters by programming the + * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either + * in sequence on the same PMC or on different PMCs. + * + * In practise it appears some of these events do in fact count, and + * we need to programm all 4 events. */ -static void intel_pmu_nhm_enable_all(int added) +static void intel_pmu_nhm_workaround(void) { - if (added) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - int i; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + static const unsigned long nhm_magic[4] = { + 0x4300B5, + 0x4300D2, + 0x4300B1, + 0x4300B1 + }; + struct perf_event *event; + int i; + + /* + * The Errata requires below steps: + * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL; + * 2) Configure 4 PERFEVTSELx with the magic events and clear + * the corresponding PMCx; + * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL; + * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL; + * 5) Clear 4 pairs of ERFEVTSELx and PMCx; + */ + + /* + * The real steps we choose are a little different from above. + * A) To reduce MSR operations, we don't run step 1) as they + * are already cleared before this function is called; + * B) Call x86_perf_event_update to save PMCx before configuring + * PERFEVTSELx with magic number; + * C) With step 5), we do clear only when the PERFEVTSELx is + * not used currently. + * D) Call x86_perf_event_set_period to restore PMCx; + */ - wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2); - wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1); - wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5); + /* We always operate 4 pairs of PERF Counters */ + for (i = 0; i < 4; i++) { + event = cpuc->events[i]; + if (event) + x86_perf_event_update(event); + } - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); + for (i = 0; i < 4; i++) { + wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); + wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); + } - for (i = 0; i < 3; i++) { - struct perf_event *event = cpuc->events[i]; + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); - if (!event) - continue; + for (i = 0; i < 4; i++) { + event = cpuc->events[i]; + if (event) { + x86_perf_event_set_period(event); __x86_pmu_enable_event(&event->hw, - ARCH_PERFMON_EVENTSEL_ENABLE); - } + ARCH_PERFMON_EVENTSEL_ENABLE); + } else + wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); } +} + +static void intel_pmu_nhm_enable_all(int added) +{ + if (added) + intel_pmu_nhm_workaround(); intel_pmu_enable_all(added); } -- cgit v0.10.2 From ecafda60e88031bcc4271c446f984ee883d69ea8 Mon Sep 17 00:00:00 2001 From: Kusanagi Kouichi Date: Wed, 18 Aug 2010 13:32:37 -0300 Subject: perf tools: Fix build error on read only source. Parts of the build process were generating files outside the specified O= directory, causing the build to fail on systems where the sources are in a read only file system. Fix it by using $(OUTPUT) on these locations. Also check that $(OUTPUT) actually exists, just like the top level kernel Makefile does. Otherwise the failure message emitted is completely misleading. Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20100817140841.0859362C03A@msa106.auone-net.jp> Signed-off-by: Kusanagi Kouichi Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index dcb9700..4f1fa77 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -5,6 +5,12 @@ endif # The default target of this Makefile is... all:: +ifneq ($(OUTPUT),) +# check that the output directory actually exists +OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) +$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) +endif + # Define V=1 to have a more verbose compile. # Define V=2 to have an even more verbose compile. # @@ -931,15 +937,15 @@ $(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt) $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@ $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh - $(QUIET_GEN)$(RM) $@ $@+ && \ + $(QUIET_GEN)$(RM) $(OUTPUT)$@ $(OUTPUT)$@+ && \ sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \ -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \ -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \ -e 's/@@NO_CURL@@/$(NO_CURL)/g' \ - $@.sh >$@+ && \ - chmod +x $@+ && \ - mv $@+ $(OUTPUT)$@ + $@.sh > $(OUTPUT)$@+ && \ + chmod +x $(OUTPUT)$@+ && \ + mv $(OUTPUT)$@+ $(OUTPUT)$@ configure: configure.ac $(QUIET_GEN)$(RM) $@ $<+ && \ diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak index ddb68e6..7a7b608 100644 --- a/tools/perf/feature-tests.mak +++ b/tools/perf/feature-tests.mak @@ -113,7 +113,7 @@ endef # try-cc # Usage: option = $(call try-cc, source-to-build, cc-options) try-cc = $(shell sh -c \ - 'TMP="$(TMPOUT).$$$$"; \ + 'TMP="$(OUTPUT)$(TMPOUT).$$$$"; \ echo "$(1)" | \ $(CC) -x c - $(2) -o "$$TMP" > /dev/null 2>&1 && echo y; \ rm -f "$$TMP"') -- cgit v0.10.2 From 737480a0d525dae13306296da08029dff545bc72 Mon Sep 17 00:00:00 2001 From: KUMANO Syuhei Date: Sun, 15 Aug 2010 15:18:04 +0900 Subject: kprobes/x86: Fix the return address of multiple kretprobes Fix the return address of subsequent kretprobes when multiple kretprobes are set on the same function. For example: # cd /sys/kernel/debug/tracing # echo "r:event1 sys_symlink" > kprobe_events # echo "r:event2 sys_symlink" >> kprobe_events # echo 1 > events/kprobes/enable # ln -s /tmp/foo /tmp/bar (without this patch) # cat trace ln-897 [000] 20404.133727: event1: (kretprobe_trampoline+0x0/0x4c <- sys_symlink) ln-897 [000] 20404.133747: event2: (system_call_fastpath+0x16/0x1b <- sys_symlink) (with this patch) # cat trace ln-740 [000] 13799.491076: event1: (system_call_fastpath+0x16/0x1b <- sys_symlink) ln-740 [000] 13799.491096: event2: (system_call_fastpath+0x16/0x1b <- sys_symlink) Signed-off-by: KUMANO Syuhei Reviewed-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Ananth N Mavinakayanahalli Cc: Peter Zijlstra Cc: YOSHIFUJI Hideaki LKML-Reference: <1281853084.3254.11.camel@camp10-laptop> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 1bfb6cf..770ebfb 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -709,6 +709,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) struct hlist_node *node, *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; + kprobe_opcode_t *correct_ret_addr = NULL; INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); @@ -740,14 +741,34 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) /* another task is sharing our hash bucket */ continue; + orig_ret_address = (unsigned long)ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; if (ri->rp && ri->rp->handler) { __get_cpu_var(current_kprobe) = &ri->rp->kp; get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; + ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); __get_cpu_var(current_kprobe) = NULL; } - orig_ret_address = (unsigned long)ri->ret_addr; recycle_rp_inst(ri, &empty_rp); if (orig_ret_address != trampoline_address) @@ -759,8 +780,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) break; } - kretprobe_assert(ri, orig_ret_address, trampoline_address); - kretprobe_hash_unlock(current, &flags); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { -- cgit v0.10.2