From 2f993cf093643b98477c421fa2b9a98dcc940323 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 30 May 2015 22:04:25 +0200 Subject: perf: Fix ring_buffer_attach() RCU sync, again While looking for other users of get_state/cond_sync. I Found ring_buffer_attach() and it looks obviously buggy? Don't we need to ensure that we have "synchronize" _between_ list_del() and list_add() ? IOW. Suppose that ring_buffer_attach() preempts right_after get_state_synchronize_rcu() and gp completes before spin_lock(). In this case cond_synchronize_rcu() does nothing and we reuse ->rb_entry without waiting for gp in between? It also moves the ->rcu_pending check under "if (rb)", to make it more readable imo. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Cc: der.herr@hofr.at Cc: josh@joshtriplett.org Cc: tj@kernel.org Fixes: b69cf53640da ("perf: Fix a race between ring_buffer_detach() and ring_buffer_attach()") Link: http://lkml.kernel.org/r/20150530200425.GA15748@redhat.com Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index eddf1ed..0ceb386 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4331,20 +4331,20 @@ static void ring_buffer_attach(struct perf_event *event, WARN_ON_ONCE(event->rcu_pending); old_rb = event->rb; - event->rcu_batches = get_state_synchronize_rcu(); - event->rcu_pending = 1; - spin_lock_irqsave(&old_rb->event_lock, flags); list_del_rcu(&event->rb_entry); spin_unlock_irqrestore(&old_rb->event_lock, flags); - } - if (event->rcu_pending && rb) { - cond_synchronize_rcu(event->rcu_batches); - event->rcu_pending = 0; + event->rcu_batches = get_state_synchronize_rcu(); + event->rcu_pending = 1; } if (rb) { + if (event->rcu_pending) { + cond_synchronize_rcu(event->rcu_batches); + event->rcu_pending = 0; + } + spin_lock_irqsave(&rb->event_lock, flags); list_add_rcu(&event->rb_entry, &rb->event_list); spin_unlock_irqrestore(&rb->event_lock, flags); -- cgit v0.10.2 From 4b36f1a4139c9284df74c0f5d7655603d67807df Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 11 Jun 2015 13:52:22 -0700 Subject: perf/x86: Add more Broadwell model numbers This patch adds additional model numbers for Broadwell to perf. Support for Broadwell with Iris Pro (Intel Core i7-57xxC) and support for Broadwell Server Xeon. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1434055942-28253-1-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index a1e35c9..afdd7c0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -3253,6 +3253,8 @@ __init int intel_pmu_init(void) case 61: /* 14nm Broadwell Core-M */ case 86: /* 14nm Broadwell Xeon D */ + case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ + case 79: /* 14nm Broadwell Server */ x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); -- cgit v0.10.2 From 6b099d9b040b0f3d0aec05b560d7caf879af5077 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 11 Jun 2015 15:13:56 +0300 Subject: perf/x86/intel/bts: Fix DS area sharing with x86_pmu events Currently, the intel_bts driver relies on the DS area allocated by the x86_pmu code in its event_init() path, which is a bug: creating a BTS event while no x86_pmu events are present results in a NULL pointer dereference. The same DS area is also used by PEBS sampling, which makes it quite a bit trickier to have a separate one for intel_bts' purposes. This patch makes intel_bts driver use the same DS allocation and reference counting code as x86_pmu to make sure it is always present when either intel_bts or x86_pmu need it. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Link: http://lkml.kernel.org/r/1434024837-9916-2-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4f7001f..aa4e3a7 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -270,11 +270,7 @@ msr_fail: static void hw_perf_event_destroy(struct perf_event *event) { - if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { - release_pmc_hardware(); - release_ds_buffers(); - mutex_unlock(&pmc_reserve_mutex); - } + x86_release_hardware(); } void hw_perf_lbr_event_destroy(struct perf_event *event) @@ -324,6 +320,35 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) return x86_pmu_extra_regs(val, event); } +int x86_reserve_hardware(void) +{ + int err = 0; + + if (!atomic_inc_not_zero(&active_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&active_events) == 0) { + if (!reserve_pmc_hardware()) + err = -EBUSY; + else + reserve_ds_buffers(); + } + if (!err) + atomic_inc(&active_events); + mutex_unlock(&pmc_reserve_mutex); + } + + return err; +} + +void x86_release_hardware(void) +{ + if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { + release_pmc_hardware(); + release_ds_buffers(); + mutex_unlock(&pmc_reserve_mutex); + } +} + /* * Check if we can create event of a certain type (that no conflicting events * are present). @@ -336,9 +361,10 @@ int x86_add_exclusive(unsigned int what) return 0; mutex_lock(&pmc_reserve_mutex); - for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) + for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) { if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i])) goto out; + } atomic_inc(&x86_pmu.lbr_exclusive[what]); ret = 0; @@ -527,19 +553,7 @@ static int __x86_pmu_event_init(struct perf_event *event) if (!x86_pmu_initialized()) return -ENODEV; - err = 0; - if (!atomic_inc_not_zero(&active_events)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&active_events) == 0) { - if (!reserve_pmc_hardware()) - err = -EBUSY; - else - reserve_ds_buffers(); - } - if (!err) - atomic_inc(&active_events); - mutex_unlock(&pmc_reserve_mutex); - } + err = x86_reserve_hardware(); if (err) return err; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index ef78516..f068695 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -703,6 +703,10 @@ int x86_add_exclusive(unsigned int what); void x86_del_exclusive(unsigned int what); +int x86_reserve_hardware(void); + +void x86_release_hardware(void); + void hw_perf_lbr_event_destroy(struct perf_event *event); int x86_setup_perfctr(struct perf_event *event); diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c index ac1f0c5..7795f3f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_bts.c +++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c @@ -483,17 +483,26 @@ static int bts_event_add(struct perf_event *event, int mode) static void bts_event_destroy(struct perf_event *event) { + x86_release_hardware(); x86_del_exclusive(x86_lbr_exclusive_bts); } static int bts_event_init(struct perf_event *event) { + int ret; + if (event->attr.type != bts_pmu.type) return -ENOENT; if (x86_add_exclusive(x86_lbr_exclusive_bts)) return -EBUSY; + ret = x86_reserve_hardware(); + if (ret) { + x86_del_exclusive(x86_lbr_exclusive_bts); + return ret; + } + event->destroy = bts_event_destroy; return 0; -- cgit v0.10.2 From 1b7b938f181742f899a2f31c280f79dabef6ddb6 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 9 Jun 2015 13:03:26 +0300 Subject: perf/x86/intel: Fix PMI handling for Intel PT Intel PT is a separate PMU and it is not using any of the x86_pmu code paths, which means in particular that the active_events counter remains intact when new PT events are created. However, PT uses the generic x86_pmu PMI handler for its PMI handling needs. The problem here is that the latter checks active_events and in case of it being zero, exits without calling the actual x86_pmu.handle_nmi(), which results in unknown NMI errors and massive data loss for PT. The effect is not visible if there are other perf events in the system at the same time that keep active_events counter non-zero, for instance if the NMI watchdog is running, so one needs to disable it to reproduce the problem. At the same time, the active_events counter besides doing what the name suggests also implicitly serves as a PMC hardware and DS area reference counter. This patch adds a separate reference counter for the PMC hardware, leaving active_events for actually counting the events and makes sure it also counts PT and BTS events. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Link: http://lkml.kernel.org/r/87k2v92t0s.fsf@ashishki-desk.ger.corp.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index aa4e3a7..6be186c 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -135,6 +135,7 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event) } static atomic_t active_events; +static atomic_t pmc_refcount; static DEFINE_MUTEX(pmc_reserve_mutex); #ifdef CONFIG_X86_LOCAL_APIC @@ -271,6 +272,7 @@ msr_fail: static void hw_perf_event_destroy(struct perf_event *event) { x86_release_hardware(); + atomic_dec(&active_events); } void hw_perf_lbr_event_destroy(struct perf_event *event) @@ -324,16 +326,16 @@ int x86_reserve_hardware(void) { int err = 0; - if (!atomic_inc_not_zero(&active_events)) { + if (!atomic_inc_not_zero(&pmc_refcount)) { mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&active_events) == 0) { + if (atomic_read(&pmc_refcount) == 0) { if (!reserve_pmc_hardware()) err = -EBUSY; else reserve_ds_buffers(); } if (!err) - atomic_inc(&active_events); + atomic_inc(&pmc_refcount); mutex_unlock(&pmc_reserve_mutex); } @@ -342,7 +344,7 @@ int x86_reserve_hardware(void) void x86_release_hardware(void) { - if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { + if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) { release_pmc_hardware(); release_ds_buffers(); mutex_unlock(&pmc_reserve_mutex); @@ -371,12 +373,24 @@ int x86_add_exclusive(unsigned int what) out: mutex_unlock(&pmc_reserve_mutex); + + /* + * Assuming that all exclusive events will share the PMI handler + * (which checks active_events for whether there is work to do), + * we can bump active_events counter right here, except for + * x86_lbr_exclusive_lbr events that go through x86_pmu_event_init() + * path, which already bumps active_events for them. + */ + if (!ret && what != x86_lbr_exclusive_lbr) + atomic_inc(&active_events); + return ret; } void x86_del_exclusive(unsigned int what) { atomic_dec(&x86_pmu.lbr_exclusive[what]); + atomic_dec(&active_events); } int x86_setup_perfctr(struct perf_event *event) @@ -557,6 +571,7 @@ static int __x86_pmu_event_init(struct perf_event *event) if (err) return err; + atomic_inc(&active_events); event->destroy = hw_perf_event_destroy; event->hw.idx = -1; @@ -1429,6 +1444,10 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) u64 finish_clock; int ret; + /* + * All PMUs/events that share this PMI handler should make sure to + * increment active_events for their events. + */ if (!atomic_read(&active_events)) return NMI_DONE; -- cgit v0.10.2 From 2c33645d366d13b969d936b68b9f4875b1fdddea Mon Sep 17 00:00:00 2001 From: "Palik, Imre" Date: Mon, 8 Jun 2015 14:46:49 +0200 Subject: perf/x86: Honor the architectural performance monitoring version Architectural performance monitoring, version 1, doesn't support fixed counters. Currently, even if a hypervisor advertises support for architectural performance monitoring version 1, perf may still try to use the fixed counters, as the constraints are set up based on the CPU model. This patch ensures that perf honors the architectural performance monitoring version returned by CPUID, and it only uses the fixed counters for version 2 and above. (Some of the ideas in this patch came from Peter Zijlstra.) Signed-off-by: Imre Palik Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Andy Lutomirski Cc: Anthony Liguori Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1433767609-1039-1-git-send-email-imrep.amz@gmail.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index afdd7c0..2813ea0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -3324,13 +3324,13 @@ __init int intel_pmu_init(void) * counter, so do not extend mask to generic counters */ for_each_event_constraint(c, x86_pmu.event_constraints) { - if (c->cmask != FIXED_EVENT_FLAGS - || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { - continue; + if (c->cmask == FIXED_EVENT_FLAGS + && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) { + c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; } - - c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; - c->weight += x86_pmu.num_counters; + c->idxmsk64 &= + ~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed)); + c->weight = hweight64(c->idxmsk64); } } -- cgit v0.10.2 From 75e84ab906ef8935cff3df3d8929f1bafea81599 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 18 Jun 2015 13:00:32 +0200 Subject: perf tools: Fix build breakage if prefix= is specified Invoking Makefile.perf with prefix= breaks the build since Makefile.perf hands that variable down to Makefile.build where it overrides prefix := $(subst ./,,$(OUTPUT)$(dir)/) leading to errors like this: No rule to make target '/usrabspath.o', needed by '/usrlibperf-in.o' Signed-off-by: Lukas Wunner Acked-by: Jiri Olsa Cc: David Ahern Fixes: c819e2cf2eb6f65d3208d195d7a0edef6108d5 Link: http://lkml.kernel.org/r/5582c48a.84a22b0a.a918.5285SMTPIN_ADDED_MISSING@mx.google.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 10df572..98cfc38 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -94,12 +94,12 @@ obj-y := $(patsubst %/, %/$(obj)-in.o, $(obj-y)) subdir-obj-y := $(filter %/$(obj)-in.o, $(obj-y)) # '$(OUTPUT)/dir' prefix to all objects -prefix := $(subst ./,,$(OUTPUT)$(dir)/) -obj-y := $(addprefix $(prefix),$(obj-y)) -subdir-obj-y := $(addprefix $(prefix),$(subdir-obj-y)) +objprefix := $(subst ./,,$(OUTPUT)$(dir)/) +obj-y := $(addprefix $(objprefix),$(obj-y)) +subdir-obj-y := $(addprefix $(objprefix),$(subdir-obj-y)) # Final '$(obj)-in.o' object -in-target := $(prefix)$(obj)-in.o +in-target := $(objprefix)$(obj)-in.o PHONY += $(subdir-y) -- cgit v0.10.2