From 8b10c5e2b59ef2a80a07ab594a3b4987a4676211 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 1 May 2015 16:08:46 +0200 Subject: perf: Annotate inherited event ctx->mutex recursion While fuzzing Sasha tripped over another ctx->mutex recursion lockdep splat. Annotate this. Reported-by: Sasha Levin Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index 81aa3a4..1a3bf48 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -913,10 +913,30 @@ static void put_ctx(struct perf_event_context *ctx) * Those places that change perf_event::ctx will hold both * perf_event_ctx::mutex of the 'old' and 'new' ctx value. * - * Lock ordering is by mutex address. There is one other site where - * perf_event_context::mutex nests and that is put_event(). But remember that - * that is a parent<->child context relation, and migration does not affect - * children, therefore these two orderings should not interact. + * Lock ordering is by mutex address. There are two other sites where + * perf_event_context::mutex nests and those are: + * + * - perf_event_exit_task_context() [ child , 0 ] + * __perf_event_exit_task() + * sync_child_event() + * put_event() [ parent, 1 ] + * + * - perf_event_init_context() [ parent, 0 ] + * inherit_task_group() + * inherit_group() + * inherit_event() + * perf_event_alloc() + * perf_init_event() + * perf_try_init_event() [ child , 1 ] + * + * While it appears there is an obvious deadlock here -- the parent and child + * nesting levels are inverted between the two. This is in fact safe because + * life-time rules separate them. That is an exiting task cannot fork, and a + * spawning task cannot (yet) exit. + * + * But remember that that these are parent<->child context relations, and + * migration does not affect children, therefore these two orderings should not + * interact. * * The change in perf_event::ctx does not affect children (as claimed above) * because the sys_perf_event_open() case will install a new event and break @@ -3657,9 +3677,6 @@ static void perf_remove_from_owner(struct perf_event *event) } } -/* - * Called when the last reference to the file is gone. - */ static void put_event(struct perf_event *event) { struct perf_event_context *ctx; @@ -3697,6 +3714,9 @@ int perf_event_release_kernel(struct perf_event *event) } EXPORT_SYMBOL_GPL(perf_event_release_kernel); +/* + * Called when the last reference to the file is gone. + */ static int perf_release(struct inode *inode, struct file *file) { put_event(file->private_data); @@ -7364,7 +7384,12 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) return -ENODEV; if (event->group_leader != event) { - ctx = perf_event_ctx_lock(event->group_leader); + /* + * This ctx->mutex can nest when we're called through + * inheritance. See the perf_event_ctx_lock_nested() comment. + */ + ctx = perf_event_ctx_lock_nested(event->group_leader, + SINGLE_DEPTH_NESTING); BUG_ON(!ctx); } -- cgit v0.10.2 From 6d374056354a742eed4d0050498101e56e794c4b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 21 Apr 2015 05:34:41 -0400 Subject: perf/x86/intel: Fix SLM cache event list iTLB-load-misses and LLC-load-misses count incorrectly on SLM. There is no ITLB.MISSES support on SLM. Event PAGE_WALKS.I_SIDE_WALK should be used to count iTLB-load-misses. This event counts when an instruction (I) page walk is completed or started. Since a page walk implies a TLB miss, the number of TLB misses can be counted by counting the number of pagewalks. DMND_DATA_RD counts both demand and DCU prefetch data reads. However, LLC-load-misses should only count demand reads. There is no way to not include prefetches with a single counter on SLM. So the LLC-load-misses support should be removed on SLM. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1429608881-5055-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 960e85d..3998131 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1134,7 +1134,7 @@ static __initconst const u64 slm_hw_cache_extra_regs [ C(LL ) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS, - [ C(RESULT_MISS) ] = SLM_DMND_READ|SLM_LLC_MISS, + [ C(RESULT_MISS) ] = 0, }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS, @@ -1184,8 +1184,7 @@ static __initconst const u64 slm_hw_cache_event_ids [ C(OP_READ) ] = { /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0, }, [ C(OP_WRITE) ] = { /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ @@ -1217,7 +1216,7 @@ static __initconst const u64 slm_hw_cache_event_ids [ C(ITLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ + [ C(RESULT_MISS) ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */ }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = -1, -- cgit v0.10.2 From 44b11fee51711ca85aa2b121a49bf029d18a3722 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 23 Apr 2015 09:07:09 +0200 Subject: perf/x86/rapl: Enable Broadwell-U RAPL support This patch enables RAPL counters (energy consumption counters) support for Intel Broadwell-U processors (Model 61): To use: $ perf stat -a -I 1000 -e power/energy-cores/,power/energy-pkg/,power/energy-ram/ sleep 10 Signed-off-by: Stephane Eranian Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Cc: jacob.jun.pan@linux.intel.com Cc: kan.liang@intel.com Cc: peterz@infradead.org Cc: sonnyrao@chromium.org Link: http://lkml.kernel.org/r/20150423070709.GA4970@thinkpad Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index 999289b9..358c54a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -722,6 +722,7 @@ static int __init rapl_pmu_init(void) break; case 60: /* Haswell */ case 69: /* Haswell-Celeron */ + case 61: /* Broadwell */ rapl_cntr_mask = RAPL_IDX_HSW; rapl_pmu_events_group.attrs = rapl_events_hsw_attr; break; -- cgit v0.10.2 From 85a9fb47c56aa5e43c54034002232c0585a4b781 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 12 Mar 2015 15:39:13 -0700 Subject: tools: Fix tools/vm build libabikfs.a doesn't exist anymore, so we now need to link with libapi.a. Signed-off-by: Andi Kleen Cc: Andrew Morton Link: http://lkml.kernel.org/r/1426199953-15324-1-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/vm/Makefile b/tools/vm/Makefile index ac884b6..93aadaf 100644 --- a/tools/vm/Makefile +++ b/tools/vm/Makefile @@ -3,7 +3,7 @@ TARGETS=page-types slabinfo page_owner_sort LIB_DIR = ../lib/api -LIBS = $(LIB_DIR)/libapikfs.a +LIBS = $(LIB_DIR)/libapi.a CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -Wextra -I../lib/ -- cgit v0.10.2 From 466c1eb07f42bd27825af24d86b46d05e5e350b9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Apr 2015 15:00:16 +0100 Subject: perf tools: Use getconf to determine number of online CPUs Parsing /proc/cpuinfo is a fiddly, arch-dependent business and a recent change to get it working for Sparc broke arm and arm64 platforms. Use sysconf to determine the number of online CPUs only parsing /proc/cpuinfo when sysconf is not available. Signed-off-by: Will Deacon Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Mark Rutland Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20150423140454.GJ1652@arm.com [ Made it fall back to parsing /proc when getconf not found ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index c699dc3..d31a7bb 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -24,7 +24,7 @@ unexport MAKEFLAGS # (To override it, run 'make JOBS=1' and similar.) # ifeq ($(JOBS),) - JOBS := $(shell egrep -c '^processor|^CPU' /proc/cpuinfo 2>/dev/null) + JOBS := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null) ifeq ($(JOBS),0) JOBS := 1 endif -- cgit v0.10.2 From d1e40e592678b8955a8ed36bdeaf42929822302e Mon Sep 17 00:00:00 2001 From: Eunbong Song Date: Fri, 24 Apr 2015 04:36:27 +0000 Subject: tools/liblockdep: Fix linker error in case of cross compile If we try to cross compile liblockdep, even if we set the CROSS_COMPILE variable the linker error can occur because LD is not set with CROSS_COMPILE. This patch adds "LD" can be set automatically with CROSS_COMPILE variable so fixes linker error problem. Signed-off-by: Eunbong Song Signed-off-by: Sasha Levin diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile index 0c356fb..18ffccf 100644 --- a/tools/lib/lockdep/Makefile +++ b/tools/lib/lockdep/Makefile @@ -14,9 +14,10 @@ define allow-override $(eval $(1) = $(2))) endef -# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. +# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix. $(call allow-override,CC,$(CROSS_COMPILE)gcc) $(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,LD,$(CROSS_COMPILE)ld) INSTALL = install -- cgit v0.10.2 From 2480257fb17ba0fdaf62bc8c2406bd08d5e7f8f1 Mon Sep 17 00:00:00 2001 From: Eunbong Song Date: Wed, 13 May 2015 06:02:32 -0400 Subject: tools/liblockdep: Fix compilation error Recent changes to kernel/locking/lockdep.c broke the liblockdep build. Fix that. Signed-off-by: Eunbong Song Signed-off-by: Sasha Levin diff --git a/tools/lib/lockdep/uinclude/linux/kernel.h b/tools/lib/lockdep/uinclude/linux/kernel.h index a11e3c3..cd2cc59 100644 --- a/tools/lib/lockdep/uinclude/linux/kernel.h +++ b/tools/lib/lockdep/uinclude/linux/kernel.h @@ -28,6 +28,9 @@ #define __init #define noinline #define list_add_tail_rcu list_add_tail +#define list_for_each_entry_rcu list_for_each_entry +#define barrier() +#define synchronize_sched() #ifndef CALLER_ADDR0 #define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -- cgit v0.10.2