From 2dbf0116aa8c7bfa900352d3f7b2609748fcc1c5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 5 Sep 2013 20:37:38 -0700 Subject: perf/x86/intel: Avoid checkpointed counters causing excessive TSX aborts With checkpointed counters there can be a situation where the counter is overflowing, aborts the transaction, is set back to a non overflowing checkpoint, causes interupt. The interrupt doesn't see the overflow because it has been checkpointed. This is then a spurious PMI, typically with a ugly NMI message. It can also lead to excessive aborts. Avoid this problem by: - Using the full counter width for counting counters (earlier patch) - Forbid sampling for checkpointed counters. It's not too useful anyways, checkpointing is mainly for counting. The check is approximate (to still handle KVM), but should catch the majority of cases. - On a PMI always set back checkpointed counters to zero. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1378438661-24765-2-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 9db76c3..57d64b7 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1282,6 +1282,11 @@ static void intel_pmu_enable_event(struct perf_event *event) __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); } +static inline bool event_is_checkpointed(struct perf_event *event) +{ + return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0; +} + /* * Save and restart an expired event. Called by NMI contexts, * so it has to be careful about preempting normal event ops: @@ -1289,6 +1294,17 @@ static void intel_pmu_enable_event(struct perf_event *event) int intel_pmu_save_and_restart(struct perf_event *event) { x86_perf_event_update(event); + /* + * For a checkpointed counter always reset back to 0. This + * avoids a situation where the counter overflows, aborts the + * transaction and is then set back to shortly before the + * overflow, and overflows and aborts again. + */ + if (unlikely(event_is_checkpointed(event))) { + /* No race with NMIs because the counter should not be armed */ + wrmsrl(event->hw.event_base, 0); + local64_set(&event->hw.prev_count, 0); + } return x86_perf_event_set_period(event); } @@ -1372,6 +1388,13 @@ again: x86_pmu.drain_pebs(regs); } + /* + * To avoid spurious interrupts with perf stat always reset checkpointed + * counters. + */ + if (cpuc->events[2] && event_is_checkpointed(cpuc->events[2])) + status |= (1ULL << 2); + for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[bit]; @@ -1837,6 +1860,20 @@ static int hsw_hw_config(struct perf_event *event) event->attr.precise_ip > 0)) return -EOPNOTSUPP; + if (event_is_checkpointed(event)) { + /* + * Sampling of checkpointed events can cause situations where + * the CPU constantly aborts because of a overflow, which is + * then checkpointed back and ignored. Forbid checkpointing + * for sampling. + * + * But still allow a long sampling period, so that perf stat + * from KVM works. + */ + if (event->attr.sample_period > 0 && + event->attr.sample_period < 0x7fffffff) + return -EOPNOTSUPP; + } return 0; } -- cgit v0.10.2 From 748e86aa90edfddfa6016f1cf383ff5bc6aada91 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 5 Sep 2013 20:37:39 -0700 Subject: perf/x86: Report TSX transaction abort cost as weight Use the existing weight reporting facility to report the transaction abort cost, that is the number of cycles wasted in aborts. Haswell reports this in the PEBS record. This was in fact the original user for weight. This is a very useful sort key to concentrate on the most costly aborts and a good metric for TSX tuning. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1378438661-24765-3-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 63438aa..104cbba 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -182,16 +182,29 @@ struct pebs_record_nhm { * Same as pebs_record_nhm, with two additional fields. */ struct pebs_record_hsw { - struct pebs_record_nhm nhm; - /* - * Real IP of the event. In the Intel documentation this - * is called eventingrip. - */ - u64 real_ip; - /* - * TSX tuning information field: abort cycles and abort flags. - */ - u64 tsx_tuning; + u64 flags, ip; + u64 ax, bx, cx, dx; + u64 si, di, bp, sp; + u64 r8, r9, r10, r11; + u64 r12, r13, r14, r15; + u64 status, dla, dse, lat; + u64 real_ip; /* the actual eventing ip */ + u64 tsx_tuning; /* TSX abort cycles and flags */ +}; + +union hsw_tsx_tuning { + struct { + u32 cycles_last_block : 32, + hle_abort : 1, + rtm_abort : 1, + instruction_abort : 1, + non_instruction_abort : 1, + retry : 1, + data_conflict : 1, + capacity_writes : 1, + capacity_reads : 1; + }; + u64 value; }; void init_debug_store_on_cpu(int cpu) @@ -785,16 +798,26 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) return 0; } +static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs) +{ + if (pebs->tsx_tuning) { + union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning }; + return tsx.cycles_last_block; + } + return 0; +} + static void __intel_pmu_pebs_event(struct perf_event *event, struct pt_regs *iregs, void *__pebs) { /* * We cast to pebs_record_nhm to get the load latency data * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used + * We cast to the biggest PEBS record are careful not + * to access out-of-bounds members. */ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct pebs_record_nhm *pebs = __pebs; - struct pebs_record_hsw *pebs_hsw = __pebs; + struct pebs_record_hsw *pebs = __pebs; struct perf_sample_data data; struct pt_regs regs; u64 sample_type; @@ -853,7 +876,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, regs.sp = pebs->sp; if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { - regs.ip = pebs_hsw->real_ip; + regs.ip = pebs->real_ip; regs.flags |= PERF_EFLAGS_EXACT; } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) regs.flags |= PERF_EFLAGS_EXACT; @@ -864,6 +887,12 @@ static void __intel_pmu_pebs_event(struct perf_event *event, x86_pmu.intel_cap.pebs_format >= 1) data.addr = pebs->dla; + /* Only set the TSX weight when no memory weight was requested. */ + if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && + !fll && + (x86_pmu.intel_cap.pebs_format >= 2)) + data.weight = intel_hsw_weight(pebs); + if (has_branch_stack(event)) data.br_stack = &cpuc->lbr_stack; -- cgit v0.10.2 From 4b2c4f1f1b71fe18381f089c501ac21cd2167dfa Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 5 Sep 2013 20:37:40 -0700 Subject: perf/x86/intel: Add Haswell TSX event aliases Add TSX event aliases, and export them from the kernel to perf. These are used by perf stat -T and to allow more user friendly access to events. The events are designed to be fairly generic and may also apply to other architectures implementing HTM. They all cover common situations that happens during tuning of transactional code. For Haswell we have to separate the HLE and RTM events, as they are separate in the PMU. This adds the following events: tx-start Count start transaction (used by perf stat -T) tx-commit Count commit of transaction tx-abort Count all aborts tx-conflict Count aborts due to conflict with another CPU. tx-capacity Count capacity aborts (transaction too large) Then matching el-* events for HLE cycles-t Transactional cycles (used by perf stat -T) * also exists on POWER8 cycles-ct Transactional cycles commited (used by perf stat -T) * according to Michael Ellerman POWER8 has a cycles-transactional-committed, * perf stat -T handles both cases Note for useful abort profiling often precise has to be set, as Haswell can only report the point inside the transaction with precise=2. For some classes of aborts, like conflicts, this is not needed, as it makes more sense to look at the complete critical section. This gives a clean set of generalized events to examine transaction success and aborts. Haswell has additional events for TSX, but those are more specialized for very specific situations. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1378438661-24765-4-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 57d64b7..dd1d4f3 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2222,7 +2222,34 @@ static __init void intel_nehalem_quirk(void) EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") +/* Haswell special events */ +EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1"); +EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2"); +EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4"); +EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2"); +EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1"); +EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1"); +EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2"); +EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4"); +EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2"); +EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1"); +EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1"); +EVENT_ATTR_STR(cycles-ct, cycles_ct, + "event=0x3c,in_tx=1,in_tx_cp=1"); + static struct attribute *hsw_events_attrs[] = { + EVENT_PTR(tx_start), + EVENT_PTR(tx_commit), + EVENT_PTR(tx_abort), + EVENT_PTR(tx_capacity), + EVENT_PTR(tx_conflict), + EVENT_PTR(el_start), + EVENT_PTR(el_commit), + EVENT_PTR(el_abort), + EVENT_PTR(el_capacity), + EVENT_PTR(el_conflict), + EVENT_PTR(cycles_t), + EVENT_PTR(cycles_ct), EVENT_PTR(mem_ld_hsw), EVENT_PTR(mem_st_hsw), NULL -- cgit v0.10.2 From 2b9e344df384e595db24ac61ae5f780e9b024878 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Sep 2013 12:53:44 +0200 Subject: perf/x86/intel: Clean up checkpoint-interrupt bits Clean up the weird CP interrupt exception code by keeping a CP mask. Andi suggested this implementation but weirdly didn't actually implement it himself, do so now because it removes the conditional in the interrupt handler and avoids the assumption its only on cnt2. Suggested-by: Andi Kleen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-dvb4q0rydkfp00kqat4p5bah@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index cc16faa..ce84ede 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -164,6 +164,11 @@ struct cpu_hw_events { struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX]; /* + * Intel checkpoint mask + */ + u64 intel_cp_status; + + /* * manage shared (per-core, per-cpu) registers * used on Intel NHM/WSM/SNB */ diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index dd1d4f3..ec70d0c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1184,6 +1184,11 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) wrmsrl(hwc->config_base, ctrl_val); } +static inline bool event_is_checkpointed(struct perf_event *event) +{ + return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0; +} + static void intel_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -1197,6 +1202,7 @@ static void intel_pmu_disable_event(struct perf_event *event) cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); + cpuc->intel_cp_status &= ~(1ull << hwc->idx); /* * must disable before any actual event @@ -1271,6 +1277,9 @@ static void intel_pmu_enable_event(struct perf_event *event) if (event->attr.exclude_guest) cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx); + if (unlikely(event_is_checkpointed(event))) + cpuc->intel_cp_status |= (1ull << hwc->idx); + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_enable_fixed(hwc); return; @@ -1282,11 +1291,6 @@ static void intel_pmu_enable_event(struct perf_event *event) __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); } -static inline bool event_is_checkpointed(struct perf_event *event) -{ - return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0; -} - /* * Save and restart an expired event. Called by NMI contexts, * so it has to be careful about preempting normal event ops: @@ -1389,11 +1393,11 @@ again: } /* - * To avoid spurious interrupts with perf stat always reset checkpointed - * counters. + * Checkpointed counters can lead to 'spurious' PMIs because the + * rollback caused by the PMI will have cleared the overflow status + * bit. Therefore always force probe these counters. */ - if (cpuc->events[2] && event_is_checkpointed(cpuc->events[2])) - status |= (1ULL << 2); + status |= cpuc->intel_cp_status; for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[bit]; -- cgit v0.10.2 From d2beea4a3419e63804094e9ac4b6d1518bc17a9b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Sep 2013 13:00:47 +0200 Subject: perf/x86/intel: Clean-up/reduce PEBS code Get rid of some pointless duplication introduced by the Haswell code. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-8q6y4davda9aawwv5yxe7klp@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 104cbba..f364c13 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -188,8 +188,7 @@ struct pebs_record_hsw { u64 r8, r9, r10, r11; u64 r12, r13, r14, r15; u64 status, dla, dse, lat; - u64 real_ip; /* the actual eventing ip */ - u64 tsx_tuning; /* TSX abort cycles and flags */ + u64 real_ip, tsx_tuning; }; union hsw_tsx_tuning { @@ -811,10 +810,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event, struct pt_regs *iregs, void *__pebs) { /* - * We cast to pebs_record_nhm to get the load latency data - * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used - * We cast to the biggest PEBS record are careful not - * to access out-of-bounds members. + * We cast to the biggest pebs_record but are careful not to + * unconditionally access the 'extra' entries. */ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct pebs_record_hsw *pebs = __pebs; @@ -884,12 +881,11 @@ static void __intel_pmu_pebs_event(struct perf_event *event, regs.flags &= ~PERF_EFLAGS_EXACT; if ((event->attr.sample_type & PERF_SAMPLE_ADDR) && - x86_pmu.intel_cap.pebs_format >= 1) + x86_pmu.intel_cap.pebs_format >= 1) data.addr = pebs->dla; /* Only set the TSX weight when no memory weight was requested. */ - if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && - !fll && + if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll && (x86_pmu.intel_cap.pebs_format >= 2)) data.weight = intel_hsw_weight(pebs); @@ -941,17 +937,34 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) __intel_pmu_pebs_event(event, iregs, at); } -static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at, - void *top) +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct debug_store *ds = cpuc->ds; struct perf_event *event = NULL; + void *at, *top; u64 status = 0; - int bit; + int bit, n; + + if (!x86_pmu.pebs_active) + return; + + at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; + top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; ds->pebs_index = ds->pebs_buffer_base; + n = (top - at) / x86_pmu.pebs_record_size; + if (n <= 0) + return; + + /* + * Should not happen, we program the threshold at 1 and do not + * set a reset value. + */ + WARN_ONCE(n > x86_pmu.max_pebs_events, + "Unexpected number of pebs records %d\n", n); + for (; at < top; at += x86_pmu.pebs_record_size) { struct pebs_record_nhm *p = at; @@ -979,61 +992,6 @@ static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at, } } -static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct debug_store *ds = cpuc->ds; - struct pebs_record_nhm *at, *top; - int n; - - if (!x86_pmu.pebs_active) - return; - - at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; - top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; - - ds->pebs_index = ds->pebs_buffer_base; - - n = top - at; - if (n <= 0) - return; - - /* - * Should not happen, we program the threshold at 1 and do not - * set a reset value. - */ - WARN_ONCE(n > x86_pmu.max_pebs_events, - "Unexpected number of pebs records %d\n", n); - - return __intel_pmu_drain_pebs_nhm(iregs, at, top); -} - -static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct debug_store *ds = cpuc->ds; - struct pebs_record_hsw *at, *top; - int n; - - if (!x86_pmu.pebs_active) - return; - - at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base; - top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index; - - n = top - at; - if (n <= 0) - return; - /* - * Should not happen, we program the threshold at 1 and do not - * set a reset value. - */ - WARN_ONCE(n > x86_pmu.max_pebs_events, - "Unexpected number of pebs records %d\n", n); - - return __intel_pmu_drain_pebs_nhm(iregs, at, top); -} - /* * BTS, PEBS probe and setup */ @@ -1068,7 +1026,7 @@ void intel_ds_init(void) case 2: pr_cont("PEBS fmt2%c, ", pebs_type); x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); - x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw; + x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; break; default: -- cgit v0.10.2 From 7f2ee91f54fb56071f97bde1ef7ba7ba0d58dfe5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 12 Sep 2013 19:17:00 +0200 Subject: perf/x86/intel: Clean up EVENT_ATTR_STR() muck Make the code a bit more readable by removing stray whitespaces et al. Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/n/tip-lzEnychz1ylqy8zjenxOmeht@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ec70d0c..353b7a3 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -190,9 +190,9 @@ static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; -EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); -EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); -EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); +EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); +EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); +EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); struct attribute *nhm_events_attrs[] = { EVENT_PTR(mem_ld_nhm), @@ -2223,23 +2223,22 @@ static __init void intel_nehalem_quirk(void) } } -EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); -EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") +EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); +EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") /* Haswell special events */ -EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1"); -EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2"); -EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4"); -EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2"); -EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1"); -EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1"); -EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2"); -EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4"); -EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2"); -EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1"); -EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1"); -EVENT_ATTR_STR(cycles-ct, cycles_ct, - "event=0x3c,in_tx=1,in_tx_cp=1"); +EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1"); +EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2"); +EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4"); +EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2"); +EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1"); +EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1"); +EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2"); +EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4"); +EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2"); +EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1"); +EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1"); +EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1"); static struct attribute *hsw_events_attrs[] = { EVENT_PTR(tx_start), -- cgit v0.10.2 From eb8417aa703eff5ff43d0275f19b0a8e591d818d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 16 Sep 2013 09:23:02 +0200 Subject: perf/x86/intel: Remove division from the intel_pmu_drain_pebs_nhm() hot path Only do the division in case we have to print the result out in a warning. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-43nl31erfbajwpfj254f6zji@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index f364c13..655d591 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -944,7 +944,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) struct perf_event *event = NULL; void *at, *top; u64 status = 0; - int bit, n; + int bit; if (!x86_pmu.pebs_active) return; @@ -954,16 +954,16 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) ds->pebs_index = ds->pebs_buffer_base; - n = (top - at) / x86_pmu.pebs_record_size; - if (n <= 0) + if (unlikely(at > top)) return; /* * Should not happen, we program the threshold at 1 and do not * set a reset value. */ - WARN_ONCE(n > x86_pmu.max_pebs_events, - "Unexpected number of pebs records %d\n", n); + WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size, + "Unexpected number of pebs records %ld\n", + (top - at) / x86_pmu.pebs_record_size); for (; at < top; at += x86_pmu.pebs_record_size) { struct pebs_record_nhm *p = at; -- cgit v0.10.2 From 92519bbc8af612975410def52bd462ca9af85cdb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Sep 2013 13:08:50 +0200 Subject: perf/x86/intel: Fix build warning in intel_pmu_drain_pebs_nhm() Fengguang Wu reported this build warning: arch/x86/kernel/cpu/perf_event_intel_ds.c: In function 'intel_pmu_drain_pebs_nhm': arch/x86/kernel/cpu/perf_event_intel_ds.c:964:2: warning: format '%ld' expects argument of type 'long int', but argument 4 has type 'int' Because pointer arithmetics result type is bitness dependent there's no natural type to use here, cast it to long. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-jbpauwxJqtf24luewcsdFith@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 655d591..54ff6ce 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -963,7 +963,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) */ WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size, "Unexpected number of pebs records %ld\n", - (top - at) / x86_pmu.pebs_record_size); + (long)(top - at) / x86_pmu.pebs_record_size); for (; at < top; at += x86_pmu.pebs_record_size) { struct pebs_record_nhm *p = at; -- cgit v0.10.2 From 354cc40e3b0981c38ba2ce2964954480e6c03c37 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 1 Oct 2013 07:22:15 -0700 Subject: tools/perf: Fix sorting for 64bit entries Some of the node comparisons in hist.c dropped the upper 32bit by using an int variable to store the compare result. This broke various 64bit fields, causing incorrect collapsing (found for the TSX transaction field) Just use int64_t always. Acked-by: Namhyung Kim Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1380637335-30110-1-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 9ff6cf3..97dc280 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -346,7 +346,7 @@ static struct hist_entry *add_hist_entry(struct hists *hists, struct rb_node **p; struct rb_node *parent = NULL; struct hist_entry *he; - int cmp; + int64_t cmp; p = &hists->entries_in->rb_node; @@ -884,7 +884,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists, struct rb_node **p; struct rb_node *parent = NULL; struct hist_entry *he; - int cmp; + int64_t cmp; if (sort__need_collapse) root = &hists->entries_collapsed; -- cgit v0.10.2 From 723478c8a471403c53cf144999701f6e0c4bbd11 Mon Sep 17 00:00:00 2001 From: Knut Petersen Date: Wed, 25 Sep 2013 14:29:37 +0200 Subject: perf: Enforce 1 as lower limit for perf_event_max_sample_rate /proc/sys/kernel/perf_event_max_sample_rate will accept negative values as well as 0. Negative values are unreasonable, and 0 causes a divide by zero exception in perf_proc_update_handler. This patch enforces a lower limit of 1. Signed-off-by: Knut Petersen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/5242DB0C.4070005@t-online.de Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index d49a9d2..b25d65c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -193,7 +193,7 @@ int perf_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec(table, write, buffer, lenp, ppos); + int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret || !write) return ret; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b2f06f3..2a9db91 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1049,6 +1049,7 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(sysctl_perf_event_sample_rate), .mode = 0644, .proc_handler = perf_proc_update_handler, + .extra1 = &one, }, { .procname = "perf_cpu_time_max_percent", -- cgit v0.10.2 From 4cabc3d1cb6a46f581a2628d1d11c483d5f300e5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 21 Aug 2013 16:47:26 -0700 Subject: tools/perf/stat: Add perf stat --transaction Add support to perf stat to print the basic transactional execution statistics: Total cycles, Cycles in Transaction, Cycles in aborted transsactions using the in_tx and in_tx_checkpoint qualifiers. Transaction Starts and Elision Starts, to compute the average transaction length. This is a reasonable overview over the success of the transactions. Also support architectures that have a transaction aborted cycles counter like POWER8. Since that is awkward to handle in the kernel abstract handle both cases here. Enable with a new --transaction / -T option. This requires measuring these events in a group, since they depend on each other. This is implemented by using TM sysfs events exported by the kernel Signed-off-by: Andi Kleen Acked-by: Arnaldo Carvalho de Melo Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1377128846-977-5-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 73c9759..80c7da6 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -137,6 +137,11 @@ core number and the number of online logical processors on that physical process After starting the program, wait msecs before measuring. This is useful to filter out the startup phase of the program, which is often very different. +-T:: +--transaction:: + +Print statistics of transactional execution if supported. + EXAMPLES -------- diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f686d5f..cc7efee 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -46,6 +46,7 @@ #include "util/util.h" #include "util/parse-options.h" #include "util/parse-events.h" +#include "util/pmu.h" #include "util/event.h" #include "util/evlist.h" #include "util/evsel.h" @@ -70,6 +71,41 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix); static void print_counter(struct perf_evsel *counter, char *prefix); static void print_aggr(char *prefix); +/* Default events used for perf stat -T */ +static const char * const transaction_attrs[] = { + "task-clock", + "{" + "instructions," + "cycles," + "cpu/cycles-t/," + "cpu/tx-start/," + "cpu/el-start/," + "cpu/cycles-ct/" + "}" +}; + +/* More limited version when the CPU does not have all events. */ +static const char * const transaction_limited_attrs[] = { + "task-clock", + "{" + "instructions," + "cycles," + "cpu/cycles-t/," + "cpu/tx-start/" + "}" +}; + +/* must match transaction_attrs and the beginning limited_attrs */ +enum { + T_TASK_CLOCK, + T_INSTRUCTIONS, + T_CYCLES, + T_CYCLES_IN_TX, + T_TRANSACTION_START, + T_ELISION_START, + T_CYCLES_IN_TX_CP, +}; + static struct perf_evlist *evsel_list; static struct perf_target target = { @@ -90,6 +126,7 @@ static enum aggr_mode aggr_mode = AGGR_GLOBAL; static volatile pid_t child_pid = -1; static bool null_run = false; static int detailed_run = 0; +static bool transaction_run; static bool big_num = true; static int big_num_opt = -1; static const char *csv_sep = NULL; @@ -214,7 +251,10 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS]; static struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; +static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS]; static struct stats walltime_nsecs_stats; +static struct stats runtime_transaction_stats[MAX_NR_CPUS]; +static struct stats runtime_elision_stats[MAX_NR_CPUS]; static void perf_stat__reset_stats(struct perf_evlist *evlist) { @@ -236,6 +276,11 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist) memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); + memset(runtime_cycles_in_tx_stats, 0, + sizeof(runtime_cycles_in_tx_stats)); + memset(runtime_transaction_stats, 0, + sizeof(runtime_transaction_stats)); + memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); } @@ -274,6 +319,29 @@ static inline int nsec_counter(struct perf_evsel *evsel) return 0; } +static struct perf_evsel *nth_evsel(int n) +{ + static struct perf_evsel **array; + static int array_len; + struct perf_evsel *ev; + int j; + + /* Assumes this only called when evsel_list does not change anymore. */ + if (!array) { + list_for_each_entry(ev, &evsel_list->entries, node) + array_len++; + array = malloc(array_len * sizeof(void *)); + if (!array) + exit(ENOMEM); + j = 0; + list_for_each_entry(ev, &evsel_list->entries, node) + array[j++] = ev; + } + if (n < array_len) + return array[n]; + return NULL; +} + /* * Update various tracking values we maintain to print * more semantic information such as miss/hit ratios, @@ -285,6 +353,15 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count) update_stats(&runtime_nsecs_stats[0], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) update_stats(&runtime_cycles_stats[0], count[0]); + else if (transaction_run && + perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX))) + update_stats(&runtime_cycles_in_tx_stats[0], count[0]); + else if (transaction_run && + perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START))) + update_stats(&runtime_transaction_stats[0], count[0]); + else if (transaction_run && + perf_evsel__cmp(counter, nth_evsel(T_ELISION_START))) + update_stats(&runtime_elision_stats[0], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_stats(&runtime_stalled_cycles_front_stats[0], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) @@ -827,7 +904,7 @@ static void print_ll_cache_misses(int cpu, static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) { - double total, ratio = 0.0; + double total, ratio = 0.0, total2; const char *fmt; if (csv_output) @@ -923,6 +1000,43 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) ratio = 1.0 * avg / total; fprintf(output, " # %8.3f GHz ", ratio); + } else if (transaction_run && + perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) { + total = avg_stats(&runtime_cycles_stats[cpu]); + if (total) + fprintf(output, + " # %5.2f%% transactional cycles ", + 100.0 * (avg / total)); + } else if (transaction_run && + perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) { + total = avg_stats(&runtime_cycles_stats[cpu]); + total2 = avg_stats(&runtime_cycles_in_tx_stats[cpu]); + if (total2 < avg) + total2 = avg; + if (total) + fprintf(output, + " # %5.2f%% aborted cycles ", + 100.0 * ((total2-avg) / total)); + } else if (transaction_run && + perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) && + avg > 0 && + runtime_cycles_in_tx_stats[cpu].n != 0) { + total = avg_stats(&runtime_cycles_in_tx_stats[cpu]); + + if (total) + ratio = total / avg; + + fprintf(output, " # %8.0f cycles / transaction ", ratio); + } else if (transaction_run && + perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) && + avg > 0 && + runtime_cycles_in_tx_stats[cpu].n != 0) { + total = avg_stats(&runtime_cycles_in_tx_stats[cpu]); + + if (total) + ratio = total / avg; + + fprintf(output, " # %8.0f cycles / elision ", ratio); } else if (runtime_nsecs_stats[cpu].n != 0) { char unit = 'M'; @@ -1236,6 +1350,16 @@ static int perf_stat_init_aggr_mode(void) return 0; } +static int setup_events(const char * const *attrs, unsigned len) +{ + unsigned i; + + for (i = 0; i < len; i++) { + if (parse_events(evsel_list, attrs[i])) + return -1; + } + return 0; +} /* * Add default attributes, if there were no attributes specified or @@ -1354,6 +1478,22 @@ static int add_default_attributes(void) if (null_run) return 0; + if (transaction_run) { + int err; + if (pmu_have_event("cpu", "cycles-ct") && + pmu_have_event("cpu", "el-start")) + err = setup_events(transaction_attrs, + ARRAY_SIZE(transaction_attrs)); + else + err = setup_events(transaction_limited_attrs, + ARRAY_SIZE(transaction_limited_attrs)); + if (err < 0) { + fprintf(stderr, "Cannot set up transaction events\n"); + return -1; + } + return 0; + } + if (!evsel_list->nr_entries) { if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) return -1; @@ -1388,6 +1528,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) int output_fd = 0; const char *output_name = NULL; const struct option options[] = { + OPT_BOOLEAN('T', "transaction", &transaction_run, + "hardware transaction statistics"), OPT_CALLBACK('e', "event", &evsel_list, "event", "event selector. use 'perf list' to list available events", parse_events_option), diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4a7bdc7..5aa68cd 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -197,6 +197,12 @@ static inline bool perf_evsel__match2(struct perf_evsel *e1, (e1->attr.config == e2->attr.config); } +#define perf_evsel__cmp(a, b) \ + ((a) && \ + (b) && \ + (a)->attr.type == (b)->attr.type && \ + (a)->attr.config == (b)->attr.config) + int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, int cpu, int thread, bool scale); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index bc9d806..64362fe 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -637,3 +637,19 @@ void print_pmu_events(const char *event_glob, bool name_only) printf("\n"); free(aliases); } + +bool pmu_have_event(const char *pname, const char *name) +{ + struct perf_pmu *pmu; + struct perf_pmu_alias *alias; + + pmu = NULL; + while ((pmu = perf_pmu__scan(pmu)) != NULL) { + if (strcmp(pname, pmu->name)) + continue; + list_for_each_entry(alias, &pmu->aliases, list) + if (!strcmp(alias->name, name)) + return true; + } + return false; +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 6b2cbe2..1179b26 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -42,6 +42,7 @@ int perf_pmu__format_parse(char *dir, struct list_head *head); struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); void print_pmu_events(const char *event_glob, bool name_only); +bool pmu_have_event(const char *pname, const char *name); int perf_pmu__test(void); #endif /* __PMU_H */ -- cgit v0.10.2 From fdfbbd07e91f8fe387140776f3fd94605f0c89e5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Sep 2013 07:40:39 -0700 Subject: perf: Add generic transaction flags Add a generic qualifier for transaction events, as a new sample type that returns a flag word. This is particularly useful for qualifying aborts: to distinguish aborts which happen due to asynchronous events (like conflicts caused by another CPU) versus instructions that lead to an abort. The tuning strategies are very different for those cases, so it's important to distinguish them easily and early. Since it's inconvenient and inflexible to filter for this in the kernel we report all the events out and allow some post processing in user space. The flags are based on the Intel TSX events, but should be fairly generic and mostly applicable to other HTM architectures too. In addition to various flag words there's also reserved space to report an program supplied abort code. For TSX this is used to distinguish specific classes of aborts, like a lock busy abort when doing lock elision. Flags: Elision and generic transactions (ELISION vs TRANSACTION) (HLE vs RTM on TSX; IBM etc. would likely only use TRANSACTION) Aborts caused by current thread vs aborts caused by others (SYNC vs ASYNC) Retryable transaction (RETRY) Conflicts with other threads (CONFLICT) Transaction write capacity overflow (CAPACITY WRITE) Transaction read capacity overflow (CAPACITY READ) Transactions implicitely aborted can also return an abort code. This can be used to signal specific events to the profiler. A common case is abort on lock busy in a RTM eliding library (code 0xff) To handle this case we include the TSX abort code Common example aborts in TSX would be: - Data conflict with another thread on memory read. Flags: TRANSACTION|ASYNC|CONFLICT - executing a WRMSR in a transaction. Flags: TRANSACTION|SYNC - HLE transaction in user space is too large Flags: ELISION|SYNC|CAPACITY-WRITE The only flag that is somewhat TSX specific is ELISION. This adds the perf core glue needed for reporting the new flag word out. v2: Add MEM/MISC v3: Move transaction to the end v4: Separate capacity-read/write and remove misc v5: Remove _SAMPLE. Move abort flags to 32bit. Rename transaction to txn Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1379688044-14173-2-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c8ba627..2e069d1 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -584,6 +584,10 @@ struct perf_sample_data { struct perf_regs_user regs_user; u64 stack_user_size; u64 weight; + /* + * Transaction flags for abort events: + */ + u64 txn; }; static inline void perf_sample_data_init(struct perf_sample_data *data, @@ -599,6 +603,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->stack_user_size = 0; data->weight = 0; data->data_src.val = 0; + data->txn = 0; } extern void perf_output_sample(struct perf_output_handle *handle, diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 009a655..da48837 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -136,8 +136,9 @@ enum perf_event_sample_format { PERF_SAMPLE_WEIGHT = 1U << 14, PERF_SAMPLE_DATA_SRC = 1U << 15, PERF_SAMPLE_IDENTIFIER = 1U << 16, + PERF_SAMPLE_TRANSACTION = 1U << 17, - PERF_SAMPLE_MAX = 1U << 17, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 18, /* non-ABI */ }; /* @@ -181,6 +182,28 @@ enum perf_sample_regs_abi { }; /* + * Values for the memory transaction event qualifier, mostly for + * abort events. Multiple bits can be set. + */ +enum { + PERF_TXN_ELISION = (1 << 0), /* From elision */ + PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ + PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ + PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */ + PERF_TXN_RETRY = (1 << 4), /* Retry possible */ + PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ + PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ + PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ + + PERF_TXN_MAX = (1 << 8), /* non-ABI */ + + /* bits 32..63 are reserved for the abort code */ + + PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), + PERF_TXN_ABORT_SHIFT = 32, +}; + +/* * The format of the data returned by read() on a perf event fd, * as specified by attr.read_format: * diff --git a/kernel/events/core.c b/kernel/events/core.c index b25d65c..c716385 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1201,6 +1201,9 @@ static void perf_event__header_size(struct perf_event *event) if (sample_type & PERF_SAMPLE_DATA_SRC) size += sizeof(data->data_src.val); + if (sample_type & PERF_SAMPLE_TRANSACTION) + size += sizeof(data->txn); + event->header_size = size; } @@ -4572,6 +4575,9 @@ void perf_output_sample(struct perf_output_handle *handle, if (sample_type & PERF_SAMPLE_DATA_SRC) perf_output_put(handle, data->data_src.val); + if (sample_type & PERF_SAMPLE_TRANSACTION) + perf_output_put(handle, data->txn); + if (!event->attr.watermark) { int wakeup_events = event->attr.wakeup_events; -- cgit v0.10.2 From a405bad5ad2086766ce320b16a56952e013327f8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Sep 2013 07:40:40 -0700 Subject: perf/x86: Add Haswell specific transaction flag reporting In the PEBS handler report the transaction flags using the new generic transaction flags facility. Most of them come from the "tsx_tuning" field in PEBSv2, but the abort code is derived from the RAX register reported in the PEBS record. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1379688044-14173-3-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 07d9a05..32e9ed8 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -206,6 +206,8 @@ union hsw_tsx_tuning { u64 value; }; +#define PEBS_HSW_TSX_FLAGS 0xff00000000ULL + void init_debug_store_on_cpu(int cpu) { struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; @@ -807,6 +809,16 @@ static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs) return 0; } +static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs) +{ + u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32; + + /* For RTM XABORTs also log the abort code from AX */ + if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1)) + txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; + return txn; +} + static void __intel_pmu_pebs_event(struct perf_event *event, struct pt_regs *iregs, void *__pebs) { @@ -885,10 +897,14 @@ static void __intel_pmu_pebs_event(struct perf_event *event, x86_pmu.intel_cap.pebs_format >= 1) data.addr = pebs->dla; - /* Only set the TSX weight when no memory weight was requested. */ - if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll && - (x86_pmu.intel_cap.pebs_format >= 2)) - data.weight = intel_hsw_weight(pebs); + if (x86_pmu.intel_cap.pebs_format >= 2) { + /* Only set the TSX weight when no memory weight. */ + if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll) + data.weight = intel_hsw_weight(pebs); + + if (event->attr.sample_type & PERF_SAMPLE_TRANSACTION) + data.txn = intel_hsw_transaction(pebs); + } if (has_branch_stack(event)) data.br_stack = &cpuc->lbr_stack; -- cgit v0.10.2 From f5d05bcec409aec2c41727077ad818f7c4db005b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Sep 2013 07:40:41 -0700 Subject: tools/perf: Support sorting by in_tx or abort branch flags Extend the perf branch sorting code to support sorting by in_tx or abort_tx qualifiers. Also print out those qualifiers. This also fixes up some of the existing sort key documentation. We do not support no_tx here, because it's simply not showing the in_tx flag. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1379688044-14173-4-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 2b8097e..ae337e3 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -71,7 +71,7 @@ OPTIONS entries are displayed as "[other]". - cpu: cpu number the task ran at the time of sample - srcline: filename and line number executed at the time of sample. The - DWARF debuggin info must be provided. + DWARF debugging info must be provided. By default, comm, dso and symbol keys are used. (i.e. --sort comm,dso,symbol) @@ -85,6 +85,8 @@ OPTIONS - symbol_from: name of function branched from - symbol_to: name of function branched to - mispredict: "N" for predicted branch, "Y" for mispredicted branch + - in_tx: branch in TSX transaction + - abort: TSX transaction abort. And default sort keys are changed to comm, dso_from, symbol_from, dso_to and symbol_to, see '--branch-stack'. diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 58d6598..f852eb5 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -112,7 +112,8 @@ Default is to monitor all CPUS. -s:: --sort:: - Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight, local_weight. + Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight, + local_weight, abort, in_tx -n:: --show-nr-samples:: diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 72eae74..89b188d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -787,7 +787,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline," " dso_to, dso_from, symbol_to, symbol_from, mispredict," " weight, local_weight, mem, symbol_daddr, dso_daddr, tlb, " - "snoop, locked"), + "snoop, locked, abort, in_tx"), OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, "Show sample percentage for different cpu modes"), OPT_STRING('p', "parent", &parent_pattern, "regex", diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 2122141..6534a37 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1103,7 +1103,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", - "sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight"), + "sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight," + " abort, in_tx"), OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, "Show a column with the number of samples"), OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts, diff --git a/tools/perf/perf.h b/tools/perf/perf.h index cf20187..acf3d66 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -182,7 +182,9 @@ struct ip_callchain { struct branch_flags { u64 mispred:1; u64 predicted:1; - u64 reserved:62; + u64 in_tx:1; + u64 abort:1; + u64 reserved:60; }; struct branch_entry { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 1329b6b..f743e96 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -45,6 +45,8 @@ enum hist_column { HISTC_CPU, HISTC_SRCLINE, HISTC_MISPREDICT, + HISTC_IN_TX, + HISTC_ABORT, HISTC_SYMBOL_FROM, HISTC_SYMBOL_TO, HISTC_DSO_FROM, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 5f118a0..1771566 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -858,6 +858,55 @@ struct sort_entry sort_mem_snoop = { .se_width_idx = HISTC_MEM_SNOOP, }; +static int64_t +sort__abort_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return left->branch_info->flags.abort != + right->branch_info->flags.abort; +} + +static int hist_entry__abort_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width) +{ + static const char *out = "."; + + if (self->branch_info->flags.abort) + out = "A"; + return repsep_snprintf(bf, size, "%-*s", width, out); +} + +struct sort_entry sort_abort = { + .se_header = "Transaction abort", + .se_cmp = sort__abort_cmp, + .se_snprintf = hist_entry__abort_snprintf, + .se_width_idx = HISTC_ABORT, +}; + +static int64_t +sort__in_tx_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return left->branch_info->flags.in_tx != + right->branch_info->flags.in_tx; +} + +static int hist_entry__in_tx_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width) +{ + static const char *out = "."; + + if (self->branch_info->flags.in_tx) + out = "T"; + + return repsep_snprintf(bf, size, "%-*s", width, out); +} + +struct sort_entry sort_in_tx = { + .se_header = "Branch in transaction", + .se_cmp = sort__in_tx_cmp, + .se_snprintf = hist_entry__in_tx_snprintf, + .se_width_idx = HISTC_IN_TX, +}; + struct sort_dimension { const char *name; struct sort_entry *entry; @@ -888,6 +937,8 @@ static struct sort_dimension bstack_sort_dimensions[] = { DIM(SORT_SYM_FROM, "symbol_from", sort_sym_from), DIM(SORT_SYM_TO, "symbol_to", sort_sym_to), DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), + DIM(SORT_IN_TX, "in_tx", sort_in_tx), + DIM(SORT_ABORT, "abort", sort_abort), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 4e80dbd..9dad3a0 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -153,6 +153,8 @@ enum sort_type { SORT_SYM_FROM, SORT_SYM_TO, SORT_MISPREDICT, + SORT_ABORT, + SORT_IN_TX, /* memory mode specific sort keys */ __SORT_MEMORY_MODE, -- cgit v0.10.2 From 0126d493b62e1306db09e1019c05e0bfe84ae8e7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Sep 2013 07:40:42 -0700 Subject: tools/perf/record: Add abort_tx,no_tx,in_tx branch filter options to perf record -j Make perf record -j aware of the new in_tx,no_tx,abort_tx branch qualifiers. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1379688044-14173-5-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index e297b74..6bec1c9 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -166,6 +166,9 @@ following filters are defined: - u: only when the branch target is at the user level - k: only when the branch target is in the kernel - hv: only when the target is at the hypervisor level + - in_tx: only when the target is in a hardware transaction + - no_tx: only when the target is not in a hardware transaction + - abort_tx: only when the target is a hardware transaction abort + The option requires at least one branch type among any, any_call, any_ret, ind_call. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a41ac415..8384b54 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -618,6 +618,9 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL), BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN), BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL), + BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX), + BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX), + BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX), BRANCH_END }; -- cgit v0.10.2 From 475eeab9f3c1579c8da89667496084db4867bf7c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Sep 2013 07:40:43 -0700 Subject: tools/perf: Add support for record transaction flags Add support for recording and displaying the transaction flags. They are essentially a new sort key. Also display them in a nice way to the user. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1379688044-14173-6-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 6bec1c9..f732eaa 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -179,12 +179,14 @@ is enabled for all the sampling events. The sampled branch type is the same for The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k Note that this feature may not be available on all processors. --W:: --weight:: Enable weightened sampling. An additional weight is recorded per sample and can be displayed with the weight and local_weight sort keys. This currently works for TSX abort events and some memory events in precise mode on modern Intel CPUs. +--transaction:: +Record transaction flags for transaction related events. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index ae337e3..be5ad87 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -72,6 +72,10 @@ OPTIONS - cpu: cpu number the task ran at the time of sample - srcline: filename and line number executed at the time of sample. The DWARF debugging info must be provided. + - weight: Event specific weight, e.g. memory latency or transaction + abort cost. This is the global weight. + - local_weight: Local weight version of the weight above. + - transaction: Transaction abort flags. By default, comm, dso and symbol keys are used. (i.e. --sort comm,dso,symbol) diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index f852eb5..6d70fbf 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -113,7 +113,7 @@ Default is to monitor all CPUS. -s:: --sort:: Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight, - local_weight, abort, in_tx + local_weight, abort, in_tx, transaction -n:: --show-nr-samples:: diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 5ebd0c3..0393d98 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -63,7 +63,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, return 0; } - he = __hists__add_entry(&evsel->hists, al, NULL, 1, 1); + he = __hists__add_entry(&evsel->hists, al, NULL, 1, 1, 0); if (he == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index f28799e..2a78dc8 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -304,9 +304,10 @@ static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair, static int hists__add_entry(struct hists *self, struct addr_location *al, u64 period, - u64 weight) + u64 weight, u64 transaction) { - if (__hists__add_entry(self, al, NULL, period, weight) != NULL) + if (__hists__add_entry(self, al, NULL, period, weight, transaction) + != NULL) return 0; return -ENOMEM; } @@ -328,7 +329,8 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused, if (al.filtered) return 0; - if (hists__add_entry(&evsel->hists, &al, sample->period, sample->weight)) { + if (hists__add_entry(&evsel->hists, &al, sample->period, + sample->weight, sample->transaction)) { pr_warning("problem incrementing symbol period, skipping event\n"); return -1; } diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 8384b54..a78db3f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -894,6 +894,8 @@ const struct option record_options[] = { parse_branch_stack), OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, "sample by weight (on special events only)"), + OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, + "sample transaction flags (special events only)"), OPT_END() }; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 89b188d..06e1abe 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -259,7 +259,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, } he = __hists__add_entry(&evsel->hists, al, parent, sample->period, - sample->weight); + sample->weight, sample->transaction); if (he == NULL) return -ENOMEM; @@ -787,7 +787,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline," " dso_to, dso_from, symbol_to, symbol_from, mispredict," " weight, local_weight, mem, symbol_daddr, dso_daddr, tlb, " - "snoop, locked, abort, in_tx"), + "snoop, locked, abort, in_tx, transaction"), OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, "Show sample percentage for different cpu modes"), OPT_STRING('p', "parent", &parent_pattern, "regex", diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 6534a37..b3e0229 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -247,9 +247,8 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel, pthread_mutex_lock(&evsel->hists.lock); he = __hists__add_entry(&evsel->hists, al, NULL, sample->period, - sample->weight); + sample->weight, sample->transaction); pthread_mutex_unlock(&evsel->hists.lock); - if (he == NULL) return NULL; @@ -1104,7 +1103,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) "be more verbose (show counter open errors, etc)"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", "sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight," - " abort, in_tx"), + " abort, in_tx, transaction"), OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, "Show a column with the number of samples"), OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts, diff --git a/tools/perf/perf.h b/tools/perf/perf.h index acf3d66..84502e8 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -233,6 +233,7 @@ struct perf_record_opts { u64 default_interval; u64 user_interval; u16 stack_dump_size; + bool sample_transaction; }; #endif diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 4228ffc..025503a 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -222,7 +222,8 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) &sample) < 0) goto out; - he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1); + he = __hists__add_entry(&evsel->hists, &al, NULL, + 1, 1, 0); if (he == NULL) goto out; @@ -244,7 +245,8 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) &sample) < 0) goto out; - he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1); + he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1, + 0); if (he == NULL) goto out; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index c67ecc4..17d9e16 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -111,6 +111,7 @@ struct perf_sample { u64 stream_id; u64 period; u64 weight; + u64 transaction; u32 cpu; u32 raw_size; u64 data_src; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 0ce9feb..abe69af 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -681,6 +681,9 @@ void perf_evsel__config(struct perf_evsel *evsel, attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; + if (opts->sample_transaction) + attr->sample_type |= PERF_SAMPLE_TRANSACTION; + /* * XXX see the function comment above * @@ -1470,6 +1473,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array++; } + data->transaction = 0; + if (type & PERF_SAMPLE_TRANSACTION) { + data->transaction = *array; + array++; + } + return 0; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 97dc280..f3278a3 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -160,6 +160,10 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_MEM_LVL, 21 + 3); hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12); hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12); + + if (h->transaction) + hists__new_col_len(hists, HISTC_TRANSACTION, + hist_entry__transaction_len()); } void hists__output_recalc_col_len(struct hists *hists, int max_rows) @@ -466,7 +470,7 @@ struct hist_entry *__hists__add_branch_entry(struct hists *self, struct hist_entry *__hists__add_entry(struct hists *self, struct addr_location *al, struct symbol *sym_parent, u64 period, - u64 weight) + u64 weight, u64 transaction) { struct hist_entry entry = { .thread = al->thread, @@ -487,6 +491,7 @@ struct hist_entry *__hists__add_entry(struct hists *self, .hists = self, .branch_info = NULL, .mem_info = NULL, + .transaction = transaction, }; return add_hist_entry(self, &entry, al, period, weight); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index f743e96..6a048c0 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -59,6 +59,7 @@ enum hist_column { HISTC_MEM_TLB, HISTC_MEM_LVL, HISTC_MEM_SNOOP, + HISTC_TRANSACTION, HISTC_NR_COLS, /* Last entry */ }; @@ -84,9 +85,10 @@ struct hists { struct hist_entry *__hists__add_entry(struct hists *self, struct addr_location *al, struct symbol *parent, u64 period, - u64 weight); + u64 weight, u64 transaction); int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); +int hist_entry__transaction_len(void); int hist_entry__sort_snprintf(struct hist_entry *self, char *bf, size_t size, struct hists *hists); void hist_entry__free(struct hist_entry *); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 70ffa41..211b325 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -858,6 +858,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_DATA_SRC) printf(" . data_src: 0x%"PRIx64"\n", sample->data_src); + if (sample_type & PERF_SAMPLE_TRANSACTION) + printf("... transaction: %" PRIx64 "\n", sample->transaction); + if (sample_type & PERF_SAMPLE_READ) sample_read__printf(sample, evsel->attr.read_format); } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 1771566..b4ecc0e 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -907,6 +907,78 @@ struct sort_entry sort_in_tx = { .se_width_idx = HISTC_IN_TX, }; +static int64_t +sort__transaction_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return left->transaction - right->transaction; +} + +static inline char *add_str(char *p, const char *str) +{ + strcpy(p, str); + return p + strlen(str); +} + +static struct txbit { + unsigned flag; + const char *name; + int skip_for_len; +} txbits[] = { + { PERF_TXN_ELISION, "EL ", 0 }, + { PERF_TXN_TRANSACTION, "TX ", 1 }, + { PERF_TXN_SYNC, "SYNC ", 1 }, + { PERF_TXN_ASYNC, "ASYNC ", 0 }, + { PERF_TXN_RETRY, "RETRY ", 0 }, + { PERF_TXN_CONFLICT, "CON ", 0 }, + { PERF_TXN_CAPACITY_WRITE, "CAP-WRITE ", 1 }, + { PERF_TXN_CAPACITY_READ, "CAP-READ ", 0 }, + { 0, NULL, 0 } +}; + +int hist_entry__transaction_len(void) +{ + int i; + int len = 0; + + for (i = 0; txbits[i].name; i++) { + if (!txbits[i].skip_for_len) + len += strlen(txbits[i].name); + } + len += 4; /* :XX */ + return len; +} + +static int hist_entry__transaction_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width) +{ + u64 t = self->transaction; + char buf[128]; + char *p = buf; + int i; + + buf[0] = 0; + for (i = 0; txbits[i].name; i++) + if (txbits[i].flag & t) + p = add_str(p, txbits[i].name); + if (t && !(t & (PERF_TXN_SYNC|PERF_TXN_ASYNC))) + p = add_str(p, "NEITHER "); + if (t & PERF_TXN_ABORT_MASK) { + sprintf(p, ":%" PRIx64, + (t & PERF_TXN_ABORT_MASK) >> + PERF_TXN_ABORT_SHIFT); + p += strlen(p); + } + + return repsep_snprintf(bf, size, "%-*s", width, buf); +} + +struct sort_entry sort_transaction = { + .se_header = "Transaction ", + .se_cmp = sort__transaction_cmp, + .se_snprintf = hist_entry__transaction_snprintf, + .se_width_idx = HISTC_TRANSACTION, +}; + struct sort_dimension { const char *name; struct sort_entry *entry; @@ -925,6 +997,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_SRCLINE, "srcline", sort_srcline), DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight), DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight), + DIM(SORT_TRANSACTION, "transaction", sort_transaction), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 9dad3a0..bf43336 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -85,6 +85,7 @@ struct hist_entry { struct map_symbol ms; struct thread *thread; u64 ip; + u64 transaction; s32 cpu; struct hist_entry_diff diff; @@ -145,6 +146,7 @@ enum sort_type { SORT_SRCLINE, SORT_LOCAL_WEIGHT, SORT_GLOBAL_WEIGHT, + SORT_TRANSACTION, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, -- cgit v0.10.2 From b7af41a1bc255c0098c37a4bcf5c7e5e168ce875 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Sep 2013 07:40:44 -0700 Subject: perf/x86: Suppress duplicated abort LBR records Haswell always give an extra LBR record after every TSX abort. Suppress the extra record. This only works when the abort is visible in the LBR If the original abort has already left the 16 LBR entries the extra entry will will stay. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1379688044-14173-7-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index ce84ede..fd00bb2 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -445,6 +445,7 @@ struct x86_pmu { int lbr_nr; /* hardware stack size */ u64 lbr_sel_mask; /* LBR_SELECT valid bits */ const int *lbr_sel_map; /* lbr_select mappings */ + bool lbr_double_abort; /* duplicated lbr aborts */ /* * Extra registers for events diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 36b5ab8..0fa4f24 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2519,6 +2519,7 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; x86_pmu.cpu_events = hsw_events_attrs; + x86_pmu.lbr_double_abort = true; pr_cont("Haswell events, "); break; diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index d5be06a..90ee6c1 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -284,6 +284,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) int lbr_format = x86_pmu.intel_cap.lbr_format; u64 tos = intel_pmu_lbr_tos(); int i; + int out = 0; for (i = 0; i < x86_pmu.lbr_nr; i++) { unsigned long lbr_idx = (tos - i) & mask; @@ -306,15 +307,27 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) } from = (u64)((((s64)from) << skip) >> skip); - cpuc->lbr_entries[i].from = from; - cpuc->lbr_entries[i].to = to; - cpuc->lbr_entries[i].mispred = mis; - cpuc->lbr_entries[i].predicted = pred; - cpuc->lbr_entries[i].in_tx = in_tx; - cpuc->lbr_entries[i].abort = abort; - cpuc->lbr_entries[i].reserved = 0; + /* + * Some CPUs report duplicated abort records, + * with the second entry not having an abort bit set. + * Skip them here. This loop runs backwards, + * so we need to undo the previous record. + * If the abort just happened outside the window + * the extra entry cannot be removed. + */ + if (abort && x86_pmu.lbr_double_abort && out > 0) + out--; + + cpuc->lbr_entries[out].from = from; + cpuc->lbr_entries[out].to = to; + cpuc->lbr_entries[out].mispred = mis; + cpuc->lbr_entries[out].predicted = pred; + cpuc->lbr_entries[out].in_tx = in_tx; + cpuc->lbr_entries[out].abort = abort; + cpuc->lbr_entries[out].reserved = 0; + out++; } - cpuc->lbr_stack.nr = i; + cpuc->lbr_stack.nr = out; } void intel_pmu_lbr_read(void) -- cgit v0.10.2 From 89fe808ae777728da6e1d78b7d13562792310d17 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 12:07:11 +0200 Subject: tools/perf: Standardize feature support define names to: HAVE_{FEATURE}_SUPPORT Standardize all the feature flags based on the HAVE_{FEATURE}_SUPPORT naming convention: HAVE_ARCH_X86_64_SUPPORT HAVE_BACKTRACE_SUPPORT HAVE_CPLUS_DEMANGLE_SUPPORT HAVE_DWARF_SUPPORT HAVE_ELF_GETPHDRNUM_SUPPORT HAVE_GTK2_SUPPORT HAVE_GTK_INFO_BAR_SUPPORT HAVE_LIBAUDIT_SUPPORT HAVE_LIBELF_MMAP_SUPPORT HAVE_LIBELF_SUPPORT HAVE_LIBNUMA_SUPPORT HAVE_LIBUNWIND_SUPPORT HAVE_ON_EXIT_SUPPORT HAVE_PERF_REGS_SUPPORT HAVE_SLANG_SUPPORT HAVE_STRLCPY_SUPPORT Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-u3zvqejddfZhtrbYbfhi3spa@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h index 7fcdcdb..e84ca76 100644 --- a/tools/perf/arch/x86/include/perf_regs.h +++ b/tools/perf/arch/x86/include/perf_regs.h @@ -5,7 +5,7 @@ #include "../../util/types.h" #include -#ifndef ARCH_X86_64 +#ifndef HAVE_ARCH_X86_64_SUPPORT #define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1) #else #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ @@ -52,7 +52,7 @@ static inline const char *perf_reg_name(int id) return "FS"; case PERF_REG_X86_GS: return "GS"; -#ifdef ARCH_X86_64 +#ifdef HAVE_ARCH_X86_64_SUPPORT case PERF_REG_X86_R8: return "R8"; case PERF_REG_X86_R9: @@ -69,7 +69,7 @@ static inline const char *perf_reg_name(int id) return "R14"; case PERF_REG_X86_R15: return "R15"; -#endif /* ARCH_X86_64 */ +#endif /* HAVE_ARCH_X86_64_SUPPORT */ default: return NULL; } diff --git a/tools/perf/arch/x86/util/unwind.c b/tools/perf/arch/x86/util/unwind.c index 78d956e..456a88c 100644 --- a/tools/perf/arch/x86/util/unwind.c +++ b/tools/perf/arch/x86/util/unwind.c @@ -4,7 +4,7 @@ #include "perf_regs.h" #include "../../util/unwind.h" -#ifdef ARCH_X86_64 +#ifdef HAVE_ARCH_X86_64_SUPPORT int unwind__arch_reg_id(int regnum) { int id; @@ -108,4 +108,4 @@ int unwind__arch_reg_id(int regnum) return id; } -#endif /* ARCH_X86_64 */ +#endif /* HAVE_ARCH_X86_64_SUPPORT */ diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h index a72e36c..57b4ed8 100644 --- a/tools/perf/bench/mem-memcpy-arch.h +++ b/tools/perf/bench/mem-memcpy-arch.h @@ -1,5 +1,5 @@ -#ifdef ARCH_X86_64 +#ifdef HAVE_ARCH_X86_64_SUPPORT #define MEMCPY_FN(fn, name, desc) \ extern void *fn(void *, const void *, size_t); diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 8cdca43..5ce71d3 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -58,7 +58,7 @@ struct routine routines[] = { { "default", "Default memcpy() provided by glibc", memcpy }, -#ifdef ARCH_X86_64 +#ifdef HAVE_ARCH_X86_64_SUPPORT #define MEMCPY_FN(fn, name, desc) { name, desc, fn }, #include "mem-memcpy-x86-64-asm-def.h" diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h index a040fa7..633800c 100644 --- a/tools/perf/bench/mem-memset-arch.h +++ b/tools/perf/bench/mem-memset-arch.h @@ -1,5 +1,5 @@ -#ifdef ARCH_X86_64 +#ifdef HAVE_ARCH_X86_64_SUPPORT #define MEMSET_FN(fn, name, desc) \ extern void *fn(void *, int, size_t); diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c index 4a2f120..9af79d2 100644 --- a/tools/perf/bench/mem-memset.c +++ b/tools/perf/bench/mem-memset.c @@ -58,7 +58,7 @@ static const struct routine routines[] = { { "default", "Default memset() provided by glibc", memset }, -#ifdef ARCH_X86_64 +#ifdef HAVE_ARCH_X86_64_SUPPORT #define MEMSET_FN(fn, name, desc) { name, desc, fn }, #include "mem-memset-x86-64-asm-def.h" diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 77298bf..33af80f 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -35,7 +35,7 @@ struct bench_suite { /* sentinel: easy for help */ #define suite_all { "all", "Test all benchmark suites", NULL } -#ifdef LIBNUMA_SUPPORT +#ifdef HAVE_LIBNUMA_SUPPORT static struct bench_suite numa_suites[] = { { "mem", "Benchmark for NUMA workloads", @@ -80,7 +80,7 @@ struct bench_subsys { }; static struct bench_subsys subsystems[] = { -#ifdef LIBNUMA_SUPPORT +#ifdef HAVE_LIBNUMA_SUPPORT { "numa", "NUMA scheduling and MM behavior", numa_suites }, diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index afe377b..f51a963 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -231,7 +231,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool, * account this as unresolved. */ } else { -#ifdef LIBELF_SUPPORT +#ifdef HAVE_LIBELF_SUPPORT pr_warning("no symbols found in %s, maybe " "install a debug package?\n", al.map->dso->long_name); diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index e8a66f9..89acc17 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -173,7 +173,7 @@ static int opt_set_target(const struct option *opt, const char *str, if (str && !params.target) { if (!strcmp(opt->long_name, "exec")) params.uprobes = true; -#ifdef DWARF_SUPPORT +#ifdef HAVE_DWARF_SUPPORT else if (!strcmp(opt->long_name, "module")) params.uprobes = false; #endif @@ -187,7 +187,7 @@ static int opt_set_target(const struct option *opt, const char *str, return ret; } -#ifdef DWARF_SUPPORT +#ifdef HAVE_DWARF_SUPPORT static int opt_show_lines(const struct option *opt __maybe_unused, const char *str, int unset __maybe_unused) { @@ -257,7 +257,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) "perf probe [] --add 'PROBEDEF' [--add 'PROBEDEF' ...]", "perf probe [] --del '[GROUP:]EVENT' ...", "perf probe --list", -#ifdef DWARF_SUPPORT +#ifdef HAVE_DWARF_SUPPORT "perf probe [] --line 'LINEDESC'", "perf probe [] --vars 'PROBEPOINT'", #endif @@ -271,7 +271,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.", opt_del_probe_event), OPT_CALLBACK('a', "add", NULL, -#ifdef DWARF_SUPPORT +#ifdef HAVE_DWARF_SUPPORT "[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT" " [[NAME=]ARG ...]", #else @@ -283,7 +283,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) "\t\tFUNC:\tFunction name\n" "\t\tOFF:\tOffset from function entry (in byte)\n" "\t\t%return:\tPut the probe at function return\n" -#ifdef DWARF_SUPPORT +#ifdef HAVE_DWARF_SUPPORT "\t\tSRC:\tSource code path\n" "\t\tRL:\tRelative line number from function entry.\n" "\t\tAL:\tAbsolute line number in file.\n" @@ -296,7 +296,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) opt_add_probe_event), OPT_BOOLEAN('f', "force", ¶ms.force_add, "forcibly add events" " with existing name"), -#ifdef DWARF_SUPPORT +#ifdef HAVE_DWARF_SUPPORT OPT_CALLBACK('L', "line", NULL, "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]", "Show source code lines.", opt_show_lines), @@ -408,7 +408,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) return ret; } -#ifdef DWARF_SUPPORT +#ifdef HAVE_DWARF_SUPPORT if (params.show_lines && !params.uprobes) { if (params.mod_events) { pr_err(" Error: Don't use --line with" diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a78db3f..cf36ba2 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -29,7 +29,7 @@ #include #include -#ifndef HAVE_ON_EXIT +#ifndef HAVE_ON_EXIT_SUPPORT #ifndef ATEXIT_MAX #define ATEXIT_MAX 32 #endif @@ -687,7 +687,7 @@ error: return ret; } -#ifdef LIBUNWIND_SUPPORT +#ifdef HAVE_LIBUNWIND_SUPPORT static int get_stack_size(char *str, unsigned long *_size) { char *endptr; @@ -713,7 +713,7 @@ static int get_stack_size(char *str, unsigned long *_size) max_size, str); return -1; } -#endif /* LIBUNWIND_SUPPORT */ +#endif /* HAVE_LIBUNWIND_SUPPORT */ int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset) @@ -751,7 +751,7 @@ int record_parse_callchain_opt(const struct option *opt, "needed for -g fp\n"); break; -#ifdef LIBUNWIND_SUPPORT +#ifdef HAVE_LIBUNWIND_SUPPORT /* Dwarf style */ } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { const unsigned long default_stack_dump_size = 8192; @@ -771,7 +771,7 @@ int record_parse_callchain_opt(const struct option *opt, if (!ret) pr_debug("callchain: stack dump size %d\n", opts->stack_dump_size); -#endif /* LIBUNWIND_SUPPORT */ +#endif /* HAVE_LIBUNWIND_SUPPORT */ } else { pr_err("callchain: Unknown -g option " "value: %s\n", arg); @@ -818,7 +818,7 @@ static struct perf_record record = { #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: " -#ifdef LIBUNWIND_SUPPORT +#ifdef HAVE_LIBUNWIND_SUPPORT const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf"; #else const char record_callchain_help[] = CALLCHAIN_HELP "[fp]"; diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 5f6f9b3..34be743 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -23,7 +23,7 @@ ifeq ($(ARCH),x86_64) endif ifeq (${IS_X86_64}, 1) RAW_ARCH := x86_64 - CFLAGS += -DARCH_X86_64 + CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S endif NO_PERF_REGS := 0 @@ -31,7 +31,7 @@ ifeq ($(ARCH),x86_64) endif ifeq ($(NO_PERF_REGS),0) - CFLAGS += -DHAVE_PERF_REGS + CFLAGS += -DHAVE_PERF_REGS_SUPPORT endif ifeq ($(src-perf),) @@ -175,13 +175,13 @@ endif # SOURCE_LIBELF endif # NO_LIBELF ifndef NO_LIBELF -CFLAGS += -DLIBELF_SUPPORT +CFLAGS += -DHAVE_LIBELF_SUPPORT FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DLIBELF_MMAP),y) - CFLAGS += -DLIBELF_MMAP +ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DHAVE_LIBELF_MMAP_SUPPORT),y) + CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT endif -ifeq ($(call try-cc,$(SOURCE_ELF_GETPHDRNUM),$(FLAGS_LIBELF),-DHAVE_ELF_GETPHDRNUM),y) - CFLAGS += -DHAVE_ELF_GETPHDRNUM +ifeq ($(call try-cc,$(SOURCE_ELF_GETPHDRNUM),$(FLAGS_LIBELF),-DHAVE_ELF_GETPHDRNUM_SUPPORT),y) + CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT endif # include ARCH specific config @@ -192,7 +192,7 @@ ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); NO_DWARF := 1 else - CFLAGS += -DDWARF_SUPPORT $(LIBDW_CFLAGS) + CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) LDFLAGS += $(LIBDW_LDFLAGS) EXTLIBS += -lelf -ldw endif # PERF_HAVE_DWARF_REGS @@ -201,10 +201,10 @@ endif # NO_DWARF endif # NO_LIBELF ifndef NO_LIBELF -CFLAGS += -DLIBELF_SUPPORT +CFLAGS += -DHAVE_LIBELF_SUPPORT FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DLIBELF_MMAP),y) - CFLAGS += -DLIBELF_MMAP +ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DHAVE_LIBELF_MMAP_SUPPORT),y) + CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT endif # try-cc endif # NO_LIBELF @@ -229,7 +229,7 @@ endif # Libunwind support endif # NO_LIBUNWIND ifndef NO_LIBUNWIND - CFLAGS += -DLIBUNWIND_SUPPORT + CFLAGS += -DHAVE_LIBUNWIND_SUPPORT EXTLIBS += $(LIBUNWIND_LIBS) CFLAGS += $(LIBUNWIND_CFLAGS) LDFLAGS += $(LIBUNWIND_LDFLAGS) @@ -241,7 +241,7 @@ ifndef NO_LIBAUDIT msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev); NO_LIBAUDIT := 1 else - CFLAGS += -DLIBAUDIT_SUPPORT + CFLAGS += -DHAVE_LIBAUDIT_SUPPORT EXTLIBS += -laudit endif endif @@ -258,7 +258,7 @@ ifndef NO_SLANG else # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h CFLAGS += -I/usr/include/slang - CFLAGS += -DSLANG_SUPPORT + CFLAGS += -DHAVE_SLANG_SUPPORT EXTLIBS += -lslang endif endif @@ -269,10 +269,10 @@ ifndef NO_GTK2 msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev); NO_GTK2 := 1 else - ifeq ($(call try-cc,$(SOURCE_GTK2_INFOBAR),$(FLAGS_GTK2),-DHAVE_GTK_INFO_BAR),y) - CFLAGS += -DHAVE_GTK_INFO_BAR + ifeq ($(call try-cc,$(SOURCE_GTK2_INFOBAR),$(FLAGS_GTK2),-DHAVE_GTK_INFO_BAR_SUPPORT),y) + CFLAGS += -DHAVE_GTK_INFO_BAR_SUPPORT endif - CFLAGS += -DGTK2_SUPPORT + CFLAGS += -DHAVE_GTK2_SUPPORT CFLAGS += $(shell pkg-config --cflags gtk+-2.0 2>/dev/null) EXTLIBS += $(shell pkg-config --libs gtk+-2.0 2>/dev/null) endif @@ -365,9 +365,9 @@ endif ifdef NO_DEMANGLE CFLAGS += -DNO_DEMANGLE else - ifdef HAVE_CPLUS_DEMANGLE + ifdef HAVE_CPLUS_DEMANGLE_SUPPORT EXTLIBS += -liberty - CFLAGS += -DHAVE_CPLUS_DEMANGLE + CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT else FLAGS_BFD=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -DPACKAGE='perf' -lbfd has_bfd := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD),libbfd) @@ -388,7 +388,7 @@ else has_cplus_demangle := $(call try-cc,$(SOURCE_CPLUS_DEMANGLE),$(FLAGS_CPLUS_DEMANGLE),demangle) ifeq ($(has_cplus_demangle),y) EXTLIBS += -liberty - CFLAGS += -DHAVE_CPLUS_DEMANGLE + CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT else msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling) CFLAGS += -DNO_DEMANGLE @@ -400,20 +400,20 @@ else endif ifndef NO_STRLCPY - ifeq ($(call try-cc,$(SOURCE_STRLCPY),,-DHAVE_STRLCPY),y) - CFLAGS += -DHAVE_STRLCPY + ifeq ($(call try-cc,$(SOURCE_STRLCPY),,-DHAVE_STRLCPY_SUPPORT),y) + CFLAGS += -DHAVE_STRLCPY_SUPPORT endif endif ifndef NO_ON_EXIT - ifeq ($(call try-cc,$(SOURCE_ON_EXIT),,-DHAVE_ON_EXIT),y) - CFLAGS += -DHAVE_ON_EXIT + ifeq ($(call try-cc,$(SOURCE_ON_EXIT),,-DHAVE_ON_EXIT_SUPPORT),y) + CFLAGS += -DHAVE_ON_EXIT_SUPPORT endif endif ifndef NO_BACKTRACE - ifeq ($(call try-cc,$(SOURCE_BACKTRACE),,-DBACKTRACE_SUPPORT),y) - CFLAGS += -DBACKTRACE_SUPPORT + ifeq ($(call try-cc,$(SOURCE_BACKTRACE),,-DHAVE_BACKTRACE_SUPPORT),y) + CFLAGS += -DHAVE_BACKTRACE_SUPPORT endif endif @@ -423,7 +423,7 @@ ifndef NO_LIBNUMA msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numa-libs-devel or libnuma-dev); NO_LIBNUMA := 1 else - CFLAGS += -DLIBNUMA_SUPPORT + CFLAGS += -DHAVE_LIBNUMA_SUPPORT EXTLIBS += -lnuma endif endif diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 85e1aed..245020c 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -49,14 +49,14 @@ static struct cmd_struct commands[] = { { "version", cmd_version, 0 }, { "script", cmd_script, 0 }, { "sched", cmd_sched, 0 }, -#ifdef LIBELF_SUPPORT +#ifdef HAVE_LIBELF_SUPPORT { "probe", cmd_probe, 0 }, #endif { "kmem", cmd_kmem, 0 }, { "lock", cmd_lock, 0 }, { "kvm", cmd_kvm, 0 }, { "test", cmd_test, 0 }, -#ifdef LIBAUDIT_SUPPORT +#ifdef HAVE_LIBAUDIT_SUPPORT { "trace", cmd_trace, 0 }, #endif { "inject", cmd_inject, 0 }, diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c index c95012c..c24d912 100644 --- a/tools/perf/ui/gtk/browser.c +++ b/tools/perf/ui/gtk/browser.c @@ -43,7 +43,7 @@ const char *perf_gtk__get_percent_color(double percent) return NULL; } -#ifdef HAVE_GTK_INFO_BAR +#ifdef HAVE_GTK_INFO_BAR_SUPPORT GtkWidget *perf_gtk__setup_info_bar(void) { GtkWidget *info_bar; diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h index 3d96785..a72acbc 100644 --- a/tools/perf/ui/gtk/gtk.h +++ b/tools/perf/ui/gtk/gtk.h @@ -12,7 +12,7 @@ struct perf_gtk_context { GtkWidget *main_window; GtkWidget *notebook; -#ifdef HAVE_GTK_INFO_BAR +#ifdef HAVE_GTK_INFO_BAR_SUPPORT GtkWidget *info_bar; GtkWidget *message_label; #endif @@ -39,7 +39,7 @@ void perf_gtk__resize_window(GtkWidget *window); const char *perf_gtk__get_percent_color(double percent); GtkWidget *perf_gtk__setup_statusbar(void); -#ifdef HAVE_GTK_INFO_BAR +#ifdef HAVE_GTK_INFO_BAR_SUPPORT GtkWidget *perf_gtk__setup_info_bar(void); #else static inline GtkWidget *perf_gtk__setup_info_bar(void) diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c index c06942a..696c1fb 100644 --- a/tools/perf/ui/gtk/util.c +++ b/tools/perf/ui/gtk/util.c @@ -53,7 +53,7 @@ static int perf_gtk__error(const char *format, va_list args) return 0; } -#ifdef HAVE_GTK_INFO_BAR +#ifdef HAVE_GTK_INFO_BAR_SUPPORT static int perf_gtk__warning_info_bar(const char *format, va_list args) { char *msg; @@ -105,7 +105,7 @@ static int perf_gtk__warning_statusbar(const char *format, va_list args) struct perf_error_ops perf_gtk_eops = { .error = perf_gtk__error, -#ifdef HAVE_GTK_INFO_BAR +#ifdef HAVE_GTK_INFO_BAR_SUPPORT .warning = perf_gtk__warning_info_bar, #else .warning = perf_gtk__warning_statusbar, diff --git a/tools/perf/ui/ui.h b/tools/perf/ui/ui.h index 70cb0d4..1349d14 100644 --- a/tools/perf/ui/ui.h +++ b/tools/perf/ui/ui.h @@ -12,7 +12,7 @@ extern int use_browser; void setup_browser(bool fallback_to_pager); void exit_browser(bool wait_for_ok); -#ifdef SLANG_SUPPORT +#ifdef HAVE_SLANG_SUPPORT int ui__init(void); void ui__exit(bool wait_for_ok); #else @@ -23,7 +23,7 @@ static inline int ui__init(void) static inline void ui__exit(bool wait_for_ok __maybe_unused) {} #endif -#ifdef GTK2_SUPPORT +#ifdef HAVE_GTK2_SUPPORT int perf_gtk__init(void); void perf_gtk__exit(bool wait_for_ok); #else diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index af75515..f0699e9 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -150,7 +150,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, bool print_lines, bool full_paths, int min_pcnt, int max_lines); -#ifdef SLANG_SUPPORT +#ifdef HAVE_SLANG_SUPPORT int symbol__tui_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, struct hist_browser_timer *hbt); @@ -165,7 +165,7 @@ static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused, } #endif -#ifdef GTK2_SUPPORT +#ifdef HAVE_GTK2_SUPPORT int symbol__gtk_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, struct hist_browser_timer *hbt); diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 26e3672..442953c 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -70,7 +70,7 @@ extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2 extern char *perf_pathdup(const char *fmt, ...) __attribute__((format (printf, 1, 2))); -#ifndef HAVE_STRLCPY +#ifndef HAVE_STRLCPY_SUPPORT extern size_t strlcpy(char *dest, const char *src, size_t size); #endif diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh index 3ac3803..36a885d 100755 --- a/tools/perf/util/generate-cmdlist.sh +++ b/tools/perf/util/generate-cmdlist.sh @@ -22,7 +22,7 @@ do }' "Documentation/perf-$cmd.txt" done -echo "#ifdef LIBELF_SUPPORT" +echo "#ifdef HAVE_LIBELF_SUPPORT" sed -n -e 's/^perf-\([^ ]*\)[ ].* full.*/\1/p' command-list.txt | sort | while read cmd @@ -35,5 +35,5 @@ do p }' "Documentation/perf-$cmd.txt" done -echo "#endif /* LIBELF_SUPPORT */" +echo "#endif /* HAVE_LIBELF_SUPPORT */" echo "};" diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 6a048c0..ed4f90e 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -187,7 +187,7 @@ struct hist_browser_timer { int refresh; }; -#ifdef SLANG_SUPPORT +#ifdef HAVE_SLANG_SUPPORT #include "../ui/keysyms.h" int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, struct hist_browser_timer *hbt); @@ -228,7 +228,7 @@ static inline int script_browse(const char *script_opt __maybe_unused) #define K_SWITCH_INPUT_DATA -3000 #endif -#ifdef GTK2_SUPPORT +#ifdef HAVE_GTK2_SUPPORT int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, const char *help, struct hist_browser_timer *hbt __maybe_unused, float min_pcnt); diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h index cf6727e..8f14965 100644 --- a/tools/perf/util/include/dwarf-regs.h +++ b/tools/perf/util/include/dwarf-regs.h @@ -1,7 +1,7 @@ #ifndef _PERF_DWARF_REGS_H_ #define _PERF_DWARF_REGS_H_ -#ifdef DWARF_SUPPORT +#ifdef HAVE_DWARF_SUPPORT const char *get_arch_regstr(unsigned int n); #endif diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 4f6680d..17ee458 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -172,7 +172,7 @@ int map__load(struct map *map, symbol_filter_t filter) pr_warning(", continuing without symbols\n"); return -1; } else if (nr == 0) { -#ifdef LIBELF_SUPPORT +#ifdef HAVE_LIBELF_SUPPORT const size_t len = strlen(name); const size_t real_len = len - sizeof(DSO__DELETED); diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index a8c4954..f395874 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -22,7 +22,7 @@ static const char *get_perf_dir(void) return "."; } -#ifndef HAVE_STRLCPY +#ifndef HAVE_STRLCPY_SUPPORT size_t strlcpy(char *dest, const char *src, size_t size) { size_t ret = strlen(src); diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 5a4f2b6f..a3d42cd 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -1,7 +1,7 @@ #ifndef __PERF_REGS_H #define __PERF_REGS_H -#ifdef HAVE_PERF_REGS +#ifdef HAVE_PERF_REGS_SUPPORT #include #else #define PERF_REGS_MASK 0 @@ -10,5 +10,5 @@ static inline const char *perf_reg_name(int id __maybe_unused) { return NULL; } -#endif /* HAVE_PERF_REGS */ +#endif /* HAVE_PERF_REGS_SUPPORT */ #endif /* __PERF_REGS_H */ diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index aa04bf9..779b2da 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -201,7 +201,7 @@ static int convert_to_perf_probe_point(struct probe_trace_point *tp, return 0; } -#ifdef DWARF_SUPPORT +#ifdef HAVE_DWARF_SUPPORT /* Open new debuginfo of given module */ static struct debuginfo *open_debuginfo(const char *module) { @@ -630,7 +630,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs, return ret; } -#else /* !DWARF_SUPPORT */ +#else /* !HAVE_DWARF_SUPPORT */ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, struct perf_probe_point *pp) diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 3b7d630..3f0c29d 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -14,7 +14,7 @@ static inline int is_c_varname(const char *name) return isalpha(name[0]) || name[0] == '_'; } -#ifdef DWARF_SUPPORT +#ifdef HAVE_DWARF_SUPPORT #include "dwarf-aux.h" @@ -105,6 +105,6 @@ struct line_finder { int found; }; -#endif /* DWARF_SUPPORT */ +#endif /* HAVE_DWARF_SUPPORT */ #endif /*_PROBE_FINDER_H */ diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index a9c829b..c376930 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -8,7 +8,7 @@ #include "symbol.h" #include "debug.h" -#ifndef HAVE_ELF_GETPHDRNUM +#ifndef HAVE_ELF_GETPHDRNUM_SUPPORT static int elf_getphdrnum(Elf *elf, size_t *dst) { GElf_Ehdr gehdr; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index fd5b70e..2a97bb1 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -13,7 +13,7 @@ #include #include "build-id.h" -#ifdef LIBELF_SUPPORT +#ifdef HAVE_LIBELF_SUPPORT #include #include #endif @@ -21,7 +21,7 @@ #include "dso.h" -#ifdef HAVE_CPLUS_DEMANGLE +#ifdef HAVE_CPLUS_DEMANGLE_SUPPORT extern char *cplus_demangle(const char *, int); static inline char *bfd_demangle(void __maybe_unused *v, const char *c, int i) @@ -46,7 +46,7 @@ static inline char *bfd_demangle(void __maybe_unused *v, * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP; * for newer versions we can use mmap to reduce memory usage: */ -#ifdef LIBELF_MMAP +#ifdef HAVE_LIBELF_MMAP_SUPPORT # define PERF_ELF_C_READ_MMAP ELF_C_READ_MMAP #else # define PERF_ELF_C_READ_MMAP ELF_C_READ @@ -178,7 +178,7 @@ struct symsrc { int fd; enum dso_binary_type type; -#ifdef LIBELF_SUPPORT +#ifdef HAVE_LIBELF_SUPPORT Elf *elf; GElf_Ehdr ehdr; diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index cb6bc50..ec0c71a 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -13,7 +13,7 @@ struct unwind_entry { typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg); -#ifdef LIBUNWIND_SUPPORT +#ifdef HAVE_LIBUNWIND_SUPPORT int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct machine *machine, struct thread *thread, @@ -31,5 +31,5 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused, { return 0; } -#endif /* LIBUNWIND_SUPPORT */ +#endif /* HAVE_LIBUNWIND_SUPPORT */ #endif /* __UNWIND_H */ diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 6d17b18..ccfdeb6 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -1,7 +1,7 @@ #include "../perf.h" #include "util.h" #include -#ifdef BACKTRACE_SUPPORT +#ifdef HAVE_BACKTRACE_SUPPORT #include #endif #include @@ -204,7 +204,7 @@ int hex2u64(const char *ptr, u64 *long_val) } /* Obtain a backtrace and print it to stdout. */ -#ifdef BACKTRACE_SUPPORT +#ifdef HAVE_BACKTRACE_SUPPORT void dump_stack(void) { void *array[16]; -- cgit v0.10.2 From b6aa9979416e2e98a800925d60ad00e83bc7cb7a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 10:08:24 +0200 Subject: tools/perf/build: Add feature check core code Start the split-out of the feature check code by adding a list of features to be tested, and rules to process that list by building its matching feature-check file in config/feature-checks/test-.c. Add 'hello' as the initial feature. This structure will allow us to build split-out feature checks in parallel and thus speed up feature detection dramatically. No change in functionality: no feature check is used by the build rules yet. Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-pixkihgscFaohfFigq5yt9gs@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 34be743..daefe2d 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -89,6 +89,22 @@ CFLAGS += -std=gnu99 EXTLIBS = -lelf -lpthread -lrt -lm -ldl +feature_check = $(eval $(feature_check_code)); $(info CHK: config/feature-checks/test-$(1)) +define feature_check_code + feature-$(2) := $(shell make -C config/feature-checks test-$1 >/dev/null 2>/dev/null && echo 1 || echo 0) +endef + +# +# Build the feature check binaries in parallel, ignore errors, ignore return value and suppress output: +# +$(info Testing features:) +$(shell make -i -j -C config/feature-checks >/dev/null 2>&1) +$(info done) + +FEATURE_TESTS = hello + +$(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) + ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -fstack-protector-all,-fstack-protector-all),y) CFLAGS += -fstack-protector-all endif diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile new file mode 100644 index 0000000..b3f6372 --- /dev/null +++ b/tools/perf/config/feature-checks/Makefile @@ -0,0 +1,16 @@ + +FILES=test-hello + +all: $(FILES) + +BUILD = $(CC) -o $(OUTPUT)$@ $@.c + +############################### + +test-hello: test-hello.c + $(BUILD) + +############################### + +clean: + rm -f $(FILES) diff --git a/tools/perf/config/feature-checks/test-hello.c b/tools/perf/config/feature-checks/test-hello.c new file mode 100644 index 0000000..c9f398d --- /dev/null +++ b/tools/perf/config/feature-checks/test-hello.c @@ -0,0 +1,6 @@ +#include + +int main(void) +{ + return puts("hi"); +} -- cgit v0.10.2 From 8b6eb56a9570001634df1d2c7f38e7179a357362 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 13:51:28 +0200 Subject: tools/perf/build: Add 'autodep' functionality, generate feature test dependencies automatically Use GCC's -MD feature to generate a dependency file for each feature test .c file, and include that .d file in the config/feature-checks/Makefile. This allows us to do two things: - speed up feature tests - detect removal or changes in build dependencies - including system libraries/headers Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-Jfma8pmPnnqzpxjbs3hpgmsj@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index b3f6372..4708cca 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -1,16 +1,20 @@ FILES=test-hello +CC := $(CC) -MD + all: $(FILES) BUILD = $(CC) -o $(OUTPUT)$@ $@.c ############################### -test-hello: test-hello.c +test-hello: $(BUILD) +-include *.d */*.d + ############################### clean: - rm -f $(FILES) + rm -f $(FILES) *.d -- cgit v0.10.2 From 3ae069cfda88349fa61de9631f878d39ab60764f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 13:37:10 +0200 Subject: tools/perf/build: Split out feature check: 'libnuma' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-vixsrpggxFjhz7kppqgrGr6s@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index daefe2d..f39fc22 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -101,7 +101,9 @@ $(info Testing features:) $(shell make -i -j -C config/feature-checks >/dev/null 2>&1) $(info done) -FEATURE_TESTS = hello +FEATURE_TESTS = \ + hello \ + libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -434,8 +436,7 @@ ifndef NO_BACKTRACE endif ifndef NO_LIBNUMA - FLAGS_LIBNUMA = $(CFLAGS) $(LDFLAGS) -lnuma - ifneq ($(call try-cc,$(SOURCE_LIBNUMA),$(FLAGS_LIBNUMA),libnuma),y) + ifeq ($(feature-libnuma), 0) msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numa-libs-devel or libnuma-dev); NO_LIBNUMA := 1 else diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 4708cca..6a42ad2 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -1,5 +1,7 @@ -FILES=test-hello +FILES= \ + test-hello \ + test-libnuma CC := $(CC) -MD @@ -12,6 +14,9 @@ BUILD = $(CC) -o $(OUTPUT)$@ $@.c test-hello: $(BUILD) +test-libnuma: + $(BUILD) -lnuma + -include *.d */*.d ############################### diff --git a/tools/perf/config/feature-checks/test-libnuma.c b/tools/perf/config/feature-checks/test-libnuma.c new file mode 100644 index 0000000..70510a9 --- /dev/null +++ b/tools/perf/config/feature-checks/test-libnuma.c @@ -0,0 +1,8 @@ +#include +#include + +int main(void) +{ + numa_available(); + return 0; +} -- cgit v0.10.2 From 90ac5422b617fcab0c6f02249502ffc17dfbf6e3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 13:48:44 +0200 Subject: tools/perf/build: Split out feature check: 'stackprotector-all' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-gupddm9clctVYws3lyexfdhg@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index f39fc22..1a67371 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -103,11 +103,12 @@ $(info done) FEATURE_TESTS = \ hello \ + stackprotector-all \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) -ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -fstack-protector-all,-fstack-protector-all),y) +ifeq ($(feature-stackprotector-all), 1) CFLAGS += -fstack-protector-all endif diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 6a42ad2..7538c14 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -1,6 +1,7 @@ FILES= \ test-hello \ + test-stackprotector-all \ test-libnuma CC := $(CC) -MD @@ -14,6 +15,9 @@ BUILD = $(CC) -o $(OUTPUT)$@ $@.c test-hello: $(BUILD) +test-stackprotector-all: + $(BUILD) -Werror -fstack-protector-all + test-libnuma: $(BUILD) -lnuma diff --git a/tools/perf/config/feature-checks/test-stackprotector-all.c b/tools/perf/config/feature-checks/test-stackprotector-all.c new file mode 100644 index 0000000..c9f398d --- /dev/null +++ b/tools/perf/config/feature-checks/test-stackprotector-all.c @@ -0,0 +1,6 @@ +#include + +int main(void) +{ + return puts("hi"); +} -- cgit v0.10.2 From 430be5ab0a50a9116c689b2b2ea7acd7a635aabe Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 7 Oct 2013 09:47:00 +0200 Subject: tools/perf/build: Split out feature check: 'stackprotector' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-eyLYjhskzn6qxkoyjtjic4ap@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 1a67371..9f94912 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -104,6 +104,7 @@ $(info done) FEATURE_TESTS = \ hello \ stackprotector-all \ + stackprotector \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -112,7 +113,7 @@ ifeq ($(feature-stackprotector-all), 1) CFLAGS += -fstack-protector-all endif -ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -Wstack-protector,-Wstack-protector),y) +ifeq ($(feature-stackprotector), 1) CFLAGS += -Wstack-protector endif diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 7538c14..46b7650 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -2,6 +2,7 @@ FILES= \ test-hello \ test-stackprotector-all \ + test-stackprotector \ test-libnuma CC := $(CC) -MD @@ -18,6 +19,9 @@ test-hello: test-stackprotector-all: $(BUILD) -Werror -fstack-protector-all +test-stackprotector: + $(BUILD) -Werror -fstack-protector + test-libnuma: $(BUILD) -lnuma diff --git a/tools/perf/config/feature-checks/test-stackprotector.c b/tools/perf/config/feature-checks/test-stackprotector.c new file mode 100644 index 0000000..c9f398d --- /dev/null +++ b/tools/perf/config/feature-checks/test-stackprotector.c @@ -0,0 +1,6 @@ +#include + +int main(void) +{ + return puts("hi"); +} -- cgit v0.10.2 From c25104452de94102810e86323eabb7d9a4cad083 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 13:58:12 +0200 Subject: tools/perf/build: Split out feature check: 'volatile-register-var' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-5dOevlybbwvbk3zTbcxrrqet@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 9f94912..a3de2f8 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -105,6 +105,7 @@ FEATURE_TESTS = \ hello \ stackprotector-all \ stackprotector \ + volatile-register-var \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -117,7 +118,7 @@ ifeq ($(feature-stackprotector), 1) CFLAGS += -Wstack-protector endif -ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -Wvolatile-register-var,-Wvolatile-register-var),y) +ifeq ($(feature-volatile-register-var), 1) CFLAGS += -Wvolatile-register-var endif diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 46b7650..5693299 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -3,6 +3,7 @@ FILES= \ test-hello \ test-stackprotector-all \ test-stackprotector \ + test-volatile-register-var \ test-libnuma CC := $(CC) -MD @@ -22,6 +23,9 @@ test-stackprotector-all: test-stackprotector: $(BUILD) -Werror -fstack-protector +test-volatile-register-var: + $(BUILD) -Werror -Wvolatile-register-var + test-libnuma: $(BUILD) -lnuma diff --git a/tools/perf/config/feature-checks/test-volatile-register-var.c b/tools/perf/config/feature-checks/test-volatile-register-var.c new file mode 100644 index 0000000..c9f398d --- /dev/null +++ b/tools/perf/config/feature-checks/test-volatile-register-var.c @@ -0,0 +1,6 @@ +#include + +int main(void) +{ + return puts("hi"); +} -- cgit v0.10.2 From 1ea6f99efd8ae61fce68c97a9cf9f722cfbca3ad Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 7 Oct 2013 09:38:28 +0200 Subject: tools/perf/build: Split out feature check: 'fortify-source' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-wicrcLCy2wkalka7iwsuzgpb@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index a3de2f8..7a614f9 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -106,6 +106,7 @@ FEATURE_TESTS = \ stackprotector-all \ stackprotector \ volatile-register-var \ + fortify-source \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -123,7 +124,7 @@ ifeq ($(feature-volatile-register-var), 1) endif ifndef PERF_DEBUG - ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -D_FORTIFY_SOURCE=2,-D_FORTIFY_SOURCE=2),y) + ifeq ($(feature-fortify-source), 1) CFLAGS += -D_FORTIFY_SOURCE=2 endif endif diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 5693299..529317e 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -4,6 +4,7 @@ FILES= \ test-stackprotector-all \ test-stackprotector \ test-volatile-register-var \ + test-fortify-source \ test-libnuma CC := $(CC) -MD @@ -26,6 +27,9 @@ test-stackprotector: test-volatile-register-var: $(BUILD) -Werror -Wvolatile-register-var +test-fortify-source: + $(BUILD) -O2 -Werror -D_FORTIFY_SOURCE=2 + test-libnuma: $(BUILD) -lnuma diff --git a/tools/perf/config/feature-checks/test-fortify-source.c b/tools/perf/config/feature-checks/test-fortify-source.c new file mode 100644 index 0000000..c9f398d --- /dev/null +++ b/tools/perf/config/feature-checks/test-fortify-source.c @@ -0,0 +1,6 @@ +#include + +int main(void) +{ + return puts("hi"); +} -- cgit v0.10.2 From 78e9d6550807aedfc1f81c199bd4681c09d80ac5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 14:11:46 +0200 Subject: tools/perf/build: Split out feature check: 'bionic' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-xjfVewprrfhlo2wuzbnpVb1k@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 7a614f9..09e2ecc 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -107,6 +107,7 @@ FEATURE_TESTS = \ stackprotector \ volatile-register-var \ fortify-source \ + bionic \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -150,12 +151,13 @@ CFLAGS += -I$(LIB_INCLUDE) CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE ifndef NO_BIONIC -ifeq ($(call try-cc,$(SOURCE_BIONIC),$(CFLAGS),bionic),y) - BIONIC := 1 - EXTLIBS := $(filter-out -lrt,$(EXTLIBS)) - EXTLIBS := $(filter-out -lpthread,$(EXTLIBS)) + $(feature_check,bionic) + ifeq ($(feature-bionic), 1) + BIONIC := 1 + EXTLIBS := $(filter-out -lrt,$(EXTLIBS)) + EXTLIBS := $(filter-out -lpthread,$(EXTLIBS)) + endif endif -endif # NO_BIONIC ifdef NO_LIBELF NO_DWARF := 1 diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 529317e..191df97 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -5,6 +5,7 @@ FILES= \ test-stackprotector \ test-volatile-register-var \ test-fortify-source \ + test-bionic \ test-libnuma CC := $(CC) -MD @@ -30,6 +31,9 @@ test-volatile-register-var: test-fortify-source: $(BUILD) -O2 -Werror -D_FORTIFY_SOURCE=2 +test-bionic: + $(BUILD) + test-libnuma: $(BUILD) -lnuma diff --git a/tools/perf/config/feature-checks/test-bionic.c b/tools/perf/config/feature-checks/test-bionic.c new file mode 100644 index 0000000..eac24e9 --- /dev/null +++ b/tools/perf/config/feature-checks/test-bionic.c @@ -0,0 +1,6 @@ +#include + +int main(void) +{ + return __ANDROID_API__; +} -- cgit v0.10.2 From 50eed7a71ebaf6676be8c497192b978f07581326 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 14:11:16 +0200 Subject: tools/perf/build: Clean up the libelf logic in config/Makefile Nest the rules properly. No change in functionality. Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-dDgivr9xtjrof2vmoyOfwxkj@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 09e2ecc..a2e0e1b 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -164,38 +164,38 @@ ifdef NO_LIBELF NO_DEMANGLE := 1 NO_LIBUNWIND := 1 else -FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -ifneq ($(call try-cc,$(SOURCE_LIBELF),$(FLAGS_LIBELF),libelf),y) - FLAGS_GLIBC=$(CFLAGS) $(LDFLAGS) - ifeq ($(call try-cc,$(SOURCE_GLIBC),$(FLAGS_GLIBC),glibc),y) - LIBC_SUPPORT := 1 - endif - ifeq ($(BIONIC),1) - LIBC_SUPPORT := 1 - endif - ifeq ($(LIBC_SUPPORT),1) - msg := $(warning No libelf found, disables 'probe' tool, please install elfutils-libelf-devel/libelf-dev); + FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) + ifneq ($(call try-cc,$(SOURCE_LIBELF),$(FLAGS_LIBELF),libelf),y) + FLAGS_GLIBC=$(CFLAGS) $(LDFLAGS) + ifeq ($(call try-cc,$(SOURCE_GLIBC),$(FLAGS_GLIBC),glibc),y) + LIBC_SUPPORT := 1 + endif + ifeq ($(BIONIC),1) + LIBC_SUPPORT := 1 + endif + ifeq ($(LIBC_SUPPORT),1) + msg := $(warning No libelf found, disables 'probe' tool, please install elfutils-libelf-devel/libelf-dev); - NO_LIBELF := 1 - NO_DWARF := 1 - NO_DEMANGLE := 1 + NO_LIBELF := 1 + NO_DWARF := 1 + NO_DEMANGLE := 1 + else + msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static); + endif else - msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static); - endif -else - # for linking with debug library, run like: - # make DEBUG=1 LIBDW_DIR=/opt/libdw/ - ifdef LIBDW_DIR - LIBDW_CFLAGS := -I$(LIBDW_DIR)/include - LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib - endif + # for linking with debug library, run like: + # make DEBUG=1 LIBDW_DIR=/opt/libdw/ + ifdef LIBDW_DIR + LIBDW_CFLAGS := -I$(LIBDW_DIR)/include + LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib + endif - FLAGS_DWARF=$(CFLAGS) $(LIBDW_CFLAGS) -ldw -lz -lelf $(LIBDW_LDFLAGS) $(LDFLAGS) $(EXTLIBS) - ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF),libdw),y) - msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); - NO_DWARF := 1 - endif # Dwarf support -endif # SOURCE_LIBELF + FLAGS_DWARF=$(CFLAGS) $(LIBDW_CFLAGS) -ldw -lz -lelf $(LIBDW_LDFLAGS) $(LDFLAGS) $(EXTLIBS) + ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF),libdw),y) + msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); + NO_DWARF := 1 + endif # Dwarf support + endif # SOURCE_LIBELF endif # NO_LIBELF ifndef NO_LIBELF -- cgit v0.10.2 From 8f7f8005f526c23d3e5537f5ab68c1c3fb422453 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 14:20:25 +0200 Subject: tools/perf/build: Split out feature check: 'libelf' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-qznhihaasbysfqO7ffvRsf9q@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index a2e0e1b..6865428 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -108,6 +108,7 @@ FEATURE_TESTS = \ volatile-register-var \ fortify-source \ bionic \ + libelf \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -164,8 +165,7 @@ ifdef NO_LIBELF NO_DEMANGLE := 1 NO_LIBUNWIND := 1 else - FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) - ifneq ($(call try-cc,$(SOURCE_LIBELF),$(FLAGS_LIBELF),libelf),y) + ifeq ($(feature-libelf), 0) FLAGS_GLIBC=$(CFLAGS) $(LDFLAGS) ifeq ($(call try-cc,$(SOURCE_GLIBC),$(FLAGS_GLIBC),glibc),y) LIBC_SUPPORT := 1 diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 191df97..789a38d2 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -6,6 +6,7 @@ FILES= \ test-volatile-register-var \ test-fortify-source \ test-bionic \ + test-libelf \ test-libnuma CC := $(CC) -MD @@ -34,6 +35,9 @@ test-fortify-source: test-bionic: $(BUILD) +test-libelf: + $(BUILD) -lelf + test-libnuma: $(BUILD) -lnuma diff --git a/tools/perf/config/feature-checks/test-libelf.c b/tools/perf/config/feature-checks/test-libelf.c new file mode 100644 index 0000000..1a08f97 --- /dev/null +++ b/tools/perf/config/feature-checks/test-libelf.c @@ -0,0 +1,7 @@ +#include + +int main(void) +{ + Elf *elf = elf_begin(0, ELF_C_READ, 0); + return (long)elf; +} -- cgit v0.10.2 From e12762cfd9b4ec8f9bb5863eea403253175df9f1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 7 Oct 2013 10:34:20 +0200 Subject: tools/perf/build: Split out feature check: 'glibc' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-nqnnsptw7ivOzhzbNjiun7ds@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 6865428..8cd0fd8 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -109,6 +109,7 @@ FEATURE_TESTS = \ fortify-source \ bionic \ libelf \ + glibc \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -166,8 +167,7 @@ ifdef NO_LIBELF NO_LIBUNWIND := 1 else ifeq ($(feature-libelf), 0) - FLAGS_GLIBC=$(CFLAGS) $(LDFLAGS) - ifeq ($(call try-cc,$(SOURCE_GLIBC),$(FLAGS_GLIBC),glibc),y) + ifeq ($(feature-glibc), 1) LIBC_SUPPORT := 1 endif ifeq ($(BIONIC),1) diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 789a38d2..c0569c7 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -7,6 +7,7 @@ FILES= \ test-fortify-source \ test-bionic \ test-libelf \ + test-glibc \ test-libnuma CC := $(CC) -MD @@ -38,6 +39,9 @@ test-bionic: test-libelf: $(BUILD) -lelf +test-glibc: + $(BUILD) + test-libnuma: $(BUILD) -lnuma diff --git a/tools/perf/config/feature-checks/test-glibc.c b/tools/perf/config/feature-checks/test-glibc.c new file mode 100644 index 0000000..13c66a5 --- /dev/null +++ b/tools/perf/config/feature-checks/test-glibc.c @@ -0,0 +1,8 @@ +#include + +int main(void) +{ + const char *version = gnu_get_libc_version(); + return (long)version; +} + -- cgit v0.10.2 From 8295d4e27262dc49177e70dcc8ba34fa26343cf2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 7 Oct 2013 10:35:39 +0200 Subject: tools/perf/build: Split out feature check: 'dwarf' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-dsx0fn9mjwfprizboANvtuup@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 8cd0fd8..89630b8 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -110,6 +110,7 @@ FEATURE_TESTS = \ bionic \ libelf \ glibc \ + dwarf \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -190,8 +191,7 @@ else LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib endif - FLAGS_DWARF=$(CFLAGS) $(LIBDW_CFLAGS) -ldw -lz -lelf $(LIBDW_LDFLAGS) $(LDFLAGS) $(EXTLIBS) - ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF),libdw),y) + ifneq ($(feature-dwarf), 1) msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); NO_DWARF := 1 endif # Dwarf support diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index c0569c7..566a71d 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -8,6 +8,7 @@ FILES= \ test-bionic \ test-libelf \ test-glibc \ + test-dwarf \ test-libnuma CC := $(CC) -MD @@ -42,6 +43,9 @@ test-libelf: test-glibc: $(BUILD) +test-dwarf: + $(BUILD) -ldw + test-libnuma: $(BUILD) -lnuma diff --git a/tools/perf/config/feature-checks/test-dwarf.c b/tools/perf/config/feature-checks/test-dwarf.c new file mode 100644 index 0000000..783dfcd --- /dev/null +++ b/tools/perf/config/feature-checks/test-dwarf.c @@ -0,0 +1,9 @@ +#include +#include +#include + +int main(void) +{ + Dwarf *dbg = dwarf_begin(0, DWARF_C_READ); + return (long)dbg; +} -- cgit v0.10.2 From fb3d333b3faa6bf975f00973c6a6271b2ab93c0c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 7 Oct 2013 10:05:51 +0200 Subject: tools/perf/build: Clean up the mmap logic in config/Makefile Nest the rules properly. No change in functionality. Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-wwktuHl4Ra5lyrrretkxmxqf@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 89630b8..8a27de2 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -199,37 +199,38 @@ else endif # NO_LIBELF ifndef NO_LIBELF -CFLAGS += -DHAVE_LIBELF_SUPPORT -FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DHAVE_LIBELF_MMAP_SUPPORT),y) - CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT -endif -ifeq ($(call try-cc,$(SOURCE_ELF_GETPHDRNUM),$(FLAGS_LIBELF),-DHAVE_ELF_GETPHDRNUM_SUPPORT),y) - CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT -endif + CFLAGS += -DHAVE_LIBELF_SUPPORT + FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -# include ARCH specific config --include $(src-perf)/arch/$(ARCH)/Makefile + ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DHAVE_LIBELF_MMAP_SUPPORT),y) + CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT + endif -ifndef NO_DWARF -ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) - msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); - NO_DWARF := 1 -else - CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) - LDFLAGS += $(LIBDW_LDFLAGS) - EXTLIBS += -lelf -ldw -endif # PERF_HAVE_DWARF_REGS -endif # NO_DWARF + ifeq ($(call try-cc,$(SOURCE_ELF_GETPHDRNUM),$(FLAGS_LIBELF),-DHAVE_ELF_GETPHDRNUM_SUPPORT),y) + CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT + endif + + # include ARCH specific config + -include $(src-perf)/arch/$(ARCH)/Makefile + ifndef NO_DWARF + ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) + msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); + NO_DWARF := 1 + else + CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) + LDFLAGS += $(LIBDW_LDFLAGS) + EXTLIBS += -lelf -ldw + endif # PERF_HAVE_DWARF_REGS + endif # NO_DWARF endif # NO_LIBELF ifndef NO_LIBELF -CFLAGS += -DHAVE_LIBELF_SUPPORT -FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DHAVE_LIBELF_MMAP_SUPPORT),y) - CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT -endif # try-cc + CFLAGS += -DHAVE_LIBELF_SUPPORT + FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) + ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DHAVE_LIBELF_MMAP_SUPPORT),y) + CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT + endif # try-cc endif # NO_LIBELF # There's only x86 (both 32 and 64) support for CFI unwind so far -- cgit v0.10.2 From 8869b17ee0bdd0da3d1f7d7ced284ab444c2c6d8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 15:02:28 +0200 Subject: tools/perf/build: Split out feature check: 'libelf-mmap' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-9fxnxjcmrgbSvipxlwsdQ8fg@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 8a27de2..bf1f021 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -111,6 +111,7 @@ FEATURE_TESTS = \ libelf \ glibc \ dwarf \ + libelf-mmap \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -202,7 +203,7 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_LIBELF_SUPPORT FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) - ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DHAVE_LIBELF_MMAP_SUPPORT),y) + ifeq ($(feature-libelf-mmap), 1) CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT endif @@ -227,8 +228,7 @@ endif # NO_LIBELF ifndef NO_LIBELF CFLAGS += -DHAVE_LIBELF_SUPPORT - FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) - ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DHAVE_LIBELF_MMAP_SUPPORT),y) + ifeq ($(feature-libelf-mmap), 1) CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT endif # try-cc endif # NO_LIBELF diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 566a71d..bf96e34 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -9,6 +9,7 @@ FILES= \ test-libelf \ test-glibc \ test-dwarf \ + test-libelf-mmap \ test-libnuma CC := $(CC) -MD @@ -46,6 +47,9 @@ test-glibc: test-dwarf: $(BUILD) -ldw +test-libelf-mmap: + $(BUILD) -lelf + test-libnuma: $(BUILD) -lnuma diff --git a/tools/perf/config/feature-checks/test-libelf-mmap.c b/tools/perf/config/feature-checks/test-libelf-mmap.c new file mode 100644 index 0000000..1c64815 --- /dev/null +++ b/tools/perf/config/feature-checks/test-libelf-mmap.c @@ -0,0 +1,7 @@ +#include +# +int main(void) +{ + Elf *elf = elf_begin(0, ELF_C_READ_MMAP, 0); + return (long)elf; +} -- cgit v0.10.2 From b7bcef6f8e61580e883672b6c97148d4f89e9874 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 14:35:27 +0200 Subject: tools/perf/build: Split out feature check: 'libelf-getphdrnum' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-wa9qstb8erbjreLxiHepzjfw@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index bf1f021..718b476 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -112,6 +112,7 @@ FEATURE_TESTS = \ glibc \ dwarf \ libelf-mmap \ + libelf-getphdrnum \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -207,7 +208,7 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT endif - ifeq ($(call try-cc,$(SOURCE_ELF_GETPHDRNUM),$(FLAGS_LIBELF),-DHAVE_ELF_GETPHDRNUM_SUPPORT),y) + ifeq ($(feature-libelf-getphdrnum), 1) CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT endif @@ -230,7 +231,7 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_LIBELF_SUPPORT ifeq ($(feature-libelf-mmap), 1) CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT - endif # try-cc + endif endif # NO_LIBELF # There's only x86 (both 32 and 64) support for CFI unwind so far diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index bf96e34..83b3a02 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -10,6 +10,7 @@ FILES= \ test-glibc \ test-dwarf \ test-libelf-mmap \ + test-libelf-getphdrnum \ test-libnuma CC := $(CC) -MD @@ -50,6 +51,9 @@ test-dwarf: test-libelf-mmap: $(BUILD) -lelf +test-libelf-getphdrnum: + $(BUILD) -lelf + test-libnuma: $(BUILD) -lnuma diff --git a/tools/perf/config/feature-checks/test-libelf-getphdrnum.c b/tools/perf/config/feature-checks/test-libelf-getphdrnum.c new file mode 100644 index 0000000..58eca53 --- /dev/null +++ b/tools/perf/config/feature-checks/test-libelf-getphdrnum.c @@ -0,0 +1,7 @@ +#include +# +int main(void) +{ + size_t dst; + return elf_getphdrnum(0, &dst); +} -- cgit v0.10.2 From 308e1e700a1337b89c7530a4f4cdde5ccb52fb4e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 7 Oct 2013 10:30:47 +0200 Subject: tools/perf/build: Clean up the libunwind logic in config/Makefile Nest the rules properly. No change in functionality. Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-jjlmizjmhockUs04wqnScnkl@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 718b476..0d75587 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -240,19 +240,19 @@ ifneq ($(ARCH),x86) endif ifndef NO_LIBUNWIND -# for linking with debug library, run like: -# make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ -ifdef LIBUNWIND_DIR - LIBUNWIND_CFLAGS := -I$(LIBUNWIND_DIR)/include - LIBUNWIND_LDFLAGS := -L$(LIBUNWIND_DIR)/lib -endif + # for linking with debug library, run like: + # make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ + ifdef LIBUNWIND_DIR + LIBUNWIND_CFLAGS := -I$(LIBUNWIND_DIR)/include + LIBUNWIND_LDFLAGS := -L$(LIBUNWIND_DIR)/lib + endif -FLAGS_UNWIND=$(LIBUNWIND_CFLAGS) $(CFLAGS) $(LIBUNWIND_LDFLAGS) $(LDFLAGS) $(EXTLIBS) $(LIBUNWIND_LIBS) -ifneq ($(call try-cc,$(SOURCE_LIBUNWIND),$(FLAGS_UNWIND),libunwind),y) - msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 0.99); - NO_LIBUNWIND := 1 -endif # Libunwind support -endif # NO_LIBUNWIND + FLAGS_UNWIND=$(LIBUNWIND_CFLAGS) $(CFLAGS) $(LIBUNWIND_LDFLAGS) $(LDFLAGS) $(EXTLIBS) $(LIBUNWIND_LIBS) + ifneq ($(call try-cc,$(SOURCE_LIBUNWIND),$(FLAGS_UNWIND),libunwind),y) + msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 0.99); + NO_LIBUNWIND := 1 + endif +endif ifndef NO_LIBUNWIND CFLAGS += -DHAVE_LIBUNWIND_SUPPORT -- cgit v0.10.2 From 058f952de9b3075cd888dc3cea60691db0ec4d3f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 14:45:44 +0200 Subject: tools/perf/build: Split out feature check: 'libunwind' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-vTiatsVyva3tfgh3vhxaidxl@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 0d75587..d684a29 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -113,6 +113,7 @@ FEATURE_TESTS = \ dwarf \ libelf-mmap \ libelf-getphdrnum \ + libunwind \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -240,15 +241,17 @@ ifneq ($(ARCH),x86) endif ifndef NO_LIBUNWIND - # for linking with debug library, run like: - # make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ + # + # For linking with debug library, run like: + # + # make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ + # ifdef LIBUNWIND_DIR LIBUNWIND_CFLAGS := -I$(LIBUNWIND_DIR)/include LIBUNWIND_LDFLAGS := -L$(LIBUNWIND_DIR)/lib endif - FLAGS_UNWIND=$(LIBUNWIND_CFLAGS) $(CFLAGS) $(LIBUNWIND_LDFLAGS) $(LDFLAGS) $(EXTLIBS) $(LIBUNWIND_LIBS) - ifneq ($(call try-cc,$(SOURCE_LIBUNWIND),$(FLAGS_UNWIND),libunwind),y) + ifneq ($(feature-libunwind), 1) msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 0.99); NO_LIBUNWIND := 1 endif @@ -259,7 +262,7 @@ ifndef NO_LIBUNWIND EXTLIBS += $(LIBUNWIND_LIBS) CFLAGS += $(LIBUNWIND_CFLAGS) LDFLAGS += $(LIBUNWIND_LDFLAGS) -endif # NO_LIBUNWIND +endif ifndef NO_LIBAUDIT FLAGS_LIBAUDIT = $(CFLAGS) $(LDFLAGS) -laudit diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 83b3a02..d6d9570 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -11,6 +11,7 @@ FILES= \ test-dwarf \ test-libelf-mmap \ test-libelf-getphdrnum \ + test-libunwind \ test-libnuma CC := $(CC) -MD @@ -57,6 +58,9 @@ test-libelf-getphdrnum: test-libnuma: $(BUILD) -lnuma +test-libunwind: + $(BUILD) -lunwind -lunwind-x86_64 -lelf + -include *.d */*.d ############################### diff --git a/tools/perf/config/feature-checks/test-libunwind.c b/tools/perf/config/feature-checks/test-libunwind.c new file mode 100644 index 0000000..5622746 --- /dev/null +++ b/tools/perf/config/feature-checks/test-libunwind.c @@ -0,0 +1,20 @@ +#include +#include + +extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, + unw_word_t ip, + unw_dyn_info_t *di, + unw_proc_info_t *pi, + int need_unwind_info, void *arg); + + +#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) + +int main(void) +{ + unw_addr_space_t addr_space; + addr_space = unw_create_addr_space(NULL, 0); + unw_init_remote(NULL, addr_space, NULL); + dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL); + return 0; +} -- cgit v0.10.2 From d795a658eb0ab5ab0b86cda5abe28954b0a08471 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 14:55:31 +0200 Subject: tools/perf/build: Split out feature check: 'libaudit' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-orhejqtjao3vf4wxwBUdzhaz@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index d684a29..d4f18f4 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -114,6 +114,7 @@ FEATURE_TESTS = \ libelf-mmap \ libelf-getphdrnum \ libunwind \ + libaudit \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -265,8 +266,7 @@ ifndef NO_LIBUNWIND endif ifndef NO_LIBAUDIT - FLAGS_LIBAUDIT = $(CFLAGS) $(LDFLAGS) -laudit - ifneq ($(call try-cc,$(SOURCE_LIBAUDIT),$(FLAGS_LIBAUDIT),libaudit),y) + ifneq ($(feature-libaudit), 1) msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev); NO_LIBAUDIT := 1 else diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index d6d9570..8e49fa0 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -12,6 +12,7 @@ FILES= \ test-libelf-mmap \ test-libelf-getphdrnum \ test-libunwind \ + test-libaudit \ test-libnuma CC := $(CC) -MD @@ -61,6 +62,9 @@ test-libnuma: test-libunwind: $(BUILD) -lunwind -lunwind-x86_64 -lelf +test-libaudit: + $(BUILD) -laudit + -include *.d */*.d ############################### diff --git a/tools/perf/config/feature-checks/test-libaudit.c b/tools/perf/config/feature-checks/test-libaudit.c new file mode 100644 index 0000000..854a65d --- /dev/null +++ b/tools/perf/config/feature-checks/test-libaudit.c @@ -0,0 +1,7 @@ +#include + +int main(void) +{ + printf("error message: %s\n", audit_errno_to_name(0)); + return audit_open(); +} -- cgit v0.10.2 From b9498b508a0d601029f1040a51e9a5a4aecbb926 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 14:57:54 +0200 Subject: tools/perf/build: Split out feature check: 'libslang' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-FGmpkydfwqlkaw7yy8ewjpza@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index d4f18f4..43713c6 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -115,6 +115,7 @@ FEATURE_TESTS = \ libelf-getphdrnum \ libunwind \ libaudit \ + libslang \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -280,8 +281,7 @@ ifdef NO_NEWT endif ifndef NO_SLANG - FLAGS_SLANG=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -I/usr/include/slang -lslang - ifneq ($(call try-cc,$(SOURCE_SLANG),$(FLAGS_SLANG),libslang),y) + ifneq ($(feature-libslang), 1) msg := $(warning slang not found, disables TUI support. Please install slang-devel or libslang-dev); NO_SLANG := 1 else diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 8e49fa0..c9b15b0 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -13,6 +13,7 @@ FILES= \ test-libelf-getphdrnum \ test-libunwind \ test-libaudit \ + test-libslang \ test-libnuma CC := $(CC) -MD @@ -65,6 +66,9 @@ test-libunwind: test-libaudit: $(BUILD) -laudit +test-libslang: + $(BUILD) -I/usr/include/slang -lslang + -include *.d */*.d ############################### diff --git a/tools/perf/config/feature-checks/test-libslang.c b/tools/perf/config/feature-checks/test-libslang.c new file mode 100644 index 0000000..22ff22e --- /dev/null +++ b/tools/perf/config/feature-checks/test-libslang.c @@ -0,0 +1,6 @@ +#include + +int main(void) +{ + return SLsmg_init_smg(); +} -- cgit v0.10.2 From 7ef9e055ce1d8ad93f636bde1bf050eef26c798b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 15:01:56 +0200 Subject: tools/perf/build: Split out feature check: 'gtk2' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-gfwzurn7wywiviLp7Swyyqsy@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 43713c6..b3bf931 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -116,6 +116,7 @@ FEATURE_TESTS = \ libunwind \ libaudit \ libslang \ + gtk2 \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -294,7 +295,7 @@ endif ifndef NO_GTK2 FLAGS_GTK2=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) - ifneq ($(call try-cc,$(SOURCE_GTK2),$(FLAGS_GTK2),gtk2),y) + ifneq ($(feature-gtk2), 1) msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev); NO_GTK2 := 1 else diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index c9b15b0..920958c 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -14,6 +14,7 @@ FILES= \ test-libunwind \ test-libaudit \ test-libslang \ + test-gtk2 \ test-libnuma CC := $(CC) -MD @@ -69,6 +70,9 @@ test-libaudit: test-libslang: $(BUILD) -I/usr/include/slang -lslang +test-gtk2: + $(BUILD) $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) + -include *.d */*.d ############################### diff --git a/tools/perf/config/feature-checks/test-gtk2.c b/tools/perf/config/feature-checks/test-gtk2.c new file mode 100644 index 0000000..1ac6d8a --- /dev/null +++ b/tools/perf/config/feature-checks/test-gtk2.c @@ -0,0 +1,10 @@ +#pragma GCC diagnostic ignored "-Wstrict-prototypes" +#include +#pragma GCC diagnostic error "-Wstrict-prototypes" + +int main(int argc, char *argv[]) +{ + gtk_init(&argc, &argv); + + return 0; +} -- cgit v0.10.2 From c7a79e96dc166a1bdadca1367a39c84887e73de3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 15:08:30 +0200 Subject: tools/perf/build: Split out feature check: 'gtk2-infobar' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-oumjyVjonjvgH8ts4mftagel@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index b3bf931..3d3d435 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -117,6 +117,7 @@ FEATURE_TESTS = \ libaudit \ libslang \ gtk2 \ + gtk2-infobar \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -299,7 +300,7 @@ ifndef NO_GTK2 msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev); NO_GTK2 := 1 else - ifeq ($(call try-cc,$(SOURCE_GTK2_INFOBAR),$(FLAGS_GTK2),-DHAVE_GTK_INFO_BAR_SUPPORT),y) + ifeq ($(feature-gtk2-infobar), 1) CFLAGS += -DHAVE_GTK_INFO_BAR_SUPPORT endif CFLAGS += -DHAVE_GTK2_SUPPORT diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 920958c..017918f 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -15,6 +15,7 @@ FILES= \ test-libaudit \ test-libslang \ test-gtk2 \ + test-gtk2-infobar \ test-libnuma CC := $(CC) -MD @@ -73,6 +74,9 @@ test-libslang: test-gtk2: $(BUILD) $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) +test-gtk2-infobar: + $(BUILD) $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) + -include *.d */*.d ############################### diff --git a/tools/perf/config/feature-checks/test-gtk2-infobar.c b/tools/perf/config/feature-checks/test-gtk2-infobar.c new file mode 100644 index 0000000..eebcfbc --- /dev/null +++ b/tools/perf/config/feature-checks/test-gtk2-infobar.c @@ -0,0 +1,10 @@ +#pragma GCC diagnostic ignored "-Wstrict-prototypes" +#include +#pragma GCC diagnostic error "-Wstrict-prototypes" + +int main(void) +{ + gtk_info_bar_new(); + + return 0; +} -- cgit v0.10.2 From 7181a6714efc5039ffb50db7462d5cefe15b5630 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 15:15:36 +0200 Subject: tools/perf/build: Split out feature check: 'libperl' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-ggucqbwFwpxyuxde6dm7itHq@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 3d3d435..8124dd5 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -118,6 +118,7 @@ FEATURE_TESTS = \ libslang \ gtk2 \ gtk2-infobar \ + libperl \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -321,7 +322,7 @@ else PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) - ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED),perl),y) + ifneq ($(feature-libperl), 1) CFLAGS += -DNO_LIBPERL NO_LIBPERL := 1 else diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 017918f..3033c25 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -16,6 +16,7 @@ FILES= \ test-libslang \ test-gtk2 \ test-gtk2-infobar \ + test-libperl \ test-libnuma CC := $(CC) -MD @@ -77,6 +78,18 @@ test-gtk2: test-gtk2-infobar: $(BUILD) $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) +grep-libs = $(filter -l%,$(1)) +strip-libs = $(filter-out -l%,$(1)) + +PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null) +PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) +PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) +PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` +FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) + +test-libperl: + $(BUILD) $(FLAGS_PERL_EMBED) + -include *.d */*.d ############################### diff --git a/tools/perf/config/feature-checks/test-libperl.c b/tools/perf/config/feature-checks/test-libperl.c new file mode 100644 index 0000000..8871f6a --- /dev/null +++ b/tools/perf/config/feature-checks/test-libperl.c @@ -0,0 +1,9 @@ +#include +#include + +int main(void) +{ + perl_alloc(); + + return 0; +} -- cgit v0.10.2 From 9734163b6ee1425c6fa4b65d7e6ce34c9079420d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 15:18:37 +0200 Subject: tools/perf/build: Split out feature check: 'libpython' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-9wfutfb8ufFHrddrwlejqrai@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 8124dd5..595c0e0 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -119,6 +119,7 @@ FEATURE_TESTS = \ gtk2 \ gtk2-infobar \ libperl \ + libpython \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -367,7 +368,7 @@ else PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) - ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED),python),y) + ifneq ($(feature-libpython), 1) $(call disable-python,Python.h (for Python 2.x)) else diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 3033c25..e7ed05a 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -17,6 +17,7 @@ FILES= \ test-gtk2 \ test-gtk2-infobar \ test-libperl \ + test-libpython \ test-libnuma CC := $(CC) -MD @@ -90,6 +91,23 @@ FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) test-libperl: $(BUILD) $(FLAGS_PERL_EMBED) +override PYTHON := python +override PYTHON_CONFIG := python-config + +escape-for-shell-sq = $(subst ','\'',$(1)) +shell-sq = '$(escape-for-shell-sq)' + +PYTHON_CONFIG_SQ = $(call shell-sq,$(PYTHON_CONFIG)) + +PYTHON_EMBED_LDOPTS = $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) +PYTHON_EMBED_LDFLAGS = $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) +PYTHON_EMBED_LIBADD = $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) +PYTHON_EMBED_CCOPTS = $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) +FLAGS_PYTHON_EMBED = $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) + +test-libpython: + $(BUILD) $(FLAGS_PYTHON_EMBED) + -include *.d */*.d ############################### diff --git a/tools/perf/config/feature-checks/test-libpython.c b/tools/perf/config/feature-checks/test-libpython.c new file mode 100644 index 0000000..7226797 --- /dev/null +++ b/tools/perf/config/feature-checks/test-libpython.c @@ -0,0 +1,7 @@ +#include +# +int main(void) +{ + Py_Initialize(); + return 0; +} -- cgit v0.10.2 From 95d061c8a9f36e8c2cc458f97bc67716571b3fee Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 15:40:04 +0200 Subject: tools/perf/build: Split out feature check: 'libpython-version' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-raHmlqlnv0zexsrPau8hhane@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 595c0e0..63ba0698 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -120,6 +120,7 @@ FEATURE_TESTS = \ gtk2-infobar \ libperl \ libpython \ + libpython-version \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -372,7 +373,7 @@ else $(call disable-python,Python.h (for Python 2.x)) else - ifneq ($(call try-cc,$(SOURCE_PYTHON_VERSION),$(FLAGS_PYTHON_EMBED),python version),y) + ifneq ($(feature-libpython-version), 1) $(warning Python 3 is not yet supported; please set) $(warning PYTHON and/or PYTHON_CONFIG appropriately.) $(warning If you also have Python 2 installed, then) diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index e7ed05a..d15074d 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -18,6 +18,7 @@ FILES= \ test-gtk2-infobar \ test-libperl \ test-libpython \ + test-libpython-version \ test-libnuma CC := $(CC) -MD @@ -108,6 +109,9 @@ FLAGS_PYTHON_EMBED = $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) test-libpython: $(BUILD) $(FLAGS_PYTHON_EMBED) +test-libpython-version: + $(BUILD) $(FLAGS_PYTHON_EMBED) + -include *.d */*.d ############################### diff --git a/tools/perf/config/feature-checks/test-libpython-version.c b/tools/perf/config/feature-checks/test-libpython-version.c new file mode 100644 index 0000000..facea12 --- /dev/null +++ b/tools/perf/config/feature-checks/test-libpython-version.c @@ -0,0 +1,10 @@ +#include + +#if PY_VERSION_HEX >= 0x03000000 + #error +#endif + +int main(void) +{ + return 0; +} -- cgit v0.10.2 From 3b7646e45d110f53d4c0fa0a63158c3d2a763e60 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 15:53:31 +0200 Subject: tools/perf/build: Split out feature check: 'libbfd' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-cdxdfv7Corpfvjg9Skezhvjn@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 63ba0698..7e13969 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -121,6 +121,7 @@ FEATURE_TESTS = \ libperl \ libpython \ libpython-version \ + libbfd \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -404,8 +405,7 @@ else CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT else FLAGS_BFD=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -DPACKAGE='perf' -lbfd - has_bfd := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD),libbfd) - ifeq ($(has_bfd),y) + ifeq ($(feature-libbfd), 1) EXTLIBS += -lbfd else FLAGS_BFD_IBERTY=$(FLAGS_BFD) -liberty diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index d15074d..af65aaa 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -19,6 +19,7 @@ FILES= \ test-libperl \ test-libpython \ test-libpython-version \ + test-libbfd \ test-libnuma CC := $(CC) -MD @@ -112,6 +113,9 @@ test-libpython: test-libpython-version: $(BUILD) $(FLAGS_PYTHON_EMBED) +test-libbfd: + $(BUILD) -DPACKAGE='perf' -DPACKAGE=perf -lbfd -ldl + -include *.d */*.d ############################### diff --git a/tools/perf/config/feature-checks/test-libbfd.c b/tools/perf/config/feature-checks/test-libbfd.c new file mode 100644 index 0000000..d03339c --- /dev/null +++ b/tools/perf/config/feature-checks/test-libbfd.c @@ -0,0 +1,7 @@ +#include + +int main(void) +{ + bfd_demangle(0, 0, 0); + return 0; +} -- cgit v0.10.2 From d0707c9172e5d286fc119dcef5ea1621ef3bfade Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 8 Oct 2013 10:10:45 +0200 Subject: tools/perf/build: Split out feature check: 'strlcpy' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-ektO8cgvupthhyqqczSok2sr@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 7e13969..c8ac4df 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -121,6 +121,7 @@ FEATURE_TESTS = \ libperl \ libpython \ libpython-version \ + strlcpy \ libbfd \ libnuma @@ -434,7 +435,7 @@ else endif ifndef NO_STRLCPY - ifeq ($(call try-cc,$(SOURCE_STRLCPY),,-DHAVE_STRLCPY_SUPPORT),y) + ifeq ($(feature-strlcpy), 1) CFLAGS += -DHAVE_STRLCPY_SUPPORT endif endif diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index af65aaa..d348aa0 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -19,6 +19,7 @@ FILES= \ test-libperl \ test-libpython \ test-libpython-version \ + test-strlcpy \ test-libbfd \ test-libnuma @@ -113,6 +114,9 @@ test-libpython: test-libpython-version: $(BUILD) $(FLAGS_PYTHON_EMBED) +test-strlcpy: + $(BUILD) + test-libbfd: $(BUILD) -DPACKAGE='perf' -DPACKAGE=perf -lbfd -ldl diff --git a/tools/perf/config/feature-checks/test-strlcpy.c b/tools/perf/config/feature-checks/test-strlcpy.c new file mode 100644 index 0000000..4a6b6ff --- /dev/null +++ b/tools/perf/config/feature-checks/test-strlcpy.c @@ -0,0 +1,8 @@ +#include +extern size_t strlcpy(char *dest, const char *src, size_t size); + +int main(void) +{ + strlcpy(NULL, NULL, 0); + return 0; +} -- cgit v0.10.2 From 34ef21622f9028efafe7950504a9ce90d9cc15a0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 16:46:49 +0200 Subject: tools/perf/build: Split out feature check: 'on-exit' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-gmywXandzfxnlcbzlX6bkpw1@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index c8ac4df..44affb5 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -123,6 +123,7 @@ FEATURE_TESTS = \ libpython-version \ strlcpy \ libbfd \ + on-exit \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -441,7 +442,7 @@ ifndef NO_STRLCPY endif ifndef NO_ON_EXIT - ifeq ($(call try-cc,$(SOURCE_ON_EXIT),,-DHAVE_ON_EXIT_SUPPORT),y) + ifeq ($(feature-on-exit), 1) CFLAGS += -DHAVE_ON_EXIT_SUPPORT endif endif diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index d348aa0..20f1b8c 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -21,6 +21,7 @@ FILES= \ test-libpython-version \ test-strlcpy \ test-libbfd \ + test-on-exit \ test-libnuma CC := $(CC) -MD @@ -120,6 +121,9 @@ test-strlcpy: test-libbfd: $(BUILD) -DPACKAGE='perf' -DPACKAGE=perf -lbfd -ldl +test-on-exit: + $(BUILD) + -include *.d */*.d ############################### diff --git a/tools/perf/config/feature-checks/test-on-exit.c b/tools/perf/config/feature-checks/test-on-exit.c new file mode 100644 index 0000000..473f1de --- /dev/null +++ b/tools/perf/config/feature-checks/test-on-exit.c @@ -0,0 +1,6 @@ +#include + +int main(void) +{ + return on_exit(NULL, NULL); +} -- cgit v0.10.2 From 4cc9117a35b2810fb84454514a9136e5f2945751 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 30 Sep 2013 16:49:38 +0200 Subject: tools/perf/build: Split out feature check: 'backtrace' Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-ihnwe6cvglVkudyvcavP1wql@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 44affb5..c0c8344 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -124,6 +124,7 @@ FEATURE_TESTS = \ strlcpy \ libbfd \ on-exit \ + backtrace \ libnuma $(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) @@ -448,7 +449,7 @@ ifndef NO_ON_EXIT endif ifndef NO_BACKTRACE - ifeq ($(call try-cc,$(SOURCE_BACKTRACE),,-DHAVE_BACKTRACE_SUPPORT),y) + ifeq ($(feature-backtrace), 1) CFLAGS += -DHAVE_BACKTRACE_SUPPORT endif endif diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 20f1b8c..0e4dbc2 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -22,6 +22,7 @@ FILES= \ test-strlcpy \ test-libbfd \ test-on-exit \ + test-backtrace \ test-libnuma CC := $(CC) -MD @@ -124,6 +125,9 @@ test-libbfd: test-on-exit: $(BUILD) +test-backtrace: + $(BUILD) + -include *.d */*.d ############################### diff --git a/tools/perf/config/feature-checks/test-backtrace.c b/tools/perf/config/feature-checks/test-backtrace.c new file mode 100644 index 0000000..5b79468 --- /dev/null +++ b/tools/perf/config/feature-checks/test-backtrace.c @@ -0,0 +1,10 @@ +#include +#include + +int main(void) +{ + backtrace(NULL, 0); + backtrace_symbols(NULL, 0); + + return 0; +} -- cgit v0.10.2 From 7a10822a30060eceddda16c051086c00acb449c9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 7 Oct 2013 11:26:18 +0200 Subject: tools/perf: Clean up util/include/linux/compiler.h Use the standard CPP style we use in the kernel: #ifndef foo # define foo bar #endif Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-iqyVrrHqpn0eiwenvgwrh8lf@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/include/linux/compiler.h b/tools/perf/util/include/linux/compiler.h index 96b919d..bef4d3d 100644 --- a/tools/perf/util/include/linux/compiler.h +++ b/tools/perf/util/include/linux/compiler.h @@ -2,20 +2,25 @@ #define _PERF_LINUX_COMPILER_H_ #ifndef __always_inline -#define __always_inline inline +# define __always_inline inline __attribute__((always_inline)) #endif + #define __user + #ifndef __attribute_const__ -#define __attribute_const__ +# define __attribute_const__ #endif #ifndef __maybe_unused -#define __maybe_unused __attribute__((unused)) +# define __maybe_unused __attribute__((unused)) +#endif + +#ifndef __packed +# define __packed __attribute__((__packed__)) #endif -#define __packed __attribute__((__packed__)) #ifndef __force -#define __force +# define __force #endif #endif -- cgit v0.10.2 From fb1c9185e36cf9c616ac15f54e54a01f052672bd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 1 Oct 2013 13:26:13 +0200 Subject: tools/perf: Turn strlcpy() into a __weak function The strlcpy() feature check slows every build unnecessarily - so make it a __weak function so it does not have to be auto-detected. If the libc (or any other library) has an strlcpy() implementation it will be used - otherwise our fallback is active. Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-zjbrcupapu08ePsyYhhhxiwk@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index c0c8344..3207c25 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -121,7 +121,6 @@ FEATURE_TESTS = \ libperl \ libpython \ libpython-version \ - strlcpy \ libbfd \ on-exit \ backtrace \ @@ -436,12 +435,6 @@ else endif endif -ifndef NO_STRLCPY - ifeq ($(feature-strlcpy), 1) - CFLAGS += -DHAVE_STRLCPY_SUPPORT - endif -endif - ifndef NO_ON_EXIT ifeq ($(feature-on-exit), 1) CFLAGS += -DHAVE_ON_EXIT_SUPPORT diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 0e4dbc2..c65bdac 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -19,7 +19,6 @@ FILES= \ test-libperl \ test-libpython \ test-libpython-version \ - test-strlcpy \ test-libbfd \ test-on-exit \ test-backtrace \ @@ -116,9 +115,6 @@ test-libpython: test-libpython-version: $(BUILD) $(FLAGS_PYTHON_EMBED) -test-strlcpy: - $(BUILD) - test-libbfd: $(BUILD) -DPACKAGE='perf' -DPACKAGE=perf -lbfd -ldl diff --git a/tools/perf/config/feature-checks/test-strlcpy.c b/tools/perf/config/feature-checks/test-strlcpy.c deleted file mode 100644 index 4a6b6ff..0000000 --- a/tools/perf/config/feature-checks/test-strlcpy.c +++ /dev/null @@ -1,8 +0,0 @@ -#include -extern size_t strlcpy(char *dest, const char *src, size_t size); - -int main(void) -{ - strlcpy(NULL, NULL, 0); - return 0; -} diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 442953c..7b176dd 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -70,8 +70,7 @@ extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2 extern char *perf_pathdup(const char *fmt, ...) __attribute__((format (printf, 1, 2))); -#ifndef HAVE_STRLCPY_SUPPORT +/* Matches the libc/libbsd function attribute so we declare this unconditionally: */ extern size_t strlcpy(char *dest, const char *src, size_t size); -#endif #endif /* __PERF_CACHE_H */ diff --git a/tools/perf/util/include/linux/compiler.h b/tools/perf/util/include/linux/compiler.h index bef4d3d..b003ad7 100644 --- a/tools/perf/util/include/linux/compiler.h +++ b/tools/perf/util/include/linux/compiler.h @@ -23,4 +23,8 @@ # define __force #endif +#ifndef __weak +# define __weak __attribute__((weak)) +#endif + #endif diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index f395874..5d13cb4 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -22,19 +22,23 @@ static const char *get_perf_dir(void) return "."; } -#ifndef HAVE_STRLCPY_SUPPORT -size_t strlcpy(char *dest, const char *src, size_t size) +/* + * If libc has strlcpy() then that version will override this + * implementation: + */ +size_t __weak strlcpy(char *dest, const char *src, size_t size) { size_t ret = strlen(src); if (size) { size_t len = (ret >= size) ? size - 1 : ret; + memcpy(dest, src, len); dest[len] = '\0'; } + return ret; } -#endif static char *get_pathname(void) { -- cgit v0.10.2 From baa9c30e1e250abf3e53b98e5bcf415dccdc7ba2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 1 Oct 2013 14:14:31 +0200 Subject: tools/perf/build: Speed up auto-detection of features by adding a 'test-all' target Concatenate all feature checks into test-all.c. This can be built and checked faster than all the individual tests. If test-all fails then we still check all the individual features, so this is a pure speedup, it should have no effects on functionality. Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-5hlcb2qorzwfwrWTjiygjjih@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 3207c25..cbd7cdc 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -89,17 +89,21 @@ CFLAGS += -std=gnu99 EXTLIBS = -lelf -lpthread -lrt -lm -ldl -feature_check = $(eval $(feature_check_code)); $(info CHK: config/feature-checks/test-$(1)) +feature_check = $(eval $(feature_check_code)) define feature_check_code - feature-$(2) := $(shell make -C config/feature-checks test-$1 >/dev/null 2>/dev/null && echo 1 || echo 0) + feature-$(1) := $(shell $(MAKE) -C config/feature-checks test-$1 >/dev/null 2>/dev/null && echo 1 || echo 0) +endef + +feature_set = $(eval $(feature_set_code)) +define feature_set_code + feature-$(1) := 1 endef # # Build the feature check binaries in parallel, ignore errors, ignore return value and suppress output: # -$(info Testing features:) -$(shell make -i -j -C config/feature-checks >/dev/null 2>&1) -$(info done) +$(info ) +$(info Auto-detecting system features:) FEATURE_TESTS = \ hello \ @@ -126,7 +130,36 @@ FEATURE_TESTS = \ backtrace \ libnuma -$(foreach test,$(FEATURE_TESTS),$(call feature_check,$(test),$(test))) +# +# Special fast-path for the 'all features are available' case: +# +$(call feature_check,all) + +ifeq ($(feature-all), 1) + $(foreach feat,$(FEATURE_TESTS),$(call feature_set,$(feat))) +else + $(shell $(MAKE) -i -j -C config/feature-checks >/dev/null 2>&1) + $(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat))) +endif + +feature_print = $(eval $(feature_print_code)) + +# +# Print the result of the feature test: +# +define feature_print_code + ifeq ($(feature-$(1)), 1) + MSG := $(shell printf '...%30s: [ \033[32mon\033[m ]' $(1)) + else + MSG := $(shell printf '...%30s: [ \033[31mOFF\033[m ]' $(1)) + endif + $(info $(MSG)) +endef + +$(foreach feat,$(FEATURE_TESTS) DUMMY,$(call feature_print,$(feat))) + +# newline at the end of the feature printouts: +$(info ) ifeq ($(feature-stackprotector-all), 1) CFLAGS += -fstack-protector-all diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index c65bdac..4b855e0 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -32,6 +32,9 @@ BUILD = $(CC) -o $(OUTPUT)$@ $@.c ############################### +test-all: + $(BUILD) -Werror -fstack-protector -fstack-protector-all -Wvolatile-register-var -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lunwind -lunwind-x86_64 -lelf -laudit -I/usr/include/slang -lslang $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='perf' -DPACKAGE=perf -lbfd -ldl + test-hello: $(BUILD) diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/perf/config/feature-checks/test-all.c new file mode 100644 index 0000000..9f7c4b1 --- /dev/null +++ b/tools/perf/config/feature-checks/test-all.c @@ -0,0 +1,196 @@ + +#pragma GCC diagnostic ignored "-Wstrict-prototypes" + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#pragma GCC diagnostic error "-Wstrict-prototypes" + +int main1(void) +{ + return puts("hi"); +} + +int main2(void) +{ + return puts("hi"); +} + +int main3(void) +{ + return puts("hi"); +} + +int main4(void) +{ + Elf *elf = elf_begin(0, ELF_C_READ, 0); + return (long)elf; +} +# +int main5(void) +{ + Elf *elf = elf_begin(0, ELF_C_READ_MMAP, 0); + return (long)elf; +} + +int main6(void) +{ + const char *version = gnu_get_libc_version(); + return (long)version; +} + +int main7(void) +{ + Dwarf *dbg = dwarf_begin(0, DWARF_C_READ); + return (long)dbg; +} + +int main8(void) +{ + size_t dst; + return elf_getphdrnum(0, &dst); +} + +extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, + unw_word_t ip, + unw_dyn_info_t *di, + unw_proc_info_t *pi, + int need_unwind_info, void *arg); + + +#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) + +int main9(void) +{ + unw_addr_space_t addr_space; + addr_space = unw_create_addr_space(NULL, 0); + unw_init_remote(NULL, addr_space, NULL); + dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL); + return 0; +} + +int main10(void) +{ + printf("error message: %s\n", audit_errno_to_name(0)); + return audit_open(); +} + +int main11(void) +{ + return SLsmg_init_smg(); +} + +int main12(int argc, char *argv[]) +{ + gtk_init(&argc, &argv); + + return 0; +} + +int main13(void) +{ + gtk_info_bar_new(); + + return 0; +} + +int main14(void) +{ + perl_alloc(); + + return 0; +} + +int main15(void) +{ + Py_Initialize(); + return 0; +} + +#if PY_VERSION_HEX >= 0x03000000 + #error +#endif + +int main16(void) +{ + return 0; +} + +int main17(void) +{ + bfd_demangle(0, 0, 0); + return 0; +} + +void exit_function(int x, void *y) +{ +} + +int main18(void) +{ + return on_exit(exit_function, NULL); +} + +int main19(void) +{ + void *backtrace_fns[1]; + size_t entries; + + entries = backtrace(backtrace_fns, 1); + backtrace_symbols(backtrace_fns, entries); + + return 0; +} + +int main20(void) +{ + numa_available(); + return 0; +} + +int main(int argc, char *argv[]) +{ + main1(); + main2(); + main3(); + main4(); + main5(); + main6(); + main7(); + main8(); + main9(); + main10(); + main11(); + main12(argc, argv); + main13(); + main14(); + main15(); + main16(); + main17(); + main18(); + main19(); + main20(); + + return 0; +} -- cgit v0.10.2 From c72e3f04b45fb2e50cdd81a50c3778c6a57251d8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 1 Oct 2013 16:28:09 +0200 Subject: tools/perf/build: Speed up git-version test on re-make util/PERF-VERSION-GEN is currently executed on every build attempt, and this script can take a lot of time on trees that are at a significant git-distance from Linus's tree: $ time util/PERF-VERSION-GEN real 0m4.343s user 0m4.176s sys 0m0.140s It also takes a lot of time if the Git repository is network attached, etc., because the commands it uses: TAG=$(git describe --abbrev=0 --match "v[0-9].[0-9]*" 2>/dev/null ) has to count commits from the nearest tag and thus has to access (and decompress) every git commit blob on the relevant version path. Even on Linus's tree it takes 0.28 seconds on a fast box to count all the commits and get the git version string: $ time util/PERF-VERSION-GEN real 0m0.279s user 0m0.247s sys 0m0.025s But the version string only has to be regenerated if the git repository's head commit changes. So add a dependency of ../../.git/HEAD and touch the file every time it's regenerated, so that Make's build rules can pick it up and cache the result: make: `PERF-VERSION-FILE' is up to date. real 0m0.184s user 0m0.117s sys 0m0.026s Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-wvmlrurufuk6mo1ovtNigguT@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 64c043b..6b5452a 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -70,8 +70,9 @@ ifneq ($(OUTPUT),) #$(info Determined 'OUTPUT' to be $(OUTPUT)) endif -$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE +$(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) + @touch $(OUTPUT)PERF-VERSION-FILE CC = $(CROSS_COMPILE)gcc AR = $(CROSS_COMPILE)ar @@ -814,6 +815,16 @@ clean: $(LIBTRACEEVENT)-clean $(LIBLK)-clean $(RM) $(OUTPUT)util/*-flex* $(python-clean) +# +# Trick: if ../../.git does not exist - we are building out of tree for example, +# then force version regeneration: +# +ifeq ($(wildcard ../../.git/HEAD),) + GIT-HEAD-PHONY = ../../.git/HEAD +else + GIT-HEAD-PHONY = +endif + .PHONY: all install clean strip $(LIBTRACEEVENT) $(LIBLK) .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell -.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS +.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS -- cgit v0.10.2 From c9404c6650d8f58ce8cf98d4c5581fc5c9e37faf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 1 Oct 2013 17:17:22 +0200 Subject: tools/perf/build: Speed up the final link libtraceevent.a and liblk.a rules have always-missed dependencies, which causes python.so to be relinked at every build attempt - even if none of the affected code changes. This slows down re-builds unnecessarily, by adding more than a second to the build time: comet:~/tip/tools/perf> time make ... SUBDIR /fast/mingo/tip/tools/lib/lk/ make[1]: `liblk.a' is up to date. SUBDIR /fast/mingo/tip/tools/lib/traceevent/ LINK perf GEN python/perf.so real 0m1.701s user 0m1.338s sys 0m0.301s Add the (trivial) dependencies to not force a re-link. This speeds up an empty re-build enormously: comet:~/tip/tools/perf> time make ... real 0m0.207s user 0m0.134s sys 0m0.028s [ This adds some coupling between the build dependencies of libtraceevent and liblk - but until those stay relatively simple this should not be an issue. ] Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-wvmlrurufuk6mo1ovtNigguT@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 6b5452a..df76198 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -669,15 +669,19 @@ $(LIB_FILE): $(LIB_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) # libtraceevent.a -$(LIBTRACEEVENT): +TE_SOURCES = $(wildcard $(TRACE_EVENT_DIR)*.[ch]) + +$(LIBTRACEEVENT): $(TE_SOURCES) $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libtraceevent.a $(LIBTRACEEVENT)-clean: $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) clean +LIBLK_SOURCES = $(wildcard $(LK_PATH)*.[ch]) + # if subdir is set, we've been called from above so target has been built # already -$(LIBLK): +$(LIBLK): $(LIBLK_SOURCES) ifeq ($(subdir),) $(QUIET_SUBDIR0)$(LK_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) liblk.a endif @@ -825,6 +829,6 @@ else GIT-HEAD-PHONY = endif -.PHONY: all install clean strip $(LIBTRACEEVENT) $(LIBLK) +.PHONY: all install clean strip .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell .PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS -- cgit v0.10.2 From 31f6be65e08d2cbb876b3b5fc9ae9281b7417c85 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 Oct 2013 06:51:27 +0200 Subject: tools/perf: Fix double/triple-build of the feature detection logic during 'make install' et al Linus reported the following perf build system bug: 'Another annoyance during that make was that "make install" seems to want to re-make the thing I just built. That's absolutely horrible, [...]' The thing that got re-built were 'only' the (numerous) feature checks, not the whole project - but still it was mighty annoying as the feature checks took 9+ seconds even on reasonably fast boxes. Even with the autodep patches where feature detection is much faster it wastes resources, wastes screen real estate and confuses users if we execute feature detection twice. There were two sources for these unnecessary re-builds of the feature checks: - Unnecessary nested invocations of $(MAKE), apparently to be able to do conditional compilation dependent on documentation tools presence. Use straight dependencies instead, with no nesting. - A direct invocation of $(MAKE) to rebuild the PERF-VERSION-FILE. This is apparently done to be able to include it into the Makefile: -include $(OUTPUT)PERF-VERSION-FILE but that's entirely pointless for two reasons: 1) the version file gets regenerated by the initial build pass anyway, 2) including it is futile, given its contents: #define PERF_VERSION "3.12.rc3.g8510c7" 'make' will interpret that as a comment line... So just remove this part of the doc-generation logic. With these things fixed a 'make install' now rebuilds only what is needed. A repeated 'make install' on an already built tree is super fast now, it finishes in under 0.3 seconds: # # After the patch: # $ time make install ... real 0m0.280s user 0m0.162s sys 0m0.054s Prior all the autodep changes and prior this fix, a repeat 'make install' took 24.1 seconds (!) on the same system: # # Before the patches: # $ time make install ... real 0m24.109s user 0m21.171s sys 0m2.449s Which almost entirely was caused by fixable build system fat. We are now literally ~86 times faster. A fresh rebuild and install now takes just 11.4 seconds: # # After the patch: # $ make clean $ time make -j16 install ... real 0m11.457s user 1m43.411s sys 0m7.610s Without the patches it took 27.8 seconds: # # Before the patches: # $ make clean $ time make -j16 install ... real 0m27.801s user 1m59.242s sys 0m9.749s So even in the complete rebuild case we are now ~2.5 times faster. Reported-by: Linus Torvalds Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-x4qjnxjGrgxpribq8sdakfTp@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 5a37a7c..eaa477f 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -192,13 +192,14 @@ do-install-man: man install-man: check-man-tools man -try-install-man: ifdef missing_tools - $(warning Please install $(missing_tools) to have the man pages installed) + DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed) else - $(MAKE) do-install-man + DO_INSTALL_MAN = do-install-man endif +try-install-man: $(DO_INSTALL_MAN) + install-info: info $(INSTALL) -d -m 755 $(DESTDIR)$(infodir) $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir) @@ -216,14 +217,6 @@ install-pdf: pdf #install-html: html # '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) -ifneq ($(MAKECMDGOALS),clean) -ifneq ($(MAKECMDGOALS),tags) -$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE - $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) $(OUTPUT)PERF-VERSION-FILE - --include $(OUTPUT)PERF-VERSION-FILE -endif -endif # # Determine "include::" file references in asciidoc files. @@ -342,5 +335,3 @@ $(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt #quick-install-html: # '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir) - -.PHONY: .FORCE-PERF-VERSION-FILE -- cgit v0.10.2 From de0f03fb8dc268de63d366e735cc576a22df201b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 Oct 2013 09:43:23 +0200 Subject: tools/perf/build: Invoke feature-checks 'clean' target from the main Makefile config/Makefile is not included for the 'clean' target, so invoke the config/feature-checks/Makefile 'clean' target from Makefile.perf. Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-sh2cGvmsjbrazarlqre7pVwt@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index df76198..6b7779b 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -809,7 +809,14 @@ $(INSTALL_DOC_TARGETS): ### Cleaning rules -clean: $(LIBTRACEEVENT)-clean $(LIBLK)-clean +# +# This is here, not in config/Makefile, because config/Makefile does +# not get included for the clean target: +# +config-clean: + @$(MAKE) -C config/feature-checks clean + +clean: $(LIBTRACEEVENT)-clean $(LIBLK)-clean config-clean $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS) $(RM) $(ALL_PROGRAMS) perf $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* @@ -829,6 +836,6 @@ else GIT-HEAD-PHONY = endif -.PHONY: all install clean strip +.PHONY: all install clean config-clean strip .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell .PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS -- cgit v0.10.2 From f1138ec66e839ce90c58af0c264db33271d73466 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 Oct 2013 09:54:43 +0200 Subject: tools/perf/build: Speed up auto-detection The detection of certain rarely detected features can be delayed to when they are actually needed. So speed up the common case of auto-detection by pre-building only a core set of features and populating only their feature-flags. [ Features not listed in CORE_FEATURES need to built explicitly via the feature_check() function. ] (Also order the feature names alphabetically, while at it.) Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-xQkuveknd0gqla1dfxrqKpkl@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index cbd7cdc..581a942 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -105,30 +105,36 @@ endef $(info ) $(info Auto-detecting system features:) -FEATURE_TESTS = \ - hello \ - stackprotector-all \ - stackprotector \ - volatile-register-var \ +# +# Note that this is not a complete list of all feature tests, just +# those that are typically built on a fully configured system. +# +# [ Feature tests not mentioned here have to be built explicitly in +# the rule that uses them - an example for that is the 'bionic' +# feature check. ] +# +CORE_FEATURE_TESTS = \ + backtrace \ + dwarf \ fortify-source \ - bionic \ - libelf \ glibc \ - dwarf \ - libelf-mmap \ - libelf-getphdrnum \ - libunwind \ - libaudit \ - libslang \ gtk2 \ gtk2-infobar \ + libaudit \ + libbfd \ + libelf \ + libelf-getphdrnum \ + libelf-mmap \ + libnuma \ libperl \ libpython \ libpython-version \ - libbfd \ + libslang \ + libunwind \ on-exit \ - backtrace \ - libnuma + stackprotector \ + stackprotector-all \ + volatile-register-var # # Special fast-path for the 'all features are available' case: @@ -136,10 +142,13 @@ FEATURE_TESTS = \ $(call feature_check,all) ifeq ($(feature-all), 1) - $(foreach feat,$(FEATURE_TESTS),$(call feature_set,$(feat))) + # + # test-all.c passed - just set all the core feature flags to 1: + # + $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_set,$(feat))) else - $(shell $(MAKE) -i -j -C config/feature-checks >/dev/null 2>&1) - $(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat))) + $(shell $(MAKE) -i -j -C config/feature-checks $(CORE_FEATURE_TESTS) >/dev/null 2>&1) + $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_check,$(feat))) endif feature_print = $(eval $(feature_print_code)) @@ -156,7 +165,7 @@ define feature_print_code $(info $(MSG)) endef -$(foreach feat,$(FEATURE_TESTS) DUMMY,$(call feature_print,$(feat))) +$(foreach feat,$(CORE_FEATURE_TESTS) DUMMY,$(call feature_print,$(feat))) # newline at the end of the feature printouts: $(info ) diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 4b855e0..d4c55ac 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -1,28 +1,29 @@ FILES= \ - test-hello \ - test-stackprotector-all \ - test-stackprotector \ - test-volatile-register-var \ - test-fortify-source \ + test-all \ + test-backtrace \ test-bionic \ - test-libelf \ - test-glibc \ test-dwarf \ - test-libelf-mmap \ - test-libelf-getphdrnum \ - test-libunwind \ - test-libaudit \ - test-libslang \ + test-fortify-source \ + test-glibc \ test-gtk2 \ test-gtk2-infobar \ + test-hello \ + test-libaudit \ + test-libbfd \ + test-libelf \ + test-libelf-getphdrnum \ + test-libelf-mmap \ + test-libnuma \ test-libperl \ test-libpython \ test-libpython-version \ - test-libbfd \ + test-libslang \ + test-libunwind \ test-on-exit \ - test-backtrace \ - test-libnuma + test-stackprotector-all \ + test-stackprotector \ + test-volatile-register-var CC := $(CC) -MD -- cgit v0.10.2 From b3b64a12239e758573316df19cbbaf0126887440 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 Oct 2013 10:01:42 +0200 Subject: tools/perf/build: Improve printout-of auto-detected features Change the print-out of auto-detected features by making sure that repeat invocations of 'make' when all features are successfully detected do not produce the (rather lengthy) autodetection printout. ( When one or more features are missing then we still print out the feature detection table, to make sure people are aware of the resulting limitations. ) Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-qd8sMsshcjomxqx9bQcufmaa@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 581a942..fb6ec06 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -102,8 +102,6 @@ endef # # Build the feature check binaries in parallel, ignore errors, ignore return value and suppress output: # -$(info ) -$(info Auto-detecting system features:) # # Note that this is not a complete list of all feature tests, just @@ -137,9 +135,33 @@ CORE_FEATURE_TESTS = \ volatile-register-var # +# So here we detect whether test-all was rebuilt, to be able +# to skip the print-out of the long features list if the file +# existed before and after it was built: +# +ifeq ($(wildcard config/feature-checks/test-all),) + test-all-failed := 1 +else + test-all-failed := 0 +endif + +# # Special fast-path for the 'all features are available' case: # -$(call feature_check,all) +$(call feature_check,all,$(MSG)) + +# +# Just in case the build freshly failed, make sure we print the +# feature matrix: +# +ifeq ($(feature-all), 0) + test-all-failed := 1 +endif + +ifeq ($(test-all-failed),1) + $(info ) + $(info Auto-detecting system features:) +endif ifeq ($(feature-all), 1) # @@ -151,11 +173,10 @@ else $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_check,$(feat))) endif -feature_print = $(eval $(feature_print_code)) - # # Print the result of the feature test: # +feature_print = $(eval $(feature_print_code)) define feature_print_code ifeq ($(feature-$(1)), 1) MSG := $(shell printf '...%30s: [ \033[32mon\033[m ]' $(1)) @@ -165,10 +186,13 @@ define feature_print_code $(info $(MSG)) endef -$(foreach feat,$(CORE_FEATURE_TESTS) DUMMY,$(call feature_print,$(feat))) - -# newline at the end of the feature printouts: -$(info ) +# +# Only print out our features if we rebuilt the testcases or if a test failed: +# +ifeq ($(test-all-failed), 1) + $(foreach feat,$(CORE_FEATURE_TESTS) DUMMY,$(call feature_print,$(feat))) + $(info ) +endif ifeq ($(feature-stackprotector-all), 1) CFLAGS += -fstack-protector-all -- cgit v0.10.2 From 2cfbe880f0a8eed7beabade3e06fd53f999b3e32 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 Oct 2013 11:18:28 +0200 Subject: tools/perf/build: Automatically build in parallel, based on number of CPUs in the system Implement automatic parallel builds when building in tools/perf: $ time make # [ perf build: Doing 'make -j12' parallel build. ] Auto-detecting system features: ... real 0m9.265s user 0m59.888s sys 0m6.082s On GNU make achieving this is not particularly easy, it requires a separate makefile, which then invokes the main Makefile. ( Note: this patch adds Makefile.parallel to show the concept - the two makefiles will be flipped in the next patch to avoid having to specify -f to get parallelism in the default build. ) Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-dvBjwqiTyzrufzkz8oanhpf9@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile.parallel b/tools/perf/Makefile.parallel new file mode 100644 index 0000000..ec5e08b --- /dev/null +++ b/tools/perf/Makefile.parallel @@ -0,0 +1,26 @@ +# +# Do a parallel build with multiple jobs, based on the number of CPUs online +# in this system: 'make -j8' on a 8-CPU system, etc. +# +# (To override it, run 'make JOBS=1' and similar.) +# +ifeq ($(JOBS),) + JOBS := $(shell grep -c ^processor /proc/cpuinfo 2>/dev/null) + ifeq ($(JOBS),) + JOBS := 1 + endif +endif + +export JOBS + +$(info $(shell printf '# [ perf build: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build. ]\n')) + +# +# Needed if no target specified: +# +all: + @$(MAKE) --no-print-directory -j$(JOBS) $@ + +%: + @$(MAKE) --no-print-directory -j$(JOBS) $@ + -- cgit v0.10.2 From bd69cc286d817a4ce55b8969b0975e42168fe7c0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 Oct 2013 11:49:08 +0200 Subject: tools/perf/build: Flip Makefile.parallel and Makefile.perf To make it more apparent that there is not change in functionality we introduced Makefile.parallel separately and now flip it with the main Makefile, which moves into Makefile.perf. The renames are: Makefile.parallel => Makefile Makefile => Makefile.perf Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-igRfuw9ugbnnpixLd6wpptzl@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 6b7779b..ce7874b 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -1,841 +1,25 @@ -include ../scripts/Makefile.include - -# The default target of this Makefile is... -all: - -include config/utilities.mak - -# Define V to have a more verbose compile. -# -# Define O to save output files in a separate directory. -# -# Define ARCH as name of target architecture if you want cross-builds. -# -# Define CROSS_COMPILE as prefix name of compiler if you want cross-builds. -# -# Define NO_LIBPERL to disable perl script extension. -# -# Define NO_LIBPYTHON to disable python script extension. -# -# Define PYTHON to point to the python binary if the default -# `python' is not correct; for example: PYTHON=python2 -# -# Define PYTHON_CONFIG to point to the python-config binary if -# the default `$(PYTHON)-config' is not correct. -# -# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8 -# -# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72. -# -# Define LDFLAGS=-static to build a static binary. -# -# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds. -# -# Define NO_DWARF if you do not want debug-info analysis feature at all. -# -# Define WERROR=0 to disable treating any warnings as errors. -# -# Define NO_NEWT if you do not want TUI support. (deprecated) -# -# Define NO_SLANG if you do not want TUI support. -# -# Define NO_GTK2 if you do not want GTK+ GUI support. -# -# Define NO_DEMANGLE if you do not want C++ symbol demangling. -# -# Define NO_LIBELF if you do not want libelf dependency (e.g. cross-builds) -# -# Define NO_LIBUNWIND if you do not want libunwind dependency for dwarf -# backtrace post unwind. # -# Define NO_BACKTRACE if you do not want stack backtrace debug feature +# Do a parallel build with multiple jobs, based on the number of CPUs online +# in this system: 'make -j8' on a 8-CPU system, etc. # -# Define NO_LIBNUMA if you do not want numa perf benchmark +# (To override it, run 'make JOBS=1' and similar.) # -# Define NO_LIBAUDIT if you do not want libaudit support -# -# Define NO_LIBBIONIC if you do not want bionic support - -ifeq ($(srctree),) -srctree := $(patsubst %/,%,$(dir $(shell pwd))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -#$(info Determined 'srctree' to be $(srctree)) -endif - -ifneq ($(objtree),) -#$(info Determined 'objtree' to be $(objtree)) -endif - -ifneq ($(OUTPUT),) -#$(info Determined 'OUTPUT' to be $(OUTPUT)) -endif - -$(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD - @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) - @touch $(OUTPUT)PERF-VERSION-FILE - -CC = $(CROSS_COMPILE)gcc -AR = $(CROSS_COMPILE)ar - -RM = rm -f -MKDIR = mkdir -FIND = find -INSTALL = install -FLEX = flex -BISON = bison -STRIP = strip - -LK_DIR = $(srctree)/tools/lib/lk/ -TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ - -# include config/Makefile by default and rule out -# non-config cases -config := 1 - -NON_CONFIG_TARGETS := clean TAGS tags cscope help - -ifdef MAKECMDGOALS -ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),) - config := 0 -endif -endif - -ifeq ($(config),1) -include config/Makefile -endif - -export prefix bindir sharedir sysconfdir - -# sparse is architecture-neutral, which means that we need to tell it -# explicitly what architecture to check for. Fix this up for yours.. -SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ - -# Guard against environment variables -BUILTIN_OBJS = -LIB_H = -LIB_OBJS = -PYRF_OBJS = -SCRIPT_SH = - -SCRIPT_SH += perf-archive.sh - -grep-libs = $(filter -l%,$(1)) -strip-libs = $(filter-out -l%,$(1)) - -ifneq ($(OUTPUT),) - TE_PATH=$(OUTPUT) -ifneq ($(subdir),) - LK_PATH=$(OUTPUT)/../lib/lk/ -else - LK_PATH=$(OUTPUT) -endif -else - TE_PATH=$(TRACE_EVENT_DIR) - LK_PATH=$(LK_DIR) -endif - -LIBTRACEEVENT = $(TE_PATH)libtraceevent.a -export LIBTRACEEVENT - -LIBLK = $(LK_PATH)liblk.a -export LIBLK - -# python extension build directories -PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/ -PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/ -PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/ -export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP - -python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so - -PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) -PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBLK) - -$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) - $(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \ - --quiet build_ext; \ - mkdir -p $(OUTPUT)python && \ - cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/ -# -# No Perl scripts right now: -# - -SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) - -# -# Single 'perf' binary right now: -# -PROGRAMS += $(OUTPUT)perf - -# what 'all' will build and 'install' will install, in perfexecdir -ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) - -# what 'all' will build but not install in perfexecdir -OTHER_PROGRAMS = $(OUTPUT)perf - -# Set paths to tools early so that they can be used for version tests. -ifndef SHELL_PATH - SHELL_PATH = /bin/sh -endif -ifndef PERL_PATH - PERL_PATH = /usr/bin/perl -endif - -export PERL_PATH - -$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c - $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) -t util/parse-events.l > $(OUTPUT)util/parse-events-flex.c - -$(OUTPUT)util/parse-events-bison.c: util/parse-events.y - $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_ - -$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c - $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/pmu-flex.h -t util/pmu.l > $(OUTPUT)util/pmu-flex.c - -$(OUTPUT)util/pmu-bison.c: util/pmu.y - $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_ - -$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c -$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c - -LIB_FILE=$(OUTPUT)libperf.a - -LIB_H += ../../include/uapi/linux/perf_event.h -LIB_H += ../../include/linux/rbtree.h -LIB_H += ../../include/linux/list.h -LIB_H += ../../include/uapi/linux/const.h -LIB_H += ../../include/linux/hash.h -LIB_H += ../../include/linux/stringify.h -LIB_H += util/include/linux/bitmap.h -LIB_H += util/include/linux/bitops.h -LIB_H += util/include/linux/compiler.h -LIB_H += util/include/linux/const.h -LIB_H += util/include/linux/ctype.h -LIB_H += util/include/linux/kernel.h -LIB_H += util/include/linux/list.h -LIB_H += util/include/linux/export.h -LIB_H += util/include/linux/magic.h -LIB_H += util/include/linux/poison.h -LIB_H += util/include/linux/prefetch.h -LIB_H += util/include/linux/rbtree.h -LIB_H += util/include/linux/rbtree_augmented.h -LIB_H += util/include/linux/string.h -LIB_H += util/include/linux/types.h -LIB_H += util/include/linux/linkage.h -LIB_H += util/include/asm/asm-offsets.h -LIB_H += util/include/asm/bug.h -LIB_H += util/include/asm/byteorder.h -LIB_H += util/include/asm/hweight.h -LIB_H += util/include/asm/swab.h -LIB_H += util/include/asm/system.h -LIB_H += util/include/asm/uaccess.h -LIB_H += util/include/dwarf-regs.h -LIB_H += util/include/asm/dwarf2.h -LIB_H += util/include/asm/cpufeature.h -LIB_H += util/include/asm/unistd_32.h -LIB_H += util/include/asm/unistd_64.h -LIB_H += perf.h -LIB_H += util/annotate.h -LIB_H += util/cache.h -LIB_H += util/callchain.h -LIB_H += util/build-id.h -LIB_H += util/debug.h -LIB_H += util/sysfs.h -LIB_H += util/pmu.h -LIB_H += util/event.h -LIB_H += util/evsel.h -LIB_H += util/evlist.h -LIB_H += util/exec_cmd.h -LIB_H += util/types.h -LIB_H += util/levenshtein.h -LIB_H += util/machine.h -LIB_H += util/map.h -LIB_H += util/parse-options.h -LIB_H += util/parse-events.h -LIB_H += util/quote.h -LIB_H += util/util.h -LIB_H += util/xyarray.h -LIB_H += util/header.h -LIB_H += util/help.h -LIB_H += util/session.h -LIB_H += util/strbuf.h -LIB_H += util/strlist.h -LIB_H += util/strfilter.h -LIB_H += util/svghelper.h -LIB_H += util/tool.h -LIB_H += util/run-command.h -LIB_H += util/sigchain.h -LIB_H += util/dso.h -LIB_H += util/symbol.h -LIB_H += util/color.h -LIB_H += util/values.h -LIB_H += util/sort.h -LIB_H += util/hist.h -LIB_H += util/thread.h -LIB_H += util/thread_map.h -LIB_H += util/trace-event.h -LIB_H += util/probe-finder.h -LIB_H += util/dwarf-aux.h -LIB_H += util/probe-event.h -LIB_H += util/pstack.h -LIB_H += util/cpumap.h -LIB_H += util/top.h -LIB_H += $(ARCH_INCLUDE) -LIB_H += util/cgroup.h -LIB_H += $(LIB_INCLUDE)traceevent/event-parse.h -LIB_H += util/target.h -LIB_H += util/rblist.h -LIB_H += util/intlist.h -LIB_H += util/perf_regs.h -LIB_H += util/unwind.h -LIB_H += util/vdso.h -LIB_H += ui/helpline.h -LIB_H += ui/progress.h -LIB_H += ui/util.h -LIB_H += ui/ui.h - -LIB_OBJS += $(OUTPUT)util/abspath.o -LIB_OBJS += $(OUTPUT)util/alias.o -LIB_OBJS += $(OUTPUT)util/annotate.o -LIB_OBJS += $(OUTPUT)util/build-id.o -LIB_OBJS += $(OUTPUT)util/config.o -LIB_OBJS += $(OUTPUT)util/ctype.o -LIB_OBJS += $(OUTPUT)util/sysfs.o -LIB_OBJS += $(OUTPUT)util/pmu.o -LIB_OBJS += $(OUTPUT)util/environment.o -LIB_OBJS += $(OUTPUT)util/event.o -LIB_OBJS += $(OUTPUT)util/evlist.o -LIB_OBJS += $(OUTPUT)util/evsel.o -LIB_OBJS += $(OUTPUT)util/exec_cmd.o -LIB_OBJS += $(OUTPUT)util/help.o -LIB_OBJS += $(OUTPUT)util/levenshtein.o -LIB_OBJS += $(OUTPUT)util/parse-options.o -LIB_OBJS += $(OUTPUT)util/parse-events.o -LIB_OBJS += $(OUTPUT)util/path.o -LIB_OBJS += $(OUTPUT)util/rbtree.o -LIB_OBJS += $(OUTPUT)util/bitmap.o -LIB_OBJS += $(OUTPUT)util/hweight.o -LIB_OBJS += $(OUTPUT)util/run-command.o -LIB_OBJS += $(OUTPUT)util/quote.o -LIB_OBJS += $(OUTPUT)util/strbuf.o -LIB_OBJS += $(OUTPUT)util/string.o -LIB_OBJS += $(OUTPUT)util/strlist.o -LIB_OBJS += $(OUTPUT)util/strfilter.o -LIB_OBJS += $(OUTPUT)util/top.o -LIB_OBJS += $(OUTPUT)util/usage.o -LIB_OBJS += $(OUTPUT)util/wrapper.o -LIB_OBJS += $(OUTPUT)util/sigchain.o -LIB_OBJS += $(OUTPUT)util/dso.o -LIB_OBJS += $(OUTPUT)util/symbol.o -LIB_OBJS += $(OUTPUT)util/symbol-elf.o -LIB_OBJS += $(OUTPUT)util/color.o -LIB_OBJS += $(OUTPUT)util/pager.o -LIB_OBJS += $(OUTPUT)util/header.o -LIB_OBJS += $(OUTPUT)util/callchain.o -LIB_OBJS += $(OUTPUT)util/values.o -LIB_OBJS += $(OUTPUT)util/debug.o -LIB_OBJS += $(OUTPUT)util/machine.o -LIB_OBJS += $(OUTPUT)util/map.o -LIB_OBJS += $(OUTPUT)util/pstack.o -LIB_OBJS += $(OUTPUT)util/session.o -LIB_OBJS += $(OUTPUT)util/thread.o -LIB_OBJS += $(OUTPUT)util/thread_map.o -LIB_OBJS += $(OUTPUT)util/trace-event-parse.o -LIB_OBJS += $(OUTPUT)util/parse-events-flex.o -LIB_OBJS += $(OUTPUT)util/parse-events-bison.o -LIB_OBJS += $(OUTPUT)util/pmu-flex.o -LIB_OBJS += $(OUTPUT)util/pmu-bison.o -LIB_OBJS += $(OUTPUT)util/trace-event-read.o -LIB_OBJS += $(OUTPUT)util/trace-event-info.o -LIB_OBJS += $(OUTPUT)util/trace-event-scripting.o -LIB_OBJS += $(OUTPUT)util/svghelper.o -LIB_OBJS += $(OUTPUT)util/sort.o -LIB_OBJS += $(OUTPUT)util/hist.o -LIB_OBJS += $(OUTPUT)util/probe-event.o -LIB_OBJS += $(OUTPUT)util/util.o -LIB_OBJS += $(OUTPUT)util/xyarray.o -LIB_OBJS += $(OUTPUT)util/cpumap.o -LIB_OBJS += $(OUTPUT)util/cgroup.o -LIB_OBJS += $(OUTPUT)util/target.o -LIB_OBJS += $(OUTPUT)util/rblist.o -LIB_OBJS += $(OUTPUT)util/intlist.o -LIB_OBJS += $(OUTPUT)util/vdso.o -LIB_OBJS += $(OUTPUT)util/stat.o -LIB_OBJS += $(OUTPUT)util/record.o - -LIB_OBJS += $(OUTPUT)ui/setup.o -LIB_OBJS += $(OUTPUT)ui/helpline.o -LIB_OBJS += $(OUTPUT)ui/progress.o -LIB_OBJS += $(OUTPUT)ui/util.o -LIB_OBJS += $(OUTPUT)ui/hist.o -LIB_OBJS += $(OUTPUT)ui/stdio/hist.o - -LIB_OBJS += $(OUTPUT)arch/common.o - -LIB_OBJS += $(OUTPUT)tests/parse-events.o -LIB_OBJS += $(OUTPUT)tests/dso-data.o -LIB_OBJS += $(OUTPUT)tests/attr.o -LIB_OBJS += $(OUTPUT)tests/vmlinux-kallsyms.o -LIB_OBJS += $(OUTPUT)tests/open-syscall.o -LIB_OBJS += $(OUTPUT)tests/open-syscall-all-cpus.o -LIB_OBJS += $(OUTPUT)tests/open-syscall-tp-fields.o -LIB_OBJS += $(OUTPUT)tests/mmap-basic.o -LIB_OBJS += $(OUTPUT)tests/perf-record.o -LIB_OBJS += $(OUTPUT)tests/rdpmc.o -LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o -LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o -LIB_OBJS += $(OUTPUT)tests/pmu.o -LIB_OBJS += $(OUTPUT)tests/hists_link.o -LIB_OBJS += $(OUTPUT)tests/python-use.o -LIB_OBJS += $(OUTPUT)tests/bp_signal.o -LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o -LIB_OBJS += $(OUTPUT)tests/task-exit.o -LIB_OBJS += $(OUTPUT)tests/sw-clock.o -ifeq ($(ARCH),x86) -LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o -endif -LIB_OBJS += $(OUTPUT)tests/code-reading.o -LIB_OBJS += $(OUTPUT)tests/sample-parsing.o -LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o - -BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o -BUILTIN_OBJS += $(OUTPUT)builtin-bench.o -# Benchmark modules -BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o -BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o -ifeq ($(RAW_ARCH),x86_64) -BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o -BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o -endif -BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o -BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o - -BUILTIN_OBJS += $(OUTPUT)builtin-diff.o -BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o -BUILTIN_OBJS += $(OUTPUT)builtin-help.o -BUILTIN_OBJS += $(OUTPUT)builtin-sched.o -BUILTIN_OBJS += $(OUTPUT)builtin-buildid-list.o -BUILTIN_OBJS += $(OUTPUT)builtin-buildid-cache.o -BUILTIN_OBJS += $(OUTPUT)builtin-list.o -BUILTIN_OBJS += $(OUTPUT)builtin-record.o -BUILTIN_OBJS += $(OUTPUT)builtin-report.o -BUILTIN_OBJS += $(OUTPUT)builtin-stat.o -BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o -BUILTIN_OBJS += $(OUTPUT)builtin-top.o -BUILTIN_OBJS += $(OUTPUT)builtin-script.o -BUILTIN_OBJS += $(OUTPUT)builtin-probe.o -BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o -BUILTIN_OBJS += $(OUTPUT)builtin-lock.o -BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o -BUILTIN_OBJS += $(OUTPUT)builtin-inject.o -BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o -BUILTIN_OBJS += $(OUTPUT)builtin-mem.o - -PERFLIBS = $(LIB_FILE) $(LIBLK) $(LIBTRACEEVENT) - -# We choose to avoid "if .. else if .. else .. endif endif" -# because maintaining the nesting to match is a pain. If -# we had "elif" things would have been much nicer... - --include arch/$(ARCH)/Makefile - -ifneq ($(OUTPUT),) - CFLAGS += -I$(OUTPUT) -endif - -ifdef NO_LIBELF -EXTLIBS := $(filter-out -lelf,$(EXTLIBS)) - -# Remove ELF/DWARF dependent codes -LIB_OBJS := $(filter-out $(OUTPUT)util/symbol-elf.o,$(LIB_OBJS)) -LIB_OBJS := $(filter-out $(OUTPUT)util/dwarf-aux.o,$(LIB_OBJS)) -LIB_OBJS := $(filter-out $(OUTPUT)util/probe-event.o,$(LIB_OBJS)) -LIB_OBJS := $(filter-out $(OUTPUT)util/probe-finder.o,$(LIB_OBJS)) - -BUILTIN_OBJS := $(filter-out $(OUTPUT)builtin-probe.o,$(BUILTIN_OBJS)) - -# Use minimal symbol handling -LIB_OBJS += $(OUTPUT)util/symbol-minimal.o - -else # NO_LIBELF -ifndef NO_DWARF - LIB_OBJS += $(OUTPUT)util/probe-finder.o - LIB_OBJS += $(OUTPUT)util/dwarf-aux.o -endif # NO_DWARF -endif # NO_LIBELF - -ifndef NO_LIBUNWIND - LIB_OBJS += $(OUTPUT)util/unwind.o -endif -LIB_OBJS += $(OUTPUT)tests/keep-tracking.o - -ifndef NO_LIBAUDIT - BUILTIN_OBJS += $(OUTPUT)builtin-trace.o -endif - -ifndef NO_SLANG - LIB_OBJS += $(OUTPUT)ui/browser.o - LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o - LIB_OBJS += $(OUTPUT)ui/browsers/hists.o - LIB_OBJS += $(OUTPUT)ui/browsers/map.o - LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o - LIB_OBJS += $(OUTPUT)ui/tui/setup.o - LIB_OBJS += $(OUTPUT)ui/tui/util.o - LIB_OBJS += $(OUTPUT)ui/tui/helpline.o - LIB_OBJS += $(OUTPUT)ui/tui/progress.o - LIB_H += ui/browser.h - LIB_H += ui/browsers/map.h - LIB_H += ui/keysyms.h - LIB_H += ui/libslang.h -endif - -ifndef NO_GTK2 - LIB_OBJS += $(OUTPUT)ui/gtk/browser.o - LIB_OBJS += $(OUTPUT)ui/gtk/hists.o - LIB_OBJS += $(OUTPUT)ui/gtk/setup.o - LIB_OBJS += $(OUTPUT)ui/gtk/util.o - LIB_OBJS += $(OUTPUT)ui/gtk/helpline.o - LIB_OBJS += $(OUTPUT)ui/gtk/progress.o - LIB_OBJS += $(OUTPUT)ui/gtk/annotate.o -endif - -ifndef NO_LIBPERL - LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o - LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o -endif - -ifndef NO_LIBPYTHON - LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o - LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o -endif - -ifeq ($(NO_PERF_REGS),0) - ifeq ($(ARCH),x86) - LIB_H += arch/x86/include/perf_regs.h +ifeq ($(JOBS),) + JOBS := $(shell grep -c ^processor /proc/cpuinfo 2>/dev/null) + ifeq ($(JOBS),) + JOBS := 1 endif endif -ifndef NO_LIBNUMA - BUILTIN_OBJS += $(OUTPUT)bench/numa.o -endif - -ifdef ASCIIDOC8 - export ASCIIDOC8 -endif - -LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group - -export INSTALL SHELL_PATH - -### Build rules - -SHELL = $(SHELL_PATH) - -all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) - -please_set_SHELL_PATH_to_a_more_modern_shell: - @$$(:) - -shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell - -strip: $(PROGRAMS) $(OUTPUT)perf - $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf - -$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - $(CFLAGS) -c $(filter %.c,$^) -o $@ - -$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUT)perf.o \ - $(BUILTIN_OBJS) $(LIBS) -o $@ - -$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - '-DPERF_MAN_PATH="$(mandir_SQ)"' \ - '-DPERF_INFO_PATH="$(infodir_SQ)"' $< - -$(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - '-DPERF_MAN_PATH="$(mandir_SQ)"' \ - '-DPERF_INFO_PATH="$(infodir_SQ)"' $< - -$(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt - -$(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt) - $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@ - -$(SCRIPTS) : % : %.sh - $(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@' - -# These can record PERF_VERSION -$(OUTPUT)perf.o perf.spec \ - $(SCRIPTS) \ - : $(OUTPUT)PERF-VERSION-FILE - -.SUFFIXES: -.SUFFIXES: .o .c .S .s - -# These two need to be here so that when O= is not used they take precedence -# over the general rule for .o - -$(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -w $< - -$(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $< - -$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< -$(OUTPUT)%.i: %.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $< -$(OUTPUT)%.s: %.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -S $(CFLAGS) $< -$(OUTPUT)%.o: %.S - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< -$(OUTPUT)%.s: %.S - $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $< - -$(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \ - '-DPREFIX="$(prefix_SQ)"' \ - $< - -$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \ - $< - -$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - -DPYTHONPATH='"$(OUTPUT)python"' \ - -DPYTHON='"$(PYTHON_WORD)"' \ - $< - -$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< - -$(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< - -$(OUTPUT)ui/browsers/annotate.o: ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< - -$(OUTPUT)ui/browsers/hists.o: ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< - -$(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +export JOBS -$(OUTPUT)ui/browsers/scripts.o: ui/browsers/scripts.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< - -$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< - -$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $< - -$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default $< - -$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default $< - -$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< - -$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< - -$(OUTPUT)perf-%: %.o $(PERFLIBS) - $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS) - -$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) -$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) - -# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So -# we depend the various files onto their directories. -DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h -$(DIRECTORY_DEPS): | $(sort $(dir $(DIRECTORY_DEPS))) -# In the second step, we make a rule to actually create these directories -$(sort $(dir $(DIRECTORY_DEPS))): - $(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null - -$(LIB_FILE): $(LIB_OBJS) - $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) - -# libtraceevent.a -TE_SOURCES = $(wildcard $(TRACE_EVENT_DIR)*.[ch]) - -$(LIBTRACEEVENT): $(TE_SOURCES) - $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libtraceevent.a - -$(LIBTRACEEVENT)-clean: - $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) clean - -LIBLK_SOURCES = $(wildcard $(LK_PATH)*.[ch]) - -# if subdir is set, we've been called from above so target has been built -# already -$(LIBLK): $(LIBLK_SOURCES) -ifeq ($(subdir),) - $(QUIET_SUBDIR0)$(LK_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) liblk.a -endif - -$(LIBLK)-clean: -ifeq ($(subdir),) - $(QUIET_SUBDIR0)$(LK_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) clean -endif - -help: - @echo 'Perf make targets:' - @echo ' doc - make *all* documentation (see below)' - @echo ' man - make manpage documentation (access with man )' - @echo ' html - make html documentation' - @echo ' info - make GNU info documentation (access with info )' - @echo ' pdf - make pdf documentation' - @echo ' TAGS - use etags to make tag information for source browsing' - @echo ' tags - use ctags to make tag information for source browsing' - @echo ' cscope - use cscope to make interactive browsing database' - @echo '' - @echo 'Perf install targets:' - @echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed' - @echo ' HINT: use "make prefix= " to install to a particular' - @echo ' path like make prefix=/usr/local install install-doc' - @echo ' install - install compiled binaries' - @echo ' install-doc - install *all* documentation' - @echo ' install-man - install manpage documentation' - @echo ' install-html - install html documentation' - @echo ' install-info - install GNU info documentation' - @echo ' install-pdf - install pdf documentation' - @echo '' - @echo ' quick-install-doc - alias for quick-install-man' - @echo ' quick-install-man - install the documentation quickly' - @echo ' quick-install-html - install the html documentation quickly' - @echo '' - @echo 'Perf maintainer targets:' - @echo ' clean - clean all binary objects and build output' - - -DOC_TARGETS := doc man html info pdf - -INSTALL_DOC_TARGETS := $(patsubst %,install-%,$(DOC_TARGETS)) try-install-man -INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html - -# 'make doc' should call 'make -C Documentation all' -$(DOC_TARGETS): - $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all) - -TAGS: - $(RM) TAGS - $(FIND) . -name '*.[hcS]' -print | xargs etags -a - -tags: - $(RM) tags - $(FIND) . -name '*.[hcS]' -print | xargs ctags -a - -cscope: - $(RM) cscope* - $(FIND) . -name '*.[hcS]' -print | xargs cscope -b - -### Detect prefix changes -TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\ - $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ) - -$(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS - @FLAGS='$(TRACK_CFLAGS)'; \ - if test x"$$FLAGS" != x"`cat $(OUTPUT)PERF-CFLAGS 2>/dev/null`" ; then \ - echo 1>&2 " * new build flags or prefix"; \ - echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \ - fi - -### Testing rules - -# GNU make supports exporting all variables by "export" without parameters. -# However, the environment gets quite big, and some programs have problems -# with that. - -check: $(OUTPUT)common-cmds.h - if sparse; \ - then \ - for i in *.c */*.c; \ - do \ - sparse $(CFLAGS) $(SPARSE_FLAGS) $$i || exit; \ - done; \ - else \ - exit 1; \ - fi - -### Installation rules - -install-bin: all - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' - $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)' - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' - $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' -ifndef NO_LIBPERL - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' - $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' - $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl' - $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' -endif -ifndef NO_LIBPYTHON - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace' - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' - $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace' - $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python' - $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' -endif - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d' - $(INSTALL) bash_completion '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf' - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests' - $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests' - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' - $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' - -install: install-bin try-install-man - -install-python_ext: - $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)' - -# 'make install-doc' should call 'make -C Documentation install' -$(INSTALL_DOC_TARGETS): - $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:-doc=) - -### Cleaning rules +$(info $(shell printf '# [ perf build: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build. ]\n')) # -# This is here, not in config/Makefile, because config/Makefile does -# not get included for the clean target: +# Needed if no target specified: # -config-clean: - @$(MAKE) -C config/feature-checks clean - -clean: $(LIBTRACEEVENT)-clean $(LIBLK)-clean config-clean - $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS) - $(RM) $(ALL_PROGRAMS) perf - $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* - $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean - $(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS - $(RM) $(OUTPUT)util/*-bison* - $(RM) $(OUTPUT)util/*-flex* - $(python-clean) - -# -# Trick: if ../../.git does not exist - we are building out of tree for example, -# then force version regeneration: -# -ifeq ($(wildcard ../../.git/HEAD),) - GIT-HEAD-PHONY = ../../.git/HEAD -else - GIT-HEAD-PHONY = -endif +all: + @$(MAKE) -f Makefile.perf --no-print-directory -j$(JOBS) $@ -.PHONY: all install clean config-clean strip -.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell -.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS +%: + @$(MAKE) -f Makefile.perf --no-print-directory -j$(JOBS) $@ diff --git a/tools/perf/Makefile.parallel b/tools/perf/Makefile.parallel deleted file mode 100644 index ec5e08b..0000000 --- a/tools/perf/Makefile.parallel +++ /dev/null @@ -1,26 +0,0 @@ -# -# Do a parallel build with multiple jobs, based on the number of CPUs online -# in this system: 'make -j8' on a 8-CPU system, etc. -# -# (To override it, run 'make JOBS=1' and similar.) -# -ifeq ($(JOBS),) - JOBS := $(shell grep -c ^processor /proc/cpuinfo 2>/dev/null) - ifeq ($(JOBS),) - JOBS := 1 - endif -endif - -export JOBS - -$(info $(shell printf '# [ perf build: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build. ]\n')) - -# -# Needed if no target specified: -# -all: - @$(MAKE) --no-print-directory -j$(JOBS) $@ - -%: - @$(MAKE) --no-print-directory -j$(JOBS) $@ - diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf new file mode 100644 index 0000000..178a1c8 --- /dev/null +++ b/tools/perf/Makefile.perf @@ -0,0 +1,842 @@ +include ../scripts/Makefile.include + +# The default target of this Makefile is... +all: + +include config/utilities.mak + +# Define V to have a more verbose compile. +# +# Define O to save output files in a separate directory. +# +# Define ARCH as name of target architecture if you want cross-builds. +# +# Define CROSS_COMPILE as prefix name of compiler if you want cross-builds. +# +# Define NO_LIBPERL to disable perl script extension. +# +# Define NO_LIBPYTHON to disable python script extension. +# +# Define PYTHON to point to the python binary if the default +# `python' is not correct; for example: PYTHON=python2 +# +# Define PYTHON_CONFIG to point to the python-config binary if +# the default `$(PYTHON)-config' is not correct. +# +# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8 +# +# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72. +# +# Define LDFLAGS=-static to build a static binary. +# +# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds. +# +# Define NO_DWARF if you do not want debug-info analysis feature at all. +# +# Define WERROR=0 to disable treating any warnings as errors. +# +# Define NO_NEWT if you do not want TUI support. (deprecated) +# +# Define NO_SLANG if you do not want TUI support. +# +# Define NO_GTK2 if you do not want GTK+ GUI support. +# +# Define NO_DEMANGLE if you do not want C++ symbol demangling. +# +# Define NO_LIBELF if you do not want libelf dependency (e.g. cross-builds) +# +# Define NO_LIBUNWIND if you do not want libunwind dependency for dwarf +# backtrace post unwind. +# +# Define NO_BACKTRACE if you do not want stack backtrace debug feature +# +# Define NO_LIBNUMA if you do not want numa perf benchmark +# +# Define NO_LIBAUDIT if you do not want libaudit support +# +# Define NO_LIBBIONIC if you do not want bionic support + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + +ifneq ($(objtree),) +#$(info Determined 'objtree' to be $(objtree)) +endif + +ifneq ($(OUTPUT),) +#$(info Determined 'OUTPUT' to be $(OUTPUT)) +endif + +$(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD + @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) + @touch $(OUTPUT)PERF-VERSION-FILE + +CC = $(CROSS_COMPILE)gcc +AR = $(CROSS_COMPILE)ar + +RM = rm -f +MKDIR = mkdir +FIND = find +INSTALL = install +FLEX = flex +BISON = bison +STRIP = strip + +LK_DIR = $(srctree)/tools/lib/lk/ +TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ + +# include config/Makefile by default and rule out +# non-config cases +config := 1 + +NON_CONFIG_TARGETS := clean TAGS tags cscope help + +ifdef MAKECMDGOALS +ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),) + config := 0 +endif +endif + +ifeq ($(config),1) +include config/Makefile +endif + +export prefix bindir sharedir sysconfdir + +# sparse is architecture-neutral, which means that we need to tell it +# explicitly what architecture to check for. Fix this up for yours.. +SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ + +# Guard against environment variables +BUILTIN_OBJS = +LIB_H = +LIB_OBJS = +PYRF_OBJS = +SCRIPT_SH = + +SCRIPT_SH += perf-archive.sh + +grep-libs = $(filter -l%,$(1)) +strip-libs = $(filter-out -l%,$(1)) + +ifneq ($(OUTPUT),) + TE_PATH=$(OUTPUT) +ifneq ($(subdir),) + LK_PATH=$(OUTPUT)/../lib/lk/ +else + LK_PATH=$(OUTPUT) +endif +else + TE_PATH=$(TRACE_EVENT_DIR) + LK_PATH=$(LK_DIR) +endif + +LIBTRACEEVENT = $(TE_PATH)libtraceevent.a +export LIBTRACEEVENT + +LIBLK = $(LK_PATH)liblk.a +export LIBLK + +# python extension build directories +PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/ +PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/ +PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/ +export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP + +python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so + +PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) +PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBLK) + +$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) + $(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \ + --quiet build_ext; \ + mkdir -p $(OUTPUT)python && \ + cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/ +# +# No Perl scripts right now: +# + +SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) + +# +# Single 'perf' binary right now: +# +PROGRAMS += $(OUTPUT)perf + +# what 'all' will build and 'install' will install, in perfexecdir +ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) + +# what 'all' will build but not install in perfexecdir +OTHER_PROGRAMS = $(OUTPUT)perf + +# Set paths to tools early so that they can be used for version tests. +ifndef SHELL_PATH + SHELL_PATH = /bin/sh +endif +ifndef PERL_PATH + PERL_PATH = /usr/bin/perl +endif + +export PERL_PATH + +$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c + $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) -t util/parse-events.l > $(OUTPUT)util/parse-events-flex.c + +$(OUTPUT)util/parse-events-bison.c: util/parse-events.y + $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_ + +$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c + $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/pmu-flex.h -t util/pmu.l > $(OUTPUT)util/pmu-flex.c + +$(OUTPUT)util/pmu-bison.c: util/pmu.y + $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_ + +$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c +$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c + +LIB_FILE=$(OUTPUT)libperf.a + +LIB_H += ../../include/uapi/linux/perf_event.h +LIB_H += ../../include/linux/rbtree.h +LIB_H += ../../include/linux/list.h +LIB_H += ../../include/uapi/linux/const.h +LIB_H += ../../include/linux/hash.h +LIB_H += ../../include/linux/stringify.h +LIB_H += util/include/linux/bitmap.h +LIB_H += util/include/linux/bitops.h +LIB_H += util/include/linux/compiler.h +LIB_H += util/include/linux/const.h +LIB_H += util/include/linux/ctype.h +LIB_H += util/include/linux/kernel.h +LIB_H += util/include/linux/list.h +LIB_H += util/include/linux/export.h +LIB_H += util/include/linux/magic.h +LIB_H += util/include/linux/poison.h +LIB_H += util/include/linux/prefetch.h +LIB_H += util/include/linux/rbtree.h +LIB_H += util/include/linux/rbtree_augmented.h +LIB_H += util/include/linux/string.h +LIB_H += util/include/linux/types.h +LIB_H += util/include/linux/linkage.h +LIB_H += util/include/asm/asm-offsets.h +LIB_H += util/include/asm/bug.h +LIB_H += util/include/asm/byteorder.h +LIB_H += util/include/asm/hweight.h +LIB_H += util/include/asm/swab.h +LIB_H += util/include/asm/system.h +LIB_H += util/include/asm/uaccess.h +LIB_H += util/include/dwarf-regs.h +LIB_H += util/include/asm/dwarf2.h +LIB_H += util/include/asm/cpufeature.h +LIB_H += util/include/asm/unistd_32.h +LIB_H += util/include/asm/unistd_64.h +LIB_H += perf.h +LIB_H += util/annotate.h +LIB_H += util/cache.h +LIB_H += util/callchain.h +LIB_H += util/build-id.h +LIB_H += util/debug.h +LIB_H += util/sysfs.h +LIB_H += util/pmu.h +LIB_H += util/event.h +LIB_H += util/evsel.h +LIB_H += util/evlist.h +LIB_H += util/exec_cmd.h +LIB_H += util/types.h +LIB_H += util/levenshtein.h +LIB_H += util/machine.h +LIB_H += util/map.h +LIB_H += util/parse-options.h +LIB_H += util/parse-events.h +LIB_H += util/quote.h +LIB_H += util/util.h +LIB_H += util/xyarray.h +LIB_H += util/header.h +LIB_H += util/help.h +LIB_H += util/session.h +LIB_H += util/strbuf.h +LIB_H += util/strlist.h +LIB_H += util/strfilter.h +LIB_H += util/svghelper.h +LIB_H += util/tool.h +LIB_H += util/run-command.h +LIB_H += util/sigchain.h +LIB_H += util/dso.h +LIB_H += util/symbol.h +LIB_H += util/color.h +LIB_H += util/values.h +LIB_H += util/sort.h +LIB_H += util/hist.h +LIB_H += util/thread.h +LIB_H += util/thread_map.h +LIB_H += util/trace-event.h +LIB_H += util/probe-finder.h +LIB_H += util/dwarf-aux.h +LIB_H += util/probe-event.h +LIB_H += util/pstack.h +LIB_H += util/cpumap.h +LIB_H += util/top.h +LIB_H += $(ARCH_INCLUDE) +LIB_H += util/cgroup.h +LIB_H += $(LIB_INCLUDE)traceevent/event-parse.h +LIB_H += util/target.h +LIB_H += util/rblist.h +LIB_H += util/intlist.h +LIB_H += util/perf_regs.h +LIB_H += util/unwind.h +LIB_H += util/vdso.h +LIB_H += ui/helpline.h +LIB_H += ui/progress.h +LIB_H += ui/util.h +LIB_H += ui/ui.h + +LIB_OBJS += $(OUTPUT)util/abspath.o +LIB_OBJS += $(OUTPUT)util/alias.o +LIB_OBJS += $(OUTPUT)util/annotate.o +LIB_OBJS += $(OUTPUT)util/build-id.o +LIB_OBJS += $(OUTPUT)util/config.o +LIB_OBJS += $(OUTPUT)util/ctype.o +LIB_OBJS += $(OUTPUT)util/sysfs.o +LIB_OBJS += $(OUTPUT)util/pmu.o +LIB_OBJS += $(OUTPUT)util/environment.o +LIB_OBJS += $(OUTPUT)util/event.o +LIB_OBJS += $(OUTPUT)util/evlist.o +LIB_OBJS += $(OUTPUT)util/evsel.o +LIB_OBJS += $(OUTPUT)util/exec_cmd.o +LIB_OBJS += $(OUTPUT)util/help.o +LIB_OBJS += $(OUTPUT)util/levenshtein.o +LIB_OBJS += $(OUTPUT)util/parse-options.o +LIB_OBJS += $(OUTPUT)util/parse-events.o +LIB_OBJS += $(OUTPUT)util/path.o +LIB_OBJS += $(OUTPUT)util/rbtree.o +LIB_OBJS += $(OUTPUT)util/bitmap.o +LIB_OBJS += $(OUTPUT)util/hweight.o +LIB_OBJS += $(OUTPUT)util/run-command.o +LIB_OBJS += $(OUTPUT)util/quote.o +LIB_OBJS += $(OUTPUT)util/strbuf.o +LIB_OBJS += $(OUTPUT)util/string.o +LIB_OBJS += $(OUTPUT)util/strlist.o +LIB_OBJS += $(OUTPUT)util/strfilter.o +LIB_OBJS += $(OUTPUT)util/top.o +LIB_OBJS += $(OUTPUT)util/usage.o +LIB_OBJS += $(OUTPUT)util/wrapper.o +LIB_OBJS += $(OUTPUT)util/sigchain.o +LIB_OBJS += $(OUTPUT)util/dso.o +LIB_OBJS += $(OUTPUT)util/symbol.o +LIB_OBJS += $(OUTPUT)util/symbol-elf.o +LIB_OBJS += $(OUTPUT)util/color.o +LIB_OBJS += $(OUTPUT)util/pager.o +LIB_OBJS += $(OUTPUT)util/header.o +LIB_OBJS += $(OUTPUT)util/callchain.o +LIB_OBJS += $(OUTPUT)util/values.o +LIB_OBJS += $(OUTPUT)util/debug.o +LIB_OBJS += $(OUTPUT)util/machine.o +LIB_OBJS += $(OUTPUT)util/map.o +LIB_OBJS += $(OUTPUT)util/pstack.o +LIB_OBJS += $(OUTPUT)util/session.o +LIB_OBJS += $(OUTPUT)util/thread.o +LIB_OBJS += $(OUTPUT)util/thread_map.o +LIB_OBJS += $(OUTPUT)util/trace-event-parse.o +LIB_OBJS += $(OUTPUT)util/parse-events-flex.o +LIB_OBJS += $(OUTPUT)util/parse-events-bison.o +LIB_OBJS += $(OUTPUT)util/pmu-flex.o +LIB_OBJS += $(OUTPUT)util/pmu-bison.o +LIB_OBJS += $(OUTPUT)util/trace-event-read.o +LIB_OBJS += $(OUTPUT)util/trace-event-info.o +LIB_OBJS += $(OUTPUT)util/trace-event-scripting.o +LIB_OBJS += $(OUTPUT)util/svghelper.o +LIB_OBJS += $(OUTPUT)util/sort.o +LIB_OBJS += $(OUTPUT)util/hist.o +LIB_OBJS += $(OUTPUT)util/probe-event.o +LIB_OBJS += $(OUTPUT)util/util.o +LIB_OBJS += $(OUTPUT)util/xyarray.o +LIB_OBJS += $(OUTPUT)util/cpumap.o +LIB_OBJS += $(OUTPUT)util/cgroup.o +LIB_OBJS += $(OUTPUT)util/target.o +LIB_OBJS += $(OUTPUT)util/rblist.o +LIB_OBJS += $(OUTPUT)util/intlist.o +LIB_OBJS += $(OUTPUT)util/vdso.o +LIB_OBJS += $(OUTPUT)util/stat.o +LIB_OBJS += $(OUTPUT)util/record.o + +LIB_OBJS += $(OUTPUT)ui/setup.o +LIB_OBJS += $(OUTPUT)ui/helpline.o +LIB_OBJS += $(OUTPUT)ui/progress.o +LIB_OBJS += $(OUTPUT)ui/util.o +LIB_OBJS += $(OUTPUT)ui/hist.o +LIB_OBJS += $(OUTPUT)ui/stdio/hist.o + +LIB_OBJS += $(OUTPUT)arch/common.o + +LIB_OBJS += $(OUTPUT)tests/parse-events.o +LIB_OBJS += $(OUTPUT)tests/dso-data.o +LIB_OBJS += $(OUTPUT)tests/attr.o +LIB_OBJS += $(OUTPUT)tests/vmlinux-kallsyms.o +LIB_OBJS += $(OUTPUT)tests/open-syscall.o +LIB_OBJS += $(OUTPUT)tests/open-syscall-all-cpus.o +LIB_OBJS += $(OUTPUT)tests/open-syscall-tp-fields.o +LIB_OBJS += $(OUTPUT)tests/mmap-basic.o +LIB_OBJS += $(OUTPUT)tests/perf-record.o +LIB_OBJS += $(OUTPUT)tests/rdpmc.o +LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o +LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o +LIB_OBJS += $(OUTPUT)tests/pmu.o +LIB_OBJS += $(OUTPUT)tests/hists_link.o +LIB_OBJS += $(OUTPUT)tests/python-use.o +LIB_OBJS += $(OUTPUT)tests/bp_signal.o +LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o +LIB_OBJS += $(OUTPUT)tests/task-exit.o +LIB_OBJS += $(OUTPUT)tests/sw-clock.o +ifeq ($(ARCH),x86) +LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o +endif +LIB_OBJS += $(OUTPUT)tests/code-reading.o +LIB_OBJS += $(OUTPUT)tests/sample-parsing.o +LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o + +BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o +BUILTIN_OBJS += $(OUTPUT)builtin-bench.o +# Benchmark modules +BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o +BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o +ifeq ($(RAW_ARCH),x86_64) +BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o +BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o +endif +BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o +BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o + +BUILTIN_OBJS += $(OUTPUT)builtin-diff.o +BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o +BUILTIN_OBJS += $(OUTPUT)builtin-help.o +BUILTIN_OBJS += $(OUTPUT)builtin-sched.o +BUILTIN_OBJS += $(OUTPUT)builtin-buildid-list.o +BUILTIN_OBJS += $(OUTPUT)builtin-buildid-cache.o +BUILTIN_OBJS += $(OUTPUT)builtin-list.o +BUILTIN_OBJS += $(OUTPUT)builtin-record.o +BUILTIN_OBJS += $(OUTPUT)builtin-report.o +BUILTIN_OBJS += $(OUTPUT)builtin-stat.o +BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o +BUILTIN_OBJS += $(OUTPUT)builtin-top.o +BUILTIN_OBJS += $(OUTPUT)builtin-script.o +BUILTIN_OBJS += $(OUTPUT)builtin-probe.o +BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o +BUILTIN_OBJS += $(OUTPUT)builtin-lock.o +BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o +BUILTIN_OBJS += $(OUTPUT)builtin-inject.o +BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o +BUILTIN_OBJS += $(OUTPUT)builtin-mem.o + +PERFLIBS = $(LIB_FILE) $(LIBLK) $(LIBTRACEEVENT) + +# We choose to avoid "if .. else if .. else .. endif endif" +# because maintaining the nesting to match is a pain. If +# we had "elif" things would have been much nicer... + +-include arch/$(ARCH)/Makefile + +ifneq ($(OUTPUT),) + CFLAGS += -I$(OUTPUT) +endif + +ifdef NO_LIBELF +EXTLIBS := $(filter-out -lelf,$(EXTLIBS)) + +# Remove ELF/DWARF dependent codes +LIB_OBJS := $(filter-out $(OUTPUT)util/symbol-elf.o,$(LIB_OBJS)) +LIB_OBJS := $(filter-out $(OUTPUT)util/dwarf-aux.o,$(LIB_OBJS)) +LIB_OBJS := $(filter-out $(OUTPUT)util/probe-event.o,$(LIB_OBJS)) +LIB_OBJS := $(filter-out $(OUTPUT)util/probe-finder.o,$(LIB_OBJS)) + +BUILTIN_OBJS := $(filter-out $(OUTPUT)builtin-probe.o,$(BUILTIN_OBJS)) + +# Use minimal symbol handling +LIB_OBJS += $(OUTPUT)util/symbol-minimal.o + +else # NO_LIBELF +ifndef NO_DWARF + LIB_OBJS += $(OUTPUT)util/probe-finder.o + LIB_OBJS += $(OUTPUT)util/dwarf-aux.o +endif # NO_DWARF +endif # NO_LIBELF + +ifndef NO_LIBUNWIND + LIB_OBJS += $(OUTPUT)util/unwind.o +endif +LIB_OBJS += $(OUTPUT)tests/keep-tracking.o + +ifndef NO_LIBAUDIT + BUILTIN_OBJS += $(OUTPUT)builtin-trace.o +endif + +ifndef NO_SLANG + LIB_OBJS += $(OUTPUT)ui/browser.o + LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o + LIB_OBJS += $(OUTPUT)ui/browsers/hists.o + LIB_OBJS += $(OUTPUT)ui/browsers/map.o + LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o + LIB_OBJS += $(OUTPUT)ui/tui/setup.o + LIB_OBJS += $(OUTPUT)ui/tui/util.o + LIB_OBJS += $(OUTPUT)ui/tui/helpline.o + LIB_OBJS += $(OUTPUT)ui/tui/progress.o + LIB_H += ui/browser.h + LIB_H += ui/browsers/map.h + LIB_H += ui/keysyms.h + LIB_H += ui/libslang.h +endif + +ifndef NO_GTK2 + LIB_OBJS += $(OUTPUT)ui/gtk/browser.o + LIB_OBJS += $(OUTPUT)ui/gtk/hists.o + LIB_OBJS += $(OUTPUT)ui/gtk/setup.o + LIB_OBJS += $(OUTPUT)ui/gtk/util.o + LIB_OBJS += $(OUTPUT)ui/gtk/helpline.o + LIB_OBJS += $(OUTPUT)ui/gtk/progress.o + LIB_OBJS += $(OUTPUT)ui/gtk/annotate.o +endif + +ifndef NO_LIBPERL + LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o + LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o +endif + +ifndef NO_LIBPYTHON + LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o + LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o +endif + +ifeq ($(NO_PERF_REGS),0) + ifeq ($(ARCH),x86) + LIB_H += arch/x86/include/perf_regs.h + endif +endif + +ifndef NO_LIBNUMA + BUILTIN_OBJS += $(OUTPUT)bench/numa.o +endif + +ifdef ASCIIDOC8 + export ASCIIDOC8 +endif + +LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group + +export INSTALL SHELL_PATH + +### Build rules + +SHELL = $(SHELL_PATH) + +all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) + +please_set_SHELL_PATH_to_a_more_modern_shell: + @$$(:) + +shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell + +strip: $(PROGRAMS) $(OUTPUT)perf + $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf + +$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \ + '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ + $(CFLAGS) -c $(filter %.c,$^) -o $@ + +$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUT)perf.o \ + $(BUILTIN_OBJS) $(LIBS) -o $@ + +$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ + '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ + '-DPERF_MAN_PATH="$(mandir_SQ)"' \ + '-DPERF_INFO_PATH="$(infodir_SQ)"' $< + +$(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ + '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ + '-DPERF_MAN_PATH="$(mandir_SQ)"' \ + '-DPERF_INFO_PATH="$(infodir_SQ)"' $< + +$(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt + +$(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt) + $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@ + +$(SCRIPTS) : % : %.sh + $(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@' + +# These can record PERF_VERSION +$(OUTPUT)perf.o perf.spec \ + $(SCRIPTS) \ + : $(OUTPUT)PERF-VERSION-FILE + +.SUFFIXES: +.SUFFIXES: .o .c .S .s + +# These two need to be here so that when O= is not used they take precedence +# over the general rule for .o + +$(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -w $< + +$(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $< + +$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< +$(OUTPUT)%.i: %.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $< +$(OUTPUT)%.s: %.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -S $(CFLAGS) $< +$(OUTPUT)%.o: %.S + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< +$(OUTPUT)%.s: %.S + $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $< + +$(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ + '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \ + '-DPREFIX="$(prefix_SQ)"' \ + $< + +$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ + '-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \ + $< + +$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ + -DPYTHONPATH='"$(OUTPUT)python"' \ + -DPYTHON='"$(PYTHON_WORD)"' \ + $< + +$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< + +$(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< + +$(OUTPUT)ui/browsers/annotate.o: ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< + +$(OUTPUT)ui/browsers/hists.o: ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< + +$(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< + +$(OUTPUT)ui/browsers/scripts.o: ui/browsers/scripts.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< + +$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< + +$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $< + +$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default $< + +$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default $< + +$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< + +$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< + +$(OUTPUT)perf-%: %.o $(PERFLIBS) + $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS) + +$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) +$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) + +# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So +# we depend the various files onto their directories. +DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h +$(DIRECTORY_DEPS): | $(sort $(dir $(DIRECTORY_DEPS))) +# In the second step, we make a rule to actually create these directories +$(sort $(dir $(DIRECTORY_DEPS))): + $(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null + +$(LIB_FILE): $(LIB_OBJS) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) + +# libtraceevent.a +TE_SOURCES = $(wildcard $(TRACE_EVENT_DIR)*.[ch]) + +$(LIBTRACEEVENT): $(TE_SOURCES) + $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libtraceevent.a + +$(LIBTRACEEVENT)-clean: + $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) clean + +LIBLK_SOURCES = $(wildcard $(LK_PATH)*.[ch]) + +# if subdir is set, we've been called from above so target has been built +# already +$(LIBLK): $(LIBLK_SOURCES) +ifeq ($(subdir),) + $(QUIET_SUBDIR0)$(LK_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) liblk.a +endif + +$(LIBLK)-clean: +ifeq ($(subdir),) + $(QUIET_SUBDIR0)$(LK_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) clean +endif + +help: + @echo 'Perf make targets:' + @echo ' doc - make *all* documentation (see below)' + @echo ' man - make manpage documentation (access with man )' + @echo ' html - make html documentation' + @echo ' info - make GNU info documentation (access with info )' + @echo ' pdf - make pdf documentation' + @echo ' TAGS - use etags to make tag information for source browsing' + @echo ' tags - use ctags to make tag information for source browsing' + @echo ' cscope - use cscope to make interactive browsing database' + @echo '' + @echo 'Perf install targets:' + @echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed' + @echo ' HINT: use "make prefix= " to install to a particular' + @echo ' path like make prefix=/usr/local install install-doc' + @echo ' install - install compiled binaries' + @echo ' install-doc - install *all* documentation' + @echo ' install-man - install manpage documentation' + @echo ' install-html - install html documentation' + @echo ' install-info - install GNU info documentation' + @echo ' install-pdf - install pdf documentation' + @echo '' + @echo ' quick-install-doc - alias for quick-install-man' + @echo ' quick-install-man - install the documentation quickly' + @echo ' quick-install-html - install the html documentation quickly' + @echo '' + @echo 'Perf maintainer targets:' + @echo ' clean - clean all binary objects and build output' + + +DOC_TARGETS := doc man html info pdf + +INSTALL_DOC_TARGETS := $(patsubst %,install-%,$(DOC_TARGETS)) try-install-man +INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html + +# 'make doc' should call 'make -C Documentation all' +$(DOC_TARGETS): + $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all) + +TAGS: + $(RM) TAGS + $(FIND) . -name '*.[hcS]' -print | xargs etags -a + +tags: + $(RM) tags + $(FIND) . -name '*.[hcS]' -print | xargs ctags -a + +cscope: + $(RM) cscope* + $(FIND) . -name '*.[hcS]' -print | xargs cscope -b + +### Detect prefix changes +TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\ + $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ) + +$(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS + @FLAGS='$(TRACK_CFLAGS)'; \ + if test x"$$FLAGS" != x"`cat $(OUTPUT)PERF-CFLAGS 2>/dev/null`" ; then \ + echo 1>&2 " * new build flags or prefix"; \ + echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \ + fi + +### Testing rules + +# GNU make supports exporting all variables by "export" without parameters. +# However, the environment gets quite big, and some programs have problems +# with that. + +check: $(OUTPUT)common-cmds.h + if sparse; \ + then \ + for i in *.c */*.c; \ + do \ + sparse $(CFLAGS) $(SPARSE_FLAGS) $$i || exit; \ + done; \ + else \ + exit 1; \ + fi + +### Installation rules + +install-bin: all + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' + $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)' + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' + $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' +ifndef NO_LIBPERL + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' + $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' + $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl' + $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' +endif +ifndef NO_LIBPYTHON + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace' + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' + $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace' + $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python' + $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' +endif + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d' + $(INSTALL) bash_completion '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf' + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests' + $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests' + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' + $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' + +install: install-bin try-install-man + +install-python_ext: + $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)' + +# 'make install-doc' should call 'make -C Documentation install' +$(INSTALL_DOC_TARGETS): + $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:-doc=) + +### Cleaning rules + +# +# This is here, not in config/Makefile, because config/Makefile does +# not get included for the clean target: +# +config-clean: + @$(MAKE) -C config/feature-checks clean + +clean: $(LIBTRACEEVENT)-clean $(LIBLK)-clean config-clean + $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS) + $(RM) $(ALL_PROGRAMS) perf + $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* + $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean + $(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS + $(RM) $(OUTPUT)util/*-bison* + $(RM) $(OUTPUT)util/*-flex* + $(python-clean) + +# +# Trick: if ../../.git does not exist - we are building out of tree for example, +# then force version regeneration: +# +ifeq ($(wildcard ../../.git/HEAD),) + GIT-HEAD-PHONY = ../../.git/HEAD +else + GIT-HEAD-PHONY = +endif + +.PHONY: all install clean config-clean strip +.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell +.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS + -- cgit v0.10.2 From 73a725f0008702600f7d987e262f963c0fa64bc6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 Oct 2013 11:58:30 +0200 Subject: tools/perf/build: Standardize the various messages output by parallel make Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-mky0rtpwxi3ivxsvdjoOEmhr@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index ce7874b..3b925ad 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -13,13 +13,30 @@ endif export JOBS -$(info $(shell printf '# [ perf build: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build. ]\n')) +define print_msg + @printf ' BUILD: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build\n' +endef + +define make + @$(MAKE) -f Makefile.perf --no-print-directory -j$(JOBS) $@ +endef # # Needed if no target specified: # all: - @$(MAKE) -f Makefile.perf --no-print-directory -j$(JOBS) $@ + $(print_msg) + $(make) + +# +# The clean target is not really parallel, don't print the jobs info: +# +clean: + $(make) +# +# All other targets get passed through: +# %: - @$(MAKE) -f Makefile.perf --no-print-directory -j$(JOBS) $@ + $(print_msg) + $(make) diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index fb6ec06..62d02cd 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -179,9 +179,9 @@ endif feature_print = $(eval $(feature_print_code)) define feature_print_code ifeq ($(feature-$(1)), 1) - MSG := $(shell printf '...%30s: [ \033[32mon\033[m ]' $(1)) + MSG = $(shell printf '...%30s: [ \033[32mon\033[m ]' $(1)) else - MSG := $(shell printf '...%30s: [ \033[31mOFF\033[m ]' $(1)) + MSG = $(shell printf '...%30s: [ \033[31mOFF\033[m ]' $(1)) endif $(info $(MSG)) endef -- cgit v0.10.2 From 1c47661a93fe6f729c80ed18a1f9ab1c7fb0cac2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 Oct 2013 15:15:09 +0200 Subject: tools/perf/build: Split out feature checks: 'liberty', 'liberty-z', 'cplus-demangle' Note that these are rarely executed tests, so we call feature_check() explicitly and don't have them in CORE_FEATURE_CHECKS. Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-pvumlx6mbtfxffgrlwO2mRcx@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 62d02cd..89b2d47 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -472,23 +472,19 @@ else EXTLIBS += -liberty CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT else - FLAGS_BFD=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -DPACKAGE='perf' -lbfd ifeq ($(feature-libbfd), 1) EXTLIBS += -lbfd else - FLAGS_BFD_IBERTY=$(FLAGS_BFD) -liberty - has_bfd_iberty := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY),liberty) - ifeq ($(has_bfd_iberty),y) + $(feature_check,liberty) + ifeq ($(feature-liberty), 1) EXTLIBS += -lbfd -liberty else - FLAGS_BFD_IBERTY_Z=$(FLAGS_BFD_IBERTY) -lz - has_bfd_iberty_z := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY_Z),libz) - ifeq ($(has_bfd_iberty_z),y) + $(feature_check,liberty-z) + ifeq ($(feature-liberty-z), 1) EXTLIBS += -lbfd -liberty -lz else - FLAGS_CPLUS_DEMANGLE=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -liberty - has_cplus_demangle := $(call try-cc,$(SOURCE_CPLUS_DEMANGLE),$(FLAGS_CPLUS_DEMANGLE),demangle) - ifeq ($(has_cplus_demangle),y) + $(feature_check,cplus-demangle) + ifeq ($(feature-cplus-demangle), 1) EXTLIBS += -liberty CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT else diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index d4c55ac..e21bceb 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -11,6 +11,9 @@ FILES= \ test-hello \ test-libaudit \ test-libbfd \ + test-liberty \ + test-liberty-z \ + test-cplus-demangle \ test-libelf \ test-libelf-getphdrnum \ test-libelf-mmap \ @@ -122,6 +125,15 @@ test-libpython-version: test-libbfd: $(BUILD) -DPACKAGE='perf' -DPACKAGE=perf -lbfd -ldl +test-liberty: + $(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='perf' -DPACKAGE=perf -lbfd -ldl -liberty + +test-liberty-z: + $(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='perf' -DPACKAGE=perf -lbfd -ldl -liberty -lz + +test-cplus-demangle: + $(BUILD) -liberty + test-on-exit: $(BUILD) diff --git a/tools/perf/config/feature-checks/test-cplus-demangle.c b/tools/perf/config/feature-checks/test-cplus-demangle.c new file mode 100644 index 0000000..5202f50 --- /dev/null +++ b/tools/perf/config/feature-checks/test-cplus-demangle.c @@ -0,0 +1,10 @@ + +extern char *cplus_demangle(const char *, int); + +int main(void) +{ + cplus_demangle(0, 0); + + return 0; +} + -- cgit v0.10.2 From 0648f839ff754e2825fee2cb7080df5874316b3f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 Oct 2013 15:30:35 +0200 Subject: tools/perf/build: Remove unused config/feature-tests.mak Also remove try-cc et al. These got obsoleted by the split-out feature checks in config/feature-checks/. Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-Y6ailbiranadqlrl8Dfivjbi@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 89b2d47..e80ffe8 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -51,7 +51,6 @@ LIB_INCLUDE := $(srctree)/tools/lib/ # include ARCH specific config -include $(src-perf)/arch/$(ARCH)/Makefile -include $(src-perf)/config/feature-tests.mak include $(src-perf)/config/utilities.mak ifeq ($(call get-executable,$(FLEX)),) @@ -274,7 +273,7 @@ else msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); NO_DWARF := 1 endif # Dwarf support - endif # SOURCE_LIBELF + endif # libelf support endif # NO_LIBELF ifndef NO_LIBELF diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak index f793057..139597f 100644 --- a/tools/perf/config/feature-tests.mak +++ b/tools/perf/config/feature-tests.mak @@ -1,246 +1,2 @@ -define SOURCE_HELLO -#include -int main(void) -{ - return puts(\"hi\"); -} -endef -ifndef NO_DWARF -define SOURCE_DWARF -#include -#include -#include -#ifndef _ELFUTILS_PREREQ -#error -#endif -int main(void) -{ - Dwarf *dbg = dwarf_begin(0, DWARF_C_READ); - return (long)dbg; -} -endef -endif - -define SOURCE_LIBELF -#include - -int main(void) -{ - Elf *elf = elf_begin(0, ELF_C_READ, 0); - return (long)elf; -} -endef - -define SOURCE_GLIBC -#include - -int main(void) -{ - const char *version = gnu_get_libc_version(); - return (long)version; -} -endef - -define SOURCE_BIONIC -#include - -int main(void) -{ - return __ANDROID_API__; -} -endef - -define SOURCE_ELF_MMAP -#include -int main(void) -{ - Elf *elf = elf_begin(0, ELF_C_READ_MMAP, 0); - return (long)elf; -} -endef - -define SOURCE_ELF_GETPHDRNUM -#include -int main(void) -{ - size_t dst; - return elf_getphdrnum(0, &dst); -} -endef - -ifndef NO_SLANG -define SOURCE_SLANG -#include - -int main(void) -{ - return SLsmg_init_smg(); -} -endef -endif - -ifndef NO_GTK2 -define SOURCE_GTK2 -#pragma GCC diagnostic ignored \"-Wstrict-prototypes\" -#include -#pragma GCC diagnostic error \"-Wstrict-prototypes\" - -int main(int argc, char *argv[]) -{ - gtk_init(&argc, &argv); - - return 0; -} -endef - -define SOURCE_GTK2_INFOBAR -#pragma GCC diagnostic ignored \"-Wstrict-prototypes\" -#include -#pragma GCC diagnostic error \"-Wstrict-prototypes\" - -int main(void) -{ - gtk_info_bar_new(); - - return 0; -} -endef -endif - -ifndef NO_LIBPERL -define SOURCE_PERL_EMBED -#include -#include - -int main(void) -{ -perl_alloc(); -return 0; -} -endef -endif - -ifndef NO_LIBPYTHON -define SOURCE_PYTHON_VERSION -#include -#if PY_VERSION_HEX >= 0x03000000 - #error -#endif -int main(void) -{ - return 0; -} -endef -define SOURCE_PYTHON_EMBED -#include -int main(void) -{ - Py_Initialize(); - return 0; -} -endef -endif - -define SOURCE_BFD -#include - -int main(void) -{ - bfd_demangle(0, 0, 0); - return 0; -} -endef - -define SOURCE_CPLUS_DEMANGLE -extern char *cplus_demangle(const char *, int); - -int main(void) -{ - cplus_demangle(0, 0); - return 0; -} -endef - -define SOURCE_STRLCPY -#include -extern size_t strlcpy(char *dest, const char *src, size_t size); - -int main(void) -{ - strlcpy(NULL, NULL, 0); - return 0; -} -endef - -ifndef NO_LIBUNWIND -define SOURCE_LIBUNWIND -#include -#include - -extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, - unw_word_t ip, - unw_dyn_info_t *di, - unw_proc_info_t *pi, - int need_unwind_info, void *arg); - - -#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) - -int main(void) -{ - unw_addr_space_t addr_space; - addr_space = unw_create_addr_space(NULL, 0); - unw_init_remote(NULL, addr_space, NULL); - dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL); - return 0; -} -endef -endif - -ifndef NO_BACKTRACE -define SOURCE_BACKTRACE -#include -#include - -int main(void) -{ - backtrace(NULL, 0); - backtrace_symbols(NULL, 0); - return 0; -} -endef -endif - -ifndef NO_LIBAUDIT -define SOURCE_LIBAUDIT -#include - -int main(void) -{ - printf(\"error message: %s\", audit_errno_to_name(0)); - return audit_open(); -} -endef -endif - -define SOURCE_ON_EXIT -#include - -int main(void) -{ - return on_exit(NULL, NULL); -} -endef - -define SOURCE_LIBNUMA -#include -#include - -int main(void) -{ - numa_available(); - return 0; -} -endef diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak index 94d2d4f..4d985e0 100644 --- a/tools/perf/config/utilities.mak +++ b/tools/perf/config/utilities.mak @@ -178,17 +178,3 @@ endef _ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call _gea_err,$(2))) _gea_warn = $(warning The path '$(1)' is not executable.) _gea_err = $(if $(1),$(error Please set '$(1)' appropriately)) - -# try-cc -# Usage: option = $(call try-cc, source-to-build, cc-options, msg) -ifneq ($(V),1) -TRY_CC_OUTPUT= > /dev/null 2>&1 -endif -TRY_CC_MSG=echo " CHK $(3)" 1>&2; - -try-cc = $(shell sh -c \ - 'TMP="$(OUTPUT)$(TMPOUT).$$$$"; \ - $(TRY_CC_MSG) \ - echo "$(1)" | \ - $(CC) -x c - $(2) -o "$$TMP" $(TRY_CC_OUTPUT) && echo y; \ - rm -f "$$TMP"') -- cgit v0.10.2 From 1e3f30fae797660a014ac159d93fff9952ec1bf0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 Oct 2013 15:48:49 +0200 Subject: tools/perf/build: Clean up various testcases Prepare to include them into test-all.c directly, by making sure that they build cleanly and without warnings. Also make sure they make a certain amount of sense and don't crash when executed. Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-Mn9gsdutzopoowk3xurqpsxE@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/feature-checks/test-backtrace.c b/tools/perf/config/feature-checks/test-backtrace.c index 5b79468..5b33bcf 100644 --- a/tools/perf/config/feature-checks/test-backtrace.c +++ b/tools/perf/config/feature-checks/test-backtrace.c @@ -3,8 +3,12 @@ int main(void) { - backtrace(NULL, 0); - backtrace_symbols(NULL, 0); + void *backtrace_fns[10]; + size_t entries; + + entries = backtrace(backtrace_fns, 10); + backtrace_symbols_fd(backtrace_fns, entries, 1); return 0; } + diff --git a/tools/perf/config/feature-checks/test-cplus-demangle.c b/tools/perf/config/feature-checks/test-cplus-demangle.c index 5202f50..ab29f80 100644 --- a/tools/perf/config/feature-checks/test-cplus-demangle.c +++ b/tools/perf/config/feature-checks/test-cplus-demangle.c @@ -1,9 +1,14 @@ - +extern int printf(const char *format, ...); extern char *cplus_demangle(const char *, int); int main(void) { - cplus_demangle(0, 0); + char symbol[4096] = "FieldName__9ClassNameFd"; + char *tmp; + + tmp = cplus_demangle(symbol, 0); + + printf("demangled symbol: {%s}\n", tmp); return 0; } diff --git a/tools/perf/config/feature-checks/test-gtk2-infobar.c b/tools/perf/config/feature-checks/test-gtk2-infobar.c index eebcfbc..397b464 100644 --- a/tools/perf/config/feature-checks/test-gtk2-infobar.c +++ b/tools/perf/config/feature-checks/test-gtk2-infobar.c @@ -2,8 +2,9 @@ #include #pragma GCC diagnostic error "-Wstrict-prototypes" -int main(void) +int main(int argc, char *argv[]) { + gtk_init(&argc, &argv); gtk_info_bar_new(); return 0; diff --git a/tools/perf/config/feature-checks/test-gtk2.c b/tools/perf/config/feature-checks/test-gtk2.c index 1ac6d8a..6bd80e5 100644 --- a/tools/perf/config/feature-checks/test-gtk2.c +++ b/tools/perf/config/feature-checks/test-gtk2.c @@ -4,7 +4,7 @@ int main(int argc, char *argv[]) { - gtk_init(&argc, &argv); + gtk_init(&argc, &argv); return 0; } diff --git a/tools/perf/config/feature-checks/test-libaudit.c b/tools/perf/config/feature-checks/test-libaudit.c index 854a65d..f7e791e 100644 --- a/tools/perf/config/feature-checks/test-libaudit.c +++ b/tools/perf/config/feature-checks/test-libaudit.c @@ -1,5 +1,7 @@ #include +extern int printf(const char *format, ...); + int main(void) { printf("error message: %s\n", audit_errno_to_name(0)); diff --git a/tools/perf/config/feature-checks/test-libbfd.c b/tools/perf/config/feature-checks/test-libbfd.c index d03339c..1886c78 100644 --- a/tools/perf/config/feature-checks/test-libbfd.c +++ b/tools/perf/config/feature-checks/test-libbfd.c @@ -1,7 +1,16 @@ #include +extern int printf(const char *format, ...); + int main(void) { - bfd_demangle(0, 0, 0); + char symbol[4096] = "FieldName__9ClassNameFd"; + char *tmp; + + tmp = bfd_demangle(0, symbol, 0); + + printf("demangled symbol: {%s}\n", tmp); + return 0; } + diff --git a/tools/perf/config/feature-checks/test-libunwind.c b/tools/perf/config/feature-checks/test-libunwind.c index 5622746..43b9369 100644 --- a/tools/perf/config/feature-checks/test-libunwind.c +++ b/tools/perf/config/feature-checks/test-libunwind.c @@ -10,11 +10,18 @@ extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, #define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) +static unw_accessors_t accessors; + int main(void) { unw_addr_space_t addr_space; - addr_space = unw_create_addr_space(NULL, 0); + + addr_space = unw_create_addr_space(&accessors, 0); + if (addr_space) + return 0; + unw_init_remote(NULL, addr_space, NULL); dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL); + return 0; } diff --git a/tools/perf/config/feature-checks/test-on-exit.c b/tools/perf/config/feature-checks/test-on-exit.c index 473f1de..8f64ed3 100644 --- a/tools/perf/config/feature-checks/test-on-exit.c +++ b/tools/perf/config/feature-checks/test-on-exit.c @@ -1,6 +1,15 @@ #include +static void exit_fn(int status, void *__data) +{ + printf("exit status: %d, data: %d\n", status, *(int *)__data); +} + +static int data = 123; + int main(void) { - return on_exit(NULL, NULL); + on_exit(exit_fn, &data); + + return 321; } -- cgit v0.10.2 From 3ca576a481f8c12fe606fb75232d52637c666ed0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 Oct 2013 16:21:37 +0200 Subject: tools/perf/build: Collapse the test-all.c testcase Simplify test-all.c by including it all the testcases via #include. Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-pcZlwqq5ou7Ebvkekvhtzfbm@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/perf/config/feature-checks/test-all.c index 9f7c4b1..50d4318 100644 --- a/tools/perf/config/feature-checks/test-all.c +++ b/tools/perf/config/feature-checks/test-all.c @@ -1,196 +1,106 @@ - -#pragma GCC diagnostic ignored "-Wstrict-prototypes" - -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#pragma GCC diagnostic error "-Wstrict-prototypes" - -int main1(void) -{ - return puts("hi"); -} - -int main2(void) -{ - return puts("hi"); -} - -int main3(void) -{ - return puts("hi"); -} - -int main4(void) -{ - Elf *elf = elf_begin(0, ELF_C_READ, 0); - return (long)elf; -} -# -int main5(void) -{ - Elf *elf = elf_begin(0, ELF_C_READ_MMAP, 0); - return (long)elf; -} - -int main6(void) -{ - const char *version = gnu_get_libc_version(); - return (long)version; -} - -int main7(void) -{ - Dwarf *dbg = dwarf_begin(0, DWARF_C_READ); - return (long)dbg; -} - -int main8(void) -{ - size_t dst; - return elf_getphdrnum(0, &dst); -} - -extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, - unw_word_t ip, - unw_dyn_info_t *di, - unw_proc_info_t *pi, - int need_unwind_info, void *arg); - - -#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) - -int main9(void) -{ - unw_addr_space_t addr_space; - addr_space = unw_create_addr_space(NULL, 0); - unw_init_remote(NULL, addr_space, NULL); - dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL); - return 0; -} - -int main10(void) -{ - printf("error message: %s\n", audit_errno_to_name(0)); - return audit_open(); -} - -int main11(void) -{ - return SLsmg_init_smg(); -} - -int main12(int argc, char *argv[]) -{ - gtk_init(&argc, &argv); - - return 0; -} - -int main13(void) -{ - gtk_info_bar_new(); - - return 0; -} - -int main14(void) -{ - perl_alloc(); - - return 0; -} - -int main15(void) -{ - Py_Initialize(); - return 0; -} - -#if PY_VERSION_HEX >= 0x03000000 - #error -#endif - -int main16(void) -{ - return 0; -} - -int main17(void) -{ - bfd_demangle(0, 0, 0); - return 0; -} - -void exit_function(int x, void *y) -{ -} - -int main18(void) -{ - return on_exit(exit_function, NULL); -} - -int main19(void) -{ - void *backtrace_fns[1]; - size_t entries; - - entries = backtrace(backtrace_fns, 1); - backtrace_symbols(backtrace_fns, entries); - - return 0; -} - -int main20(void) -{ - numa_available(); - return 0; -} +/* + * test-all.c: Try to build all the main testcases at once. + * + * A well-configured system will have all the prereqs installed, so we can speed + * up auto-detection on such systems. + */ + +/* + * Quirk: Python and Perl headers cannot be in arbitrary places, so keep + * these 3 testcases at the top: + */ +#define main main_test_libpython +# include "test-libpython.c" +#undef main + +#define main main_test_libpython_version +# include "test-libpython-version.c" +#undef main + +#define main main_test_libperl +# include "test-libperl.c" +#undef main + +#define main main_test_hello +# include "test-hello.c" +#undef main + +#define main main_test_libelf +# include "test-libelf.c" +#undef main + +#define main main_test_libelf_mmap +# include "test-libelf-mmap.c" +#undef main + +#define main main_test_glibc +# include "test-glibc.c" +#undef main + +#define main main_test_dwarf +# include "test-dwarf.c" +#undef main + +#define main main_test_libelf_getphdrnum +# include "test-libelf-getphdrnum.c" +#undef main + +#define main main_test_libunwind +# include "test-libunwind.c" +#undef main + +#define main main_test_libaudit +# include "test-libaudit.c" +#undef main + +#define main main_test_libslang +# include "test-libslang.c" +#undef main + +#define main main_test_gtk2 +# include "test-gtk2.c" +#undef main + +#define main main_test_gtk2_infobar +# include "test-gtk2-infobar.c" +#undef main + +#define main main_test_libbfd +# include "test-libbfd.c" +#undef main + +#define main main_test_on_exit +# include "test-on-exit.c" +#undef main + +#define main main_test_backtrace +# include "test-backtrace.c" +#undef main + +#define main main_test_libnuma +# include "test-libnuma.c" +#undef main int main(int argc, char *argv[]) { - main1(); - main2(); - main3(); - main4(); - main5(); - main6(); - main7(); - main8(); - main9(); - main10(); - main11(); - main12(argc, argv); - main13(); - main14(); - main15(); - main16(); - main17(); - main18(); - main19(); - main20(); + main_test_libpython(); + main_test_libpython_version(); + main_test_libperl(); + main_test_hello(); + main_test_libelf(); + main_test_libelf_mmap(); + main_test_glibc(); + main_test_dwarf(); + main_test_libelf_getphdrnum(); + main_test_libunwind(); + main_test_libaudit(); + main_test_libslang(); + main_test_gtk2(argc, argv); + main_test_gtk2_infobar(argc, argv); + main_test_libbfd(); + main_test_on_exit(); + main_test_backtrace(); + main_test_libnuma(); return 0; } -- cgit v0.10.2 From b016a0dd08999218a05a4b176bc08a9ff68ccc5e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 3 Oct 2013 14:32:10 +0200 Subject: tools/perf/build: Pass through all targets to Makefile.perf Jiri reported that 'make .o' stopped working: > [jolsa@krava perf]$ make -f Makefile perf.o > cc -c -o perf.o perf.c > In file included from builtin.h:4:0, > from perf.c:9: > util/util.h:74:24: fatal error: lk/debugfs.h: No such file or directory > compilation terminated. > make: *** [perf.o] Error 1 This is due to GNU make having built-in rules for popular targets such as *.o. Clear them out so that all targets as passed through to Makefile.perf. Reported-by: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Link: http://lkml.kernel.org/n/tip-5wkuvmlaaxtfgepKcvRij8sh@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 3b925ad..6f6f13a 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -1,3 +1,10 @@ + +# +# Clear out the built-in rules GNU make defines by default (such as .o targets), +# so that we pass through all targets to Makefile.perf: +# +.SUFFIXES: + # # Do a parallel build with multiple jobs, based on the number of CPUs online # in this system: 'make -j8' on a 8-CPU system, etc. -- cgit v0.10.2 From 1c2d1d8cf4dbef33b8c6af6888bed2f4e659220e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 3 Oct 2013 15:05:56 +0200 Subject: tools/perf/build: Make sure autodep feature binaries honor the O= setting Arnaldo noticed that the feature-check binaries are generated in the config/check-features/ directory even if O= is specified. Implement $(OUTPUT) logic for config/check-features/Makefile. Reported-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-NLwlnv5prsubuey0vfocebym@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index e80ffe8..3d656e3 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -88,9 +88,14 @@ CFLAGS += -std=gnu99 EXTLIBS = -lelf -lpthread -lrt -lm -ldl +ifneq ($(OUTPUT),) + OUTPUT_FEATURES = $(OUTPUT)config/feature-checks/ + $(shell mkdir -p $(OUTPUT_FEATURES)) +endif + feature_check = $(eval $(feature_check_code)) define feature_check_code - feature-$(1) := $(shell $(MAKE) -C config/feature-checks test-$1 >/dev/null 2>/dev/null && echo 1 || echo 0) + feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) -C config/feature-checks test-$1 >/dev/null 2>/dev/null && echo 1 || echo 0) endef feature_set = $(eval $(feature_set_code)) @@ -138,7 +143,7 @@ CORE_FEATURE_TESTS = \ # to skip the print-out of the long features list if the file # existed before and after it was built: # -ifeq ($(wildcard config/feature-checks/test-all),) +ifeq ($(wildcard $(OUTPUT)config/feature-checks/test-all),) test-all-failed := 1 else test-all-failed := 0 @@ -168,7 +173,7 @@ ifeq ($(feature-all), 1) # $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_set,$(feat))) else - $(shell $(MAKE) -i -j -C config/feature-checks $(CORE_FEATURE_TESTS) >/dev/null 2>&1) + $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) -i -j -C config/feature-checks $(CORE_FEATURE_TESTS) >/dev/null 2>&1) $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_check,$(feat))) endif -- cgit v0.10.2 From 684f434cc05a122938b75e055d7d799f1dd58d55 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 4 Oct 2013 11:11:32 +0200 Subject: tools/perf/build: Exclude MAKEFLAGS from nested invocation In case the user specifies MAKEFLAGS as an environment variable, or uses 'make -jN' explicitly, the options can conflict and result in: BUILD: Doing 'make -j8' parallel build make[1]: warning: -jN forced in submake: disabling jobserver mode. GEN common-cmds.h make[1]: *** write jobserver: Bad file descriptor. Stop. Make sure we invoke the main makefile in a pristine state. Users who want to do something non-standard can use the: make -f Makefile.perf method to invoke the makefile. Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-uen6hzTvkqqngqwjma9yoEgw@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 6f6f13a..74f52d8 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -1,3 +1,10 @@ +# +# This is a simple wrapper Makefile that calls the main Makefile.perf +# with a -j option to do parallel builds +# +# If you want to invoke the perf build in some non-standard way then +# you can use the 'make -f Makefile.perf' method to invoke it. +# # # Clear out the built-in rules GNU make defines by default (such as .o targets), @@ -6,6 +13,11 @@ .SUFFIXES: # +# We don't want to pass along options like -j: +# +unexport MAKEFLAGS + +# # Do a parallel build with multiple jobs, based on the number of CPUs online # in this system: 'make -j8' on a 8-CPU system, etc. # @@ -18,14 +30,12 @@ ifeq ($(JOBS),) endif endif -export JOBS - define print_msg @printf ' BUILD: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build\n' endef define make - @$(MAKE) -f Makefile.perf --no-print-directory -j$(JOBS) $@ + @$(MAKE) -f Makefile.perf --no-print-directory -j$(JOBS) O=$(O) $@ endef # -- cgit v0.10.2 From b102420b500da97e0fc18d94f0600bddeced1b99 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 4 Oct 2013 12:08:05 +0200 Subject: tools/perf/build: Fix non-canonical directory names in O= This was a long-standing bug, relative pathnames like O=dir did not fully work in the build system: $ make O=localdir clean SUBDIR Documentation ../../scripts/Makefile.include:3: *** O=localdir does not exist. Stop. make[1]: *** [clean] Error 2 make: *** [clean] Error 2 Fix this by canonizing the directory before passing it to Makefile.perf. Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-hchMp1hozn9tqgswWcooxcru@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 74f52d8..9580ebe 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -30,12 +30,19 @@ ifeq ($(JOBS),) endif endif +# +# Only pass canonical directory names as the output directory: +# +ifneq ($(O),) + FULL_O := $(shell readlink -f $(O)) +endif + define print_msg @printf ' BUILD: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build\n' endef define make - @$(MAKE) -f Makefile.perf --no-print-directory -j$(JOBS) O=$(O) $@ + @$(MAKE) -f Makefile.perf --no-print-directory -j$(JOBS) O=$(FULL_O) $@ endef # -- cgit v0.10.2 From 1f7c645ab4b8326fef5afcd842795e071ecce9df Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 4 Oct 2013 12:14:59 +0200 Subject: tools/perf/build: Fix O=/some/dir perf.o type of targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If someone specifies a single target, mixed with O=, the following way: hubble:~/tip/tools/perf> make O=/tmp/perf util/stat.o BUILD: Doing 'make -j8' parallel build gcc -Wbad-function-cast -Wdeclaration-after-statement -Wformat-security -Wformat-y2k [...] The build might even fail, if a target depends on other targets: hubble:~/tip/tools/perf> make O=/tmp/perf perf.o ... perf.c: In function ‘handle_options’: perf.c:155:21: error: ‘PERF_HTML_PATH’ undeclared (first use in this function) The correct way to invoke such targets is: hubble:~/tip/tools/perf> make O=/tmp/perf /tmp/perf/perf.o BUILD: Doing 'make -j8' parallel build GEN /tmp/perf/common-cmds.h CC /tmp/perf/perf.o But that's unnecessary typing and it's also easy to mistakenly build into the source directory. To fix this remove the generic suffix rules and add redirection to $(OUTPUT) for the most popular .o targets. Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-mk0oiukmhgSbrll6chrPkkqr@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 178a1c8..a24f6c2 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -576,7 +576,21 @@ $(OUTPUT)perf.o perf.spec \ : $(OUTPUT)PERF-VERSION-FILE .SUFFIXES: -.SUFFIXES: .o .c .S .s + +# +# If a target does not match any of the later rules then prefix it by $(OUTPUT) +# This makes targets like 'make O=/tmp/perf perf.o' work in a natural way. +# +ifneq ($(OUTPUT),) +%.o: $(OUTPUT)%.o + @echo " # Redirected target $@ => $(OUTPUT)$@" +util/%.o: $(OUTPUT)util/%.o + @echo " # Redirected target $@ => $(OUTPUT)util/$@" +bench/%.o: $(OUTPUT)bench/%.o + @echo " # Redirected target $@ => $(OUTPUT)bench/$@" +tests/%.o: $(OUTPUT)tests/%.o + @echo " # Redirected target $@ => $(OUTPUT)tests/$@" +endif # These two need to be here so that when O= is not used they take precedence # over the general rule for .o -- cgit v0.10.2 From 20c99e82173bed9e4e0013ec45c0b2b3b80b65d5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 7 Oct 2013 13:27:23 +0200 Subject: tools/perf/build: Harmonize the style of the feature testcases The various testcases used different styles, which was not really visible as long as they hid in feature-tests.mak. Now that they are out in the open make them prettier. ( Also delete the leftover, empty feature-tests.mak file. ) Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-drDWk8xltndjdsespzjbhu6w@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/feature-checks/test-backtrace.c b/tools/perf/config/feature-checks/test-backtrace.c index 5b33bcf..7124aa1 100644 --- a/tools/perf/config/feature-checks/test-backtrace.c +++ b/tools/perf/config/feature-checks/test-backtrace.c @@ -11,4 +11,3 @@ int main(void) return 0; } - diff --git a/tools/perf/config/feature-checks/test-cplus-demangle.c b/tools/perf/config/feature-checks/test-cplus-demangle.c index ab29f80..610c686 100644 --- a/tools/perf/config/feature-checks/test-cplus-demangle.c +++ b/tools/perf/config/feature-checks/test-cplus-demangle.c @@ -12,4 +12,3 @@ int main(void) return 0; } - diff --git a/tools/perf/config/feature-checks/test-dwarf.c b/tools/perf/config/feature-checks/test-dwarf.c index 783dfcd..3fc1801 100644 --- a/tools/perf/config/feature-checks/test-dwarf.c +++ b/tools/perf/config/feature-checks/test-dwarf.c @@ -5,5 +5,6 @@ int main(void) { Dwarf *dbg = dwarf_begin(0, DWARF_C_READ); + return (long)dbg; } diff --git a/tools/perf/config/feature-checks/test-glibc.c b/tools/perf/config/feature-checks/test-glibc.c index 13c66a5..b082034 100644 --- a/tools/perf/config/feature-checks/test-glibc.c +++ b/tools/perf/config/feature-checks/test-glibc.c @@ -3,6 +3,6 @@ int main(void) { const char *version = gnu_get_libc_version(); + return (long)version; } - diff --git a/tools/perf/config/feature-checks/test-libaudit.c b/tools/perf/config/feature-checks/test-libaudit.c index f7e791e..afc019f 100644 --- a/tools/perf/config/feature-checks/test-libaudit.c +++ b/tools/perf/config/feature-checks/test-libaudit.c @@ -5,5 +5,6 @@ extern int printf(const char *format, ...); int main(void) { printf("error message: %s\n", audit_errno_to_name(0)); + return audit_open(); } diff --git a/tools/perf/config/feature-checks/test-libbfd.c b/tools/perf/config/feature-checks/test-libbfd.c index 1886c78..2405990 100644 --- a/tools/perf/config/feature-checks/test-libbfd.c +++ b/tools/perf/config/feature-checks/test-libbfd.c @@ -13,4 +13,3 @@ int main(void) return 0; } - diff --git a/tools/perf/config/feature-checks/test-libelf-getphdrnum.c b/tools/perf/config/feature-checks/test-libelf-getphdrnum.c index 58eca53..d710459 100644 --- a/tools/perf/config/feature-checks/test-libelf-getphdrnum.c +++ b/tools/perf/config/feature-checks/test-libelf-getphdrnum.c @@ -1,7 +1,8 @@ #include -# + int main(void) { size_t dst; + return elf_getphdrnum(0, &dst); } diff --git a/tools/perf/config/feature-checks/test-libelf-mmap.c b/tools/perf/config/feature-checks/test-libelf-mmap.c index 1c64815..564427d 100644 --- a/tools/perf/config/feature-checks/test-libelf-mmap.c +++ b/tools/perf/config/feature-checks/test-libelf-mmap.c @@ -1,7 +1,8 @@ #include -# + int main(void) { Elf *elf = elf_begin(0, ELF_C_READ_MMAP, 0); + return (long)elf; } diff --git a/tools/perf/config/feature-checks/test-libelf.c b/tools/perf/config/feature-checks/test-libelf.c index 1a08f97..08db322 100644 --- a/tools/perf/config/feature-checks/test-libelf.c +++ b/tools/perf/config/feature-checks/test-libelf.c @@ -3,5 +3,6 @@ int main(void) { Elf *elf = elf_begin(0, ELF_C_READ, 0); + return (long)elf; } diff --git a/tools/perf/config/feature-checks/test-libnuma.c b/tools/perf/config/feature-checks/test-libnuma.c index 70510a9..4763d9c 100644 --- a/tools/perf/config/feature-checks/test-libnuma.c +++ b/tools/perf/config/feature-checks/test-libnuma.c @@ -4,5 +4,6 @@ int main(void) { numa_available(); + return 0; } diff --git a/tools/perf/config/feature-checks/test-libpython.c b/tools/perf/config/feature-checks/test-libpython.c index 7226797..b24b28a 100644 --- a/tools/perf/config/feature-checks/test-libpython.c +++ b/tools/perf/config/feature-checks/test-libpython.c @@ -1,7 +1,8 @@ #include -# + int main(void) { Py_Initialize(); + return 0; } diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak deleted file mode 100644 index 139597f..0000000 --- a/tools/perf/config/feature-tests.mak +++ /dev/null @@ -1,2 +0,0 @@ - - -- cgit v0.10.2 From aa4acf6cf102f639a2023b389ca8be8b8d9cad52 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 7 Oct 2013 17:51:29 +0200 Subject: tools/perf/build: Pass through LDFLAGS to feature tests David Ahern reported that when passing in LDFLAGS=-static then the feature checks still succeed - causing build failures down the line because the static libraries are missing. Solve this by passing through LDFLAGS to the feature-check Makefile. Reported-by: David Ahern Tested-by: David Ahern Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20131007155129.GA1066@gmail.com Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 3d656e3..78f3b3e 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -95,7 +95,7 @@ endif feature_check = $(eval $(feature_check_code)) define feature_check_code - feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) -C config/feature-checks test-$1 >/dev/null 2>/dev/null && echo 1 || echo 0) + feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) LDFLAGS=$(LDFLAGS) -C config/feature-checks test-$1 >/dev/null 2>/dev/null && echo 1 || echo 0) endef feature_set = $(eval $(feature_set_code)) @@ -173,7 +173,7 @@ ifeq ($(feature-all), 1) # $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_set,$(feat))) else - $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) -i -j -C config/feature-checks $(CORE_FEATURE_TESTS) >/dev/null 2>&1) + $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) LDFLAGS=$(LDFLAGS) -i -j -C config/feature-checks $(CORE_FEATURE_TESTS) >/dev/null 2>&1) $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_check,$(feat))) endif diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index e21bceb..8ecac19 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -32,7 +32,7 @@ CC := $(CC) -MD all: $(FILES) -BUILD = $(CC) -o $(OUTPUT)$@ $@.c +BUILD = $(CC) $(LDFLAGS) -o $(OUTPUT)$@ $@.c ############################### -- cgit v0.10.2 From 165108a92fc554d51e73b143b69b77e7c278da78 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 8 Oct 2013 17:51:10 +0200 Subject: tools/perf/build: Clean up feature_print_code() Remove DUMMY by making sure 'feature_print' is evaluated and thus all messages are printed. Signed-off-by: Jiri Olsa Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: David Ahern Link: http://lkml.kernel.org/r/20131008155110.GA15558@krava.redhat.com Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 78f3b3e..f5d661f 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -180,21 +180,21 @@ endif # # Print the result of the feature test: # -feature_print = $(eval $(feature_print_code)) +feature_print = $(eval $(feature_print_code)) $(info $(MSG)) + define feature_print_code ifeq ($(feature-$(1)), 1) MSG = $(shell printf '...%30s: [ \033[32mon\033[m ]' $(1)) else MSG = $(shell printf '...%30s: [ \033[31mOFF\033[m ]' $(1)) endif - $(info $(MSG)) endef # # Only print out our features if we rebuilt the testcases or if a test failed: # ifeq ($(test-all-failed), 1) - $(foreach feat,$(CORE_FEATURE_TESTS) DUMMY,$(call feature_print,$(feat))) + $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_print,$(feat))) $(info ) endif -- cgit v0.10.2 From 01533e9720c8527faf0bc6e476c4c911a488e268 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Sep 2013 12:20:12 -0300 Subject: perf trace: Put syscall formatter parms into struct So that we can add more state to formatters without having to modify all of them. Example is to pass a table to a generic string formatter, like for setitimer 'which' arg. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-zyi2esmas5wfrxznh0x0fkiz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 71aa3e3..939426c 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -33,22 +33,24 @@ # define MADV_UNMERGEABLE 13 #endif +struct syscall_arg { + unsigned long val; + u8 idx; + u8 mask; +}; + static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, - unsigned long arg, - u8 arg_idx __maybe_unused, - u8 *arg_mask __maybe_unused) + struct syscall_arg *arg) { - return scnprintf(bf, size, "%#lx", arg); + return scnprintf(bf, size, "%#lx", arg->val); } #define SCA_HEX syscall_arg__scnprintf_hex static size_t syscall_arg__scnprintf_whence(char *bf, size_t size, - unsigned long arg, - u8 arg_idx __maybe_unused, - u8 *arg_mask __maybe_unused) + struct syscall_arg *arg) { - int whence = arg; + int whence = arg->val; switch (whence) { #define P_WHENCE(n) case SEEK_##n: return scnprintf(bf, size, #n) @@ -71,11 +73,9 @@ static size_t syscall_arg__scnprintf_whence(char *bf, size_t size, #define SCA_WHENCE syscall_arg__scnprintf_whence static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, - unsigned long arg, - u8 arg_idx __maybe_unused, - u8 *arg_mask __maybe_unused) + struct syscall_arg *arg) { - int printed = 0, prot = arg; + int printed = 0, prot = arg->val; if (prot == PROT_NONE) return scnprintf(bf, size, "NONE"); @@ -104,10 +104,9 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, - unsigned long arg, u8 arg_idx __maybe_unused, - u8 *arg_mask __maybe_unused) + struct syscall_arg *arg) { - int printed = 0, flags = arg; + int printed = 0, flags = arg->val; #define P_MMAP_FLAG(n) \ if (flags & MAP_##n) { \ @@ -148,10 +147,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, - unsigned long arg, u8 arg_idx __maybe_unused, - u8 *arg_mask __maybe_unused) + struct syscall_arg *arg) { - int behavior = arg; + int behavior = arg->val; switch (behavior) { #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n) @@ -190,8 +188,7 @@ static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior -static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, unsigned long arg, - u8 arg_idx __maybe_unused, u8 *arg_mask) +static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg) { enum syscall_futex_args { SCF_UADDR = (1 << 0), @@ -201,24 +198,24 @@ static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, unsigned lo SCF_UADDR2 = (1 << 4), SCF_VAL3 = (1 << 5), }; - int op = arg; + int op = arg->val; int cmd = op & FUTEX_CMD_MASK; size_t printed = 0; switch (cmd) { #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n); - P_FUTEX_OP(WAIT); *arg_mask |= SCF_VAL3|SCF_UADDR2; break; - P_FUTEX_OP(WAKE); *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; - P_FUTEX_OP(FD); *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; - P_FUTEX_OP(REQUEUE); *arg_mask |= SCF_VAL3|SCF_TIMEOUT; break; - P_FUTEX_OP(CMP_REQUEUE); *arg_mask |= SCF_TIMEOUT; break; - P_FUTEX_OP(CMP_REQUEUE_PI); *arg_mask |= SCF_TIMEOUT; break; + P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break; + P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; + P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; + P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break; + P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break; + P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break; P_FUTEX_OP(WAKE_OP); break; - P_FUTEX_OP(LOCK_PI); *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; - P_FUTEX_OP(UNLOCK_PI); *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; - P_FUTEX_OP(TRYLOCK_PI); *arg_mask |= SCF_VAL3|SCF_UADDR2; break; - P_FUTEX_OP(WAIT_BITSET); *arg_mask |= SCF_UADDR2; break; - P_FUTEX_OP(WAKE_BITSET); *arg_mask |= SCF_UADDR2; break; + P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; + P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; + P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break; + P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break; + P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break; P_FUTEX_OP(WAIT_REQUEUE_PI); break; default: printed = scnprintf(bf, size, "%#x", cmd); break; } @@ -235,13 +232,12 @@ static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, unsigned lo #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, - unsigned long arg, - u8 arg_idx, u8 *arg_mask) + struct syscall_arg *arg) { - int printed = 0, flags = arg; + int printed = 0, flags = arg->val; if (!(flags & O_CREAT)) - *arg_mask |= 1 << (arg_idx + 1); /* Mask the mode parm */ + arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */ if (flags == 0) return scnprintf(bf, size, "RDONLY"); @@ -294,7 +290,7 @@ static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, static struct syscall_fmt { const char *name; const char *alias; - size_t (*arg_scnprintf[6])(char *bf, size_t size, unsigned long arg, u8 arg_idx, u8 *arg_mask); + size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg); bool errmsg; bool timeout; bool hexret; @@ -364,8 +360,7 @@ struct syscall { const char *name; bool filtered; struct syscall_fmt *fmt; - size_t (**arg_scnprintf)(char *bf, size_t size, - unsigned long arg, u8 arg_idx, u8 *args_mask); + size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); }; static size_t fprintf_duration(unsigned long t, FILE *fp) @@ -605,30 +600,35 @@ static int trace__read_syscall_info(struct trace *trace, int id) static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, unsigned long *args) { - int i = 0; size_t printed = 0; if (sc->tp_format != NULL) { struct format_field *field; - u8 mask = 0, bit = 1; + u8 bit = 1; + struct syscall_arg arg = { + .idx = 0, + .mask = 0, + }; for (field = sc->tp_format->format.fields->next; field; - field = field->next, ++i, bit <<= 1) { - if (mask & bit) + field = field->next, ++arg.idx, bit <<= 1) { + if (arg.mask & bit) continue; printed += scnprintf(bf + printed, size - printed, "%s%s: ", printed ? ", " : "", field->name); - - if (sc->arg_scnprintf && sc->arg_scnprintf[i]) { - printed += sc->arg_scnprintf[i](bf + printed, size - printed, - args[i], i, &mask); + if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) { + arg.val = args[arg.idx]; + printed += sc->arg_scnprintf[arg.idx](bf + printed, + size - printed, &arg); } else { printed += scnprintf(bf + printed, size - printed, - "%ld", args[i]); + "%ld", args[arg.idx]); } } } else { + int i = 0; + while (i < 6) { printed += scnprintf(bf + printed, size - printed, "%sarg%d: %ld", -- cgit v0.10.2 From 1f115cb72e44391fac3ab1c562a77b421469ac2d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Sep 2013 15:50:28 -0300 Subject: perf trace: Allow passing parms to arg formatters So that we can have generic formatters that act upon specific parameters. Start using them with a simple string table that assumes entries will be indexes to a string table, like with the 'which' parm for the set and getitimer syscalls Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-r0dqhapr8j6150v1wctgg340@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 939426c..386ca20 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -35,10 +35,35 @@ struct syscall_arg { unsigned long val; + void *parm; u8 idx; u8 mask; }; +struct strarray { + int nr_entries; + const char **entries; +}; + +#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \ + .nr_entries = ARRAY_SIZE(array), \ + .entries = array, \ +} + +static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, + struct syscall_arg *arg) +{ + int idx = arg->val; + struct strarray *sa = arg->parm; + + if (idx < 0 || idx >= sa->nr_entries) + return scnprintf(bf, size, "%d", idx); + + return scnprintf(bf, size, "%s", sa->entries[idx]); +} + +#define SCA_STRARRAY syscall_arg__scnprintf_strarray + static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg) { @@ -229,6 +254,9 @@ static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct sysc return printed; } +static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; +static DEFINE_STRARRAY(itimers); + #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, @@ -291,6 +319,7 @@ static struct syscall_fmt { const char *name; const char *alias; size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg); + void *arg_parm[6]; bool errmsg; bool timeout; bool hexret; @@ -305,6 +334,9 @@ static struct syscall_fmt { { .name = "fstatat", .errmsg = true, .alias = "newfstatat", }, { .name = "futex", .errmsg = true, .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, + { .name = "getitimer", .errmsg = true, + .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ }, + .arg_parm = { [0] = &strarray__itimers, /* which */ }, }, { .name = "ioctl", .errmsg = true, .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, }, { .name = "lseek", .errmsg = true, @@ -338,6 +370,9 @@ static struct syscall_fmt { { .name = "read", .errmsg = true, }, { .name = "recvfrom", .errmsg = true, }, { .name = "select", .errmsg = true, .timeout = true, }, + { .name = "setitimer", .errmsg = true, + .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ }, + .arg_parm = { [0] = &strarray__itimers, /* which */ }, }, { .name = "socket", .errmsg = true, }, { .name = "stat", .errmsg = true, .alias = "newstat", }, { .name = "uname", .errmsg = true, .alias = "newuname", }, @@ -361,6 +396,7 @@ struct syscall { bool filtered; struct syscall_fmt *fmt; size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); + void **arg_parm; }; static size_t fprintf_duration(unsigned long t, FILE *fp) @@ -528,6 +564,9 @@ static int syscall__set_arg_fmts(struct syscall *sc) if (sc->arg_scnprintf == NULL) return -1; + if (sc->fmt) + sc->arg_parm = sc->fmt->arg_parm; + for (field = sc->tp_format->format.fields->next; field; field = field->next) { if (sc->fmt && sc->fmt->arg_scnprintf[idx]) sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; @@ -619,6 +658,8 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, "%s%s: ", printed ? ", " : "", field->name); if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) { arg.val = args[arg.idx]; + if (sc->arg_parm) + arg.parm = sc->arg_parm[arg.idx]; printed += sc->arg_scnprintf[arg.idx](bf + printed, size - printed, &arg); } else { -- cgit v0.10.2 From efe6b882cda2a9967a629fa14b6106b6a9a558a2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Sep 2013 16:15:12 -0300 Subject: perf trace: Use strarray for ltrace's whence arg Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-5f9jhbq8my4ojarhtlygveox@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 386ca20..7cb6052 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -72,31 +72,6 @@ static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, #define SCA_HEX syscall_arg__scnprintf_hex -static size_t syscall_arg__scnprintf_whence(char *bf, size_t size, - struct syscall_arg *arg) -{ - int whence = arg->val; - - switch (whence) { -#define P_WHENCE(n) case SEEK_##n: return scnprintf(bf, size, #n) - P_WHENCE(SET); - P_WHENCE(CUR); - P_WHENCE(END); -#ifdef SEEK_DATA - P_WHENCE(DATA); -#endif -#ifdef SEEK_HOLE - P_WHENCE(HOLE); -#endif -#undef P_WHENCE - default: break; - } - - return scnprintf(bf, size, "%#x", whence); -} - -#define SCA_WHENCE syscall_arg__scnprintf_whence - static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, struct syscall_arg *arg) { @@ -254,10 +229,20 @@ static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct sysc return printed; } +#define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op + static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; static DEFINE_STRARRAY(itimers); -#define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op +static const char *whences[] = { "SET", "CUR", "END", +#ifdef SEEK_DATA +"DATA", +#endif +#ifdef SEEK_HOLE +"HOLE", +#endif +}; +static DEFINE_STRARRAY(whences); static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg) @@ -340,7 +325,8 @@ static struct syscall_fmt { { .name = "ioctl", .errmsg = true, .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, }, { .name = "lseek", .errmsg = true, - .arg_scnprintf = { [2] = SCA_WHENCE, /* whence */ }, }, + .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ }, + .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, { .name = "lstat", .errmsg = true, .alias = "newlstat", }, { .name = "madvise", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* start */ -- cgit v0.10.2 From 80f587d5f9424ebdf93abbe79ab87e7fb7d4699d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Sep 2013 16:28:58 -0300 Subject: perf trace: Beautify fcntl 'cmd' arg This is just for the low hanging fruit 'cmd' arg, a proper beautifier will as well use arg->mask to ignore the third arg for some of the cmds. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-phhvcyi9vdnxw9l11tbquvru@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7cb6052..8455a0a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -244,6 +244,14 @@ static const char *whences[] = { "SET", "CUR", "END", }; static DEFINE_STRARRAY(whences); +static const char *fcntl_cmds[] = { + "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK", + "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64", + "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX", + "F_GETOWNER_UIDS", +}; +static DEFINE_STRARRAY(fcntl_cmds); + static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg) { @@ -315,6 +323,9 @@ static struct syscall_fmt { .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, { .name = "mmap", .hexret = true, }, { .name = "connect", .errmsg = true, }, + { .name = "fcntl", .errmsg = true, + .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ }, + .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, { .name = "fstat", .errmsg = true, .alias = "newfstat", }, { .name = "fstatat", .errmsg = true, .alias = "newfstatat", }, { .name = "futex", .errmsg = true, -- cgit v0.10.2 From eb5b1b1475d5ce6ebe84fd4c641545a41839d5f9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Sep 2013 16:37:46 -0300 Subject: perf trace: Beautify rt_sigprocmask 'how' arg Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-e2epkc38e3x0uqmi1xie4tgc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 8455a0a..ecc83ce 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -252,6 +252,9 @@ static const char *fcntl_cmds[] = { }; static DEFINE_STRARRAY(fcntl_cmds); +static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", }; +static DEFINE_STRARRAY(sighow); + static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg) { @@ -366,6 +369,9 @@ static struct syscall_fmt { { .name = "pwrite", .errmsg = true, .alias = "pwrite64", }, { .name = "read", .errmsg = true, }, { .name = "recvfrom", .errmsg = true, }, + { .name = "rt_sigprocmask", .errmsg = true, + .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ }, + .arg_parm = { [0] = &strarray__sighow, /* how */ }, }, { .name = "select", .errmsg = true, .timeout = true, }, { .name = "setitimer", .errmsg = true, .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ }, -- cgit v0.10.2 From 8bad5b0abfdbd0866c2b0445fdee8a8c2c38865b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Sep 2013 17:17:15 -0300 Subject: perf trace: Beautify signal number arg in several syscalls Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ek8w714ramabyl5jqqvjlbyb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ecc83ce..eb0bbff 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -311,6 +311,51 @@ static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags +static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) +{ + int sig = arg->val; + + switch (sig) { +#define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n) + P_SIGNUM(HUP); + P_SIGNUM(INT); + P_SIGNUM(QUIT); + P_SIGNUM(ILL); + P_SIGNUM(TRAP); + P_SIGNUM(ABRT); + P_SIGNUM(BUS); + P_SIGNUM(FPE); + P_SIGNUM(KILL); + P_SIGNUM(USR1); + P_SIGNUM(SEGV); + P_SIGNUM(USR2); + P_SIGNUM(PIPE); + P_SIGNUM(ALRM); + P_SIGNUM(TERM); + P_SIGNUM(STKFLT); + P_SIGNUM(CHLD); + P_SIGNUM(CONT); + P_SIGNUM(STOP); + P_SIGNUM(TSTP); + P_SIGNUM(TTIN); + P_SIGNUM(TTOU); + P_SIGNUM(URG); + P_SIGNUM(XCPU); + P_SIGNUM(XFSZ); + P_SIGNUM(VTALRM); + P_SIGNUM(PROF); + P_SIGNUM(WINCH); + P_SIGNUM(IO); + P_SIGNUM(PWR); + P_SIGNUM(SYS); + default: break; + } + + return scnprintf(bf, size, "%#x", sig); +} + +#define SCA_SIGNUM syscall_arg__scnprintf_signum + static struct syscall_fmt { const char *name; const char *alias; @@ -338,6 +383,8 @@ static struct syscall_fmt { .arg_parm = { [0] = &strarray__itimers, /* which */ }, }, { .name = "ioctl", .errmsg = true, .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, }, + { .name = "kill", .errmsg = true, + .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, { .name = "lseek", .errmsg = true, .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ }, .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, @@ -369,15 +416,25 @@ static struct syscall_fmt { { .name = "pwrite", .errmsg = true, .alias = "pwrite64", }, { .name = "read", .errmsg = true, }, { .name = "recvfrom", .errmsg = true, }, + { .name = "rt_sigaction", .errmsg = true, + .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, { .name = "rt_sigprocmask", .errmsg = true, .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ }, .arg_parm = { [0] = &strarray__sighow, /* how */ }, }, + { .name = "rt_sigqueueinfo", .errmsg = true, + .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, + { .name = "rt_tgsigqueueinfo", .errmsg = true, + .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, { .name = "select", .errmsg = true, .timeout = true, }, { .name = "setitimer", .errmsg = true, .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ }, .arg_parm = { [0] = &strarray__itimers, /* which */ }, }, { .name = "socket", .errmsg = true, }, { .name = "stat", .errmsg = true, .alias = "newstat", }, + { .name = "tgkill", .errmsg = true, + .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, + { .name = "tkill", .errmsg = true, + .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, { .name = "uname", .errmsg = true, .alias = "newuname", }, }; -- cgit v0.10.2 From e10bce815d5c0e7eb793231e4334839b2cd0d81f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 4 Sep 2013 10:27:41 -0300 Subject: perf trace: Beautify socket 'family' arg Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-8xuaupgmy82v7sha3l09oaux@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index eb0bbff..6280166 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -255,6 +255,16 @@ static DEFINE_STRARRAY(fcntl_cmds); static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", }; static DEFINE_STRARRAY(sighow); +static const char *socket_families[] = { + "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", + "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", + "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", + "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", + "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", + "ALG", "NFC", "VSOCK", +}; +static DEFINE_STRARRAY(socket_families); + static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg) { @@ -429,7 +439,9 @@ static struct syscall_fmt { { .name = "setitimer", .errmsg = true, .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ }, .arg_parm = { [0] = &strarray__itimers, /* which */ }, }, - { .name = "socket", .errmsg = true, }, + { .name = "socket", .errmsg = true, + .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ }, + .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, { .name = "stat", .errmsg = true, .alias = "newstat", }, { .name = "tgkill", .errmsg = true, .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, -- cgit v0.10.2 From a28b24b27869d7a2fc0e6a0cae714641c83791a2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 4 Sep 2013 11:00:44 -0300 Subject: perf trace: Beautify socket 'type' arg Taking into account the fact that the SOCK_ types can be overriden for ABI reasons on MIPS and also masking and interpreting the socket flags (NONBLOCK and CLOEXEC), printing whatever is left in the flags bits as an hex number, or'ed. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-cbn57082gq9v0sbsd67edwjq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 6280166..2b19aef 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -265,6 +265,53 @@ static const char *socket_families[] = { }; static DEFINE_STRARRAY(socket_families); +#ifndef SOCK_TYPE_MASK +#define SOCK_TYPE_MASK 0xf +#endif + +static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, + struct syscall_arg *arg) +{ + size_t printed; + int type = arg->val, + flags = type & ~SOCK_TYPE_MASK; + + type &= SOCK_TYPE_MASK; + /* + * Can't use a strarray, MIPS may override for ABI reasons. + */ + switch (type) { +#define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break; + P_SK_TYPE(STREAM); + P_SK_TYPE(DGRAM); + P_SK_TYPE(RAW); + P_SK_TYPE(RDM); + P_SK_TYPE(SEQPACKET); + P_SK_TYPE(DCCP); + P_SK_TYPE(PACKET); +#undef P_SK_TYPE + default: + printed = scnprintf(bf, size, "%#x", type); + } + +#define P_SK_FLAG(n) \ + if (flags & SOCK_##n) { \ + printed += scnprintf(bf + printed, size - printed, "|%s", #n); \ + flags &= ~SOCK_##n; \ + } + + P_SK_FLAG(CLOEXEC); + P_SK_FLAG(NONBLOCK); +#undef P_SK_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "|%#x", flags); + + return printed; +} + +#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type + static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg) { @@ -440,7 +487,8 @@ static struct syscall_fmt { .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ }, .arg_parm = { [0] = &strarray__itimers, /* which */ }, }, { .name = "socket", .errmsg = true, - .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ }, + .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ + [1] = SCA_SK_TYPE, /* type */ }, .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, { .name = "stat", .errmsg = true, .alias = "newstat", }, { .name = "tgkill", .errmsg = true, -- cgit v0.10.2 From 511089994e33c9dbfe2c6e1fb2fb00b5e53866e8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 4 Sep 2013 11:42:27 -0300 Subject: perf trace: Beautify access 'mode' arg Removing the _OK suffix and using RWX when all three bits are set, for instance. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ypaz9k43lyqy94679feqnv8x@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 2b19aef..7aa6bca 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -312,6 +312,33 @@ static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type +static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, + struct syscall_arg *arg) +{ + size_t printed = 0; + int mode = arg->val; + + if (mode == F_OK) /* 0 */ + return scnprintf(bf, size, "F"); +#define P_MODE(n) \ + if (mode & n##_OK) { \ + printed += scnprintf(bf + printed, size - printed, "%s", #n); \ + mode &= ~n##_OK; \ + } + + P_MODE(R); + P_MODE(W); + P_MODE(X); +#undef P_MODE + + if (mode) + printed += scnprintf(bf + printed, size - printed, "|%#x", mode); + + return printed; +} + +#define SCA_ACCMODE syscall_arg__scnprintf_access_mode + static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg) { @@ -422,7 +449,8 @@ static struct syscall_fmt { bool timeout; bool hexret; } syscall_fmts[] = { - { .name = "access", .errmsg = true, }, + { .name = "access", .errmsg = true, + .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, }, { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, { .name = "brk", .hexret = true, .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, -- cgit v0.10.2 From c045bf02e47664af67f8970a6a67065ff51acf62 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 4 Sep 2013 11:52:33 -0300 Subject: perf trace: Beautify rlmimit resources On the getrlimit, setrlimit and prlimit64 syscalls. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-pups75313afhn7p96qwhzs9v@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7aa6bca..7ce036e 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -252,6 +252,13 @@ static const char *fcntl_cmds[] = { }; static DEFINE_STRARRAY(fcntl_cmds); +static const char *rlimit_resources[] = { + "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE", + "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO", + "RTTIME", +}; +static DEFINE_STRARRAY(rlimit_resources); + static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", }; static DEFINE_STRARRAY(sighow); @@ -466,6 +473,9 @@ static struct syscall_fmt { { .name = "getitimer", .errmsg = true, .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ }, .arg_parm = { [0] = &strarray__itimers, /* which */ }, }, + { .name = "getrlimit", .errmsg = true, + .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ }, + .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, }, { .name = "ioctl", .errmsg = true, .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, }, { .name = "kill", .errmsg = true, @@ -498,6 +508,9 @@ static struct syscall_fmt { { .name = "poll", .errmsg = true, .timeout = true, }, { .name = "ppoll", .errmsg = true, .timeout = true, }, { .name = "pread", .errmsg = true, .alias = "pread64", }, + { .name = "prlimit64", .errmsg = true, + .arg_scnprintf = { [1] = SCA_STRARRAY, /* resource */ }, + .arg_parm = { [1] = &strarray__rlimit_resources, /* resource */ }, }, { .name = "pwrite", .errmsg = true, .alias = "pwrite64", }, { .name = "read", .errmsg = true, }, { .name = "recvfrom", .errmsg = true, }, @@ -514,6 +527,9 @@ static struct syscall_fmt { { .name = "setitimer", .errmsg = true, .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ }, .arg_parm = { [0] = &strarray__itimers, /* which */ }, }, + { .name = "setrlimit", .errmsg = true, + .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ }, + .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, }, { .name = "socket", .errmsg = true, .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ [1] = SCA_SK_TYPE, /* type */ }, -- cgit v0.10.2 From 4bb09192d38ef08f0619667527cabb26354fff89 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Sep 2013 12:37:43 -0600 Subject: perf trace: Add option to show full timestamp Current timestamp shown for output is time relative to firt sample. This patch adds an option to show the absolute perf_clock timestamp which is useful when comparing output across commands (e.g., perf-trace to perf-script). Signed-off-by: David Ahern Cc: Adrian Hunter Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1378319865-55695-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index daccd2c..a93e91a 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -78,6 +78,10 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --input Process events from a given perf data file. +-T +--time + Print full timestamp rather time relative to first sample. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7ce036e..bc21140 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -624,6 +624,7 @@ struct trace { struct perf_record_opts opts; struct machine host; u64 base_time; + bool full_time; FILE *output; unsigned long nr_events; struct strlist *ev_qualifier; @@ -1066,7 +1067,7 @@ static int trace__process_sample(struct perf_tool *tool, if (skip_sample(trace, sample)) return 0; - if (trace->base_time == 0) + if (!trace->full_time && trace->base_time == 0) trace->base_time = sample->time; if (handler) @@ -1195,7 +1196,7 @@ again: continue; } - if (trace->base_time == 0) + if (!trace->full_time && trace->base_time == 0) trace->base_time = sample.time; if (type != PERF_RECORD_SAMPLE) { @@ -1433,6 +1434,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) trace__set_duration), OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), OPT_INCR('v', "verbose", &verbose, "be more verbose"), + OPT_BOOLEAN('T', "time", &trace.full_time, + "Show full timestamp, not time relative to first start"), OPT_END() }; int err; -- cgit v0.10.2 From 770480592084d2c114adedfbe6740e345aaf2279 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Sep 2013 12:37:44 -0600 Subject: perf trace: Remove duplicate mmap entry in syscall_fmts array Entries in syscall_fmts need to be in alphabetical order, and the duplicate entry breaks bsearch on new entries around this duplicate entry. Signed-off-by: David Ahern Cc: Adrian Hunter Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1378319865-55695-2-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index bc21140..903416c 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -461,7 +461,6 @@ static struct syscall_fmt { { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, { .name = "brk", .hexret = true, .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, - { .name = "mmap", .hexret = true, }, { .name = "connect", .errmsg = true, }, { .name = "fcntl", .errmsg = true, .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ }, -- cgit v0.10.2 From 22ae5cf1c9b4e27a3efe96d8fa6d038560a2cf23 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Sep 2013 11:27:34 -0300 Subject: perf trace: Don't print zeroed args This way we make the output more compact. If somebody complain (and provide a sane reason why we would like to see zeroes) we can make it an optional, ~/.perfconfig configurable knob. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-myqozw43hk8z2r5hsupzdk82@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 903416c..516f6b3 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -818,6 +818,9 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, if (arg.mask & bit) continue; + if (args[arg.idx] == 0) + continue; + printed += scnprintf(bf + printed, size - printed, "%s%s: ", printed ? ", " : "", field->name); if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) { -- cgit v0.10.2 From b2cc99fdaad20b6e787f0864a1ac7b14391f9fe6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Sep 2013 11:54:48 -0300 Subject: perf trace: Beautify send/recv syscall 'flags' arg [root@sandy ~]# perf trace -a -e recvmmsg,recvmsg,recvfrom,sendto,sendmsg,sendmmsg 6.901 (0.002 ms): 589 recvmsg(fd: 51, msg: 0x7fff35673420, flags: CMSG_CLOEXEC) = -1 EAGAIN Resource temporarily unavailable 6.966 (0.008 ms): 589 sendmsg(fd: 50, msg: 0x7fff35673230, flags: NOSIGNAL ) = 961 6.984 (0.004 ms): 979 sendmsg(fd: 3, msg: 0x7fff5b484940, flags: NOSIGNAL ) = 945 Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-h25k5k50nac0ej5cl5iwgvae@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 516f6b3..c400fbe 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -319,6 +319,60 @@ static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type +#ifndef MSG_PROBE +#define MSG_PROBE 0x10 +#endif +#ifndef MSG_SENDPAGE_NOTLAST +#define MSG_SENDPAGE_NOTLAST 0x20000 +#endif +#ifndef MSG_FASTOPEN +#define MSG_FASTOPEN 0x20000000 +#endif + +static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + + if (flags == 0) + return scnprintf(bf, size, "NONE"); +#define P_MSG_FLAG(n) \ + if (flags & MSG_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~MSG_##n; \ + } + + P_MSG_FLAG(OOB); + P_MSG_FLAG(PEEK); + P_MSG_FLAG(DONTROUTE); + P_MSG_FLAG(TRYHARD); + P_MSG_FLAG(CTRUNC); + P_MSG_FLAG(PROBE); + P_MSG_FLAG(TRUNC); + P_MSG_FLAG(DONTWAIT); + P_MSG_FLAG(EOR); + P_MSG_FLAG(WAITALL); + P_MSG_FLAG(FIN); + P_MSG_FLAG(SYN); + P_MSG_FLAG(CONFIRM); + P_MSG_FLAG(RST); + P_MSG_FLAG(ERRQUEUE); + P_MSG_FLAG(NOSIGNAL); + P_MSG_FLAG(MORE); + P_MSG_FLAG(WAITFORONE); + P_MSG_FLAG(SENDPAGE_NOTLAST); + P_MSG_FLAG(FASTOPEN); + P_MSG_FLAG(CMSG_CLOEXEC); +#undef P_MSG_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags + static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, struct syscall_arg *arg) { @@ -512,7 +566,12 @@ static struct syscall_fmt { .arg_parm = { [1] = &strarray__rlimit_resources, /* resource */ }, }, { .name = "pwrite", .errmsg = true, .alias = "pwrite64", }, { .name = "read", .errmsg = true, }, - { .name = "recvfrom", .errmsg = true, }, + { .name = "recvfrom", .errmsg = true, + .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, + { .name = "recvmmsg", .errmsg = true, + .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, + { .name = "recvmsg", .errmsg = true, + .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "rt_sigaction", .errmsg = true, .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, { .name = "rt_sigprocmask", .errmsg = true, @@ -523,6 +582,12 @@ static struct syscall_fmt { { .name = "rt_tgsigqueueinfo", .errmsg = true, .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, { .name = "select", .errmsg = true, .timeout = true, }, + { .name = "sendmmsg", .errmsg = true, + .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, + { .name = "sendmsg", .errmsg = true, + .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, + { .name = "sendto", .errmsg = true, + .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "setitimer", .errmsg = true, .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ }, .arg_parm = { [0] = &strarray__itimers, /* which */ }, }, -- cgit v0.10.2 From 49af9e93adfa11d50435aa079299a765843532fc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Sep 2013 12:18:56 -0300 Subject: perf trace: Beautify eventfd2 'flags' arg 61.168 ( 0.004 ms): 24267 eventfd2(flags: CLOEXEC|NONBLOCK) = 9 Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-3hg8eajdzil077501c8f5jkw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index c400fbe..8a09ba3 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -456,6 +457,32 @@ static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags +static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + + if (flags == 0) + return scnprintf(bf, size, "NONE"); +#define P_FLAG(n) \ + if (flags & EFD_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~EFD_##n; \ + } + + P_FLAG(SEMAPHORE); + P_FLAG(CLOEXEC); + P_FLAG(NONBLOCK); +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags + static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) { int sig = arg->val; @@ -516,6 +543,8 @@ static struct syscall_fmt { { .name = "brk", .hexret = true, .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, { .name = "connect", .errmsg = true, }, + { .name = "eventfd2", .errmsg = true, + .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, { .name = "fcntl", .errmsg = true, .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ }, .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, -- cgit v0.10.2 From 50c95cbd70808aa2e5ba8d79e503456f1da37aeb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Sep 2013 12:35:21 -0300 Subject: perf trace: Add option to show process COMM Enabled by default, disable with --no-comm, e.g.: 181.821 (0.001 ms): deja-dup-monit/10784 recvmsg(fd: 8, msg: 0x7fff4342baf0, flags: PEEK|TRUNC|CMSG_CLOEXEC ) = 20 181.824 (0.001 ms): deja-dup-monit/10784 geteuid( ) = 1000 181.825 (0.001 ms): deja-dup-monit/10784 getegid( ) = 1000 181.834 (0.002 ms): deja-dup-monit/10784 recvmsg(fd: 8, msg: 0x7fff4342baf0, flags: CMSG_CLOEXEC ) = 20 181.836 (0.001 ms): deja-dup-monit/10784 geteuid( ) = 1000 181.838 (0.001 ms): deja-dup-monit/10784 getegid( ) = 1000 181.705 (0.003 ms): evolution-addr/10924 recvmsg(fd: 10, msg: 0x7fff17dc6990, flags: PEEK|TRUNC|CMSG_CLOEXEC) = 1256 181.710 (0.002 ms): evolution-addr/10924 geteuid( ) = 1000 181.712 (0.001 ms): evolution-addr/10924 getegid( ) = 1000 181.727 (0.003 ms): evolution-addr/10924 recvmsg(fd: 10, msg: 0x7fff17dc6990, flags: CMSG_CLOEXEC ) = 1256 181.731 (0.001 ms): evolution-addr/10924 geteuid( ) = 1000 181.734 (0.001 ms): evolution-addr/10924 getegid( ) = 1000 181.908 (0.002 ms): evolution-addr/10924 recvmsg(fd: 10, msg: 0x7fff17dc6990, flags: PEEK|TRUNC|CMSG_CLOEXEC) = 20 181.913 (0.001 ms): evolution-addr/10924 geteuid( ) = 1000 181.915 (0.001 ms): evolution-addr/10924 getegid( ) = 1000 181.930 (0.003 ms): evolution-addr/10924 recvmsg(fd: 10, msg: 0x7fff17dc6990, flags: CMSG_CLOEXEC ) = 20 181.934 (0.001 ms): evolution-addr/10924 geteuid( ) = 1000 181.937 (0.001 ms): evolution-addr/10924 getegid( ) = 1000 220.718 (0.010 ms): at-spi2-regist/10715 sendmsg(fd: 3, msg: 0x7fffdb8756c0, flags: NOSIGNAL ) = 200 220.741 (0.000 ms): dbus-daemon/10711 ... [continued]: epoll_wait()) = 1 220.759 (0.004 ms): dbus-daemon/10711 recvmsg(fd: 11, msg: 0x7ffff94594d0, flags: CMSG_CLOEXEC ) = 200 220.780 (0.002 ms): dbus-daemon/10711 recvmsg(fd: 11, msg: 0x7ffff94594d0, flags: CMSG_CLOEXEC ) = 200 220.788 (0.001 ms): dbus-daemon/10711 recvmsg(fd: 11, msg: 0x7ffff94594d0, flags: CMSG_CLOEXEC ) = -1 EAGAIN Resource temporarily unavailable 220.760 (0.004 ms): at-spi2-regist/10715 sendmsg(fd: 3, msg: 0x7fffdb8756c0, flags: NOSIGNAL ) = 200 220.771 (0.023 ms): perf/26347 open(filename: 0xf2e780, mode: 15918976 ) = 19 220.850 (0.002 ms): perf/26347 close(fd: 19 ) = 0 Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-6be5jvnkdzjptdrebfn5263n@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index a93e91a..3777385 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -82,6 +82,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --time Print full timestamp rather time relative to first sample. +--comm:: + Show process COMM right beside its ID, on by default, disable with --no-comm. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 8a09ba3..be65843 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -726,6 +726,7 @@ struct trace { struct intlist *pid_list; bool sched; bool multiple_threads; + bool show_comm; double duration_filter; double runtime_ms; }; @@ -755,8 +756,11 @@ static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thre size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); printed += fprintf_duration(duration, fp); - if (trace->multiple_threads) + if (trace->multiple_threads) { + if (trace->show_comm) + printed += fprintf(fp, "%.14s/", thread->comm); printed += fprintf(fp, "%d ", thread->tid); + } return printed; } @@ -1503,10 +1507,13 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) .mmap_pages = 1024, }, .output = stdout, + .show_comm = true, }; const char *output_name = NULL; const char *ev_qualifier_str = NULL; const struct option trace_options[] = { + OPT_BOOLEAN(0, "comm", &trace.show_comm, + "show the thread COMM next to its id"), OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of events to trace"), OPT_STRING('o', "output", &output_name, "file", "output file name"), -- cgit v0.10.2 From 1ba6e01782fd2a94481e18b91b363636f8171565 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 4 Jul 2013 18:11:25 +0530 Subject: perf completion: Don't dictate perf install location The statement have perf limits the locations in which to look for the perf program. Moreover, it depends on the bash-completion package to be installed. Replace it with a call to `type perf`. Signed-off-by: Ramkumar Ramachandra Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1372941691-14684-2-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/bash_completion b/tools/perf/bash_completion index 56e6a12..50540cf 100644 --- a/tools/perf/bash_completion +++ b/tools/perf/bash_completion @@ -19,7 +19,7 @@ __ltrim_colon_completions() fi } -have perf && +type perf &>/dev/null && _perf() { local cur prev cmd -- cgit v0.10.2 From 30079d1d5ebcb8d706c6e05cacebb7facc60cd95 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 4 Jul 2013 18:11:26 +0530 Subject: perf completion: Update __ltrim_colon_completions The function is taken from the bash-completion package; update it to use the latest version where colon_word doesn't miss quoting. Signed-off-by: Ramkumar Ramachandra Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1372941691-14684-3-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/bash_completion b/tools/perf/bash_completion index 50540cf..b0cdd12 100644 --- a/tools/perf/bash_completion +++ b/tools/perf/bash_completion @@ -11,7 +11,7 @@ __ltrim_colon_completions() { if [[ "$1" == *:* && "$COMP_WORDBREAKS" == *:* ]]; then # Remove colon-word prefix from COMPREPLY items - local colon_word=${1%${1##*:}} + local colon_word=${1%"${1##*:}"} local i=${#COMPREPLY[*]} while [[ $((--i)) -ge 0 ]]; do COMPREPLY[$i]=${COMPREPLY[$i]#"$colon_word"} -- cgit v0.10.2 From 7b6c48e16e5d312b0ebae78acfb3ff4f9c8c083c Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 4 Jul 2013 18:11:27 +0530 Subject: perf completion: Strip dependency on _filedir _filedir is defined in the bash-completion package, but there is no need to depend on it. Instead, call complete with multiple -o arguments before the -F argument like in git.git's completion script. Signed-off-by: Ramkumar Ramachandra Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1372941691-14684-4-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/bash_completion b/tools/perf/bash_completion index b0cdd12..d2598be 100644 --- a/tools/perf/bash_completion +++ b/tools/perf/bash_completion @@ -54,9 +54,8 @@ _perf() subcmd=${COMP_WORDS[1]} opts=$($cmd $subcmd --list-opts) COMPREPLY=( $( compgen -W '$opts' -- "$cur" ) ) - # Fall down to list regular files - else - _filedir fi } && -complete -F _perf perf + +complete -o bashdefault -o default -o nospace -F _perf perf 2>/dev/null \ + || complete -o default -o nospace -F _perf perf -- cgit v0.10.2 From 4685a6cfaeac6d2fe8ed10a9aa0cbff5026529cb Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 4 Jul 2013 18:11:29 +0530 Subject: perf completion: Strip function_exists () Use "type" to check existence consistently. Signed-off-by: Ramkumar Ramachandra Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1372941691-14684-6-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/bash_completion b/tools/perf/bash_completion index d2598be..35fdda1 100644 --- a/tools/perf/bash_completion +++ b/tools/perf/bash_completion @@ -1,12 +1,6 @@ # perf completion -function_exists() -{ - declare -F $1 > /dev/null - return $? -} - -function_exists __ltrim_colon_completions || +type __ltrim_colon_completions &>/dev/null || __ltrim_colon_completions() { if [[ "$1" == *:* && "$COMP_WORDBREAKS" == *:* ]]; then -- cgit v0.10.2 From c3fb6717e90049b93d0f5f5714a4d878799d89c2 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 4 Jul 2013 18:11:30 +0530 Subject: perf completion: Strip dependency on bash-completion The bash-completion package defines the _get_comp_words_by_ref function. There is no need to depend on it, as we can reimplement it like git.git has. Signed-off-by: Ramkumar Ramachandra Cc: Frederic Weisbecker Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1372941691-14684-7-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/bash_completion b/tools/perf/bash_completion index 35fdda1..ee9c6d8 100644 --- a/tools/perf/bash_completion +++ b/tools/perf/bash_completion @@ -1,5 +1,81 @@ # perf completion +# Taken from git.git's completion script. +__my_reassemble_comp_words_by_ref() +{ + local exclude i j first + # Which word separators to exclude? + exclude="${1//[^$COMP_WORDBREAKS]}" + cword_=$COMP_CWORD + if [ -z "$exclude" ]; then + words_=("${COMP_WORDS[@]}") + return + fi + # List of word completion separators has shrunk; + # re-assemble words to complete. + for ((i=0, j=0; i < ${#COMP_WORDS[@]}; i++, j++)); do + # Append each nonempty word consisting of just + # word separator characters to the current word. + first=t + while + [ $i -gt 0 ] && + [ -n "${COMP_WORDS[$i]}" ] && + # word consists of excluded word separators + [ "${COMP_WORDS[$i]//[^$exclude]}" = "${COMP_WORDS[$i]}" ] + do + # Attach to the previous token, + # unless the previous token is the command name. + if [ $j -ge 2 ] && [ -n "$first" ]; then + ((j--)) + fi + first= + words_[$j]=${words_[j]}${COMP_WORDS[i]} + if [ $i = $COMP_CWORD ]; then + cword_=$j + fi + if (($i < ${#COMP_WORDS[@]} - 1)); then + ((i++)) + else + # Done. + return + fi + done + words_[$j]=${words_[j]}${COMP_WORDS[i]} + if [ $i = $COMP_CWORD ]; then + cword_=$j + fi + done +} + +type _get_comp_words_by_ref &>/dev/null || +_get_comp_words_by_ref() +{ + local exclude cur_ words_ cword_ + if [ "$1" = "-n" ]; then + exclude=$2 + shift 2 + fi + __my_reassemble_comp_words_by_ref "$exclude" + cur_=${words_[cword_]} + while [ $# -gt 0 ]; do + case "$1" in + cur) + cur=$cur_ + ;; + prev) + prev=${words_[$cword_-1]} + ;; + words) + words=("${words_[@]}") + ;; + cword) + cword=$cword_ + ;; + esac + shift + done +} + type __ltrim_colon_completions &>/dev/null || __ltrim_colon_completions() { @@ -19,12 +95,7 @@ _perf() local cur prev cmd COMPREPLY=() - if function_exists _get_comp_words_by_ref; then - _get_comp_words_by_ref -n : cur prev - else - cur=$(_get_cword :) - prev=${COMP_WORDS[COMP_CWORD-1]} - fi + _get_comp_words_by_ref -n : cur prev cmd=${COMP_WORDS[0]} -- cgit v0.10.2 From 6e0dc374a2c912a8a967ea8a4f9696dd4b0a6d3e Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 4 Jul 2013 18:11:31 +0530 Subject: perf completion: Use more comp words The completion words $words and $cword are available, so we might as well use them instead of directly accessing COMP_WORDS. Signed-off-by: Ramkumar Ramachandra Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1372941691-14684-8-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/bash_completion b/tools/perf/bash_completion index ee9c6d8..62e157db 100644 --- a/tools/perf/bash_completion +++ b/tools/perf/bash_completion @@ -92,15 +92,15 @@ __ltrim_colon_completions() type perf &>/dev/null && _perf() { - local cur prev cmd + local cur words cword prev cmd COMPREPLY=() - _get_comp_words_by_ref -n : cur prev + _get_comp_words_by_ref -n =: cur words cword prev - cmd=${COMP_WORDS[0]} + cmd=${words[0]} # List perf subcommands or long options - if [ $COMP_CWORD -eq 1 ]; then + if [ $cword -eq 1 ]; then if [[ $cur == --* ]]; then COMPREPLY=( $( compgen -W '--help --version \ --exec-path --html-path --paginate --no-pager \ @@ -110,13 +110,13 @@ _perf() COMPREPLY=( $( compgen -W '$cmds' -- "$cur" ) ) fi # List possible events for -e option - elif [[ $prev == "-e" && "${COMP_WORDS[1]}" == @(record|stat|top) ]]; then + elif [[ $prev == "-e" && "${words[1]}" == @(record|stat|top) ]]; then evts=$($cmd list --raw-dump) COMPREPLY=( $( compgen -W '$evts' -- "$cur" ) ) __ltrim_colon_completions $cur # List long option names elif [[ $cur == --* ]]; then - subcmd=${COMP_WORDS[1]} + subcmd=${words[1]} opts=$($cmd $subcmd --list-opts) COMPREPLY=( $( compgen -W '$opts' -- "$cur" ) ) fi -- cgit v0.10.2 From 9cd00941f8c274e6ca03ed238d96ddb0be474b86 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Wed, 18 Sep 2013 15:56:14 +0200 Subject: perf symbols: Support for Openembedded/Yocto -dbg packages On OpenEmbedded the symbol files are located under a .debug folder on the same folder as the binary file. This patch adds support for such files. Without this patch on perf top you can see: no symbols found in /usr/lib/gstreamer-1.0/libtheoraenc.so.1.1.2, maybe install a debug package? 84.56% libtheoraenc.so.1.1.2 [.] 0x000000000000b346 With this patch symbols are shown: 19.06% libtheoraenc.so.1.1.2 [.] oc_int_frag_satd_thresh_mmxext 9.76% libtheoraenc.so.1.1.2 [.] oc_analyze_mb_mode_luma 5.58% libtheoraenc.so.1.1.2 [.] oc_qii_state_advance 4.84% libtheoraenc.so.1.1.2 [.] oc_enc_tokenize_ac ... Signed-off-by: Ricardo Ribalda Delgado Acked-by: Ingo Molnar Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Waiman Long Link: http://lkml.kernel.org/r/1379512574-25912-1-git-send-email-ricardo.ribalda@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index e3c1ff8..6bfc8aa 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -7,19 +7,20 @@ char dso__symtab_origin(const struct dso *dso) { static const char origin[] = { - [DSO_BINARY_TYPE__KALLSYMS] = 'k', - [DSO_BINARY_TYPE__VMLINUX] = 'v', - [DSO_BINARY_TYPE__JAVA_JIT] = 'j', - [DSO_BINARY_TYPE__DEBUGLINK] = 'l', - [DSO_BINARY_TYPE__BUILD_ID_CACHE] = 'B', - [DSO_BINARY_TYPE__FEDORA_DEBUGINFO] = 'f', - [DSO_BINARY_TYPE__UBUNTU_DEBUGINFO] = 'u', - [DSO_BINARY_TYPE__BUILDID_DEBUGINFO] = 'b', - [DSO_BINARY_TYPE__SYSTEM_PATH_DSO] = 'd', - [DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE] = 'K', - [DSO_BINARY_TYPE__GUEST_KALLSYMS] = 'g', - [DSO_BINARY_TYPE__GUEST_KMODULE] = 'G', - [DSO_BINARY_TYPE__GUEST_VMLINUX] = 'V', + [DSO_BINARY_TYPE__KALLSYMS] = 'k', + [DSO_BINARY_TYPE__VMLINUX] = 'v', + [DSO_BINARY_TYPE__JAVA_JIT] = 'j', + [DSO_BINARY_TYPE__DEBUGLINK] = 'l', + [DSO_BINARY_TYPE__BUILD_ID_CACHE] = 'B', + [DSO_BINARY_TYPE__FEDORA_DEBUGINFO] = 'f', + [DSO_BINARY_TYPE__UBUNTU_DEBUGINFO] = 'u', + [DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO] = 'o', + [DSO_BINARY_TYPE__BUILDID_DEBUGINFO] = 'b', + [DSO_BINARY_TYPE__SYSTEM_PATH_DSO] = 'd', + [DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE] = 'K', + [DSO_BINARY_TYPE__GUEST_KALLSYMS] = 'g', + [DSO_BINARY_TYPE__GUEST_KMODULE] = 'G', + [DSO_BINARY_TYPE__GUEST_VMLINUX] = 'V', }; if (dso == NULL || dso->symtab_type == DSO_BINARY_TYPE__NOT_FOUND) @@ -64,6 +65,28 @@ int dso__binary_type_file(struct dso *dso, enum dso_binary_type type, symbol_conf.symfs, dso->long_name); break; + case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: + { + char *last_slash; + size_t len; + size_t dir_size; + + last_slash = dso->long_name + dso->long_name_len; + while (last_slash != dso->long_name && *last_slash != '/') + last_slash--; + + len = scnprintf(file, size, "%s", symbol_conf.symfs); + dir_size = last_slash - dso->long_name + 2; + if (dir_size > (size - len)) { + ret = -1; + break; + } + len += scnprintf(file + len, dir_size, "%s", dso->long_name); + len += scnprintf(file + len , size - len, ".debug%s", + last_slash); + break; + } + case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: if (!dso->has_build_id) { ret = -1; diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index b793053..dbd9241 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -23,6 +23,7 @@ enum dso_binary_type { DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE, DSO_BINARY_TYPE__KCORE, DSO_BINARY_TYPE__GUEST_KCORE, + DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, DSO_BINARY_TYPE__NOT_FOUND, }; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 7eb0362..cd1dcc4 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -51,6 +51,7 @@ static enum dso_binary_type binary_type_symtab[] = { DSO_BINARY_TYPE__SYSTEM_PATH_DSO, DSO_BINARY_TYPE__GUEST_KMODULE, DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE, + DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, DSO_BINARY_TYPE__NOT_FOUND, }; -- cgit v0.10.2 From 5283ec23a02e8afdc984c7f5f07e2a2662d4934a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Sep 2013 18:39:34 +0200 Subject: perf tools: Remove unused trace-event-* code Removing unused trace-event-* code. Acked-by: Namhyung Kim Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1379003976-5839-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index e9e1c03..6681f71 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -120,42 +120,6 @@ raw_field_value(struct event_format *event, const char *name, void *data) return val; } -void *raw_field_ptr(struct event_format *event, const char *name, void *data) -{ - struct format_field *field; - - field = pevent_find_any_field(event, name); - if (!field) - return NULL; - - if (field->flags & FIELD_IS_DYNAMIC) { - int offset; - - offset = *(int *)(data + field->offset); - offset &= 0xffff; - - return data + offset; - } - - return data + field->offset; -} - -int trace_parse_common_type(struct pevent *pevent, void *data) -{ - struct pevent_record record; - - record.data = data; - return pevent_data_type(pevent, &record); -} - -int trace_parse_common_pid(struct pevent *pevent, void *data) -{ - struct pevent_record record; - - record.data = data; - return pevent_data_pid(pevent, &record); -} - unsigned long long read_size(struct event_format *event, void *ptr, int size) { return pevent_read_number(event->pevent, ptr, size); diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index fafe1a4..04df631 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -11,8 +11,6 @@ union perf_event; struct perf_tool; struct thread; -extern struct pevent *perf_pevent; - int bigendian(void); struct pevent *read_trace_init(int file_bigendian, int host_bigendian); @@ -23,26 +21,19 @@ int parse_ftrace_file(struct pevent *pevent, char *buf, unsigned long size); int parse_event_file(struct pevent *pevent, char *buf, unsigned long size, char *sys); -struct pevent_record *trace_peek_data(struct pevent *pevent, int cpu); - unsigned long long raw_field_value(struct event_format *event, const char *name, void *data); -void *raw_field_ptr(struct event_format *event, const char *name, void *data); void parse_proc_kallsyms(struct pevent *pevent, char *file, unsigned int size); void parse_ftrace_printk(struct pevent *pevent, char *file, unsigned int size); ssize_t trace_report(int fd, struct pevent **pevent, bool repipe); -int trace_parse_common_type(struct pevent *pevent, void *data); -int trace_parse_common_pid(struct pevent *pevent, void *data); - struct event_format *trace_find_next_event(struct pevent *pevent, struct event_format *event); unsigned long long read_size(struct event_format *event, void *ptr, int size); unsigned long long eval_flag(const char *flag); -struct pevent_record *trace_read_data(struct pevent *pevent, int cpu); int read_tracing_data(int fd, struct list_head *pattrs); struct tracing_data { -- cgit v0.10.2 From 918512b435e15fefe609d236e0ecd62cb8f389c9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Sep 2013 18:39:35 +0200 Subject: perf tools: Unify page_size usage Making page_size global from the util object. Removing the not needed one. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1379003976-5839-4-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index cf36ba2..0aacd629 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -70,7 +70,6 @@ struct perf_record { struct perf_session *session; const char *progname; int output; - unsigned int page_size; int realtime_prio; bool no_buildid; bool no_buildid_cache; @@ -119,7 +118,7 @@ static int perf_record__mmap_read(struct perf_record *rec, { unsigned int head = perf_mmap__read_head(md); unsigned int old = md->prev; - unsigned char *data = md->base + rec->page_size; + unsigned char *data = md->base + page_size; unsigned long size; void *buf; int rc = 0; @@ -360,8 +359,6 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) rec->progname = argv[0]; - rec->page_size = sysconf(_SC_PAGE_SIZE); - on_exit(perf_record__sig_exit, rec); signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 245020c..6265778 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -456,6 +456,7 @@ int main(int argc, const char **argv) { const char *cmd; + /* The page_size is placed in util object. */ page_size = sysconf(_SC_PAGE_SIZE); cmd = perf_extract_argv0_path(argv[0]); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 71b5412..a24ce0a 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -1036,6 +1036,7 @@ PyMODINIT_FUNC initperf(void) pyrf_cpu_map__setup_types() < 0) return; + /* The page_size is placed in util object. */ page_size = sysconf(_SC_PAGE_SIZE); Py_INCREF(&pyrf_evlist__type); -- cgit v0.10.2 From 02ad0702e54f9b82b697718e7e8662eb3f2266ee Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 8 Sep 2013 19:19:13 -0700 Subject: perf lock: Remove dead code No need for break statements after goto jumps. Signed-off-by: Davidlohr Bueso Cc: Aswin Chandramouleeswaran Cc: Frederic Weisbecker Cc: Hitoshi Mitake Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378693159-8747-2-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index ee33ba2..148f7e2 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -446,7 +446,6 @@ broken: list_del(&seq->list); free(seq); goto end; - break; default: BUG_ON("Unknown state of lock sequence found!\n"); break; @@ -508,8 +507,6 @@ static int report_lock_acquired_event(struct perf_evsel *evsel, list_del(&seq->list); free(seq); goto end; - break; - default: BUG_ON("Unknown state of lock sequence found!\n"); break; @@ -564,7 +561,6 @@ static int report_lock_contended_event(struct perf_evsel *evsel, list_del(&seq->list); free(seq); goto end; - break; default: BUG_ON("Unknown state of lock sequence found!\n"); break; @@ -606,7 +602,6 @@ static int report_lock_release_event(struct perf_evsel *evsel, switch (seq->state) { case SEQ_STATE_UNINITIALIZED: goto end; - break; case SEQ_STATE_ACQUIRED: break; case SEQ_STATE_READ_ACQUIRED: @@ -624,7 +619,6 @@ static int report_lock_release_event(struct perf_evsel *evsel, ls->discard = 1; bad_hist[BROKEN_RELEASE]++; goto free_seq; - break; default: BUG_ON("Unknown state of lock sequence found!\n"); break; -- cgit v0.10.2 From b33492ade49a223a666e582d0c63566609e7014b Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 8 Sep 2013 19:19:14 -0700 Subject: perf lock: Return proper code in report_lock_*_event The report_lock_*_event() functions return -1 when lock_stat_findnew(), thread_stat_findnew() or get_seq() return NULL. These functions only return this value when failing to allocate memory, this return -ENOMEM instead. Signed-off-by: Davidlohr Bueso Cc: Aswin Chandramouleeswaran Cc: Frederic Weisbecker Cc: Hitoshi Mitake Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378693159-8747-3-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 148f7e2..d3188629 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -400,17 +400,17 @@ static int report_lock_acquire_event(struct perf_evsel *evsel, ls = lock_stat_findnew(addr, name); if (!ls) - return -1; + return -ENOMEM; if (ls->discard) return 0; ts = thread_stat_findnew(sample->tid); if (!ts) - return -1; + return -ENOMEM; seq = get_seq(ts, addr); if (!seq) - return -1; + return -ENOMEM; switch (seq->state) { case SEQ_STATE_UNINITIALIZED: @@ -472,17 +472,17 @@ static int report_lock_acquired_event(struct perf_evsel *evsel, ls = lock_stat_findnew(addr, name); if (!ls) - return -1; + return -ENOMEM; if (ls->discard) return 0; ts = thread_stat_findnew(sample->tid); if (!ts) - return -1; + return -ENOMEM; seq = get_seq(ts, addr); if (!seq) - return -1; + return -ENOMEM; switch (seq->state) { case SEQ_STATE_UNINITIALIZED: @@ -533,17 +533,17 @@ static int report_lock_contended_event(struct perf_evsel *evsel, ls = lock_stat_findnew(addr, name); if (!ls) - return -1; + return -ENOMEM; if (ls->discard) return 0; ts = thread_stat_findnew(sample->tid); if (!ts) - return -1; + return -ENOMEM; seq = get_seq(ts, addr); if (!seq) - return -1; + return -ENOMEM; switch (seq->state) { case SEQ_STATE_UNINITIALIZED: @@ -587,17 +587,17 @@ static int report_lock_release_event(struct perf_evsel *evsel, ls = lock_stat_findnew(addr, name); if (!ls) - return -1; + return -ENOMEM; if (ls->discard) return 0; ts = thread_stat_findnew(sample->tid); if (!ts) - return -1; + return -ENOMEM; seq = get_seq(ts, addr); if (!seq) - return -1; + return -ENOMEM; switch (seq->state) { case SEQ_STATE_UNINITIALIZED: -- cgit v0.10.2 From 0a98c7febf55325ebac4f28289a9433f4b66ed0e Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 8 Sep 2013 19:19:15 -0700 Subject: perf lock: Plug some memleaks Address some trivial leaks. Signed-off-by: Davidlohr Bueso Cc: Aswin Chandramouleeswaran Cc: Frederic Weisbecker Cc: Hitoshi Mitake Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378693159-8747-4-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index d3188629..7784347 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -321,10 +321,12 @@ static struct lock_stat *lock_stat_findnew(void *addr, const char *name) new->addr = addr; new->name = zalloc(sizeof(char) * strlen(name) + 1); - if (!new->name) + if (!new->name) { + free(new); goto alloc_failed; - strcpy(new->name, name); + } + strcpy(new->name, name); new->wait_time_min = ULLONG_MAX; list_add(&new->hash_entry, entry); @@ -875,7 +877,7 @@ static int __cmd_record(int argc, const char **argv) const char *record_args[] = { "record", "-R", "-m", "1024", "-c", "1", }; - unsigned int rec_argc, i, j; + unsigned int rec_argc, i, j, ret; const char **rec_argv; for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) { @@ -892,7 +894,7 @@ static int __cmd_record(int argc, const char **argv) rec_argc += 2 * ARRAY_SIZE(lock_tracepoints); rec_argv = calloc(rec_argc + 1, sizeof(char *)); - if (rec_argv == NULL) + if (!rec_argv) return -ENOMEM; for (i = 0; i < ARRAY_SIZE(record_args); i++) @@ -908,7 +910,9 @@ static int __cmd_record(int argc, const char **argv) BUG_ON(i != rec_argc); - return cmd_record(i, rec_argv, NULL); + ret = cmd_record(i, rec_argv, NULL); + free(rec_argv); + return ret; } int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) -- cgit v0.10.2 From 375eb2be5584b8182a917124ca217b74e43d2dc4 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 8 Sep 2013 19:19:16 -0700 Subject: perf lock: Redo __cmd_report This function should be straightforward, and we can remove some trivial logic by moving the functionality of read_events() into __cmd_report() - thus allowing a new session to be properly deleted. Since the 'info' subcommand also needs to process the recorded events, add a 'display_info' flag to differentiate between report and info commands. Furthermore, this patch also calls perf_session__has_traces(), making sure that we don't compare apples and oranges, fixing a segfault when using an perf.data file generated by a different subcommand. ie: ./perf mem record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.017 MB perf.data (~724 samples) ] ./perf lock report Segmentation fault (core dumped) Signed-off-by: Davidlohr Bueso Cc: Aswin Chandramouleeswaran Cc: Frederic Weisbecker Cc: Hitoshi Mitake Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378693159-8747-5-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 7784347..780484f 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -818,6 +818,18 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, return 0; } +static void sort_result(void) +{ + unsigned int i; + struct lock_stat *st; + + for (i = 0; i < LOCKHASH_SIZE; i++) { + list_for_each_entry(st, &lockhash_table[i], hash_entry) { + insert_to_result(st, compare); + } + } +} + static const struct perf_evsel_str_handler lock_tracepoints[] = { { "lock:lock_acquire", perf_evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ { "lock:lock_acquired", perf_evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ @@ -825,51 +837,47 @@ static const struct perf_evsel_str_handler lock_tracepoints[] = { { "lock:lock_release", perf_evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ }; -static int read_events(void) +static int __cmd_report(bool display_info) { + int err = -EINVAL; struct perf_tool eops = { .sample = process_sample_event, .comm = perf_event__process_comm, .ordered_samples = true, }; + session = perf_session__new(input_name, O_RDONLY, 0, false, &eops); if (!session) { pr_err("Initializing perf session failed\n"); - return -1; + return -ENOMEM; } + if (!perf_session__has_traces(session, "lock record")) + goto out_delete; + if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) { pr_err("Initializing perf session tracepoint handlers failed\n"); - return -1; + goto out_delete; } - return perf_session__process_events(session, &eops); -} + if (select_key()) + goto out_delete; -static void sort_result(void) -{ - unsigned int i; - struct lock_stat *st; - - for (i = 0; i < LOCKHASH_SIZE; i++) { - list_for_each_entry(st, &lockhash_table[i], hash_entry) { - insert_to_result(st, compare); - } - } -} + err = perf_session__process_events(session, &eops); + if (err) + goto out_delete; -static int __cmd_report(void) -{ setup_pager(); + if (display_info) /* used for info subcommand */ + err = dump_info(); + else { + sort_result(); + print_result(); + } - if ((select_key() != 0) || - (read_events() != 0)) - return -1; - - sort_result(); - print_result(); - - return 0; +out_delete: + perf_session__delete(session); + return err; } static int __cmd_record(int argc, const char **argv) @@ -970,7 +978,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) if (argc) usage_with_options(report_usage, report_options); } - __cmd_report(); + rc = __cmd_report(false); } else if (!strcmp(argv[0], "script")) { /* Aliased to 'perf script' */ return cmd_script(argc, argv, prefix); @@ -983,11 +991,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) } /* recycling report_lock_ops */ trace_handler = &report_lock_ops; - setup_pager(); - if (read_events() != 0) - rc = -1; - else - rc = dump_info(); + rc = __cmd_report(true); } else { usage_with_options(lock_usage, lock_options); } -- cgit v0.10.2 From 60a25cbc4a167fc0129296c3c640d8506a57acc5 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 8 Sep 2013 19:19:18 -0700 Subject: perf lock: Limit bad rate precision Two decimal precision should be enough for this. Signed-off-by: Davidlohr Bueso Cc: Aswin Chandramouleeswaran Cc: Frederic Weisbecker Cc: Hitoshi Mitake Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378693159-8747-7-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 780484f..972310c 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -686,7 +686,7 @@ static void print_bad_events(int bad, int total) pr_info("\n=== output for debug===\n\n"); pr_info("bad: %d, total: %d\n", bad, total); - pr_info("bad rate: %f %%\n", (double)bad / (double)total * 100); + pr_info("bad rate: %.2f %%\n", (double)bad / (double)total * 100); pr_info("histogram of events caused bad sequence\n"); for (i = 0; i < BROKEN_MAX; i++) pr_info(" %10s: %d\n", name[i], bad_hist[i]); -- cgit v0.10.2 From f37376cd721a539ac398cbb7718b72fce83cd49b Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 8 Sep 2013 19:19:19 -0700 Subject: perf lock: Account for lock average wait time While perf-lock currently reports both the total wait time and the number of contentions, it doesn't explicitly show the average wait time. Having this value immediately in the report can be quite useful when looking into performance issues. Furthermore, allowing report to sort by averages is another handy feature to have - and thus do not only print the value, but add it to the lock_stat structure. Signed-off-by: Davidlohr Bueso Cc: Aswin Chandramouleeswaran Cc: Frederic Weisbecker Cc: Hitoshi Mitake Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378693159-8747-8-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index c7f5f55..ab25be2 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -48,7 +48,7 @@ REPORT OPTIONS -k:: --key=:: Sorting key. Possible values: acquired (default), contended, - wait_total, wait_max, wait_min. + avg_wait, wait_total, wait_max, wait_min. INFO OPTIONS ------------ diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 972310c..6a9076f 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -56,7 +56,9 @@ struct lock_stat { unsigned int nr_readlock; unsigned int nr_trylock; + /* these times are in nano sec. */ + u64 avg_wait_time; u64 wait_time_total; u64 wait_time_min; u64 wait_time_max; @@ -208,6 +210,7 @@ static struct thread_stat *thread_stat_findnew_first(u32 tid) SINGLE_KEY(nr_acquired) SINGLE_KEY(nr_contended) +SINGLE_KEY(avg_wait_time) SINGLE_KEY(wait_time_total) SINGLE_KEY(wait_time_max) @@ -244,6 +247,7 @@ static struct rb_root result; /* place to store sorted data */ struct lock_key keys[] = { DEF_KEY_LOCK(acquired, nr_acquired), DEF_KEY_LOCK(contended, nr_contended), + DEF_KEY_LOCK(avg_wait, avg_wait_time), DEF_KEY_LOCK(wait_total, wait_time_total), DEF_KEY_LOCK(wait_min, wait_time_min), DEF_KEY_LOCK(wait_max, wait_time_max), @@ -516,6 +520,7 @@ static int report_lock_acquired_event(struct perf_evsel *evsel, seq->state = SEQ_STATE_ACQUIRED; ls->nr_acquired++; + ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0; seq->prev_event_time = sample->time; end: return 0; @@ -570,6 +575,7 @@ static int report_lock_contended_event(struct perf_evsel *evsel, seq->state = SEQ_STATE_CONTENDED; ls->nr_contended++; + ls->avg_wait_time = ls->wait_time_total/ls->nr_contended; seq->prev_event_time = sample->time; end: return 0; @@ -703,6 +709,7 @@ static void print_result(void) pr_info("%10s ", "acquired"); pr_info("%10s ", "contended"); + pr_info("%15s ", "avg wait (ns)"); pr_info("%15s ", "total wait (ns)"); pr_info("%15s ", "max wait (ns)"); pr_info("%15s ", "min wait (ns)"); @@ -734,6 +741,7 @@ static void print_result(void) pr_info("%10u ", st->nr_acquired); pr_info("%10u ", st->nr_contended); + pr_info("%15" PRIu64 " ", st->avg_wait_time); pr_info("%15" PRIu64 " ", st->wait_time_total); pr_info("%15" PRIu64 " ", st->wait_time_max); pr_info("%15" PRIu64 " ", st->wait_time_min == ULLONG_MAX ? @@ -940,7 +948,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) }; const struct option report_options[] = { OPT_STRING('k', "key", &sort_key, "acquired", - "key for sorting (acquired / contended / wait_total / wait_max / wait_min)"), + "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), /* TODO: type */ OPT_END() }; -- cgit v0.10.2 From 994a1f78b191df0c9d6caca3f3afb03e247aff26 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 1 Sep 2013 12:36:12 +0200 Subject: perf tools: Check mmap pages value early Move the check of the mmap_pages value to the options parsing time, so we could rely on this value on other parts of code. Related changes come in the next patches. Also changes perf_evlist::mmap_len to proper size_t type. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378031796-17892-2-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 935d522..cfa0b79 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1426,8 +1426,9 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, const struct option live_options[] = { OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid", "record events on existing process id"), - OPT_UINTEGER('m', "mmap-pages", &kvm->opts.mmap_pages, - "number of mmap data pages"), + OPT_CALLBACK('m', "mmap-pages", &kvm->opts.mmap_pages, "pages", + "number of mmap data pages", + perf_evlist__parse_mmap_pages), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_BOOLEAN('a', "all-cpus", &kvm->opts.target.system_wide, diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0aacd629..92ca541 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -233,10 +233,6 @@ try_again: "or try again with a smaller value of -m/--mmap_pages.\n" "(current value: %d)\n", opts->mmap_pages); rc = -errno; - } else if (!is_power_of_2(opts->mmap_pages) && - (opts->mmap_pages != UINT_MAX)) { - pr_err("--mmap_pages/-m value must be a power of two."); - rc = -EINVAL; } else { pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno)); rc = -errno; @@ -854,8 +850,9 @@ const struct option record_options[] = { OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit, "child tasks do not inherit counters"), OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"), - OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages, - "number of mmap data pages"), + OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages", + "number of mmap data pages", + perf_evlist__parse_mmap_pages), OPT_BOOLEAN(0, "group", &record.opts.group, "put the counters into a counter group"), OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts, diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index b3e0229..e846695 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1075,8 +1075,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) "file", "vmlinux pathname"), OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols, "hide kernel symbols"), - OPT_UINTEGER('m', "mmap-pages", &opts->mmap_pages, - "number of mmap data pages"), + OPT_CALLBACK('m', "mmap-pages", &opts->mmap_pages, "pages", + "number of mmap data pages", + perf_evlist__parse_mmap_pages), OPT_INTEGER('r', "realtime", &top.realtime_prio, "collect data with this RT SCHED_FIFO priority"), OPT_INTEGER('d', "delay", &top.delay_secs, diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index be65843..f3ddbb0 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1528,8 +1528,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) "list of cpus to monitor"), OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, "child tasks do not inherit counters"), - OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages, - "number of mmap data pages"), + OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages", + "number of mmap data pages", + perf_evlist__parse_mmap_pages), OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", "user to profile"), OPT_CALLBACK(0, "duration", &trace, "float", diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f9f77be..4283f49 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -18,6 +18,7 @@ #include #include "parse-events.h" +#include "parse-options.h" #include @@ -672,6 +673,40 @@ out_unmap: return -1; } +static size_t perf_evlist__mmap_size(unsigned long pages) +{ + /* 512 kiB: default amount of unprivileged mlocked memory */ + if (pages == UINT_MAX) + pages = (512 * 1024) / page_size; + else if (!is_power_of_2(pages)) + return 0; + + return (pages + 1) * page_size; +} + +int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + unsigned int pages, *mmap_pages = opt->value; + size_t size; + char *eptr; + + pages = strtoul(str, &eptr, 10); + if (*eptr != '\0') { + pr_err("failed to parse --mmap_pages/-m value\n"); + return -1; + } + + size = perf_evlist__mmap_size(pages); + if (!size) { + pr_err("--mmap_pages/-m value must be a power of two."); + return -1; + } + + *mmap_pages = pages; + return 0; +} + /** perf_evlist__mmap - Create per cpu maps to receive events * * @evlist - list of events @@ -695,14 +730,6 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, const struct thread_map *threads = evlist->threads; int prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), mask; - /* 512 kiB: default amount of unprivileged mlocked memory */ - if (pages == UINT_MAX) - pages = (512 * 1024) / page_size; - else if (!is_power_of_2(pages)) - return -EINVAL; - - mask = pages * page_size - 1; - if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0) return -ENOMEM; @@ -710,7 +737,8 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, return -ENOMEM; evlist->overwrite = overwrite; - evlist->mmap_len = (pages + 1) * page_size; + evlist->mmap_len = perf_evlist__mmap_size(pages); + mask = evlist->mmap_len - page_size - 1; list_for_each_entry(evsel, &evlist->entries, node) { if ((evsel->attr.read_format & PERF_FORMAT_ID) && diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 880d713..4edb500 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -31,7 +31,7 @@ struct perf_evlist { int nr_groups; int nr_fds; int nr_mmaps; - int mmap_len; + size_t mmap_len; int id_pos; int is_pos; u64 combined_sample_type; @@ -103,6 +103,10 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist, bool want_signal); int perf_evlist__start_workload(struct perf_evlist *evlist); +int perf_evlist__parse_mmap_pages(const struct option *opt, + const char *str, + int unset); + int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, bool overwrite); void perf_evlist__munmap(struct perf_evlist *evlist); -- cgit v0.10.2 From 27050f530dc4fd88dc93d85c177e000efe970d12 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 1 Sep 2013 12:36:13 +0200 Subject: perf tools: Add possibility to specify mmap size Adding possibility to specify mmap size via -m/--mmap-pages by appending unit size character (B/K/M/G) to the number, like: $ perf record -m 8K ls $ perf record -m 2M ls The size is rounded up appropriately to follow perf mmap restrictions. If no unit is specified the number provides pages as of now, like: $ perf record -m 8 ls Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378031796-17892-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt index ac84db2..6a06cef 100644 --- a/tools/perf/Documentation/perf-kvm.txt +++ b/tools/perf/Documentation/perf-kvm.txt @@ -109,7 +109,9 @@ STAT LIVE OPTIONS -m:: --mmap-pages=:: - Number of mmap data pages. Must be a power of two. + Number of mmap data pages (must be a power of two) or size + specification with appended unit character - B/K/M/G. The + size is rounded up to have nearest pages power of two value. -a:: --all-cpus:: diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index f732eaa..f10ab63 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -87,7 +87,9 @@ OPTIONS -m:: --mmap-pages=:: - Number of mmap data pages. Must be a power of two. + Number of mmap data pages (must be a power of two) or size + specification with appended unit character - B/K/M/G. The + size is rounded up to have nearest pages power of two value. -g:: --call-graph:: diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 6d70fbf..f65777c 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -68,7 +68,9 @@ Default is to monitor all CPUS. -m :: --mmap-pages=:: - Number of mmapped data pages. + Number of mmap data pages (must be a power of two) or size + specification with appended unit character - B/K/M/G. The + size is rounded up to have nearest pages power of two value. -p :: --pid=:: diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 3777385..7f70d36 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -59,7 +59,9 @@ OPTIONS -m:: --mmap-pages=:: - Number of mmap data pages. Must be a power of two. + Number of mmap data pages (must be a power of two) or size + specification with appended unit character - B/K/M/G. The + size is rounded up to have nearest pages power of two value. -C:: --cpu:: diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 4283f49..d21ab08 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -687,14 +687,33 @@ static size_t perf_evlist__mmap_size(unsigned long pages) int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, int unset __maybe_unused) { - unsigned int pages, *mmap_pages = opt->value; + unsigned int pages, val, *mmap_pages = opt->value; size_t size; - char *eptr; + static struct parse_tag tags[] = { + { .tag = 'B', .mult = 1 }, + { .tag = 'K', .mult = 1 << 10 }, + { .tag = 'M', .mult = 1 << 20 }, + { .tag = 'G', .mult = 1 << 30 }, + { .tag = 0 }, + }; - pages = strtoul(str, &eptr, 10); - if (*eptr != '\0') { - pr_err("failed to parse --mmap_pages/-m value\n"); - return -1; + val = parse_tag_value(str, tags); + if (val != (unsigned int) -1) { + /* we got file size value */ + pages = PERF_ALIGN(val, page_size) / page_size; + if (!is_power_of_2(pages)) { + pages = next_pow2(pages); + pr_info("rounding mmap pages size to %u (%u pages)\n", + pages * page_size, pages); + } + } else { + /* we got pages count value */ + char *eptr; + pages = strtoul(str, &eptr, 10); + if (*eptr != '\0') { + pr_err("failed to parse --mmap_pages/-m value\n"); + return -1; + } } size = perf_evlist__mmap_size(pages); @@ -738,6 +757,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, evlist->overwrite = overwrite; evlist->mmap_len = perf_evlist__mmap_size(pages); + pr_debug("mmap size %luB\n", evlist->mmap_len); mask = evlist->mmap_len - page_size - 1; list_for_each_entry(evsel, &evlist->entries, node) { diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index ccfdeb6..ab71d62 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -361,3 +361,28 @@ int parse_nsec_time(const char *str, u64 *ptime) *ptime = time_sec * NSEC_PER_SEC + time_nsec; return 0; } + +unsigned long parse_tag_value(const char *str, struct parse_tag *tags) +{ + struct parse_tag *i = tags; + + while (i->tag) { + char *s; + + s = strchr(str, i->tag); + if (s) { + unsigned long int value; + char *endptr; + + value = strtoul(str, &endptr, 10); + if (s != endptr) + break; + + value *= i->mult; + return value; + } + i++; + } + + return (unsigned long) -1; +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index a535359..c29ecaa 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -270,6 +270,13 @@ bool is_power_of_2(unsigned long n) return (n != 0 && ((n & (n - 1)) == 0)); } +static inline unsigned next_pow2(unsigned x) +{ + if (!x) + return 1; + return 1ULL << (32 - __builtin_clz(x - 1)); +} + size_t hex_width(u64 v); int hex2u64(const char *ptr, u64 *val); @@ -281,4 +288,11 @@ void dump_stack(void); extern unsigned int page_size; void get_term_dimensions(struct winsize *ws); + +struct parse_tag { + char tag; + int mult; +}; + +unsigned long parse_tag_value(const char *str, struct parse_tag *tags); #endif /* GIT_COMPAT_UTIL_H */ -- cgit v0.10.2 From b22d54b09a5448d3706929c6f0eae36429f4ec5d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 1 Sep 2013 12:36:14 +0200 Subject: perf evlist: Introduce perf_evlist__new_default function Adding new common function to create evlist with default event. It spares some code lines in automated tests. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378031796-17892-4-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index b8a7056..82ac715 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -45,7 +45,7 @@ int test__PERF_RECORD(void) }; cpu_set_t cpu_mask; size_t cpu_mask_size = sizeof(cpu_mask); - struct perf_evlist *evlist = perf_evlist__new(); + struct perf_evlist *evlist = perf_evlist__new_default(); struct perf_evsel *evsel; struct perf_sample sample; const char *cmd = "sleep"; @@ -66,16 +66,6 @@ int test__PERF_RECORD(void) } /* - * We need at least one evsel in the evlist, use the default - * one: "cycles". - */ - err = perf_evlist__add_default(evlist); - if (err < 0) { - pr_debug("Not enough memory to create evsel\n"); - goto out_delete_evlist; - } - - /* * Create maps of threads and cpus to monitor. In this case * we start with all threads and cpus (-1, -1) but then in * perf_evlist__prepare_workload we'll fill in the only thread diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 28fe589..b07f8a1 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -37,20 +37,11 @@ int test__task_exit(void) signal(SIGCHLD, sig_handler); signal(SIGUSR1, sig_handler); - evlist = perf_evlist__new(); + evlist = perf_evlist__new_default(); if (evlist == NULL) { - pr_debug("perf_evlist__new\n"); + pr_debug("perf_evlist__new_default\n"); return -1; } - /* - * We need at least one evsel in the evlist, use the default - * one: "cycles". - */ - err = perf_evlist__add_default(evlist); - if (err < 0) { - pr_debug("Not enough memory to create evsel\n"); - goto out_free_evlist; - } /* * Create maps of threads and cpus to monitor. In this case @@ -117,7 +108,6 @@ out_close_evlist: perf_evlist__close(evlist); out_delete_maps: perf_evlist__delete_maps(evlist); -out_free_evlist: perf_evlist__delete(evlist); return err; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d21ab08..f0d71a9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -50,6 +50,18 @@ struct perf_evlist *perf_evlist__new(void) return evlist; } +struct perf_evlist *perf_evlist__new_default(void) +{ + struct perf_evlist *evlist = perf_evlist__new(); + + if (evlist && perf_evlist__add_default(evlist)) { + perf_evlist__delete(evlist); + evlist = NULL; + } + + return evlist; +} + /** * perf_evlist__set_id_pos - set the positions of event ids. * @evlist: selected event list diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 4edb500..871b55ab 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -53,6 +53,7 @@ struct perf_evsel_str_handler { }; struct perf_evlist *perf_evlist__new(void); +struct perf_evlist *perf_evlist__new_default(void); void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, struct thread_map *threads); void perf_evlist__exit(struct perf_evlist *evlist); -- cgit v0.10.2 From dd96c46b5c765a779d8c35cc7d1df7515b4c7baf Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 1 Sep 2013 12:36:15 +0200 Subject: perf tools: Adding throttle event data struct support Moving 'struct throttle_event' out of python code and making it global as any other event. There's no usage of throttling events in any perf commands so far (besides python support), but we'll need this event data backup for upcoming test. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378031796-17892-5-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 17d9e16..9b7d4d3 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -61,6 +61,12 @@ struct read_event { u64 id; }; +struct throttle_event { + struct perf_event_header header; + u64 time; + u64 id; + u64 stream_id; +}; #define PERF_SAMPLE_MASK \ (PERF_SAMPLE_IP | PERF_SAMPLE_TID | \ @@ -178,6 +184,7 @@ union perf_event { struct fork_event fork; struct lost_event lost; struct read_event read; + struct throttle_event throttle; struct sample_event sample; struct attr_event attr; struct event_type_event event_type; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index a24ce0a..06efd02 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -33,13 +33,6 @@ int eprintf(int level, const char *fmt, ...) # define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size, #endif -struct throttle_event { - struct perf_event_header header; - u64 time; - u64 id; - u64 stream_id; -}; - PyMODINIT_FUNC initperf(void); #define member_def(type, member, ptype, help) \ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index b97f468..d1e4495 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -397,6 +397,17 @@ static void perf_event__read_swap(union perf_event *event, bool sample_id_all) swap_sample_id_all(event, &event->read + 1); } +static void perf_event__throttle_swap(union perf_event *event, + bool sample_id_all) +{ + event->throttle.time = bswap_64(event->throttle.time); + event->throttle.id = bswap_64(event->throttle.id); + event->throttle.stream_id = bswap_64(event->throttle.stream_id); + + if (sample_id_all) + swap_sample_id_all(event, &event->throttle + 1); +} + static u8 revbyte(u8 b) { int rev = (b >> 4) | ((b & 0xf) << 4); @@ -482,6 +493,8 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_EXIT] = perf_event__task_swap, [PERF_RECORD_LOST] = perf_event__all64_swap, [PERF_RECORD_READ] = perf_event__read_swap, + [PERF_RECORD_THROTTLE] = perf_event__throttle_swap, + [PERF_RECORD_UNTHROTTLE] = perf_event__throttle_swap, [PERF_RECORD_SAMPLE] = perf_event__all64_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, -- cgit v0.10.2 From fc2be6968e99b5314f20e938a547d44dcb1c40eb Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 14 Sep 2013 10:32:59 +0200 Subject: perf symbols: Add new option --ignore-vmlinux for perf top Running "perf top" on a machine with possibly invalid or non-matching vmlinux at the various places results in no symbol resolving despite /proc/kallsyms being present and valid. Add a new option --ignore-vmlinux to explicitly indicate that we do not want to use these kernels and just use what we have (kallsyms). Signed-off-by: Willy Tarreau Cc: Ingo Molnar Link: http://lkml.kernel.org/r/20130914083259.GA3418@1wt.eu Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index e846695..65c49b2 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1073,6 +1073,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) "list of cpus to monitor"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), + OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux, + "don't load vmlinux even if found"), OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols, "hide kernel symbols"), OPT_CALLBACK('m', "mmap-pages", &opts->mmap_pages, "pages", diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index cd1dcc4..48c3879 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1215,7 +1215,7 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map, goto do_kallsyms; } - if (symbol_conf.vmlinux_name != NULL) { + if (!symbol_conf.ignore_vmlinux && symbol_conf.vmlinux_name != NULL) { err = dso__load_vmlinux(dso, map, symbol_conf.vmlinux_name, filter); if (err > 0) { @@ -1227,7 +1227,7 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map, return err; } - if (vmlinux_path != NULL) { + if (!symbol_conf.ignore_vmlinux && vmlinux_path != NULL) { err = dso__load_vmlinux_path(dso, map, filter); if (err > 0) return err; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2a97bb1..9b8b213 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -85,6 +85,7 @@ struct symbol_conf { unsigned short priv_size; unsigned short nr_events; bool try_vmlinux_path, + ignore_vmlinux, show_kernel_path, use_modules, sort_by_name, -- cgit v0.10.2 From fc67297b16da335d610af2fac96233d51146300a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 13 Sep 2013 15:27:43 +0900 Subject: perf tools: Separate out GTK codes to libperf-gtk.so Separate out GTK codes to a shared object called libperf-gtk.so. This time only GTK codes are built with -fPIC and libperf remains as is. Now run GTK hist and annotation browser using libdl. Signed-off-by: Namhyung Kim Reviewed-by: Pekka Enberg Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1379053663-13706-1-git-send-email-namhyung@kernel.org [ Fix it up wrt Ingo's tools/perf build speedups ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a24f6c2..40c39c3 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -114,6 +114,7 @@ SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ BUILTIN_OBJS = LIB_H = LIB_OBJS = +GTK_OBJS = PYRF_OBJS = SCRIPT_SH = @@ -490,13 +491,19 @@ ifndef NO_SLANG endif ifndef NO_GTK2 - LIB_OBJS += $(OUTPUT)ui/gtk/browser.o - LIB_OBJS += $(OUTPUT)ui/gtk/hists.o - LIB_OBJS += $(OUTPUT)ui/gtk/setup.o - LIB_OBJS += $(OUTPUT)ui/gtk/util.o - LIB_OBJS += $(OUTPUT)ui/gtk/helpline.o - LIB_OBJS += $(OUTPUT)ui/gtk/progress.o - LIB_OBJS += $(OUTPUT)ui/gtk/annotate.o + ALL_PROGRAMS += $(OUTPUT)libperf-gtk.so + + GTK_OBJS += $(OUTPUT)ui/gtk/browser.o + GTK_OBJS += $(OUTPUT)ui/gtk/hists.o + GTK_OBJS += $(OUTPUT)ui/gtk/setup.o + GTK_OBJS += $(OUTPUT)ui/gtk/util.o + GTK_OBJS += $(OUTPUT)ui/gtk/helpline.o + GTK_OBJS += $(OUTPUT)ui/gtk/progress.o + GTK_OBJS += $(OUTPUT)ui/gtk/annotate.o + +install-gtk: $(OUTPUT)libperf-gtk.so + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)' + $(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)' endif ifndef NO_LIBPERL @@ -550,6 +557,12 @@ $(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUT)perf.o \ $(BUILTIN_OBJS) $(LIBS) -o $@ +$(GTK_OBJS): $(OUTPUT)%.o: %.c $(LIB_H) + $(QUIET_CC)$(CC) -o $@ -c -fPIC $(CFLAGS) $(GTK_CFLAGS) $< + +$(OUTPUT)libperf-gtk.so: $(GTK_OBJS) $(PERFLIBS) + $(QUIET_LINK)$(CC) -o $@ -shared $(ALL_LDFLAGS) $(filter %.o,$^) $(GTK_LIBS) + $(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ @@ -632,6 +645,9 @@ $(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS $(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< +$(OUTPUT)ui/setup.o: ui/setup.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DLIBDIR='"$(libdir_SQ)"' $< + $(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< @@ -673,7 +689,8 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) # we compile into subdirectories. if the target directory is not the source directory, they might not exists. So # we depend the various files onto their directories. -DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h +DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(GTK_OBJS) +DIRECTORY_DEPS += $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h $(DIRECTORY_DEPS): | $(sort $(dir $(DIRECTORY_DEPS))) # In the second step, we make a rule to actually create these directories $(sort $(dir $(DIRECTORY_DEPS))): @@ -786,7 +803,9 @@ check: $(OUTPUT)common-cmds.h ### Installation rules -install-bin: all +install-gtk: + +install-bin: all install-gtk $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)' $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' @@ -831,7 +850,8 @@ config-clean: @$(MAKE) -C config/feature-checks clean clean: $(LIBTRACEEVENT)-clean $(LIBLK)-clean config-clean - $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS) + $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(GTK_OBJS) + $(RM) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS) $(RM) $(ALL_PROGRAMS) perf $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean @@ -850,7 +870,7 @@ else GIT-HEAD-PHONY = endif -.PHONY: all install clean config-clean strip +.PHONY: all install clean config-clean strip install-gtk .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell .PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 0393d98..94f9a8e 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -30,6 +30,7 @@ #include "util/tool.h" #include "arch/common.h" +#include #include struct perf_annotate { @@ -142,8 +143,18 @@ find_next: if (use_browser == 2) { int ret; + int (*annotate)(struct hist_entry *he, + struct perf_evsel *evsel, + struct hist_browser_timer *hbt); + + annotate = dlsym(perf_gtk_handle, + "hist_entry__gtk_annotate"); + if (annotate == NULL) { + ui__error("GTK browser not found!\n"); + return; + } - ret = hist_entry__gtk_annotate(he, evsel, NULL); + ret = annotate(he, evsel, NULL); if (!ret || !ann->skip_missing) return; @@ -247,8 +258,17 @@ static int __cmd_annotate(struct perf_annotate *ann) goto out_delete; } - if (use_browser == 2) - perf_gtk__show_annotations(); + if (use_browser == 2) { + void (*show_annotations)(void); + + show_annotations = dlsym(perf_gtk_handle, + "perf_gtk__show_annotations"); + if (show_annotations == NULL) { + ui__error("GTK browser not found!\n"); + goto out_delete; + } + show_annotations(); + } out_delete: /* diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 06e1abe..21b5c2f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -35,6 +35,7 @@ #include "util/hist.h" #include "arch/common.h" +#include #include struct perf_report { @@ -591,8 +592,19 @@ static int __cmd_report(struct perf_report *rep) ret = 0; } else if (use_browser == 2) { - perf_evlist__gtk_browse_hists(session->evlist, help, - NULL, rep->min_percent); + int (*hist_browser)(struct perf_evlist *, + const char *, + struct hist_browser_timer *, + float min_pcnt); + + hist_browser = dlsym(perf_gtk_handle, + "perf_evlist__gtk_browse_hists"); + if (hist_browser == NULL) { + ui__error("GTK browser not found!\n"); + return ret; + } + hist_browser(session->evlist, help, NULL, + rep->min_percent); } } else perf_evlist__tty_browse_hists(session->evlist, rep, help); diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index f5d661f..d9bba8d 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -377,11 +377,11 @@ ifndef NO_GTK2 NO_GTK2 := 1 else ifeq ($(feature-gtk2-infobar), 1) - CFLAGS += -DHAVE_GTK_INFO_BAR_SUPPORT + GTK_CFLAGS := -DHAVE_GTK_INFO_BAR_SUPPORT endif CFLAGS += -DHAVE_GTK2_SUPPORT - CFLAGS += $(shell pkg-config --cflags gtk+-2.0 2>/dev/null) - EXTLIBS += $(shell pkg-config --libs gtk+-2.0 2>/dev/null) + GTK_CFLAGS += $(shell pkg-config --cflags gtk+-2.0 2>/dev/null) + GTK_LIBS := $(shell pkg-config --libs gtk+-2.0 2>/dev/null) endif endif @@ -554,7 +554,12 @@ else sysconfdir = $(prefix)/etc ETC_PERFCONFIG = etc/perfconfig endif +ifeq ($(IS_X86_64),1) +lib = lib64 +else lib = lib +endif +libdir = $(prefix)/$(lib) # Shell quote (do not use $(call) to accommodate ancient setups); ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG)) @@ -567,6 +572,7 @@ template_dir_SQ = $(subst ','\'',$(template_dir)) htmldir_SQ = $(subst ','\'',$(htmldir)) prefix_SQ = $(subst ','\'',$(prefix)) sysconfdir_SQ = $(subst ','\'',$(sysconfdir)) +libdir_SQ = $(subst ','\'',$(libdir)) ifneq ($(filter /%,$(firstword $(perfexecdir))),) perfexec_instdir = $(perfexecdir) diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index f538794..9c7ff8d 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -154,9 +154,9 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct symbol *sym, return 0; } -int symbol__gtk_annotate(struct symbol *sym, struct map *map, - struct perf_evsel *evsel, - struct hist_browser_timer *hbt) +static int symbol__gtk_annotate(struct symbol *sym, struct map *map, + struct perf_evsel *evsel, + struct hist_browser_timer *hbt) { GtkWidget *window; GtkWidget *notebook; @@ -226,6 +226,13 @@ int symbol__gtk_annotate(struct symbol *sym, struct map *map, return 0; } +int hist_entry__gtk_annotate(struct hist_entry *he, + struct perf_evsel *evsel, + struct hist_browser_timer *hbt) +{ + return symbol__gtk_annotate(he->ms.sym, he->ms.map, evsel, hbt); +} + void perf_gtk__show_annotations(void) { GtkWidget *window; diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h index a72acbc..8576cf1 100644 --- a/tools/perf/ui/gtk/gtk.h +++ b/tools/perf/ui/gtk/gtk.h @@ -20,6 +20,9 @@ struct perf_gtk_context { guint statbar_ctx_id; }; +int perf_gtk__init(void); +void perf_gtk__exit(bool wait_for_ok); + extern struct perf_gtk_context *pgctx; static inline bool perf_gtk__is_active_context(struct perf_gtk_context *ctx) @@ -48,4 +51,17 @@ static inline GtkWidget *perf_gtk__setup_info_bar(void) } #endif +struct perf_evsel; +struct perf_evlist; +struct hist_entry; +struct hist_browser_timer; + +int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, const char *help, + struct hist_browser_timer *hbt, + float min_pcnt); +int hist_entry__gtk_annotate(struct hist_entry *he, + struct perf_evsel *evsel, + struct hist_browser_timer *hbt); +void perf_gtk__show_annotations(void); + #endif /* _PERF_GTK_H_ */ diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c index 47d9a57..5df5140 100644 --- a/tools/perf/ui/setup.c +++ b/tools/perf/ui/setup.c @@ -1,10 +1,64 @@ #include +#include #include "../util/cache.h" #include "../util/debug.h" #include "../util/hist.h" pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER; +void *perf_gtk_handle; + +#ifdef HAVE_GTK2_SUPPORT +static int setup_gtk_browser(void) +{ + int (*perf_ui_init)(void); + + if (perf_gtk_handle) + return 0; + + perf_gtk_handle = dlopen(PERF_GTK_DSO, RTLD_LAZY); + if (perf_gtk_handle == NULL) { + char buf[PATH_MAX]; + scnprintf(buf, sizeof(buf), "%s/%s", LIBDIR, PERF_GTK_DSO); + perf_gtk_handle = dlopen(buf, RTLD_LAZY); + } + if (perf_gtk_handle == NULL) + return -1; + + perf_ui_init = dlsym(perf_gtk_handle, "perf_gtk__init"); + if (perf_ui_init == NULL) + goto out_close; + + if (perf_ui_init() == 0) + return 0; + +out_close: + dlclose(perf_gtk_handle); + return -1; +} + +static void exit_gtk_browser(bool wait_for_ok) +{ + void (*perf_ui_exit)(bool); + + if (perf_gtk_handle == NULL) + return; + + perf_ui_exit = dlsym(perf_gtk_handle, "perf_gtk__exit"); + if (perf_ui_exit == NULL) + goto out_close; + + perf_ui_exit(wait_for_ok); + +out_close: + dlclose(perf_gtk_handle); + + perf_gtk_handle = NULL; +} +#else +static inline int setup_gtk_browser(void) { return -1; } +static inline void exit_gtk_browser(bool wait_for_ok __maybe_unused) {} +#endif void setup_browser(bool fallback_to_pager) { @@ -17,8 +71,11 @@ void setup_browser(bool fallback_to_pager) switch (use_browser) { case 2: - if (perf_gtk__init() == 0) + if (setup_gtk_browser() == 0) break; + printf("GTK browser requested but could not find %s\n", + PERF_GTK_DSO); + sleep(1); /* fall through */ case 1: use_browser = 1; @@ -39,7 +96,7 @@ void exit_browser(bool wait_for_ok) { switch (use_browser) { case 2: - perf_gtk__exit(wait_for_ok); + exit_gtk_browser(wait_for_ok); break; case 1: diff --git a/tools/perf/ui/ui.h b/tools/perf/ui/ui.h index 1349d14..ab88383 100644 --- a/tools/perf/ui/ui.h +++ b/tools/perf/ui/ui.h @@ -6,6 +6,7 @@ #include extern pthread_mutex_t ui__lock; +extern void *perf_gtk_handle; extern int use_browser; @@ -23,17 +24,6 @@ static inline int ui__init(void) static inline void ui__exit(bool wait_for_ok __maybe_unused) {} #endif -#ifdef HAVE_GTK2_SUPPORT -int perf_gtk__init(void); -void perf_gtk__exit(bool wait_for_ok); -#else -static inline int perf_gtk__init(void) -{ - return -1; -} -static inline void perf_gtk__exit(bool wait_for_ok __maybe_unused) {} -#endif - void ui__refresh_dimensions(bool force); #endif /* _PERF_UI_H_ */ diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index f0699e9..834b7b5 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -165,30 +165,6 @@ static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused, } #endif -#ifdef HAVE_GTK2_SUPPORT -int symbol__gtk_annotate(struct symbol *sym, struct map *map, - struct perf_evsel *evsel, - struct hist_browser_timer *hbt); - -static inline int hist_entry__gtk_annotate(struct hist_entry *he, - struct perf_evsel *evsel, - struct hist_browser_timer *hbt) -{ - return symbol__gtk_annotate(he->ms.sym, he->ms.map, evsel, hbt); -} - -void perf_gtk__show_annotations(void); -#else -static inline int hist_entry__gtk_annotate(struct hist_entry *he __maybe_unused, - struct perf_evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused) -{ - return 0; -} - -static inline void perf_gtk__show_annotations(void) {} -#endif - extern const char *disassembler_style; #endif /* __PERF_ANNOTATE_H */ diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index ed4f90e..20b1758 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -228,20 +228,5 @@ static inline int script_browse(const char *script_opt __maybe_unused) #define K_SWITCH_INPUT_DATA -3000 #endif -#ifdef HAVE_GTK2_SUPPORT -int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, const char *help, - struct hist_browser_timer *hbt __maybe_unused, - float min_pcnt); -#else -static inline -int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist __maybe_unused, - const char *help __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - float min_pcnt __maybe_unused) -{ - return 0; -} -#endif - unsigned int hists__sort_list_width(struct hists *self); #endif /* __PERF_HIST_H */ diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index c29ecaa..7fd840b 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -128,6 +128,8 @@ void put_tracing_file(char *file); #endif #endif +#define PERF_GTK_DSO "libperf-gtk.so" + /* General helper functions */ extern void usage(const char *err) NORETURN; extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); -- cgit v0.10.2 From 963ba5fd5d04f36d6a5c9a94562484a4f270c1de Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 11 Sep 2013 14:09:25 +0900 Subject: perf sort: Fix a memory leak on srcline In the hist_entry__srcline_snprintf(), path and self->srcline are pointing the same memory region, but they are doubly allocated. Signed-off-by: Namhyung Kim Reviewed-by: Jiri Olsa Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378876173-13363-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index b4ecc0e..97cf3ef 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -269,10 +269,7 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf, if (!fp) goto out_ip; - if (getline(&path, &line_len, fp) < 0 || !line_len) - goto out_ip; - self->srcline = strdup(path); - if (self->srcline == NULL) + if (getline(&self->srcline, &line_len, fp) < 0 || !line_len) goto out_ip; nl = strchr(self->srcline, '\n'); -- cgit v0.10.2 From 89da393c171926d3372f573d752be5ced98038eb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 11 Sep 2013 14:09:26 +0900 Subject: perf annotate: Reuse path from the result of addr2line In the symbol__get_source_line(), path and src_line->path will have same value, but they were allocated separately, and leaks one. Just share path to src_line->path. Signed-off-by: Namhyung Kim Reviewed-by: Jiri Olsa Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378876173-13363-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 7eae548..c6fd187 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1143,11 +1143,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, if (getline(&path, &line_len, fp) < 0 || !line_len) goto next_close; - src_line->path = malloc(sizeof(char) * line_len + 1); - if (!src_line->path) - goto next_close; - - strcpy(src_line->path, path); + src_line->path = path; insert_source_line(&tmp_root, src_line); next_close: -- cgit v0.10.2 From 909b143162de7af310d2a9351220030260ebe728 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 11 Sep 2013 14:09:27 +0900 Subject: perf hists: Free srcline when freeing hist_entry We've been leaked srcline of hist_entry, it should be freed also. Signed-off-by: Namhyung Kim Reviewed-by: Jiri Olsa Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378876173-13363-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index f3278a3..e6fc38a 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -535,6 +535,7 @@ void hist_entry__free(struct hist_entry *he) { free(he->branch_info); free(he->mem_info); + free(he->srcline); free(he); } -- cgit v0.10.2 From f048d548f803b57ee1dbf66702f398ba69657450 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 11 Sep 2013 14:09:28 +0900 Subject: perf annotate: Factor out get/free_srcline() Currently external addr2line tool is used for srcline sort key and annotate with srcline info. Separate the common code to prepare upcoming enhancements. Signed-off-by: Namhyung Kim Reviewed-by: Jiri Olsa Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378876173-13363-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 40c39c3..1f13615 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -363,6 +363,7 @@ LIB_OBJS += $(OUTPUT)util/intlist.o LIB_OBJS += $(OUTPUT)util/vdso.o LIB_OBJS += $(OUTPUT)util/stat.o LIB_OBJS += $(OUTPUT)util/record.o +LIB_OBJS += $(OUTPUT)util/srcline.o LIB_OBJS += $(OUTPUT)ui/setup.o LIB_OBJS += $(OUTPUT)ui/helpline.o diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index c6fd187..d48297d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1070,7 +1070,7 @@ static void symbol__free_source_line(struct symbol *sym, int len) (sizeof(src_line->p) * (src_line->nr_pcnt - 1)); for (i = 0; i < len; i++) { - free(src_line->path); + free_srcline(src_line->path); src_line = (void *)src_line + sizeof_src_line; } @@ -1087,7 +1087,6 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, u64 start; int i, k; int evidx = evsel->idx; - char cmd[PATH_MAX * 2]; struct source_line *src_line; struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evidx); @@ -1115,10 +1114,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, start = map__rip_2objdump(map, sym->start); for (i = 0; i < len; i++) { - char *path = NULL; - size_t line_len; u64 offset; - FILE *fp; double percent_max = 0.0; src_line->nr_pcnt = nr_pcnt; @@ -1135,19 +1131,9 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, goto next; offset = start + i; - sprintf(cmd, "addr2line -e %s %016" PRIx64, filename, offset); - fp = popen(cmd, "r"); - if (!fp) - goto next; - - if (getline(&path, &line_len, fp) < 0 || !line_len) - goto next_close; - - src_line->path = path; + src_line->path = get_srcline(filename, offset); insert_source_line(&tmp_root, src_line); - next_close: - pclose(fp); next: src_line = (void *)src_line + sizeof_src_line; } @@ -1188,7 +1174,7 @@ static void print_summary(struct rb_root *root, const char *filename) path = src_line->path; color = get_percent_color(percent_max); - color_fprintf(stdout, color, " %s", path); + color_fprintf(stdout, color, " %s\n", path); node = rb_next(node); } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index e6fc38a..cca03831 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -535,7 +535,7 @@ void hist_entry__free(struct hist_entry *he) { free(he->branch_info); free(he->mem_info); - free(he->srcline); + free_srcline(he->srcline); free(he); } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 97cf3ef..b7e0ef0 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -251,8 +251,7 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf, unsigned int width __maybe_unused) { FILE *fp = NULL; - char cmd[PATH_MAX + 2], *path = self->srcline, *nl; - size_t line_len; + char *path = self->srcline; if (path != NULL) goto out_path; @@ -263,19 +262,9 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf, if (!strncmp(self->ms.map->dso->long_name, "/tmp/perf-", 10)) goto out_ip; - snprintf(cmd, sizeof(cmd), "addr2line -e %s %016" PRIx64, - self->ms.map->dso->long_name, self->ip); - fp = popen(cmd, "r"); - if (!fp) - goto out_ip; - - if (getline(&self->srcline, &line_len, fp) < 0 || !line_len) - goto out_ip; + path = get_srcline(self->ms.map->dso->long_name, self->ip); + self->srcline = path; - nl = strchr(self->srcline, '\n'); - if (nl != NULL) - *nl = '\0'; - path = self->srcline; out_path: if (fp) pclose(fp); diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c new file mode 100644 index 0000000..7e92cca --- /dev/null +++ b/tools/perf/util/srcline.c @@ -0,0 +1,83 @@ +#include +#include +#include + +#include + +#include "util/util.h" +#include "util/debug.h" + +static int addr2line(const char *dso_name, unsigned long addr, + char **file, unsigned int *line_nr) +{ + FILE *fp; + char cmd[PATH_MAX]; + char *filename = NULL; + size_t len; + char *sep; + int ret = 0; + + scnprintf(cmd, sizeof(cmd), "addr2line -e %s %016"PRIx64, + dso_name, addr); + + fp = popen(cmd, "r"); + if (fp == NULL) { + pr_warning("popen failed for %s\n", dso_name); + return 0; + } + + if (getline(&filename, &len, fp) < 0 || !len) { + pr_warning("addr2line has no output for %s\n", dso_name); + goto out; + } + + sep = strchr(filename, '\n'); + if (sep) + *sep = '\0'; + + if (!strcmp(filename, "??:0")) { + pr_debug("no debugging info in %s\n", dso_name); + free(filename); + goto out; + } + + sep = strchr(filename, ':'); + if (sep) { + *sep++ = '\0'; + *file = filename; + *line_nr = strtoul(sep, NULL, 0); + ret = 1; + } +out: + pclose(fp); + return ret; +} + +char *get_srcline(const char *dso_name, unsigned long addr) +{ + char *file; + unsigned line; + char *srcline; + size_t size; + + if (!addr2line(dso_name, addr, &file, &line)) + return SRCLINE_UNKNOWN; + + /* just calculate actual length */ + size = snprintf(NULL, 0, "%s:%u", file, line) + 1; + + srcline = malloc(size); + if (srcline) + snprintf(srcline, size, "%s:%u", file, line); + else + srcline = SRCLINE_UNKNOWN; + + free(file); + return srcline; +} + +void free_srcline(char *srcline) +{ + if (srcline && strcmp(srcline, SRCLINE_UNKNOWN) != 0) + free(srcline); +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 7fd840b..7c8b43f 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -297,4 +297,9 @@ struct parse_tag { }; unsigned long parse_tag_value(const char *str, struct parse_tag *tags); + +#define SRCLINE_UNKNOWN ((char *) "??:0") + +char *get_srcline(const char *dso_name, unsigned long addr); +void free_srcline(char *srcline); #endif /* GIT_COMPAT_UTIL_H */ -- cgit v0.10.2 From 58d91a0068694a5ba3efc99e88ce6b4b0dd0d085 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 11 Sep 2013 14:09:29 +0900 Subject: perf tools: Do not try to call addr2line on non-binary files No need to call addr2line since they don't have such information. Signed-off-by: Namhyung Kim Reviewed-by: Jiri Olsa Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378876173-13363-6-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index b7e0ef0..d443593 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -259,9 +259,6 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf, if (!self->ms.map) goto out_ip; - if (!strncmp(self->ms.map->dso->long_name, "/tmp/perf-", 10)) - goto out_ip; - path = get_srcline(self->ms.map->dso->long_name, self->ip); self->srcline = path; diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 7e92cca..777f918 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -57,11 +57,17 @@ char *get_srcline(const char *dso_name, unsigned long addr) { char *file; unsigned line; - char *srcline; + char *srcline = SRCLINE_UNKNOWN; size_t size; + if (dso_name[0] == '[') + goto out; + + if (!strncmp(dso_name, "/tmp/perf-", 10)) + goto out; + if (!addr2line(dso_name, addr, &file, &line)) - return SRCLINE_UNKNOWN; + goto out; /* just calculate actual length */ size = snprintf(NULL, 0, "%s:%u", file, line) + 1; @@ -73,6 +79,7 @@ char *get_srcline(const char *dso_name, unsigned long addr) srcline = SRCLINE_UNKNOWN; free(file); +out: return srcline; } -- cgit v0.10.2 From 86c98cab5a3137376ea7df5ffa5bd52e545fee95 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 11 Sep 2013 14:09:30 +0900 Subject: perf annotate: Pass dso instead of dso_name to get_srcline() This is a preparation of next change. No functional changes are intended. Signed-off-by: Namhyung Kim Reviewed-by: Jiri Olsa Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378876173-13363-7-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index d48297d..d73e800 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1081,8 +1081,7 @@ static void symbol__free_source_line(struct symbol *sym, int len) /* Get the filename:line for the colored entries */ static int symbol__get_source_line(struct symbol *sym, struct map *map, struct perf_evsel *evsel, - struct rb_root *root, int len, - const char *filename) + struct rb_root *root, int len) { u64 start; int i, k; @@ -1131,7 +1130,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, goto next; offset = start + i; - src_line->path = get_srcline(filename, offset); + src_line->path = get_srcline(map->dso, offset); insert_source_line(&tmp_root, src_line); next: @@ -1338,7 +1337,6 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, bool full_paths, int min_pcnt, int max_lines) { struct dso *dso = map->dso; - const char *filename = dso->long_name; struct rb_root source_line = RB_ROOT; u64 len; @@ -1348,9 +1346,8 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, len = symbol__size(sym); if (print_lines) { - symbol__get_source_line(sym, map, evsel, &source_line, - len, filename); - print_summary(&source_line, filename); + symbol__get_source_line(sym, map, evsel, &source_line, len); + print_summary(&source_line, dso->long_name); } symbol__annotate_printf(sym, map, evsel, full_paths, diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index dbd9241..72eedd6 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -6,6 +6,7 @@ #include #include "types.h" #include "map.h" +#include "build-id.h" enum dso_binary_type { DSO_BINARY_TYPE__KALLSYMS = 0, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index d443593..f732120 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -259,7 +259,7 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf, if (!self->ms.map) goto out_ip; - path = get_srcline(self->ms.map->dso->long_name, self->ip); + path = get_srcline(self->ms.map->dso, self->ip); self->srcline = path; out_path: diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 777f918..c736d94 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -4,6 +4,7 @@ #include +#include "util/dso.h" #include "util/util.h" #include "util/debug.h" @@ -53,11 +54,12 @@ out: return ret; } -char *get_srcline(const char *dso_name, unsigned long addr) +char *get_srcline(struct dso *dso, unsigned long addr) { char *file; unsigned line; char *srcline = SRCLINE_UNKNOWN; + char *dso_name = dso->long_name; size_t size; if (dso_name[0] == '[') diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 7c8b43f..1f06ba4 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -300,6 +300,8 @@ unsigned long parse_tag_value(const char *str, struct parse_tag *tags); #define SRCLINE_UNKNOWN ((char *) "??:0") -char *get_srcline(const char *dso_name, unsigned long addr); +struct dso; + +char *get_srcline(struct dso *dso, unsigned long addr); void free_srcline(char *srcline); #endif /* GIT_COMPAT_UTIL_H */ -- cgit v0.10.2 From 2cc9d0ef577975abb3ebce7d5978559ec1c73633 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 11 Sep 2013 14:09:31 +0900 Subject: perf tools: Save failed result of get_srcline() Some dso's lack srcline info, so there's no point to keep trying on them. Just save failture status and skip them. Signed-off-by: Namhyung Kim Reviewed-by: Jiri Olsa Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378876173-13363-8-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 6bfc8aa..af4c687c 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -450,6 +450,7 @@ struct dso *dso__new(const char *name) dso->rel = 0; dso->sorted_by_name = 0; dso->has_build_id = 0; + dso->has_srcline = 1; dso->kernel = DSO_TYPE_USER; dso->needs_swap = DSO_SWAP__UNSET; INIT_LIST_HEAD(&dso->node); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 72eedd6..9ac666a 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -83,6 +83,7 @@ struct dso { enum dso_binary_type data_type; u8 adjust_symbols:1; u8 has_build_id:1; + u8 has_srcline:1; u8 hit:1; u8 annotate_warned:1; u8 sname_alloc:1; diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index c736d94..dcff10b 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -58,10 +58,13 @@ char *get_srcline(struct dso *dso, unsigned long addr) { char *file; unsigned line; - char *srcline = SRCLINE_UNKNOWN; + char *srcline; char *dso_name = dso->long_name; size_t size; + if (!dso->has_srcline) + return SRCLINE_UNKNOWN; + if (dso_name[0] == '[') goto out; @@ -81,8 +84,11 @@ char *get_srcline(struct dso *dso, unsigned long addr) srcline = SRCLINE_UNKNOWN; free(file); -out: return srcline; + +out: + dso->has_srcline = 0; + return SRCLINE_UNKNOWN; } void free_srcline(char *srcline) -- cgit v0.10.2 From 2f48fcd84e9e68392e29c59204a4a434311d49e9 Mon Sep 17 00:00:00 2001 From: Roberto Vitillo Date: Wed, 11 Sep 2013 14:09:32 +0900 Subject: perf tools: Implement addr2line directly using libbfd When the srcline sort key is used , the external addr2line utility needs to be run for each hist entry to get the srcline info. This can consume quite a time if one has a huge perf.data file. So rather than executing the external utility, implement it internally and just call it. We can do it since we've linked with libbfd already. Signed-off-by: Roberto Agostino Vitillo Reviewed-by: Jiri Olsa Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378876173-13363-9-git-send-email-namhyung@kernel.org [ Use a2l_data struct instead of static globals ] Signed-off-by: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index d9bba8d..cf6ad5d 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -523,6 +523,10 @@ ifndef NO_LIBNUMA endif endif +ifndef ($(filter -lbfd,$(EXTLIBS)),) + CFLAGS += -DHAVE_LIBBFD_SUPPORT +endif + # Among the variables below, these: # perfexecdir # template_dir diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index dcff10b..3735319 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -8,6 +8,172 @@ #include "util/util.h" #include "util/debug.h" +#ifdef HAVE_LIBBFD_SUPPORT + +/* + * Implement addr2line using libbfd. + */ +#define PACKAGE "perf" +#include + +struct a2l_data { + const char *input; + unsigned long addr; + + bool found; + const char *filename; + const char *funcname; + unsigned line; + + bfd *abfd; + asymbol **syms; +}; + +static int bfd_error(const char *string) +{ + const char *errmsg; + + errmsg = bfd_errmsg(bfd_get_error()); + fflush(stdout); + + if (string) + pr_debug("%s: %s\n", string, errmsg); + else + pr_debug("%s\n", errmsg); + + return -1; +} + +static int slurp_symtab(bfd *abfd, struct a2l_data *a2l) +{ + long storage; + long symcount; + asymbol **syms; + bfd_boolean dynamic = FALSE; + + if ((bfd_get_file_flags(abfd) & HAS_SYMS) == 0) + return bfd_error(bfd_get_filename(abfd)); + + storage = bfd_get_symtab_upper_bound(abfd); + if (storage == 0L) { + storage = bfd_get_dynamic_symtab_upper_bound(abfd); + dynamic = TRUE; + } + if (storage < 0L) + return bfd_error(bfd_get_filename(abfd)); + + syms = malloc(storage); + if (dynamic) + symcount = bfd_canonicalize_dynamic_symtab(abfd, syms); + else + symcount = bfd_canonicalize_symtab(abfd, syms); + + if (symcount < 0) { + free(syms); + return bfd_error(bfd_get_filename(abfd)); + } + + a2l->syms = syms; + return 0; +} + +static void find_address_in_section(bfd *abfd, asection *section, void *data) +{ + bfd_vma pc, vma; + bfd_size_type size; + struct a2l_data *a2l = data; + + if (a2l->found) + return; + + if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0) + return; + + pc = a2l->addr; + vma = bfd_get_section_vma(abfd, section); + size = bfd_get_section_size(section); + + if (pc < vma || pc >= vma + size) + return; + + a2l->found = bfd_find_nearest_line(abfd, section, a2l->syms, pc - vma, + &a2l->filename, &a2l->funcname, + &a2l->line); +} + +static struct a2l_data *addr2line_init(const char *path) +{ + bfd *abfd; + struct a2l_data *a2l = NULL; + + abfd = bfd_openr(path, NULL); + if (abfd == NULL) + return NULL; + + if (!bfd_check_format(abfd, bfd_object)) + goto out; + + a2l = zalloc(sizeof(*a2l)); + if (a2l == NULL) + goto out; + + a2l->abfd = abfd; + a2l->input = strdup(path); + if (a2l->input == NULL) + goto out; + + if (slurp_symtab(abfd, a2l)) + goto out; + + return a2l; + +out: + if (a2l) { + free((void *)a2l->input); + free(a2l); + } + bfd_close(abfd); + return NULL; +} + +static void addr2line_cleanup(struct a2l_data *a2l) +{ + if (a2l->abfd) + bfd_close(a2l->abfd); + free((void *)a2l->input); + free(a2l->syms); + free(a2l); +} + +static int addr2line(const char *dso_name, unsigned long addr, + char **file, unsigned int *line) +{ + int ret = 0; + struct a2l_data *a2l; + + a2l = addr2line_init(dso_name); + if (a2l == NULL) { + pr_warning("addr2line_init failed for %s\n", dso_name); + return 0; + } + + a2l->addr = addr; + bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l); + + if (a2l->found && a2l->filename) { + *file = strdup(a2l->filename); + *line = a2l->line; + + if (*file) + ret = 1; + } + + addr2line_cleanup(a2l); + return ret; +} + +#else /* HAVE_LIBBFD_SUPPORT */ + static int addr2line(const char *dso_name, unsigned long addr, char **file, unsigned int *line_nr) { @@ -53,6 +219,7 @@ out: pclose(fp); return ret; } +#endif /* HAVE_LIBBFD_SUPPORT */ char *get_srcline(struct dso *dso, unsigned long addr) { -- cgit v0.10.2 From 4adcc43003024354b45edadfad4ea2fa205f135f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 11 Sep 2013 14:09:33 +0900 Subject: perf tools: Fix srcline sort key behavior Currently the srcline sort key compares ip rather than srcline info. I guess this was due to a performance reason to run external addr2line utility. Now we have implemented the functionality inside, use the srcline info when comparing hist entries. Also constantly print "??:0" string for unknown srcline rather than printing ip. Signed-off-by: Namhyung Kim Reviewed-by: Jiri Olsa Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1378876173-13363-10-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index f732120..32c5637 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -243,33 +243,32 @@ struct sort_entry sort_sym = { static int64_t sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) { - return (int64_t)(right->ip - left->ip); + if (!left->srcline) { + if (!left->ms.map) + left->srcline = SRCLINE_UNKNOWN; + else { + struct map *map = left->ms.map; + left->srcline = get_srcline(map->dso, + map__rip_2objdump(map, left->ip)); + } + } + if (!right->srcline) { + if (!right->ms.map) + right->srcline = SRCLINE_UNKNOWN; + else { + struct map *map = right->ms.map; + right->srcline = get_srcline(map->dso, + map__rip_2objdump(map, right->ip)); + } + } + return strcmp(left->srcline, right->srcline); } static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf, size_t size, unsigned int width __maybe_unused) { - FILE *fp = NULL; - char *path = self->srcline; - - if (path != NULL) - goto out_path; - - if (!self->ms.map) - goto out_ip; - - path = get_srcline(self->ms.map->dso, self->ip); - self->srcline = path; - -out_path: - if (fp) - pclose(fp); - return repsep_snprintf(bf, size, "%s", path); -out_ip: - if (fp) - pclose(fp); - return repsep_snprintf(bf, size, "%-#*llx", BITS_PER_LONG / 4, self->ip); + return repsep_snprintf(bf, size, "%s", self->srcline); } struct sort_entry sort_srcline = { -- cgit v0.10.2 From eac032c54be48e899a7be2b8db4b746be1b51748 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 20 Sep 2013 11:27:32 -0300 Subject: perf trace: Beautify epoll_ctl 'op' arg [root@sandy ~]# perf trace -e epoll_ctl 2.490 (0.003 ms): systemd-logind/586 epoll_ctl(epfd: 10, op: ADD, fd: 24, event: 0x7fff22314ef0) = 0 2.621 (0.003 ms): systemd-logind/586 epoll_ctl(epfd: 10, op: DEL, fd: 24 ) = 0 2.833 (0.010 ms): systemd-logind/586 epoll_ctl(epfd: 10, op: ADD, fd: 24, event: 0x7fff22314cd0) = 0 2.953 (0.002 ms): systemd-logind/586 epoll_ctl(epfd: 10, op: DEL, fd: 24 ) = 0 3.118 (0.002 ms): systemd-logind/586 epoll_ctl(epfd: 10, op: ADD, fd: 24, event: 0x7fff22314d20) = 0 4.762 (0.002 ms): systemd-logind/586 epoll_ctl(epfd: 10, op: DEL, fd: 24 ) = 0 ^C[root@sandy ~]# Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-88xz9phc8cbicnxonud6if8h@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f3ddbb0..807e542 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -232,6 +232,9 @@ static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct sysc #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op +static const char *epoll_ctl_ops[] = { [1] = "ADD", "DEL", "MOD", }; +static DEFINE_STRARRAY(epoll_ctl_ops); + static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; static DEFINE_STRARRAY(itimers); @@ -543,6 +546,9 @@ static struct syscall_fmt { { .name = "brk", .hexret = true, .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, { .name = "connect", .errmsg = true, }, + { .name = "epoll_ctl", .errmsg = true, + .arg_scnprintf = { [1] = SCA_STRARRAY, /* op */ }, + .arg_parm = { [1] = &strarray__epoll_ctl_ops, /* op */ }, }, { .name = "eventfd2", .errmsg = true, .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, { .name = "fcntl", .errmsg = true, -- cgit v0.10.2 From 5cea6ff265e6979226b751f5ebfdda4594322523 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 20 Sep 2013 11:49:50 -0300 Subject: perf trace: Beautify flock 'cmd' arg 4735.638 ( 0.003 ms): man/19881 flock(fd: 3, cmd: SH|NB) = 0 4735.832 ( 0.002 ms): man/19881 flock(fd: 3, cmd: UN ) = 0 Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-amh3y88kh1nmclpwezqlarl8@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 807e542..2007c8c 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -189,6 +189,37 @@ static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior +static size_t syscall_arg__scnprintf_flock(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, op = arg->val; + + if (op == 0) + return scnprintf(bf, size, "NONE"); +#define P_CMD(cmd) \ + if ((op & LOCK_##cmd) == LOCK_##cmd) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \ + op &= ~LOCK_##cmd; \ + } + + P_CMD(SH); + P_CMD(EX); + P_CMD(NB); + P_CMD(UN); + P_CMD(MAND); + P_CMD(RW); + P_CMD(READ); + P_CMD(WRITE); +#undef P_OP + + if (op) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op); + + return printed; +} + +#define SCA_FLOCK syscall_arg__scnprintf_flock + static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg) { enum syscall_futex_args { @@ -554,6 +585,8 @@ static struct syscall_fmt { { .name = "fcntl", .errmsg = true, .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ }, .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, + { .name = "flock", .errmsg = true, + .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, }, { .name = "fstat", .errmsg = true, .alias = "newfstat", }, { .name = "fstatat", .errmsg = true, .alias = "newfstatat", }, { .name = "futex", .errmsg = true, -- cgit v0.10.2 From 453350dd0f0245b91b1e43310f5966fb1c51e7bd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 20 Sep 2013 12:13:37 -0300 Subject: perf trace: Add helper for syscalls with a single strarray arg In such cases just stating the (arg, name, array) is enough, reducing the size of the syscall formatters table. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-3k53p6dv2sh4ydsc5k5otoia@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 2007c8c..8855207 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -562,6 +562,10 @@ static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscal #define SCA_SIGNUM syscall_arg__scnprintf_signum +#define STRARRAY(arg, name, array) \ + .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \ + .arg_parm = { [arg] = &strarray__##array, } + static struct syscall_fmt { const char *name; const char *alias; @@ -577,33 +581,23 @@ static struct syscall_fmt { { .name = "brk", .hexret = true, .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, { .name = "connect", .errmsg = true, }, - { .name = "epoll_ctl", .errmsg = true, - .arg_scnprintf = { [1] = SCA_STRARRAY, /* op */ }, - .arg_parm = { [1] = &strarray__epoll_ctl_ops, /* op */ }, }, + { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), }, { .name = "eventfd2", .errmsg = true, .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, - { .name = "fcntl", .errmsg = true, - .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ }, - .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, + { .name = "fcntl", .errmsg = true, STRARRAY(1, cmd, fcntl_cmds), }, { .name = "flock", .errmsg = true, .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, }, { .name = "fstat", .errmsg = true, .alias = "newfstat", }, { .name = "fstatat", .errmsg = true, .alias = "newfstatat", }, { .name = "futex", .errmsg = true, .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, - { .name = "getitimer", .errmsg = true, - .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ }, - .arg_parm = { [0] = &strarray__itimers, /* which */ }, }, - { .name = "getrlimit", .errmsg = true, - .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ }, - .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, }, + { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, + { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, { .name = "ioctl", .errmsg = true, .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, }, { .name = "kill", .errmsg = true, .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, - { .name = "lseek", .errmsg = true, - .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ }, - .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, + { .name = "lseek", .errmsg = true, STRARRAY(2, whence, whences), }, { .name = "lstat", .errmsg = true, .alias = "newlstat", }, { .name = "madvise", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* start */ @@ -629,9 +623,7 @@ static struct syscall_fmt { { .name = "poll", .errmsg = true, .timeout = true, }, { .name = "ppoll", .errmsg = true, .timeout = true, }, { .name = "pread", .errmsg = true, .alias = "pread64", }, - { .name = "prlimit64", .errmsg = true, - .arg_scnprintf = { [1] = SCA_STRARRAY, /* resource */ }, - .arg_parm = { [1] = &strarray__rlimit_resources, /* resource */ }, }, + { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), }, { .name = "pwrite", .errmsg = true, .alias = "pwrite64", }, { .name = "read", .errmsg = true, }, { .name = "recvfrom", .errmsg = true, @@ -642,9 +634,7 @@ static struct syscall_fmt { .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "rt_sigaction", .errmsg = true, .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, - { .name = "rt_sigprocmask", .errmsg = true, - .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ }, - .arg_parm = { [0] = &strarray__sighow, /* how */ }, }, + { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, { .name = "rt_sigqueueinfo", .errmsg = true, .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, { .name = "rt_tgsigqueueinfo", .errmsg = true, @@ -656,12 +646,8 @@ static struct syscall_fmt { .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "sendto", .errmsg = true, .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, - { .name = "setitimer", .errmsg = true, - .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ }, - .arg_parm = { [0] = &strarray__itimers, /* which */ }, }, - { .name = "setrlimit", .errmsg = true, - .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ }, - .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, }, + { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, + { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, { .name = "socket", .errmsg = true, .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ [1] = SCA_SK_TYPE, /* type */ }, -- cgit v0.10.2 From 4aa5823225c70efba758750029504807cd30b925 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 20 Sep 2013 12:19:41 -0300 Subject: perf trace: Don't supress zeroed args when there is an strarray entry for it Case in hand: 9.682 ( 0.001 ms): Xorg/13079 setitimer(which: REAL, value: 0x7fffede42470) = 0 ITIMER_REAL is zero. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-6hnoqsjh99t4hxi3xu2nlwep@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 8855207..27b0ec8 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -940,8 +940,15 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, field = field->next, ++arg.idx, bit <<= 1) { if (arg.mask & bit) continue; - - if (args[arg.idx] == 0) + /* + * Suppress this argument if its value is zero and + * and we don't have a string associated in an + * strarray for it. + */ + if (args[arg.idx] == 0 && + !(sc->arg_scnprintf && + sc->arg_scnprintf[arg.idx] == SCA_STRARRAY && + sc->arg_parm[arg.idx])) continue; printed += scnprintf(bf + printed, size - printed, -- cgit v0.10.2 From 07120aa5d5a241895bcca8b6e5b5b8d7b1df1f2f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 20 Sep 2013 12:24:20 -0300 Subject: perf trace: Use socket's beautifiers in socketpair For the address family and socket type. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-3a6cwwskobvan823pau76cm4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 27b0ec8..bf74217 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -652,6 +652,10 @@ static struct syscall_fmt { .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ [1] = SCA_SK_TYPE, /* type */ }, .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, + { .name = "socketpair", .errmsg = true, + .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ + [1] = SCA_SK_TYPE, /* type */ }, + .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, { .name = "stat", .errmsg = true, .alias = "newstat", }, { .name = "tgkill", .errmsg = true, .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, -- cgit v0.10.2 From 46cce19ba8cc8231d3c949c128e4bdaf420228a2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 23 Sep 2013 12:52:04 -0300 Subject: perf trace: Beautify pipe2 'flags' arg 4.234 (0.005 ms): fetchmail/3224 pipe2(fildes: 0x7fffc72bcee0, flags: CLOEXEC) = 0 Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-9e1jz78i6q6e0xr9fsitqbpe@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index bf74217..76d9427 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -517,6 +517,29 @@ static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags +static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + +#define P_FLAG(n) \ + if (flags & O_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~O_##n; \ + } + + P_FLAG(CLOEXEC); + P_FLAG(NONBLOCK); +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags + static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) { int sig = arg->val; @@ -620,6 +643,8 @@ static struct syscall_fmt { .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "openat", .errmsg = true, .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, }, + { .name = "pipe2", .errmsg = true, + .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, }, { .name = "poll", .errmsg = true, .timeout = true, }, { .name = "ppoll", .errmsg = true, .timeout = true, }, { .name = "pread", .errmsg = true, .alias = "pread64", }, -- cgit v0.10.2 From 4f8c1b74c5fdac35ee4480685d42030446724848 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 22 Sep 2013 19:45:00 -0600 Subject: perf trace: Add beautifier for clock_gettime's clk_id argument Before: 0.030 ( 0.002 ms): 2571 clock_gettime(which_clock: 1, tp: 0x7f3b45729cd0 ) = 0 After: 0.030 ( 0.002 ms): 2571 clock_gettime(which_clock: MONOTONIC, tp: 0x7f3b45729cd0 ) = 0 v2: Update to use the STRARRAY option Signed-off-by: David Ahern Cc: Adrian Hunter Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1379900700-5186-6-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 76d9427..39a947a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -297,6 +297,12 @@ static DEFINE_STRARRAY(rlimit_resources); static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", }; static DEFINE_STRARRAY(sighow); +static const char *clockid[] = { + "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID", + "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", +}; +static DEFINE_STRARRAY(clockid); + static const char *socket_families[] = { "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", @@ -603,6 +609,7 @@ static struct syscall_fmt { { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, { .name = "brk", .hexret = true, .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, + { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, { .name = "connect", .errmsg = true, }, { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), }, { .name = "eventfd2", .errmsg = true, -- cgit v0.10.2 From b6e8f8f4674be5a32f78027ec6e432f5ea33921e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 22 Sep 2013 19:44:56 -0600 Subject: perf trace: Handle MSG_WAITFORONE not defined Needed for compiles on Fedora 12 for example. Signed-off-by: David Ahern Link: http://lkml.kernel.org/r/1379900700-5186-2-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 39a947a..3ca6a85 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -363,6 +363,9 @@ static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, #ifndef MSG_PROBE #define MSG_PROBE 0x10 #endif +#ifndef MSG_WAITFORONE +#define MSG_WAITFORONE 0x10000 +#endif #ifndef MSG_SENDPAGE_NOTLAST #define MSG_SENDPAGE_NOTLAST 0x20000 #endif -- cgit v0.10.2 From 3d903aa74a98da2836249bce86aaf557781239c9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 24 Sep 2013 00:09:38 -0300 Subject: perf trace: Beautify mlock & friends 'addr' arg Printing it as an hex number. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-gd68zmnwbbofsv5m6w18intw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 3ca6a85..12a275d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -635,6 +635,10 @@ static struct syscall_fmt { { .name = "madvise", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* start */ [2] = SCA_MADV_BHV, /* behavior */ }, }, + { .name = "mlock", .errmsg = true, + .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, + { .name = "mlockall", .errmsg = true, + .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "mmap", .hexret = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ [2] = SCA_MMAP_PROT, /* prot */ @@ -645,6 +649,8 @@ static struct syscall_fmt { { .name = "mremap", .hexret = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ [4] = SCA_HEX, /* new_addr */ }, }, + { .name = "munlock", .errmsg = true, + .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "munmap", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "open", .errmsg = true, -- cgit v0.10.2 From 75b757ca90469e990e6901f4a9497fe4161f7f5a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 24 Sep 2013 11:04:32 -0300 Subject: perf trace: Show path associated with fd in live sessions For live sessions we can just access /proc to map an fd to its path, on a best effort way, i.e. sometimes the fd will have gone away when we try to do the mapping, as it is done in a lazy way, only when a reference to such fd is made then the path will be looked up in /proc. This is disabled when processing perf.data files, where we will have to have a way to get getname events, be it via an on-the-fly 'perf probe' event or after a vfs_getname tracepoint is added to the kernel. A first step will be to synthesize such event for the use cases where the threads in the monitored workload exist already. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-1r1ti33ye1666jezu2d8q1c3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 12a275d..fcc157f 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -36,6 +36,8 @@ struct syscall_arg { unsigned long val; + struct thread *thread; + struct trace *trace; void *parm; u8 idx; u8 mask; @@ -65,6 +67,29 @@ static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, #define SCA_STRARRAY syscall_arg__scnprintf_strarray +static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, + struct syscall_arg *arg); + +#define SCA_FD syscall_arg__scnprintf_fd + +static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size, + struct syscall_arg *arg) +{ + int fd = arg->val; + + if (fd == AT_FDCWD) + return scnprintf(bf, size, "CWD"); + + return syscall_arg__scnprintf_fd(bf, size, arg); +} + +#define SCA_FDAT syscall_arg__scnprintf_fd_at + +static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, + struct syscall_arg *arg); + +#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd + static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg) { @@ -613,28 +638,84 @@ static struct syscall_fmt { { .name = "brk", .hexret = true, .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, + { .name = "close", .errmsg = true, + .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, { .name = "connect", .errmsg = true, }, + { .name = "dup", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "dup2", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "dup3", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), }, { .name = "eventfd2", .errmsg = true, .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, - { .name = "fcntl", .errmsg = true, STRARRAY(1, cmd, fcntl_cmds), }, + { .name = "faccessat", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + { .name = "fadvise64", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "fallocate", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "fchdir", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "fchmod", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "fchmodat", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + { .name = "fchown", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "fchownat", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + { .name = "fcntl", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ + [1] = SCA_STRARRAY, /* cmd */ }, + .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, + { .name = "fdatasync", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "flock", .errmsg = true, - .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, }, - { .name = "fstat", .errmsg = true, .alias = "newfstat", }, - { .name = "fstatat", .errmsg = true, .alias = "newfstatat", }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ + [1] = SCA_FLOCK, /* cmd */ }, }, + { .name = "fsetxattr", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "fstat", .errmsg = true, .alias = "newfstat", + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "fstatat", .errmsg = true, .alias = "newfstatat", + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + { .name = "fstatfs", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "fsync", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "ftruncate", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "futex", .errmsg = true, .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, + { .name = "futimesat", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + { .name = "getdents", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "getdents64", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, { .name = "ioctl", .errmsg = true, - .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ + [2] = SCA_HEX, /* arg */ }, }, { .name = "kill", .errmsg = true, .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, - { .name = "lseek", .errmsg = true, STRARRAY(2, whence, whences), }, + { .name = "linkat", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + { .name = "lseek", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ + [2] = SCA_STRARRAY, /* whence */ }, + .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, { .name = "lstat", .errmsg = true, .alias = "newlstat", }, { .name = "madvise", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* start */ [2] = SCA_MADV_BHV, /* behavior */ }, }, + { .name = "mkdirat", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + { .name = "mknodat", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "mlock", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "mlockall", .errmsg = true, @@ -653,26 +734,45 @@ static struct syscall_fmt { .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "munmap", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, + { .name = "name_to_handle_at", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + { .name = "newfstatat", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "open", .errmsg = true, .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "open_by_handle_at", .errmsg = true, - .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ + [2] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "openat", .errmsg = true, - .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ + [2] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "pipe2", .errmsg = true, .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, }, { .name = "poll", .errmsg = true, .timeout = true, }, { .name = "ppoll", .errmsg = true, .timeout = true, }, - { .name = "pread", .errmsg = true, .alias = "pread64", }, + { .name = "pread", .errmsg = true, .alias = "pread64", + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "preadv", .errmsg = true, .alias = "pread", + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), }, - { .name = "pwrite", .errmsg = true, .alias = "pwrite64", }, - { .name = "read", .errmsg = true, }, + { .name = "pwrite", .errmsg = true, .alias = "pwrite64", + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "pwritev", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "read", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "readlinkat", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + { .name = "readv", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "recvfrom", .errmsg = true, .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "recvmmsg", .errmsg = true, .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "recvmsg", .errmsg = true, .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, + { .name = "renameat", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "rt_sigaction", .errmsg = true, .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, @@ -689,6 +789,8 @@ static struct syscall_fmt { .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, + { .name = "shutdown", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "socket", .errmsg = true, .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ [1] = SCA_SK_TYPE, /* type */ }, @@ -698,11 +800,21 @@ static struct syscall_fmt { [1] = SCA_SK_TYPE, /* type */ }, .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, { .name = "stat", .errmsg = true, .alias = "newstat", }, + { .name = "symlinkat", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "tgkill", .errmsg = true, .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, { .name = "tkill", .errmsg = true, .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, { .name = "uname", .errmsg = true, .alias = "newuname", }, + { .name = "unlinkat", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + { .name = "utimensat", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, }, + { .name = "write", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "writev", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, }; static int syscall_fmt__cmp(const void *name, const void *fmtp) @@ -747,11 +859,20 @@ struct thread_trace { unsigned long nr_events; char *entry_str; double runtime_ms; + struct { + int max; + char **table; + } paths; }; static struct thread_trace *thread_trace__new(void) { - return zalloc(sizeof(struct thread_trace)); + struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace)); + + if (ttrace) + ttrace->paths.max = -1; + + return ttrace; } static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) @@ -792,6 +913,7 @@ struct trace { unsigned long nr_events; struct strlist *ev_qualifier; bool not_ev_qualifier; + bool live; struct intlist *tid_list; struct intlist *pid_list; bool sched; @@ -801,6 +923,98 @@ struct trace { double runtime_ms; }; +static int thread__read_fd_path(struct thread *thread, int fd) +{ + struct thread_trace *ttrace = thread->priv; + char linkname[PATH_MAX], pathname[PATH_MAX]; + struct stat st; + int ret; + + if (thread->pid_ == thread->tid) { + scnprintf(linkname, sizeof(linkname), + "/proc/%d/fd/%d", thread->pid_, fd); + } else { + scnprintf(linkname, sizeof(linkname), + "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd); + } + + if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname)) + return -1; + + ret = readlink(linkname, pathname, sizeof(pathname)); + + if (ret < 0 || ret > st.st_size) + return -1; + + pathname[ret] = '\0'; + + if (fd > ttrace->paths.max) { + char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *)); + + if (npath == NULL) + return -1; + + if (ttrace->paths.max != -1) { + memset(npath + ttrace->paths.max + 1, 0, + (fd - ttrace->paths.max) * sizeof(char *)); + } else { + memset(npath, 0, (fd + 1) * sizeof(char *)); + } + + ttrace->paths.table = npath; + ttrace->paths.max = fd; + } + + ttrace->paths.table[fd] = strdup(pathname); + + return ttrace->paths.table[fd] != NULL ? 0 : -1; +} + +static const char *thread__fd_path(struct thread *thread, int fd, bool live) +{ + struct thread_trace *ttrace = thread->priv; + + if (ttrace == NULL) + return NULL; + + if (fd < 0) + return NULL; + + if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL) && + (!live || thread__read_fd_path(thread, fd))) + return NULL; + + return ttrace->paths.table[fd]; +} + +static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, + struct syscall_arg *arg) +{ + int fd = arg->val; + size_t printed = scnprintf(bf, size, "%d", fd); + const char *path = thread__fd_path(arg->thread, fd, arg->trace->live); + + if (path) + printed += scnprintf(bf + printed, size - printed, "<%s>", path); + + return printed; +} + +static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, + struct syscall_arg *arg) +{ + int fd = arg->val; + size_t printed = syscall_arg__scnprintf_fd(bf, size, arg); + struct thread_trace *ttrace = arg->thread->priv; + + if (ttrace && fd >= 0 && fd <= ttrace->paths.max) { + free(ttrace->paths.table[fd]); + ttrace->paths.table[fd] = NULL; + } + + return printed; +} + static bool trace__filter_duration(struct trace *trace, double t) { return t < (trace->duration_filter * NSEC_PER_MSEC); @@ -969,7 +1183,8 @@ static int trace__read_syscall_info(struct trace *trace, int id) } static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, - unsigned long *args) + unsigned long *args, struct trace *trace, + struct thread *thread) { size_t printed = 0; @@ -977,8 +1192,10 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, struct format_field *field; u8 bit = 1; struct syscall_arg arg = { - .idx = 0, - .mask = 0, + .idx = 0, + .mask = 0, + .trace = trace, + .thread = thread, }; for (field = sc->tp_format->format.fields->next; field; @@ -1111,7 +1328,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, msg = ttrace->entry_str; printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); - printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, args); + printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, + args, trace, thread); if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) { if (!trace->duration_filter) { @@ -1292,6 +1510,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv) unsigned long before; const bool forks = argc > 0; + trace->live = true; + if (evlist == NULL) { fprintf(trace->output, "Not enough memory to run!\n"); goto out; @@ -1423,6 +1643,7 @@ out_delete_maps: out_delete_evlist: perf_evlist__delete(evlist); out: + trace->live = false; return err; } -- cgit v0.10.2 From b52bc23414124b0d37fd9933b5a894d9b4a9720d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 24 Sep 2013 11:56:36 -0300 Subject: perf trace: Add 'trace' alias to 'perf trace' Make 'perf trace' more accessible by aliasing it to just 'trace': [root@zoo linux]# trace --duration 15 -a -e futex sleep 1 110.092 (16.188 ms): libvirtd/1166 futex(uaddr: 0x185b344, op: WAIT|PRIV, val: 174293 ) = 0 110.101 (15.903 ms): libvirtd/1171 futex(uaddr: 0x185b3dc, op: WAIT|PRIV, val: 139265 ) = 0 111.594 (15.776 ms): libvirtd/1165 futex(uaddr: 0x185b344, op: WAIT|PRIV, val: 174295 ) = 0 111.610 (15.969 ms): libvirtd/1169 futex(uaddr: 0x185b3dc, op: WAIT|PRIV, val: 139267 ) = 0 113.556 (16.216 ms): libvirtd/1168 futex(uaddr: 0x185b3dc, op: WAIT|PRIV, val: 139269 ) = 0 291.265 (199.508 ms): chromium-brows/15830 futex(uaddr: 0x7fff2986bcb4, op: WAIT_BITSET|PRIV|CLKRT, val: 1, utime: 0x7fff2986bab0, val3: 4294967295) = -1 ETIMEDOUT Connection timed out 360.354 (69.053 ms): chromium-brows/15830 futex(uaddr: 0x7fff2986bcb4, op: WAIT_BITSET|PRIV|CLKRT, val: 1, utime: 0x7fff2986bab0, val3: 4294967295) = -1 ETIMEDOUT Connection timed out [root@zoo linux]# I.e. looking for futex calls that take at least 15ms, system wide, during a one second window. Now to get callchains into 'trace' to figure out what are those locks :-) Requested-by: Ingo Molnar Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ch4smqz8b5fmgrte7c5e4fuw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 1f13615..2badb08 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -78,6 +78,7 @@ CC = $(CROSS_COMPILE)gcc AR = $(CROSS_COMPILE)ar RM = rm -f +LN = ln -f MKDIR = mkdir FIND = find INSTALL = install @@ -809,6 +810,7 @@ install-gtk: install-bin: all install-gtk $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)' + $(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace' $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' ifndef NO_LIBPERL diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 6265778..8b38b4e 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -481,7 +481,14 @@ int main(int argc, const char **argv) fprintf(stderr, "cannot handle %s internally", cmd); goto out; } - +#ifdef HAVE_LIBAUDIT_SUPPORT + if (!prefixcmp(cmd, "trace")) { + set_buildid_dir(); + setup_path(); + argv[0] = "trace"; + return cmd_trace(argc, argv, NULL); + } +#endif /* Look for flags.. */ argv++; argc--; -- cgit v0.10.2 From a9faa0cab619fad380c2669825aa84212943d528 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 23 Sep 2013 10:17:35 +0200 Subject: perf bench sched: Add --threaded option Allow the measurement of thread versus process context switch performance. The default stays at 'process' based measurement, like lmbench's lat_ctx benchmark. Sample output: comet:~/tip/tools/perf> taskset 1 ./perf bench sched pipe # Running sched/pipe benchmark... # Executed 1000000 pipe operations between two processes Total time: 4.138 [sec] 4.138729 usecs/op 241620 ops/sec comet:~/tip/tools/perf> taskset 1 ./perf bench sched pipe --threaded # Running sched/pipe benchmark... # Executed 1000000 pipe operations between two threads Total time: 3.667 [sec] 3.667667 usecs/op 272652 ops/sec Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Namhyung Kim Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/20130917114256.GA31159@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 69cfba8..07a8d76 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -7,9 +7,7 @@ * Based on pipe-test-1m.c by Ingo Molnar * http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c * Ported to perf by Hitoshi Mitake - * */ - #include "../perf.h" #include "../util/util.h" #include "../util/parse-options.h" @@ -28,12 +26,24 @@ #include #include +#include + +struct thread_data { + int nr; + int pipe_read; + int pipe_write; + pthread_t pthread; +}; + #define LOOPS_DEFAULT 1000000 -static int loops = LOOPS_DEFAULT; +static int loops = LOOPS_DEFAULT; + +/* Use processes by default: */ +static bool threaded; static const struct option options[] = { - OPT_INTEGER('l', "loop", &loops, - "Specify number of loops"), + OPT_INTEGER('l', "loop", &loops, "Specify number of loops"), + OPT_BOOLEAN('T', "threaded", &threaded, "Specify threads/process based task setup"), OPT_END() }; @@ -42,13 +52,37 @@ static const char * const bench_sched_pipe_usage[] = { NULL }; -int bench_sched_pipe(int argc, const char **argv, - const char *prefix __maybe_unused) +static void *worker_thread(void *__tdata) { - int pipe_1[2], pipe_2[2]; + struct thread_data *td = __tdata; int m = 0, i; + int ret; + + for (i = 0; i < loops; i++) { + if (!td->nr) { + ret = read(td->pipe_read, &m, sizeof(int)); + BUG_ON(ret != sizeof(int)); + ret = write(td->pipe_write, &m, sizeof(int)); + BUG_ON(ret != sizeof(int)); + } else { + ret = write(td->pipe_write, &m, sizeof(int)); + BUG_ON(ret != sizeof(int)); + ret = read(td->pipe_read, &m, sizeof(int)); + BUG_ON(ret != sizeof(int)); + } + } + + return NULL; +} + +int bench_sched_pipe(int argc, const char **argv, const char *prefix __maybe_unused) +{ + struct thread_data threads[2], *td; + int pipe_1[2], pipe_2[2]; struct timeval start, stop, diff; unsigned long long result_usec = 0; + int nr_threads = 2; + int t; /* * why does "ret" exist? @@ -58,43 +92,66 @@ int bench_sched_pipe(int argc, const char **argv, int __maybe_unused ret, wait_stat; pid_t pid, retpid __maybe_unused; - argc = parse_options(argc, argv, options, - bench_sched_pipe_usage, 0); + argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0); BUG_ON(pipe(pipe_1)); BUG_ON(pipe(pipe_2)); - pid = fork(); - assert(pid >= 0); - gettimeofday(&start, NULL); - if (!pid) { - for (i = 0; i < loops; i++) { - ret = read(pipe_1[0], &m, sizeof(int)); - ret = write(pipe_2[1], &m, sizeof(int)); - } - } else { - for (i = 0; i < loops; i++) { - ret = write(pipe_1[1], &m, sizeof(int)); - ret = read(pipe_2[0], &m, sizeof(int)); + for (t = 0; t < nr_threads; t++) { + td = threads + t; + + td->nr = t; + + if (t == 0) { + td->pipe_read = pipe_1[0]; + td->pipe_write = pipe_2[1]; + } else { + td->pipe_write = pipe_1[1]; + td->pipe_read = pipe_2[0]; } } - gettimeofday(&stop, NULL); - timersub(&stop, &start, &diff); - if (pid) { + if (threaded) { + + for (t = 0; t < nr_threads; t++) { + td = threads + t; + + ret = pthread_create(&td->pthread, NULL, worker_thread, td); + BUG_ON(ret); + } + + for (t = 0; t < nr_threads; t++) { + td = threads + t; + + ret = pthread_join(td->pthread, NULL); + BUG_ON(ret); + } + + } else { + pid = fork(); + assert(pid >= 0); + + if (!pid) { + worker_thread(threads + 0); + exit(0); + } else { + worker_thread(threads + 1); + } + retpid = waitpid(pid, &wait_stat, 0); assert((retpid == pid) && WIFEXITED(wait_stat)); - } else { - exit(0); } + gettimeofday(&stop, NULL); + timersub(&stop, &start, &diff); + switch (bench_format) { case BENCH_FORMAT_DEFAULT: - printf("# Executed %d pipe operations between two tasks\n\n", - loops); + printf("# Executed %d pipe operations between two %s\n\n", + loops, threaded ? "threads" : "processes"); result_usec = diff.tv_sec * 1000000; result_usec += diff.tv_usec; -- cgit v0.10.2 From f4be904d2fa7c65e230ed4b1008ebdf5f4053ac3 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Sun, 22 Sep 2013 13:22:09 +0300 Subject: perf machine: Use snprintf instead of sprintf To avoid buffer overruns. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1379845338-29637-2-git-send-email-adrian.hunter@intel.com [ Split from aa7fe3b ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 6188d28..ddf917b 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -785,10 +785,10 @@ static int machine__create_modules(struct machine *machine) const char *modules; char path[PATH_MAX]; - if (machine__is_default_guest(machine)) + if (machine__is_default_guest(machine)) { modules = symbol_conf.default_guest_modules; - else { - sprintf(path, "%s/proc/modules", machine->root_dir); + } else { + snprintf(path, PATH_MAX, "%s/proc/modules", machine->root_dir); modules = path; } -- cgit v0.10.2 From e2137086be7bc52893a790292635cfafc475b693 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Sep 2013 20:55:54 +0200 Subject: perf tools: Add missing -ldl for gtk build If we build perf with NO_LIBPYTHON=1 NO_LIBPERL=1 the '-ldl' is not added to libs build fails if we have gtk2 code in, because it depends on it. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1380221754-29865-1-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index cf6ad5d..29ad7d6 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -382,6 +382,7 @@ ifndef NO_GTK2 CFLAGS += -DHAVE_GTK2_SUPPORT GTK_CFLAGS += $(shell pkg-config --cflags gtk+-2.0 2>/dev/null) GTK_LIBS := $(shell pkg-config --libs gtk+-2.0 2>/dev/null) + EXTLIBS += -ldl endif endif -- cgit v0.10.2 From 820042233bd3c1b24b5ca4c75a88a4e0de39814a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 27 Sep 2013 18:29:58 +0200 Subject: perf tools: Move start conditions to start of the flex file Moving start conditions to start of the flex file so it's clear what the INITIAL condition rules are. Plus adding default rule for INITIAL condition. This prevents default space to be printed for events like: $ ./perf stat -e "cycles " kill 2>/dev/null $ ^^^^^^^^ Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1380299398-10839-1-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 91346b7..3432995 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -126,6 +126,37 @@ modifier_bp [rwx]{1,3} } +{ +config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } +config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); } +config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); } +name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); } +period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); } +branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); } +, { return ','; } +"/" { BEGIN(INITIAL); return '/'; } +{name_minus} { return str(yyscanner, PE_NAME); } +} + +{ +{modifier_bp} { return str(yyscanner, PE_MODIFIER_BP); } +: { return ':'; } +{num_dec} { return value(yyscanner, 10); } +{num_hex} { return value(yyscanner, 16); } + /* + * We need to separate 'mem:' scanner part, in order to get specific + * modifier bits parsed out. Otherwise we would need to handle PE_NAME + * and we'd need to parse it manually. During the escape from + * state we need to put the escaping char back, so we dont miss it. + */ +. { unput(*yytext); BEGIN(INITIAL); } + /* + * We destroy the scanner after reaching EOF, + * but anyway just to be sure get back to INIT state. + */ +<> { BEGIN(INITIAL); } +} + cpu-cycles|cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); } stalled-cycles-frontend|idle-cycles-frontend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } stalled-cycles-backend|idle-cycles-backend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } @@ -162,18 +193,6 @@ speculative-read|speculative-load | refs|Reference|ops|access | misses|miss { return str(yyscanner, PE_NAME_CACHE_OP_RESULT); } -{ -config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } -config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); } -config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); } -name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); } -period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); } -branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); } -, { return ','; } -"/" { BEGIN(INITIAL); return '/'; } -{name_minus} { return str(yyscanner, PE_NAME); } -} - mem: { BEGIN(mem); return PE_PREFIX_MEM; } r{num_raw_hex} { return raw(yyscanner); } {num_dec} { return value(yyscanner, 10); } @@ -189,25 +208,7 @@ r{num_raw_hex} { return raw(yyscanner); } "}" { return '}'; } = { return '='; } \n { } - -{ -{modifier_bp} { return str(yyscanner, PE_MODIFIER_BP); } -: { return ':'; } -{num_dec} { return value(yyscanner, 10); } -{num_hex} { return value(yyscanner, 16); } - /* - * We need to separate 'mem:' scanner part, in order to get specific - * modifier bits parsed out. Otherwise we would need to handle PE_NAME - * and we'd need to parse it manually. During the escape from - * state we need to put the escaping char back, so we dont miss it. - */ -. { unput(*yytext); BEGIN(INITIAL); } - /* - * We destroy the scanner after reaching EOF, - * but anyway just to be sure get back to INIT state. - */ -<> { BEGIN(INITIAL); } -} +. { } %% -- cgit v0.10.2 From c458fe62ca31496664c1211a7906d261220b18f9 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Mon, 30 Sep 2013 16:43:05 +0530 Subject: perf stat: Don't print bogus data on -e cycles When only the cycles event is requested: $ perf stat -e cycles dd if=/dev/zero of=/dev/null count=1000000 1000000+0 records in 1000000+0 records out 512000000 bytes (512 MB) copied, 0.26123 s, 2.0 GB/s Performance counter stats for 'dd if=/dev/zero of=/dev/null count=1000000': 911,626,453 cycles # 0.000 GHz 0.262113350 seconds time elapsed The 0.000 GHz comment in the output is totally bogus and misleading. It happens because update_shadow_stats() doesn't touch runtime_nsecs_stats; it is only written when a requested counter matches a SW_TASK_CLOCK. In our case, since we have only requested HW_CPU_CYCLES, runtime_nsecs_stats is unavailable. So, omit printing the comment altogether. Signed-off-by: Ramkumar Ramachandra Acked-by: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1380539585-23859-3-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 700b478..ce2266c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -997,10 +997,10 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { total = avg_stats(&runtime_nsecs_stats[cpu]); - if (total) - ratio = 1.0 * avg / total; - - fprintf(output, " # %8.3f GHz ", ratio); + if (total) { + ratio = avg / total; + fprintf(output, " # %8.3f GHz ", ratio); + } } else if (transaction_run && perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) { total = avg_stats(&runtime_cycles_stats[cpu]); -- cgit v0.10.2 From 3e7a081796146f97f166d77a655c0eb585065077 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 1 Oct 2013 14:06:44 +0530 Subject: perf stat: Don't print bogus data on -e instructions When only the instructions event is requested: $ perf stat -e instructions git s M builtin-stat.c Performance counter stats for 'git s': 917,453,420 instructions # 0.00 insns per cycle 0.213002926 seconds time elapsed The 0.00 insns per cycle comment in the output is totally bogus and misleading. It happens because update_shadow_stats() doesn't touch runtime_cycles_stats when only the instructions event is requested. So, omit printing the bogus data altogether. Signed-off-by: Ramkumar Ramachandra Acked-by: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1380616604-4077-1-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ce2266c..fb02b53 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -930,11 +930,10 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { total = avg_stats(&runtime_cycles_stats[cpu]); - if (total) + if (total) { ratio = avg / total; - - fprintf(output, " # %5.2f insns per cycle ", ratio); - + fprintf(output, " # %5.2f insns per cycle ", ratio); + } total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]); total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu])); -- cgit v0.10.2 From f3c236b0c7a84e5d59cc639a1673a20b0a59ecc0 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 3 Oct 2013 14:10:36 +0530 Subject: perf tools: Ignore 'perf timechart' output file The default output file produced by the 'perf timechart' tool is called output.svg, add it to .gitignore. Signed-off-by: Ramkumar Ramachandra Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1380789636-4512-1-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 8f8fbc2..782d86e 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -13,6 +13,7 @@ perf*.html common-cmds.h perf.data perf.data.old +output.svg perf-archive tags TAGS -- cgit v0.10.2 From b81a48ea877e1a104dace1392d92f708ff208f97 Mon Sep 17 00:00:00 2001 From: Petr Holasek Date: Thu, 3 Oct 2013 19:28:45 +0200 Subject: perf bench: Fix failing assertions in numa bench Patch adds more subtle handling of -C and -N parameters in parse_{cpu,node}_setup_list() functions when there isn't enough NUMA nodes or CPUs present. Instead of assertion and terminating benchmark, partial test is skipped with error message and perf will continue to the next one. Fixed problem can be easily reproduced on machine with only one NUMA node: # Running numa/mem benchmark... # Running main, "perf bench numa mem -a" ... # Running RAM-bw-remote, "perf bench numa mem -p 1 -t 1 -P 1024 -C 0 -M 1 -s perf: bench/numa.c:622: parse_setup_node_list: Assertion `!(bind_node_0 < 0 || bind_node_0 >= g->p.nr_nodes)' failed. Aborted Signed-off-by: Petr Holasek Acked-by: Ingo Molnar Cc: Ingo Molnar Cc: Jiri Olsa Cc: Petr Benas Link: http://lkml.kernel.org/r/1380821325-4017-1-git-send-email-pholasek@redhat.com Signed-off-by: Petr Benas Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 30d1c32..64fa01c 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -429,14 +429,14 @@ static int parse_cpu_list(const char *arg) return 0; } -static void parse_setup_cpu_list(void) +static int parse_setup_cpu_list(void) { struct thread_data *td; char *str0, *str; int t; if (!g->p.cpu_list_str) - return; + return 0; dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks); @@ -500,8 +500,12 @@ static void parse_setup_cpu_list(void) dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul); - BUG_ON(bind_cpu_0 < 0 || bind_cpu_0 >= g->p.nr_cpus); - BUG_ON(bind_cpu_1 < 0 || bind_cpu_1 >= g->p.nr_cpus); + if (bind_cpu_0 >= g->p.nr_cpus || bind_cpu_1 >= g->p.nr_cpus) { + printf("\nTest not applicable, system has only %d CPUs.\n", g->p.nr_cpus); + return -1; + } + + BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0); BUG_ON(bind_cpu_0 > bind_cpu_1); for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) { @@ -541,6 +545,7 @@ out: printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t); free(str0); + return 0; } static int parse_cpus_opt(const struct option *opt __maybe_unused, @@ -561,14 +566,14 @@ static int parse_node_list(const char *arg) return 0; } -static void parse_setup_node_list(void) +static int parse_setup_node_list(void) { struct thread_data *td; char *str0, *str; int t; if (!g->p.node_list_str) - return; + return 0; dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks); @@ -619,8 +624,12 @@ static void parse_setup_node_list(void) dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step); - BUG_ON(bind_node_0 < 0 || bind_node_0 >= g->p.nr_nodes); - BUG_ON(bind_node_1 < 0 || bind_node_1 >= g->p.nr_nodes); + if (bind_node_0 >= g->p.nr_nodes || bind_node_1 >= g->p.nr_nodes) { + printf("\nTest not applicable, system has only %d nodes.\n", g->p.nr_nodes); + return -1; + } + + BUG_ON(bind_node_0 < 0 || bind_node_1 < 0); BUG_ON(bind_node_0 > bind_node_1); for (bind_node = bind_node_0; bind_node <= bind_node_1; bind_node += step) { @@ -651,6 +660,7 @@ out: printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t); free(str0); + return 0; } static int parse_nodes_opt(const struct option *opt __maybe_unused, @@ -1356,8 +1366,8 @@ static int init(void) init_thread_data(); tprintf("#\n"); - parse_setup_cpu_list(); - parse_setup_node_list(); + if (parse_setup_cpu_list() || parse_setup_node_list()) + return -1; tprintf("#\n"); print_summary(); @@ -1600,7 +1610,6 @@ static int run_bench_numa(const char *name, const char **argv) return 0; err: - usage_with_options(numa_usage, options); return -1; } @@ -1701,8 +1710,7 @@ static int bench_all(void) BUG_ON(ret < 0); for (i = 0; i < nr; i++) { - if (run_bench_numa(tests[i][0], tests[i] + 1)) - return -1; + run_bench_numa(tests[i][0], tests[i] + 1); } printf("\n"); -- cgit v0.10.2 From a65cb4b9f8a777a715371c63c0525408048cea3e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 2 Oct 2013 15:46:39 +0200 Subject: perf evlist: Fix perf_evlist__mmap_read event overflow The perf_evlist__mmap_read used 'union perf_event' as a placeholder for event crossing the mmap boundary. This is ok for sample shorter than ~PATH_MAX. However we could grow up to the maximum sample size which is 16 bits max. I hit this overflow issue when using 'perf top -G dwarf' which produces sample with the size around 8192 bytes. We could configure any valid sample size here using: '-G dwarf,size'. Using array with sample max size instead for the event placeholder. Also adding another safe check for the dynamic size of the user stack. TODO: The 'struct perf_mmap' is quite big now, maybe we could use some lazy allocation for event_copy size. Signed-off-by: Jiri Olsa Acked-by: David Ahern Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1380721599-24285-1-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 9b7d4d3..752709c 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -75,6 +75,9 @@ struct throttle_event { PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD | \ PERF_SAMPLE_IDENTIFIER) +/* perf sample has 16 bits size limit */ +#define PERF_SAMPLE_MAX_SIZE (1 << 16) + struct sample_event { struct perf_event_header header; u64 array[]; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f0d71a9..cb9523f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -540,7 +540,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) if ((old & md->mask) + size != ((old + size) & md->mask)) { unsigned int offset = old; unsigned int len = min(sizeof(*event), size), cpy; - void *dst = &md->event_copy; + void *dst = md->event_copy; do { cpy = min(md->mask + 1 - (offset & md->mask), len); @@ -550,7 +550,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) len -= cpy; } while (len); - event = &md->event_copy; + event = (union perf_event *) md->event_copy; } old += size; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 871b55ab..722618f 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -21,7 +21,7 @@ struct perf_mmap { void *base; int mask; unsigned int prev; - union perf_event event_copy; + char event_copy[PERF_SAMPLE_MAX_SIZE]; }; struct perf_evlist { diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index abe69af..bfebc1e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1456,6 +1456,9 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array = (void *)array + sz; OVERFLOW_CHECK_u64(array); data->user_stack.size = *array++; + if (WARN_ONCE(data->user_stack.size > sz, + "user stack dump failure\n")) + return -EFAULT; } } -- cgit v0.10.2 From 62d3b617c02785a4f1fbde8d93ca77a0b33d8454 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 28 Sep 2013 14:27:58 -0600 Subject: perf stat: Fix misleading message when specifying cpu list or system wide The "perf stat" tool displays the command run in its summary output which is misleading when using a cpu list or system wide collection. Before: perf stat -a -- sleep 1 Performance counter stats for 'sleep 1': 16152.670249 task-clock # 16.132 CPUs utilized 417 context-switches # 0.002 M/sec 7 cpu-migrations # 0.030 K/sec ... After: perf stat -a -- sleep 1 Performance counter stats for 'system wide': 16206.931120 task-clock # 16.144 CPUs utilized 395 context-switches # 0.002 M/sec 5 cpu-migrations # 0.030 K/sec ... or perf stat -C1 -- sleep 1 Performance counter stats for 'CPU(s) 1': 1001.669257 task-clock # 1.000 CPUs utilized 4,264 context-switches # 0.004 M/sec 3 cpu-migrations # 0.003 K/sec ... Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1380400080-9211-2-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index fb02b53..c8a2662 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1229,7 +1229,11 @@ static void print_stat(int argc, const char **argv) if (!csv_output) { fprintf(output, "\n"); fprintf(output, " Performance counter stats for "); - if (!perf_target__has_task(&target)) { + if (target.system_wide) + fprintf(output, "\'system wide"); + else if (target.cpu_list) + fprintf(output, "\'CPU(s) %s", target.cpu_list); + else if (!perf_target__has_task(&target)) { fprintf(output, "\'%s", argv[0]); for (i = 1; i < argc; i++) fprintf(output, " %s", argv[i]); -- cgit v0.10.2 From ac3063bd4725689f39d7a23fdfca2e034c73dcac Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 30 Sep 2013 07:37:37 -0600 Subject: perf stat: Don't require a workload when using system wide or CPU options The "perf stat" command can do system wide counters or one or more cpus. For these options do not require a workload to be specified. v2: use perf_target__none per Namhyung's comment. Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/52497F3C.9070908@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c8a2662..2178e66 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1659,8 +1659,9 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) } else if (big_num_opt == 0) /* User passed --no-big-num */ big_num = false; - if (!argc && !perf_target__has_task(&target)) + if (!argc && perf_target__none(&target)) usage_with_options(stat_usage, options); + if (run_count < 0) { usage_with_options(stat_usage, options); } else if (run_count == 0) { -- cgit v0.10.2 From 4bbe5a61f29b13437a6a16467328d3bae8fce9e7 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 28 Sep 2013 14:28:00 -0600 Subject: perf stat: Add units to nanosec-based counters Ingo pointed out that the task-clock counter should have the units explicitly stated since it is not a counter. Before: perf stat -a -- sleep 1 Performance counter stats for 'sleep 1': 16186.874834 task-clock # 16.154 CPUs utilized ... After: perf stat -a -- sleep 1 Performance counter stats for 'system wide': 16146.402138 task-clock (msec) # 16.125 CPUs utilized ... Reported-by: Ingo Molnar Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1380400080-9211-4-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2178e66..1a9c95d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -706,10 +706,13 @@ static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) { double msecs = avg / 1e6; const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s"; + char name[25]; aggr_printout(evsel, cpu, nr); - fprintf(output, fmt, msecs, csv_sep, perf_evsel__name(evsel)); + scnprintf(name, sizeof(name), "%s%s", + perf_evsel__name(evsel), csv_output ? "" : " (msec)"); + fprintf(output, fmt, msecs, csv_sep, name); if (evsel->cgrp) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); -- cgit v0.10.2 From 8fb598e5a3b0ac213012e8461a309843ba0f2e74 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 28 Sep 2013 13:13:00 -0600 Subject: perf trace: Fix comm resolution when reading events from file Task comm's are getting lost when processing events from a file. The problem is that the trace struct used by the live processing has its host machine and the perf-session used for file based processing has its host machine. Fix by having both references point to the same machine. Before: 0.030 ( 0.001 ms): :27743/27743 brk( ... 0.057 ( 0.004 ms): :27743/27743 mmap(len: 4096, prot: READ|WRITE, flags: ... 0.075 ( 0.006 ms): :27743/27743 access(filename: 0x7f3809fbce00, mode: R ... 0.091 ( 0.005 ms): :27743/27743 open(filename: 0x7f3809fba14c, flags: CLOEXEC ... ... After: 0.030 ( 0.001 ms): make/27743 brk( ... 0.057 ( 0.004 ms): make/27743 mmap(len: 4096, prot: READ|WRITE, flags: ... 0.075 ( 0.006 ms): make/27743 access(filename: 0x7f3809fbce00, mode: R ... 0.091 ( 0.005 ms): make/27743 open(filename: 0x7f3809fba14c, flags: CLOEXEC ... ... Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1380395584-9025-4-git-send-email-dsahern@gmail.com [ Moved creation of new host machine to a separate constructor: machine__new_host() ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index fcc157f..5776b5f 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -906,7 +906,7 @@ struct trace { struct syscall *table; } syscalls; struct perf_record_opts opts; - struct machine host; + struct machine *host; u64 base_time; bool full_time; FILE *output; @@ -1083,16 +1083,17 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) if (err) return err; - machine__init(&trace->host, "", HOST_KERNEL_ID); - machine__create_kernel_maps(&trace->host); + trace->host = machine__new_host(); + if (trace->host == NULL) + return -ENOMEM; if (perf_target__has_task(&trace->opts.target)) { err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads, trace__tool_process, - &trace->host); + trace->host); } else { err = perf_event__synthesize_threads(&trace->tool, trace__tool_process, - &trace->host); + trace->host); } if (err) @@ -1304,8 +1305,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, if (sc->filtered) return 0; - thread = machine__findnew_thread(&trace->host, sample->pid, - sample->tid); + thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); ttrace = thread__trace(thread, trace->output); if (ttrace == NULL) return -1; @@ -1357,8 +1357,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, if (sc->filtered) return 0; - thread = machine__findnew_thread(&trace->host, sample->pid, - sample->tid); + thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); ttrace = thread__trace(thread, trace->output); if (ttrace == NULL) return -1; @@ -1414,7 +1413,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evs { u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); double runtime_ms = (double)runtime / NSEC_PER_MSEC; - struct thread *thread = machine__findnew_thread(&trace->host, + struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); struct thread_trace *ttrace = thread__trace(thread, trace->output); @@ -1597,7 +1596,7 @@ again: trace->base_time = sample.time; if (type != PERF_RECORD_SAMPLE) { - trace__process_event(trace, &trace->host, event); + trace__process_event(trace, trace->host, event); continue; } @@ -1681,6 +1680,8 @@ static int trace__replay(struct trace *trace) if (session == NULL) return -ENOMEM; + trace->host = &session->machines.host; + err = perf_session__set_tracepoints_handlers(session, handlers); if (err) goto out; @@ -1728,7 +1729,7 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) size_t printed = trace__fprintf_threads_header(fp); struct rb_node *nd; - for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) { + for (nd = rb_first(&trace->host->threads); nd; nd = rb_next(nd)) { struct thread *thread = rb_entry(nd, struct thread, rb_node); struct thread_trace *ttrace = thread->priv; const char *color; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index ddf917b..fc14f9b 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -46,6 +46,23 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) return 0; } +struct machine *machine__new_host(void) +{ + struct machine *machine = malloc(sizeof(*machine)); + + if (machine != NULL) { + machine__init(machine, "", HOST_KERNEL_ID); + + if (machine__create_kernel_maps(machine) < 0) + goto out_delete; + } + + return machine; +out_delete: + free(machine); + return NULL; +} + static void dsos__delete(struct list_head *dsos) { struct dso *pos, *n; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 58a6be1..5150d5e 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -74,6 +74,7 @@ char *machine__mmap_name(struct machine *machine, char *bf, size_t size); void machines__set_symbol_filter(struct machines *machines, symbol_filter_t symbol_filter); +struct machine *machine__new_host(void); int machine__init(struct machine *machine, const char *root_dir, pid_t pid); void machine__exit(struct machine *machine); void machine__delete_dead_threads(struct machine *machine); -- cgit v0.10.2 From 5e2485b1a2813faa6b80007c653f8bbbed9457ee Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 28 Sep 2013 13:13:01 -0600 Subject: perf trace: Add record option The record option is a convience alias to include the -e raw_syscalls:* argument to perf-record. All other options are passed to perf-record's handler. Resulting data file can be analyzed by perf-trace -i. Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1380395584-9025-5-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 7f70d36..1a22486 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -9,6 +9,7 @@ SYNOPSIS -------- [verse] 'perf trace' +'perf trace record' DESCRIPTION ----------- @@ -16,9 +17,14 @@ This command will show the events associated with the target, initially syscalls, but other system events like pagefaults, task lifetime events, scheduling events, etc. -Initially this is a live mode only tool, but eventually will work with -perf.data files like the other tools, allowing a detached 'record' from -analysis phases. +This is a live mode tool in addition to working with perf.data files like +the other perf tools. Files can be generated using the 'perf record' command +but the session needs to include the raw_syscalls events (-e 'raw_syscalls:*'). +Alernatively, the 'perf trace record' can be used as a shortcut to +automatically include the raw_syscalls events when writing events to a file. + +The following options apply to perf trace; options to perf trace record are +found in the perf record man page. OPTIONS ------- diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5776b5f..1e2368f 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1501,6 +1501,33 @@ static int parse_target_str(struct trace *trace) return 0; } +static int trace__record(int argc, const char **argv) +{ + unsigned int rec_argc, i, j; + const char **rec_argv; + const char * const record_args[] = { + "record", + "-R", + "-m", "1024", + "-c", "1", + "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit", + }; + + rec_argc = ARRAY_SIZE(record_args) + argc; + rec_argv = calloc(rec_argc + 1, sizeof(char *)); + + if (rec_argv == NULL) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(record_args); i++) + rec_argv[i] = record_args[i]; + + for (j = 0; j < (unsigned int)argc; j++, i++) + rec_argv[i] = argv[j]; + + return cmd_record(i, rec_argv, NULL); +} + static int trace__run(struct trace *trace, int argc, const char **argv) { struct perf_evlist *evlist = perf_evlist__new(); @@ -1788,6 +1815,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) const char * const trace_usage[] = { "perf trace [] []", "perf trace [] -- []", + "perf trace record [] []", + "perf trace record [] -- []", NULL }; struct trace trace = { @@ -1844,6 +1873,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) int err; char bf[BUFSIZ]; + if ((argc > 1) && (strcmp(argv[1], "record") == 0)) + return trace__record(argc-2, &argv[2]); + argc = parse_options(argc, argv, trace_options, trace_usage, 0); if (output_name != NULL) { -- cgit v0.10.2 From 35feee19f9fda7447f51073b5be3f6d082b508f5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 28 Sep 2013 13:12:58 -0600 Subject: perf machine: Add method to loop over threads and invoke handler Loop over all threads within a machine - including threads moved to the dead threads list -- and invoked a function. This allows commands to run some specific function on each thread (eg., dump statistics) yet hides how the threads are maintained within the machine. Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1380395584-9025-2-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index fc14f9b..901397a 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1393,3 +1393,26 @@ int machine__resolve_callchain(struct machine *machine, sample); } + +int machine__for_each_thread(struct machine *machine, + int (*fn)(struct thread *thread, void *p), + void *priv) +{ + struct rb_node *nd; + struct thread *thread; + int rc = 0; + + for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { + thread = rb_entry(nd, struct thread, rb_node); + rc = fn(thread, priv); + if (rc != 0) + return rc; + } + + list_for_each_entry(thread, &machine->dead_threads, node) { + rc = fn(thread, priv); + if (rc != 0) + return rc; + } + return rc; +} diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 5150d5e..d44c09b 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -166,4 +166,8 @@ void machines__destroy_kernel_maps(struct machines *machines); size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp); +int machine__for_each_thread(struct machine *machine, + int (*fn)(struct thread *thread, void *p), + void *priv); + #endif /* __PERF_MACHINE_H */ -- cgit v0.10.2 From 896cbb56bfee6ad99e0ee1b8209dc678f1a49f5a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 28 Sep 2013 13:12:59 -0600 Subject: perf trace: Use new machine method to loop over threads Use the new machine method that loops over threads to dump summary data. Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1380395584-9025-3-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 1e2368f..addc3e1 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1751,37 +1751,57 @@ static size_t trace__fprintf_threads_header(FILE *fp) return printed; } +/* struct used to pass data to per-thread function */ +struct summary_data { + FILE *fp; + struct trace *trace; + size_t printed; +}; + +static int trace__fprintf_one_thread(struct thread *thread, void *priv) +{ + struct summary_data *data = priv; + FILE *fp = data->fp; + size_t printed = data->printed; + struct trace *trace = data->trace; + struct thread_trace *ttrace = thread->priv; + const char *color; + double ratio; + + if (ttrace == NULL) + return 0; + + ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; + + color = PERF_COLOR_NORMAL; + if (ratio > 50.0) + color = PERF_COLOR_RED; + else if (ratio > 25.0) + color = PERF_COLOR_GREEN; + else if (ratio > 5.0) + color = PERF_COLOR_YELLOW; + + printed += color_fprintf(fp, color, "%20s", thread->comm); + printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events); + printed += color_fprintf(fp, color, "%5.1f%%", ratio); + printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms); + + data->printed += printed; + + return 0; +} + static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) { - size_t printed = trace__fprintf_threads_header(fp); - struct rb_node *nd; - - for (nd = rb_first(&trace->host->threads); nd; nd = rb_next(nd)) { - struct thread *thread = rb_entry(nd, struct thread, rb_node); - struct thread_trace *ttrace = thread->priv; - const char *color; - double ratio; - - if (ttrace == NULL) - continue; - - ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; - - color = PERF_COLOR_NORMAL; - if (ratio > 50.0) - color = PERF_COLOR_RED; - else if (ratio > 25.0) - color = PERF_COLOR_GREEN; - else if (ratio > 5.0) - color = PERF_COLOR_YELLOW; - - printed += color_fprintf(fp, color, "%20s", thread->comm); - printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events); - printed += color_fprintf(fp, color, "%5.1f%%", ratio); - printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms); - } + struct summary_data data = { + .fp = fp, + .trace = trace + }; + data.printed = trace__fprintf_threads_header(fp); - return printed; + machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data); + + return data.printed; } static int trace__set_duration(const struct option *opt, const char *str, -- cgit v0.10.2 From 2969b12993ca7a8b9692048431e075a67815002d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 28 Sep 2013 13:13:02 -0600 Subject: perf intlist: Add priv member Allows commands to leverage intlist infrastructure for opaque structures. For example an upcoming perf-trace change will use this as a means of tracking syscalls statistics by task. Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1380395584-9025-6-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/intlist.c b/tools/perf/util/intlist.c index 11a8d86..826d7b3 100644 --- a/tools/perf/util/intlist.c +++ b/tools/perf/util/intlist.c @@ -20,6 +20,7 @@ static struct rb_node *intlist__node_new(struct rblist *rblist __maybe_unused, if (node != NULL) { node->i = i; + node->priv = NULL; rc = &node->rb_node; } diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h index 62351da..0eb00ac 100644 --- a/tools/perf/util/intlist.h +++ b/tools/perf/util/intlist.h @@ -9,6 +9,7 @@ struct int_node { struct rb_node rb_node; int i; + void *priv; }; struct intlist { -- cgit v0.10.2 From 316d70d6dbde540b275289563cbddd9f0c903fc6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Oct 2013 11:45:48 +0300 Subject: perf symbols: Make a separate function to parse /proc/modules Make a separate function to parse /proc/modules so that it can be reused. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1381221956-16699-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 901397a..6b861ae 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -793,12 +793,22 @@ static int machine__set_modules_path(struct machine *machine) return map_groups__set_modules_path_dir(&machine->kmaps, modules_path); } -static int machine__create_modules(struct machine *machine) +static int machine__create_module(void *arg, const char *name, u64 start) { - char *line = NULL; - size_t n; - FILE *file; + struct machine *machine = arg; struct map *map; + + map = machine__new_module(machine, start, name); + if (map == NULL) + return -1; + + dso__kernel_module_get_build_id(map->dso, machine->root_dir); + + return 0; +} + +static int machine__create_modules(struct machine *machine) +{ const char *modules; char path[PATH_MAX]; @@ -812,56 +822,15 @@ static int machine__create_modules(struct machine *machine) if (symbol__restricted_filename(modules, "/proc/modules")) return -1; - file = fopen(modules, "r"); - if (file == NULL) + if (modules__parse(modules, machine, machine__create_module)) return -1; - while (!feof(file)) { - char name[PATH_MAX]; - u64 start; - char *sep; - int line_len; - - line_len = getline(&line, &n, file); - if (line_len < 0) - break; - - if (!line) - goto out_failure; - - line[--line_len] = '\0'; /* \n */ - - sep = strrchr(line, 'x'); - if (sep == NULL) - continue; - - hex2u64(sep + 1, &start); - - sep = strchr(line, ' '); - if (sep == NULL) - continue; - - *sep = '\0'; - - snprintf(name, sizeof(name), "[%s]", line); - map = machine__new_module(machine, start, name); - if (map == NULL) - goto out_delete_line; - dso__kernel_module_get_build_id(map->dso, machine->root_dir); - } + if (!machine__set_modules_path(machine)) + return 0; - free(line); - fclose(file); + pr_debug("Problems setting modules path maps, continuing anyway...\n"); - if (machine__set_modules_path(machine) < 0) { - pr_debug("Problems setting modules path maps, continuing anyway...\n"); - } return 0; - -out_delete_line: - free(line); -out_failure: - return -1; } int machine__create_kernel_maps(struct machine *machine) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 48c3879..5fd9513 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -500,6 +500,64 @@ out_failure: return -1; } +int modules__parse(const char *filename, void *arg, + int (*process_module)(void *arg, const char *name, + u64 start)) +{ + char *line = NULL; + size_t n; + FILE *file; + int err = 0; + + file = fopen(filename, "r"); + if (file == NULL) + return -1; + + while (1) { + char name[PATH_MAX]; + u64 start; + char *sep; + ssize_t line_len; + + line_len = getline(&line, &n, file); + if (line_len < 0) { + if (feof(file)) + break; + err = -1; + goto out; + } + + if (!line) { + err = -1; + goto out; + } + + line[--line_len] = '\0'; /* \n */ + + sep = strrchr(line, 'x'); + if (sep == NULL) + continue; + + hex2u64(sep + 1, &start); + + sep = strchr(line, ' '); + if (sep == NULL) + continue; + + *sep = '\0'; + + scnprintf(name, sizeof(name), "[%s]", line); + + err = process_module(arg, name, start); + if (err) + break; + } +out: + free(line); + fclose(file); + return err; +} + struct process_kallsyms_args { struct map *map; struct dso *dso; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 9b8b213..2d3eb43 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -223,6 +223,9 @@ int sysfs__read_build_id(const char *filename, void *bf, size_t size); int kallsyms__parse(const char *filename, void *arg, int (*process_symbol)(void *arg, const char *name, char type, u64 start)); +int modules__parse(const char *filename, void *arg, + int (*process_module)(void *arg, const char *name, + u64 start)); int filename__read_debuglink(const char *filename, char *debuglink, size_t size); -- cgit v0.10.2 From 03e3adc9f4d8b57dc83475c8c4c6e462a78ff709 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 8 Oct 2013 16:00:21 -0300 Subject: perf trace: Allow specifying index offset in strarrays So that the index passed doesn't have to start at zero, being decremented from an offset specified when declaring the strarray before being used as the real array index. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-k1ce6uqyt4qar9edrj3mevod@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index addc3e1..7424298 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -44,6 +44,7 @@ struct syscall_arg { }; struct strarray { + int offset; int nr_entries; const char **entries; }; @@ -53,14 +54,20 @@ struct strarray { .entries = array, \ } +#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \ + .offset = off, \ + .nr_entries = ARRAY_SIZE(array), \ + .entries = array, \ +} + static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, struct syscall_arg *arg) { - int idx = arg->val; struct strarray *sa = arg->parm; + int idx = arg->val - sa->offset; if (idx < 0 || idx >= sa->nr_entries) - return scnprintf(bf, size, "%d", idx); + return scnprintf(bf, size, "%d", arg->val); return scnprintf(bf, size, "%s", sa->entries[idx]); } @@ -288,8 +295,8 @@ static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct sysc #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op -static const char *epoll_ctl_ops[] = { [1] = "ADD", "DEL", "MOD", }; -static DEFINE_STRARRAY(epoll_ctl_ops); +static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", }; +static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1); static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; static DEFINE_STRARRAY(itimers); -- cgit v0.10.2 From 975b7c2f40d431da4fdb46dd8b9d90c129eecc12 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 8 Oct 2013 17:17:43 -0300 Subject: perf trace: Prepare the strarray scnprintf method for reuse Right now when an index passed to that method has no string associated it'll print the index as a decimal number, prepare it so that we can use it to print it in hex as well, for ioctls, for instance. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-nsvy06sqj64qvnkmzvwxsx2v@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7424298..0d4af1d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -60,18 +60,25 @@ struct strarray { .entries = array, \ } -static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, - struct syscall_arg *arg) +static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size, + const char *intfmt, + struct syscall_arg *arg) { struct strarray *sa = arg->parm; int idx = arg->val - sa->offset; if (idx < 0 || idx >= sa->nr_entries) - return scnprintf(bf, size, "%d", arg->val); + return scnprintf(bf, size, intfmt, arg->val); return scnprintf(bf, size, "%s", sa->entries[idx]); } +static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, + struct syscall_arg *arg) +{ + return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg); +} + #define SCA_STRARRAY syscall_arg__scnprintf_strarray static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, -- cgit v0.10.2 From 78645cf3ed32860a3e83b8e35aa469f5b844a4ba Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 8 Oct 2013 17:43:20 -0300 Subject: perf trace: Initial beautifier for ioctl's 'cmd' arg [root@zoo linux]# trace -e ioctl | grep -v "cmd: 0x" | head -10 0.386 ( 0.001 ms): trace/1602 ioctl(fd: 1, cmd: TCGETS, arg: 0x7fff59fcb4d0 ) = -1 ENOTTY Inappropriate ioctl for device 1459.368 ( 0.002 ms): inotify_reader/10352 ioctl(fd: 18, cmd: FIONREAD, arg: 0x7fb835228bcc ) = 0 1463.586 ( 0.002 ms): inotify_reader/10352 ioctl(fd: 18, cmd: FIONREAD, arg: 0x7fb835228bcc ) = 0 1463.611 ( 0.002 ms): inotify_reader/10352 ioctl(fd: 18, cmd: FIONREAD, arg: 0x7fb835228bcc ) = 0 3740.526 ( 0.002 ms): awk/1612 ioctl(fd: 1, cmd: TCGETS, arg: 0x7fff4d166b90 ) = -1 ENOTTY Inappropriate ioctl for device 3740.704 ( 0.001 ms): awk/1612 ioctl(fd: 3, cmd: TCGETS, arg: 0x7fff4d1669a0 ) = -1 ENOTTY Inappropriate ioctl for device 3742.550 ( 0.002 ms): ps/1614 ioctl(fd: 1, cmd: TIOCGWINSZ, arg: 0x7fff591762b0 ) = -1 ENOTTY Inappropriate ioctl for device 3742.555 ( 0.003 ms): ps/1614 ioctl(fd: 2, cmd: TIOCGWINSZ, arg: 0x7fff591762b0 ) = -1 ENOTTY Inappropriate ioctl for device 3742.558 ( 0.002 ms): ps/1614 ioctl(cmd: TIOCGWINSZ, arg: 0x7fff591762b0 ) = -1 ENOTTY Inappropriate ioctl for device 3742.572 ( 0.002 ms): ps/1614 ioctl(fd: 1, cmd: TCGETS, arg: 0x7fff59176220 ) = -1 ENOTTY Inappropriate ioctl for device [root@zoo linux]# Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-afajwap3mr60dfl4qpdl1pxn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 0d4af1d..19ddcab 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -81,6 +81,14 @@ static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, #define SCA_STRARRAY syscall_arg__scnprintf_strarray +static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size, + struct syscall_arg *arg) +{ + return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg); +} + +#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray + static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg); @@ -633,6 +641,28 @@ static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscal #define SCA_SIGNUM syscall_arg__scnprintf_signum +#define TCGETS 0x5401 + +static const char *tioctls[] = { + "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW", + "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL", + "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI", + "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC", + "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX", + "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO", + "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK", + "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2", + "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK", + "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG", + "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL", + [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG", + "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS", + "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI", + "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE", +}; + +static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401); + #define STRARRAY(arg, name, array) \ .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \ .arg_parm = { [arg] = &strarray__##array, } @@ -713,7 +743,9 @@ static struct syscall_fmt { { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, { .name = "ioctl", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ - [2] = SCA_HEX, /* arg */ }, }, + [1] = SCA_STRHEXARRAY, /* cmd */ + [2] = SCA_HEX, /* arg */ }, + .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, }, { .name = "kill", .errmsg = true, .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, { .name = "linkat", .errmsg = true, -- cgit v0.10.2 From cee972c0e6940ec75bab2d02f37e96d06ce143eb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 10 Oct 2013 07:42:56 +0200 Subject: perf tools: Fix redirection printouts Fix the duplicate util/util printout Arnaldo reported: $ make V=1 O=/tmp/build/perf -C tools/perf/ util/srcline.o ... # Redirected target util/srcline.o => /tmp/build/perf/util/util/srcline.o Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20131010054256.GA23716@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 2badb08..8bc6d0c 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -600,11 +600,11 @@ ifneq ($(OUTPUT),) %.o: $(OUTPUT)%.o @echo " # Redirected target $@ => $(OUTPUT)$@" util/%.o: $(OUTPUT)util/%.o - @echo " # Redirected target $@ => $(OUTPUT)util/$@" + @echo " # Redirected target $@ => $(OUTPUT)$@" bench/%.o: $(OUTPUT)bench/%.o - @echo " # Redirected target $@ => $(OUTPUT)bench/$@" + @echo " # Redirected target $@ => $(OUTPUT)$@" tests/%.o: $(OUTPUT)tests/%.o - @echo " # Redirected target $@ => $(OUTPUT)tests/$@" + @echo " # Redirected target $@ => $(OUTPUT)$@" endif # These two need to be here so that when O= is not used they take precedence -- cgit v0.10.2 From 8ec19c0eba73d7221e93e993c1c8bd62484354e9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Oct 2013 11:49:26 +0200 Subject: perf tools: Implement summary output for 'make clean' 'make clean' used to show all the rm lines, which isn't really informative in any way and spams the console. Implement summary output: comet:~/tip/tools/perf> make clean CLEAN libtraceevent CLEAN liblk CLEAN config CLEAN core-objs CLEAN core-progs CLEAN core-gen CLEAN Documentation CLEAN python 'make clean V=1' will still show the old, detailed output. Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1381312169-17354-2-git-send-email-mingo@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index eaa477f..be5adb1 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -246,15 +246,17 @@ $(OUTPUT)cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT) $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \ date >$@ +CLEAN_FILES = \ + $(MAN_XML) $(addsuffix +,$(MAN_XML)) \ + $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \ + $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7) \ + $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++ \ + $(OUTPUT)perf.info $(OUTPUT)perfman.info \ + $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep \ + $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt \ + $(cmds_txt) $(OUTPUT)*.made clean: - $(RM) $(MAN_XML) $(addsuffix +,$(MAN_XML)) - $(RM) $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) - $(RM) $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7) - $(RM) $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++ - $(RM) $(OUTPUT)perf.info $(OUTPUT)perfman.info - $(RM) $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep - $(RM) $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt - $(RM) $(cmds_txt) $(OUTPUT)*.made + $(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES) $(MAN_HTML): $(OUTPUT)%.html : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 8bc6d0c..250b276 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -148,7 +148,7 @@ PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/ PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP -python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so +python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBLK) @@ -708,7 +708,8 @@ $(LIBTRACEEVENT): $(TE_SOURCES) $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libtraceevent.a $(LIBTRACEEVENT)-clean: - $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) clean + $(call QUIET_CLEAN, libtraceevent) + @$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null LIBLK_SOURCES = $(wildcard $(LK_PATH)*.[ch]) @@ -721,7 +722,8 @@ endif $(LIBLK)-clean: ifeq ($(subdir),) - $(QUIET_SUBDIR0)$(LK_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) clean + $(call QUIET_CLEAN, liblk) + @$(MAKE) -C $(LK_DIR) O=$(OUTPUT) clean >/dev/null endif help: @@ -850,17 +852,15 @@ $(INSTALL_DOC_TARGETS): # not get included for the clean target: # config-clean: - @$(MAKE) -C config/feature-checks clean + $(call QUIET_CLEAN, config) + @$(MAKE) -C config/feature-checks clean >/dev/null clean: $(LIBTRACEEVENT)-clean $(LIBLK)-clean config-clean - $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(GTK_OBJS) - $(RM) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS) - $(RM) $(ALL_PROGRAMS) perf - $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* - $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean - $(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS - $(RM) $(OUTPUT)util/*-bison* - $(RM) $(OUTPUT)util/*-flex* + $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS) $(GTK_OBJS) + $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf + $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* + $(call QUIET_CLEAN, Documentation) + @$(MAKE) -C Documentation O=$(OUTPUT) clean >/dev/null $(python-clean) # diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak index 4d985e0..94908a5 100644 --- a/tools/perf/config/utilities.mak +++ b/tools/perf/config/utilities.mak @@ -178,3 +178,9 @@ endef _ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call _gea_err,$(2))) _gea_warn = $(warning The path '$(1)' is not executable.) _gea_err = $(if $(1),$(error Please set '$(1)' appropriately)) + +ifneq ($(findstring $(MAKEFLAGS),s),s) + ifneq ($(V),1) + QUIET_CLEAN = @printf ' CLEAN %s\n' $(1); + endif +endif diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 0d0506d..1853736 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -66,7 +66,7 @@ ifneq ($(V),1) QUIET_MKDIR = @echo ' ' MKDIR $@; QUIET_GEN = @echo ' ' GEN $@; QUIET_SUBDIR0 = +@subdir= - QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \ + QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \ $(MAKE) $(PRINT_DIR) -C $$subdir QUIET_FLEX = @echo ' ' FLEX $@; QUIET_BISON = @echo ' ' BISON $@; -- cgit v0.10.2 From 65fb09922d4ca5da54fe18d7b44e5961caf169ad Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Oct 2013 11:49:27 +0200 Subject: tools: Harmonize the various build messages in perf, lib-traceevent, lib-lk The various build lines from libtraceevent and perf mix up during a parallel build and produce unaligned output like: CC builtin-buildid-list.o CC builtin-buildid-cache.o CC builtin-list.o CC FPIC trace-seq.o CC builtin-record.o CC FPIC parse-filter.o CC builtin-report.o CC builtin-stat.o CC FPIC parse-utils.o CC FPIC kbuffer-parse.o CC builtin-timechart.o CC builtin-top.o CC builtin-script.o BUILD STATIC LIB libtraceevent.a CC builtin-probe.o CC builtin-kmem.o CC builtin-lock.o To solve this, harmonize all the build message alignments to be similar to the kernel's kbuild output: prefixed by two spaces and 11-char wide. After the patch the output looks pretty tidy, even if output lines get mixed up: CC builtin-annotate.o FLAGS: * new build flags or cross compiler CC builtin-bench.o AR liblk.a CC bench/sched-messaging.o CC FPIC event-parse.o CC bench/sched-pipe.o CC FPIC trace-seq.o CC bench/mem-memcpy.o CC bench/mem-memset.o CC FPIC parse-filter.o CC builtin-diff.o CC builtin-evlist.o CC builtin-help.o Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1381312169-17354-3-git-send-email-mingo@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index ca6cb77..fc15020 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -134,14 +134,14 @@ ifeq ($(VERBOSE),1) print_install = else Q = @ - print_compile = echo ' CC '$(OBJ); - print_app_build = echo ' BUILD '$(OBJ); - print_fpic_compile = echo ' CC FPIC '$(OBJ); - print_shared_lib_compile = echo ' BUILD SHARED LIB '$(OBJ); - print_plugin_obj_compile = echo ' CC PLUGIN OBJ '$(OBJ); - print_plugin_build = echo ' CC PLUGI '$(OBJ); - print_static_lib_build = echo ' BUILD STATIC LIB '$(OBJ); - print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; + print_compile = echo ' CC '$(OBJ); + print_app_build = echo ' BUILD '$(OBJ); + print_fpic_compile = echo ' CC FPIC '$(OBJ); + print_shared_lib_compile = echo ' BUILD SHARED LIB '$(OBJ); + print_plugin_obj_compile = echo ' BUILD PLUGIN OBJ '$(OBJ); + print_plugin_build = echo ' BUILD PLUGIN '$(OBJ); + print_static_lib_build = echo ' BUILD STATIC LIB '$(OBJ); + print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; endif do_fpic_compile = \ @@ -268,7 +268,7 @@ TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE) TRACEEVENT-CFLAGS: force @FLAGS='$(TRACK_CFLAGS)'; \ if test x"$$FLAGS" != x"`cat TRACEEVENT-CFLAGS 2>/dev/null`" ; then \ - echo 1>&2 " * new build flags or cross compiler"; \ + echo 1>&2 " FLAGS: * new build flags or cross compiler"; \ echo "$$FLAGS" >TRACEEVENT-CFLAGS; \ fi diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index be5adb1..c4c300c 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -145,16 +145,17 @@ endif ifneq ($(findstring $(MAKEFLAGS),s),s) ifneq ($(V),1) - QUIET_ASCIIDOC = @echo ' ' ASCIIDOC $@; - QUIET_XMLTO = @echo ' ' XMLTO $@; - QUIET_DB2TEXI = @echo ' ' DB2TEXI $@; - QUIET_MAKEINFO = @echo ' ' MAKEINFO $@; - QUIET_DBLATEX = @echo ' ' DBLATEX $@; - QUIET_XSLTPROC = @echo ' ' XSLTPROC $@; - QUIET_GEN = @echo ' ' GEN $@; + QUIET_ASCIIDOC = @echo ' ASCIIDOC '$@; + QUIET_XMLTO = @echo ' XMLTO '$@; + QUIET_DB2TEXI = @echo ' DB2TEXI '$@; + QUIET_MAKEINFO = @echo ' MAKEINFO '$@; + QUIET_DBLATEX = @echo ' DBLATEX '$@; + QUIET_XSLTPROC = @echo ' XSLTPROC '$@; + QUIET_GEN = @echo ' GEN '$@; QUIET_STDERR = 2> /dev/null QUIET_SUBDIR0 = +@subdir= - QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \ + QUIET_SUBDIR1 = ;$(NO_SUBDIR) \ + echo ' SUBDIR ' $$subdir; \ $(MAKE) $(PRINT_DIR) -C $$subdir export V endif diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 9580ebe..5aa3d04 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -38,7 +38,7 @@ ifneq ($(O),) endif define print_msg - @printf ' BUILD: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build\n' + @printf ' BUILD: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build\n' endef define make diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 250b276..f91bd5a 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -784,7 +784,7 @@ TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\ $(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS @FLAGS='$(TRACK_CFLAGS)'; \ if test x"$$FLAGS" != x"`cat $(OUTPUT)PERF-CFLAGS 2>/dev/null`" ; then \ - echo 1>&2 " * new build flags or prefix"; \ + echo 1>&2 " FLAGS: * new build flags or prefix"; \ echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \ fi diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 1853736..ee76544 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -59,21 +59,22 @@ QUIET_SUBDIR0 = +$(MAKE) $(COMMAND_O) -C # space to separate -C and subdir QUIET_SUBDIR1 = ifneq ($(findstring $(MAKEFLAGS),s),s) -ifneq ($(V),1) - QUIET_CC = @echo ' ' CC $@; - QUIET_AR = @echo ' ' AR $@; - QUIET_LINK = @echo ' ' LINK $@; - QUIET_MKDIR = @echo ' ' MKDIR $@; - QUIET_GEN = @echo ' ' GEN $@; + ifneq ($(V),1) + QUIET_CC = @echo ' CC '$@; + QUIET_AR = @echo ' AR '$@; + QUIET_LINK = @echo ' LINK '$@; + QUIET_MKDIR = @echo ' MKDIR '$@; + QUIET_GEN = @echo ' GEN '$@; QUIET_SUBDIR0 = +@subdir= - QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \ + QUIET_SUBDIR1 = ;$(NO_SUBDIR) \ + echo ' SUBDIR '$$subdir; \ $(MAKE) $(PRINT_DIR) -C $$subdir - QUIET_FLEX = @echo ' ' FLEX $@; - QUIET_BISON = @echo ' ' BISON $@; + QUIET_FLEX = @echo ' FLEX '$@; + QUIET_BISON = @echo ' BISON '$@; descend = \ - +@echo ' ' DESCEND $(1); \ + +@echo ' DESCEND '$(1); \ mkdir -p $(OUTPUT)$(1) && \ $(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) $(2) -endif + endif endif -- cgit v0.10.2 From 3fae82db558468c01f36eb6398e9459ac240e697 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Oct 2013 11:49:28 +0200 Subject: perf tools: Align perf version output to other build messages Before: CC util/pmu.o CC util/parse-events.o PERF_VERSION = 3.12.rc4.g1b30c CC util/parse-events-flex.o GEN perf-archive After: CC util/pmu.o CC util/parse-events.o PERF_VERSION = 3.12.rc4.g1b30c CC util/parse-events-flex.o GEN perf-archive Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1381312169-17354-4-git-send-email-mingo@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN index 15a77b7..ce7a804 100755 --- a/tools/perf/util/PERF-VERSION-GEN +++ b/tools/perf/util/PERF-VERSION-GEN @@ -40,7 +40,7 @@ else VC=unset fi test "$VN" = "$VC" || { - echo >&2 "PERF_VERSION = $VN" + echo >&2 " PERF_VERSION = $VN" echo "#define PERF_VERSION \"$VN\"" >$GVF } -- cgit v0.10.2 From 8a5411e9a3e65ab70b094022e5aa6deaec82ae7c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Oct 2013 11:49:29 +0200 Subject: perf tools: Implement summary output for 'make install' 'make install' used to show all the install lines, which is way too verbose to be really informative to the user. Implement summary output instead: comet:~/tip/tools/perf> make install BUILD: Doing 'make -j12' parallel build SUBDIR Documentation INSTALL Documentation-man INSTALL binaries INSTALL libexec INSTALL perf-archive INSTALL perl-scripts INSTALL python-scripts INSTALL bash_completion-script INSTALL tests 'make install V=1' will still show the old, detailed output. Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Ingo Molnar Link: http://lkml.kernel.org/r/1381312169-17354-5-git-send-email-mingo@kernel.org [ Fixed conflict with libperf-gtk patches in acme/perf/core, cope with 'trace' alias ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index c4c300c..3ba1c0b 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -184,12 +184,13 @@ ifdef missing_tools endif do-install-man: man - $(INSTALL) -d -m 755 $(DESTDIR)$(man1dir) -# $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir) -# $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir) - $(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir) -# $(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir) -# $(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir) + $(call QUIET_INSTALL, Documentation-man) \ + $(INSTALL) -d -m 755 $(DESTDIR)$(man1dir); \ +# $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir); \ +# $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \ + $(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir); \ +# $(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir); \ +# $(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir) install-man: check-man-tools man @@ -202,18 +203,20 @@ endif try-install-man: $(DO_INSTALL_MAN) install-info: info - $(INSTALL) -d -m 755 $(DESTDIR)$(infodir) - $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir) + $(call QUIET_INSTALL, Documentation-info) \ + $(INSTALL) -d -m 755 $(DESTDIR)$(infodir); \ + $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir); \ if test -r $(DESTDIR)$(infodir)/dir; then \ - $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\ - $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\ + $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\ + $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\ else \ echo "No directory found in $(DESTDIR)$(infodir)" >&2 ; \ fi install-pdf: pdf - $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir) - $(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir) + $(call QUIET_INSTALL, Documentation-pdf) \ + $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir); \ + $(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir) #install-html: html # '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index f91bd5a..c873e03 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -504,8 +504,9 @@ ifndef NO_GTK2 GTK_OBJS += $(OUTPUT)ui/gtk/annotate.o install-gtk: $(OUTPUT)libperf-gtk.so - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)' - $(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)' + $(call QUIET_INSTALL, 'GTK UI') \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \ + $(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)' endif ifndef NO_LIBPERL @@ -810,31 +811,38 @@ check: $(OUTPUT)common-cmds.h install-gtk: install-bin: all install-gtk - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' - $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)' - $(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace' - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' - $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' + $(call QUIET_INSTALL, binaries) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'; \ + $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \ + $(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace' + $(call QUIET_INSTALL, libexec) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' + $(call QUIET_INSTALL, perf-archive) \ + $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' ifndef NO_LIBPERL - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' - $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' - $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl' - $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' + $(call QUIET_INSTALL, perl-scripts) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \ + $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \ + $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'; \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'; \ + $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' endif ifndef NO_LIBPYTHON - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace' - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' - $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace' - $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python' - $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' + $(call QUIET_INSTALL, python-scripts) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'; \ + $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \ + $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \ + $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' endif - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d' - $(INSTALL) bash_completion '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf' - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests' - $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests' - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' - $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' + $(call QUIET_INSTALL, bash_completion-script) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \ + $(INSTALL) bash_completion '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf' + $(call QUIET_INSTALL, tests) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ + $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ + $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' install: install-bin try-install-man diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak index 94908a5..f168deb 100644 --- a/tools/perf/config/utilities.mak +++ b/tools/perf/config/utilities.mak @@ -181,6 +181,7 @@ _gea_err = $(if $(1),$(error Please set '$(1)' appropriately)) ifneq ($(findstring $(MAKEFLAGS),s),s) ifneq ($(V),1) - QUIET_CLEAN = @printf ' CLEAN %s\n' $(1); + QUIET_CLEAN = @printf ' CLEAN %s\n' $1; + QUIET_INSTALL = @printf ' INSTALL %s\n' $1; endif endif -- cgit v0.10.2 From d366c53e1d4fc9d7a5826fd82010b3cffaabe5f1 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 3 Oct 2013 14:45:16 +0530 Subject: perf timechart: Add example in the documentation While at it, update the synopsis to show both forms. Signed-off-by: Ramkumar Ramachandra Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1380791716-10325-1-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt index 1632b0e..3ff8bd4 100644 --- a/tools/perf/Documentation/perf-timechart.txt +++ b/tools/perf/Documentation/perf-timechart.txt @@ -8,7 +8,8 @@ perf-timechart - Tool to visualize total system behavior during a workload SYNOPSIS -------- [verse] -'perf timechart' {record} +'perf timechart' record +'perf timechart' [] DESCRIPTION ----------- @@ -41,6 +42,18 @@ OPTIONS --symfs=:: Look for files with symbols relative to this directory. +EXAMPLES +-------- + +$ perf timechart record git pull + + [ perf record: Woken up 13 times to write data ] + [ perf record: Captured and wrote 4.253 MB perf.data (~185801 samples) ] + +$ perf timechart + + Written 10.2 seconds of trace to output.svg. + SEE ALSO -------- linkperf:perf-record[1] -- cgit v0.10.2 From 87f918685a452be514d060a09eeb4e0c91422e86 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Fri, 4 Oct 2013 10:47:31 +0530 Subject: perf trace: Improve the error messages Currently, execution of 'perf trace' reports the following cryptic message to the user: $ perf trace Couldn't read the raw_syscalls tracepoints information! Typically this happens because the user does not have permissions to read the debugfs filesystem. Also handle the case when the kernel was not compiled with debugfs support or when it isn't mounted. Now, the tool prints detailed error messages: $ perf trace Error: Unable to find debugfs Hint: Was your kernel was compiled with debugfs support? Hint: Is the debugfs filesystem mounted? Hint: Try 'sudo mount -t debugfs nodev /sys/kernel/debug' $ perf trace Error: No permissions to read /sys/kernel/debug//tracing/events/raw_syscalls Hint: Try 'sudo mount -o remount,mode=755 /sys/kernel/debug/' Signed-off-by: Ramkumar Ramachandra Cc: David Ahern Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1380863851-14460-1-git-send-email-artagnon@gmail.com [ Added ready to use commands to fix the issues as extra hints, use the current debugfs mount point when reporting permission error, use strerror_r instead of the deprecated sys_errlist, as reported by David Ahern ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 19ddcab..5496546 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1590,17 +1590,13 @@ static int trace__run(struct trace *trace, int argc, const char **argv) } if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) || - perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) { - fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n"); - goto out_delete_evlist; - } + perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) + goto out_error_tp; if (trace->sched && - perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", - trace__sched_stat_runtime)) { - fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n"); - goto out_delete_evlist; - } + perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", + trace__sched_stat_runtime)) + goto out_error_tp; err = perf_evlist__create_maps(evlist, &trace->opts.target); if (err < 0) { @@ -1717,6 +1713,29 @@ out_delete_evlist: out: trace->live = false; return err; +out_error_tp: + switch(errno) { + case ENOENT: + fputs("Error:\tUnable to find debugfs\n" + "Hint:\tWas your kernel was compiled with debugfs support?\n" + "Hint:\tIs the debugfs filesystem mounted?\n" + "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'\n", + trace->output); + break; + case EACCES: + fprintf(trace->output, + "Error:\tNo permissions to read %s/tracing/events/raw_syscalls\n" + "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", + debugfs_mountpoint, debugfs_mountpoint); + break; + default: { + char bf[256]; + fprintf(trace->output, "Can't trace: %s\n", + strerror_r(errno, bf, sizeof(bf))); + } + break; + } + goto out_delete_evlist; } static int trace__replay(struct trace *trace) -- cgit v0.10.2 From 813335b8b27d9ceeb67a073f501ada8b8dde37a7 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 8 Oct 2013 21:26:52 -0600 Subject: perf util: Add findnew method to intlist Similar to other findnew based methods if the requested object is not found, add it to the list. v2: followed format of other findnew methods per acme's request Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1381289214-24885-2-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/intlist.c b/tools/perf/util/intlist.c index 826d7b3..89715b6 100644 --- a/tools/perf/util/intlist.c +++ b/tools/perf/util/intlist.c @@ -58,22 +58,36 @@ void intlist__remove(struct intlist *ilist, struct int_node *node) rblist__remove_node(&ilist->rblist, &node->rb_node); } -struct int_node *intlist__find(struct intlist *ilist, int i) +static struct int_node *__intlist__findnew(struct intlist *ilist, + int i, bool create) { - struct int_node *node; + struct int_node *node = NULL; struct rb_node *rb_node; if (ilist == NULL) return NULL; - node = NULL; - rb_node = rblist__find(&ilist->rblist, (void *)((long)i)); + if (create) + rb_node = rblist__findnew(&ilist->rblist, (void *)((long)i)); + else + rb_node = rblist__find(&ilist->rblist, (void *)((long)i)); + if (rb_node) node = container_of(rb_node, struct int_node, rb_node); return node; } +struct int_node *intlist__find(struct intlist *ilist, int i) +{ + return __intlist__findnew(ilist, i, false); +} + +struct int_node *intlist__findnew(struct intlist *ilist, int i) +{ + return __intlist__findnew(ilist, i, true); +} + static int intlist__parse_list(struct intlist *ilist, const char *s) { char *sep; diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h index 0eb00ac..aa6877d 100644 --- a/tools/perf/util/intlist.h +++ b/tools/perf/util/intlist.h @@ -24,6 +24,7 @@ int intlist__add(struct intlist *ilist, int i); struct int_node *intlist__entry(const struct intlist *ilist, unsigned int idx); struct int_node *intlist__find(struct intlist *ilist, int i); +struct int_node *intlist__findnew(struct intlist *ilist, int i); static inline bool intlist__has_entry(struct intlist *ilist, int i) { diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c index a16cdd2..0dfe27d 100644 --- a/tools/perf/util/rblist.c +++ b/tools/perf/util/rblist.c @@ -48,10 +48,12 @@ void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node) rblist->node_delete(rblist, rb_node); } -struct rb_node *rblist__find(struct rblist *rblist, const void *entry) +static struct rb_node *__rblist__findnew(struct rblist *rblist, + const void *entry, + bool create) { struct rb_node **p = &rblist->entries.rb_node; - struct rb_node *parent = NULL; + struct rb_node *parent = NULL, *new_node = NULL; while (*p != NULL) { int rc; @@ -67,7 +69,26 @@ struct rb_node *rblist__find(struct rblist *rblist, const void *entry) return parent; } - return NULL; + if (create) { + new_node = rblist->node_new(rblist, entry); + if (new_node) { + rb_link_node(new_node, parent, p); + rb_insert_color(new_node, &rblist->entries); + ++rblist->nr_entries; + } + } + + return new_node; +} + +struct rb_node *rblist__find(struct rblist *rblist, const void *entry) +{ + return __rblist__findnew(rblist, entry, false); +} + +struct rb_node *rblist__findnew(struct rblist *rblist, const void *entry) +{ + return __rblist__findnew(rblist, entry, true); } void rblist__init(struct rblist *rblist) diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h index 6d0cae5..ff9913b 100644 --- a/tools/perf/util/rblist.h +++ b/tools/perf/util/rblist.h @@ -32,6 +32,7 @@ void rblist__delete(struct rblist *rblist); int rblist__add_node(struct rblist *rblist, const void *new_entry); void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node); struct rb_node *rblist__find(struct rblist *rblist, const void *entry); +struct rb_node *rblist__findnew(struct rblist *rblist, const void *entry); struct rb_node *rblist__entry(const struct rblist *rblist, unsigned int idx); static inline bool rblist__empty(const struct rblist *rblist) -- cgit v0.10.2 From bf2575c121ca11247ef07fd02b43f7430834f7b1 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 8 Oct 2013 21:26:53 -0600 Subject: perf trace: Add summary option to dump syscall statistics When enabled dumps a summary of all syscalls by task with the usual statistics -- min, max, average and relative stddev. For example, make - 26341 : 3344 [ 17.4% ] 0.000 ms read : 52 0.000 4.802 0.644 30.08 write : 20 0.004 0.036 0.010 21.72 open : 24 0.003 0.046 0.014 23.68 close : 64 0.002 0.055 0.008 22.53 stat : 2714 0.002 0.222 0.004 4.47 fstat : 18 0.001 0.041 0.006 46.26 mmap : 30 0.003 0.009 0.006 5.71 mprotect : 8 0.006 0.039 0.016 32.16 munmap : 12 0.007 0.077 0.020 38.25 brk : 48 0.002 0.014 0.004 10.18 rt_sigaction : 18 0.002 0.002 0.002 2.11 rt_sigprocmask : 60 0.002 0.128 0.010 32.88 access : 2 0.006 0.006 0.006 0.00 pipe : 12 0.004 0.048 0.013 35.98 vfork : 34 0.448 0.980 0.692 3.04 execve : 20 0.000 0.387 0.046 56.66 wait4 : 34 0.017 9923.287 593.221 68.45 fcntl : 8 0.001 0.041 0.013 48.79 getdents : 48 0.002 0.079 0.013 19.62 getcwd : 2 0.005 0.005 0.005 0.00 chdir : 2 0.070 0.070 0.070 0.00 getrlimit : 2 0.045 0.045 0.045 0.00 arch_prctl : 2 0.002 0.002 0.002 0.00 setrlimit : 2 0.002 0.002 0.002 0.00 openat : 94 0.003 0.005 0.003 2.11 Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1381289214-24885-3-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 1a22486..54139c6 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -93,6 +93,10 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --comm:: Show process COMM right beside its ID, on by default, disable with --no-comm. +--summary:: + Show a summary of syscalls by thread with min, max, and average times (in + msec) and relative stddev. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5496546..d0f91fe 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -10,6 +10,7 @@ #include "util/strlist.h" #include "util/intlist.h" #include "util/thread_map.h" +#include "util/stat.h" #include #include @@ -909,6 +910,8 @@ struct thread_trace { int max; char **table; } paths; + + struct intlist *syscall_stats; }; static struct thread_trace *thread_trace__new(void) @@ -918,6 +921,8 @@ static struct thread_trace *thread_trace__new(void) if (ttrace) ttrace->paths.max = -1; + ttrace->syscall_stats = intlist__new(NULL); + return ttrace; } @@ -964,6 +969,7 @@ struct trace { struct intlist *pid_list; bool sched; bool multiple_threads; + bool summary; bool show_comm; double duration_filter; double runtime_ms; @@ -1291,10 +1297,8 @@ typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, struct perf_sample *sample); static struct syscall *trace__syscall_info(struct trace *trace, - struct perf_evsel *evsel, - struct perf_sample *sample) + struct perf_evsel *evsel, int id) { - int id = perf_evsel__intval(evsel, sample, "id"); if (id < 0) { @@ -1335,6 +1339,32 @@ out_cant_read: return NULL; } +static void thread__update_stats(struct thread_trace *ttrace, + int id, struct perf_sample *sample) +{ + struct int_node *inode; + struct stats *stats; + u64 duration = 0; + + inode = intlist__findnew(ttrace->syscall_stats, id); + if (inode == NULL) + return; + + stats = inode->priv; + if (stats == NULL) { + stats = malloc(sizeof(struct stats)); + if (stats == NULL) + return; + init_stats(stats); + inode->priv = stats; + } + + if (ttrace->entry_time && sample->time > ttrace->entry_time) + duration = sample->time - ttrace->entry_time; + + update_stats(stats, duration); +} + static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, struct perf_sample *sample) { @@ -1342,7 +1372,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, void *args; size_t printed = 0; struct thread *thread; - struct syscall *sc = trace__syscall_info(trace, evsel, sample); + int id = perf_evsel__intval(evsel, sample, "id"); + struct syscall *sc = trace__syscall_info(trace, evsel, id); struct thread_trace *ttrace; if (sc == NULL) @@ -1394,7 +1425,8 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, int ret; u64 duration = 0; struct thread *thread; - struct syscall *sc = trace__syscall_info(trace, evsel, sample); + int id = perf_evsel__intval(evsel, sample, "id"); + struct syscall *sc = trace__syscall_info(trace, evsel, id); struct thread_trace *ttrace; if (sc == NULL) @@ -1408,6 +1440,9 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, if (ttrace == NULL) return -1; + if (trace->summary) + thread__update_stats(ttrace, id, sample); + ret = perf_evsel__intval(evsel, sample, "ret"); ttrace = thread->priv; @@ -1574,6 +1609,8 @@ static int trace__record(int argc, const char **argv) return cmd_record(i, rec_argv, NULL); } +static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); + static int trace__run(struct trace *trace, int argc, const char **argv) { struct perf_evlist *evlist = perf_evlist__new(); @@ -1703,6 +1740,9 @@ again: goto again; out_unmap_evlist: + if (!err && trace->summary) + trace__fprintf_thread_summary(trace, trace->output); + perf_evlist__munmap(evlist); out_close_evlist: perf_evlist__close(evlist); @@ -1798,6 +1838,9 @@ static int trace__replay(struct trace *trace) if (err) pr_err("Failed to process events, error %d", err); + else if (trace->summary) + trace__fprintf_thread_summary(trace, trace->output); + out: perf_session__delete(session); @@ -1808,10 +1851,53 @@ static size_t trace__fprintf_threads_header(FILE *fp) { size_t printed; - printed = fprintf(fp, "\n _____________________________________________________________________\n"); - printed += fprintf(fp," __) Summary of events (__\n\n"); - printed += fprintf(fp," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n"); - printed += fprintf(fp," _____________________________________________________________________\n\n"); + printed = fprintf(fp, "\n _____________________________________________________________________________\n"); + printed += fprintf(fp, " __) Summary of events (__\n\n"); + printed += fprintf(fp, " [ task - pid ] [ events ] [ ratio ] [ runtime ]\n"); + printed += fprintf(fp, " syscall count min max avg stddev\n"); + printed += fprintf(fp, " msec msec msec %%\n"); + printed += fprintf(fp, " _____________________________________________________________________________\n\n"); + + return printed; +} + +static size_t thread__dump_stats(struct thread_trace *ttrace, + struct trace *trace, FILE *fp) +{ + struct stats *stats; + size_t printed = 0; + struct syscall *sc; + struct int_node *inode = intlist__first(ttrace->syscall_stats); + + if (inode == NULL) + return 0; + + printed += fprintf(fp, "\n"); + + /* each int_node is a syscall */ + while (inode) { + stats = inode->priv; + if (stats) { + double min = (double)(stats->min) / NSEC_PER_MSEC; + double max = (double)(stats->max) / NSEC_PER_MSEC; + double avg = avg_stats(stats); + double pct; + u64 n = (u64) stats->n; + + pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0; + avg /= NSEC_PER_MSEC; + + sc = &trace->syscalls.table[inode->i]; + printed += fprintf(fp, "%24s %14s : ", "", sc->name); + printed += fprintf(fp, "%5" PRIu64 " %8.3f %8.3f", + n, min, max); + printed += fprintf(fp, " %8.3f %6.2f\n", avg, pct); + } + + inode = intlist__next(inode); + } + + printed += fprintf(fp, "\n\n"); return printed; } @@ -1850,6 +1936,7 @@ static int trace__fprintf_one_thread(struct thread *thread, void *priv) printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events); printed += color_fprintf(fp, color, "%5.1f%%", ratio); printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms); + printed += thread__dump_stats(ttrace, trace, fp); data->printed += printed; @@ -1953,6 +2040,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) OPT_INCR('v', "verbose", &verbose, "be more verbose"), OPT_BOOLEAN('T', "time", &trace.full_time, "Show full timestamp, not time relative to first start"), + OPT_BOOLEAN(0, "summary", &trace.summary, + "Show syscall summary with statistics"), OPT_END() }; int err; @@ -2008,9 +2097,6 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) else err = trace__run(&trace, argc, argv); - if (trace.sched && !err) - trace__fprintf_thread_summary(&trace, trace.output); - out_close: if (output_name != NULL) fclose(trace.output); -- cgit v0.10.2 From a949fffb84df6f9be136198a00f796a9dc696bd0 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 9 Oct 2013 21:51:31 -0600 Subject: perf tools: Fix old GCC build error in 'get_srcline' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit trace-event-parse.c:parse_proc_kallsyms() Old GCC (4.4.2) does not see through the code flow of get_srcline() and gets confused about the status of 'file' and 'line': CC /tmp/build/perf/util/srcline.o cc1: warnings being treated as errors util/srcline.c: In function ¿get_srcline¿: util/srcline.c:226: error: ¿file¿ may be used uninitialized in this function util/srcline.c:227: error: ¿line¿ may be used uninitialized in this function make[1]: *** [/tmp/build/perf/util/srcline.o] Error 1 make: *** [install] Error 2 make: Leaving directory `/home/acme/git/linux/tools/perf' [acme@fedora12 linux]$ Help out GCC by initializing 'file' and 'line'. Signed-off-by: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/n/tip-h8k7h49z3cndqgjdftkmm9f8@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 3735319..d11aefb 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -223,8 +223,8 @@ out: char *get_srcline(struct dso *dso, unsigned long addr) { - char *file; - unsigned line; + char *file = NULL; + unsigned line = 0; char *srcline; char *dso_name = dso->long_name; size_t size; -- cgit v0.10.2 From 1df9297c8535a5bb2b776381e63d8334f87d4abe Mon Sep 17 00:00:00 2001 From: Felipe Pena Date: Wed, 9 Oct 2013 23:00:38 -0300 Subject: perf tests: Fix memory leak in dso-data.c Fix for a memory leak on test_file() function in dso-data.c. Signed-off-by: Felipe Pena Acked-by: Jiri Olsa Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1381370438-4209-1-git-send-email-felipensp@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index dffe055..9cc81a3 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -35,6 +35,7 @@ static char *test_file(int size) if (size != write(fd, buf, size)) templ = NULL; + free(buf); close(fd); return templ; } -- cgit v0.10.2 From 3e6a147deef93ddbb899cb394d8d44118289e76a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 10 Oct 2013 22:24:00 +0200 Subject: perf tools: Separate lbfd check out of NO_DEMANGLE condition We fail build with NO_DEMANGLE with missing -lbfd externals error. The reason is that we now use bfd code in srcline object: perf tools: Implement addr2line directly using libbfd So we need to check/add -lbfd always now. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 29ad7d6..9680424 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -470,6 +470,10 @@ else endif endif +ifeq ($(feature-libbfd), 1) + EXTLIBS += -lbfd +endif + ifdef NO_DEMANGLE CFLAGS += -DNO_DEMANGLE else @@ -477,9 +481,7 @@ else EXTLIBS += -liberty CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT else - ifeq ($(feature-libbfd), 1) - EXTLIBS += -lbfd - else + ifneq ($(feature-libbfd), 1) $(feature_check,liberty) ifeq ($(feature-liberty), 1) EXTLIBS += -lbfd -liberty @@ -502,6 +504,10 @@ else endif endif +ifneq ($(filter -lbfd,$(EXTLIBS)),) + CFLAGS += -DHAVE_LIBBFD_SUPPORT +endif + ifndef NO_ON_EXIT ifeq ($(feature-on-exit), 1) CFLAGS += -DHAVE_ON_EXIT_SUPPORT @@ -524,10 +530,6 @@ ifndef NO_LIBNUMA endif endif -ifndef ($(filter -lbfd,$(EXTLIBS)),) - CFLAGS += -DHAVE_LIBBFD_SUPPORT -endif - # Among the variables below, these: # perfexecdir # template_dir -- cgit v0.10.2 From 52afdaf9f0c6a35e154ba42ac9510044e16d75ec Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 9 Oct 2013 15:01:11 +0300 Subject: perf symbols: Validate kcore module addresses Before using kcore we need to check that modules are in memory at the same addresses that they were when data was recorded. This is done because, while we could remap symbols to different addresses, the object code linkages would still be different which would provide an erroneous view of the object code. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1381320078-16497-2-git-send-email-adrian.hunter@intel.com [ Rename basename to base_name to avoid shadowing libgen's basename in fedora 12 ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 5fd9513..b2f60dd 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -798,51 +798,201 @@ bool symbol__restricted_filename(const char *filename, return restricted; } -struct kcore_mapfn_data { - struct dso *dso; - enum map_type type; - struct list_head maps; +struct module_info { + struct rb_node rb_node; + char *name; + u64 start; }; -static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data) +static void add_module(struct module_info *mi, struct rb_root *modules) { - struct kcore_mapfn_data *md = data; - struct map *map; + struct rb_node **p = &modules->rb_node; + struct rb_node *parent = NULL; + struct module_info *m; - map = map__new2(start, md->dso, md->type); - if (map == NULL) + while (*p != NULL) { + parent = *p; + m = rb_entry(parent, struct module_info, rb_node); + if (strcmp(mi->name, m->name) < 0) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&mi->rb_node, parent, p); + rb_insert_color(&mi->rb_node, modules); +} + +static void delete_modules(struct rb_root *modules) +{ + struct module_info *mi; + struct rb_node *next = rb_first(modules); + + while (next) { + mi = rb_entry(next, struct module_info, rb_node); + next = rb_next(&mi->rb_node); + rb_erase(&mi->rb_node, modules); + free(mi->name); + free(mi); + } +} + +static struct module_info *find_module(const char *name, + struct rb_root *modules) +{ + struct rb_node *n = modules->rb_node; + + while (n) { + struct module_info *m; + int cmp; + + m = rb_entry(n, struct module_info, rb_node); + cmp = strcmp(name, m->name); + if (cmp < 0) + n = n->rb_left; + else if (cmp > 0) + n = n->rb_right; + else + return m; + } + + return NULL; +} + +static int __read_proc_modules(void *arg, const char *name, u64 start) +{ + struct rb_root *modules = arg; + struct module_info *mi; + + mi = zalloc(sizeof(struct module_info)); + if (!mi) return -ENOMEM; - map->end = map->start + len; - map->pgoff = pgoff; + mi->name = strdup(name); + mi->start = start; - list_add(&map->node, &md->maps); + if (!mi->name) { + free(mi); + return -ENOMEM; + } + + add_module(mi, modules); + + return 0; +} + +static int read_proc_modules(const char *filename, struct rb_root *modules) +{ + if (symbol__restricted_filename(filename, "/proc/modules")) + return -1; + + if (modules__parse(filename, modules, __read_proc_modules)) { + delete_modules(modules); + return -1; + } return 0; } +static int do_validate_kcore_modules(const char *filename, struct map *map, + struct map_groups *kmaps) +{ + struct rb_root modules = RB_ROOT; + struct map *old_map; + int err; + + err = read_proc_modules(filename, &modules); + if (err) + return err; + + old_map = map_groups__first(kmaps, map->type); + while (old_map) { + struct map *next = map_groups__next(old_map); + struct module_info *mi; + + if (old_map == map || old_map->start == map->start) { + /* The kernel map */ + old_map = next; + continue; + } + + /* Module must be in memory at the same address */ + mi = find_module(old_map->dso->short_name, &modules); + if (!mi || mi->start != old_map->start) { + err = -EINVAL; + goto out; + } + + old_map = next; + } +out: + delete_modules(&modules); + return err; +} + /* - * If kallsyms is referenced by name then we look for kcore in the same + * If kallsyms is referenced by name then we look for filename in the same * directory. */ -static bool kcore_filename_from_kallsyms_filename(char *kcore_filename, - const char *kallsyms_filename) +static bool filename_from_kallsyms_filename(char *filename, + const char *base_name, + const char *kallsyms_filename) { char *name; - strcpy(kcore_filename, kallsyms_filename); - name = strrchr(kcore_filename, '/'); + strcpy(filename, kallsyms_filename); + name = strrchr(filename, '/'); if (!name) return false; - if (!strcmp(name, "/kallsyms")) { - strcpy(name, "/kcore"); + name += 1; + + if (!strcmp(name, "kallsyms")) { + strcpy(name, base_name); return true; } return false; } +static int validate_kcore_modules(const char *kallsyms_filename, + struct map *map) +{ + struct map_groups *kmaps = map__kmap(map)->kmaps; + char modules_filename[PATH_MAX]; + + if (!filename_from_kallsyms_filename(modules_filename, "modules", + kallsyms_filename)) + return -EINVAL; + + if (do_validate_kcore_modules(modules_filename, map, kmaps)) + return -EINVAL; + + return 0; +} + +struct kcore_mapfn_data { + struct dso *dso; + enum map_type type; + struct list_head maps; +}; + +static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data) +{ + struct kcore_mapfn_data *md = data; + struct map *map; + + map = map__new2(start, md->dso, md->type); + if (map == NULL) + return -ENOMEM; + + map->end = map->start + len; + map->pgoff = pgoff; + + list_add(&map->node, &md->maps); + + return 0; +} + static int dso__load_kcore(struct dso *dso, struct map *map, const char *kallsyms_filename) { @@ -859,8 +1009,12 @@ static int dso__load_kcore(struct dso *dso, struct map *map, if (map != machine->vmlinux_maps[map->type]) return -EINVAL; - if (!kcore_filename_from_kallsyms_filename(kcore_filename, - kallsyms_filename)) + if (!filename_from_kallsyms_filename(kcore_filename, "kcore", + kallsyms_filename)) + return -EINVAL; + + /* All modules must be present at their original addresses */ + if (validate_kcore_modules(kallsyms_filename, map)) return -EINVAL; md.dso = dso; -- cgit v0.10.2 From afba19d9dc8eba66ea26901708cf99354c637786 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 9 Oct 2013 15:01:12 +0300 Subject: perf symbols: Workaround objdump difficulties with kcore The objdump tool fails to annotate module symbols when looking at kcore. Workaround this by extracting object code from kcore and putting it in a temporary file for objdump to use instead. The temporary file is created to look like kcore but contains only the function being disassembled. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1381320078-16497-3-git-send-email-adrian.hunter@intel.com [ Renamed 'index' to 'idx' to avoid shadowing string.h's 'index' in Fedora 12, Replace local with variable length with malloc/free to fix build in Fedora 12 ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index d73e800..882bb86 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -879,6 +879,8 @@ int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize) FILE *file; int err = 0; char symfs_filename[PATH_MAX]; + struct kcore_extract kce; + bool delete_extract = false; if (filename) { snprintf(symfs_filename, sizeof(symfs_filename), "%s%s", @@ -940,6 +942,23 @@ fallback: pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso->long_name, sym, sym->name); + if (dso__is_kcore(dso)) { + kce.kcore_filename = symfs_filename; + kce.addr = map__rip_2objdump(map, sym->start); + kce.offs = sym->start; + kce.len = sym->end + 1 - sym->start; + if (!kcore_extract__create(&kce)) { + delete_extract = true; + strlcpy(symfs_filename, kce.extract_filename, + sizeof(symfs_filename)); + if (free_filename) { + free(filename); + free_filename = false; + } + filename = symfs_filename; + } + } + snprintf(command, sizeof(command), "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 @@ -972,6 +991,8 @@ fallback: pclose(file); out_free_filename: + if (delete_extract) + kcore_extract__delete(&kce); if (free_filename) free(filename); return err; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index c376930..499c71d 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1018,6 +1018,235 @@ int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data, return err; } +static int copy_bytes(int from, off_t from_offs, int to, off_t to_offs, u64 len) +{ + ssize_t r; + size_t n; + int err = -1; + char *buf = malloc(page_size); + + if (buf == NULL) + return -1; + + if (lseek(to, to_offs, SEEK_SET) != to_offs) + goto out; + + if (lseek(from, from_offs, SEEK_SET) != from_offs) + goto out; + + while (len) { + n = page_size; + if (len < n) + n = len; + /* Use read because mmap won't work on proc files */ + r = read(from, buf, n); + if (r < 0) + goto out; + if (!r) + break; + n = r; + r = write(to, buf, n); + if (r < 0) + goto out; + if ((size_t)r != n) + goto out; + len -= n; + } + + err = 0; +out: + free(buf); + return err; +} + +struct kcore { + int fd; + int elfclass; + Elf *elf; + GElf_Ehdr ehdr; +}; + +static int kcore__open(struct kcore *kcore, const char *filename) +{ + GElf_Ehdr *ehdr; + + kcore->fd = open(filename, O_RDONLY); + if (kcore->fd == -1) + return -1; + + kcore->elf = elf_begin(kcore->fd, ELF_C_READ, NULL); + if (!kcore->elf) + goto out_close; + + kcore->elfclass = gelf_getclass(kcore->elf); + if (kcore->elfclass == ELFCLASSNONE) + goto out_end; + + ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr); + if (!ehdr) + goto out_end; + + return 0; + +out_end: + elf_end(kcore->elf); +out_close: + close(kcore->fd); + return -1; +} + +static int kcore__init(struct kcore *kcore, char *filename, int elfclass, + bool temp) +{ + GElf_Ehdr *ehdr; + + kcore->elfclass = elfclass; + + if (temp) + kcore->fd = mkstemp(filename); + else + kcore->fd = open(filename, O_WRONLY | O_CREAT | O_EXCL, 0400); + if (kcore->fd == -1) + return -1; + + kcore->elf = elf_begin(kcore->fd, ELF_C_WRITE, NULL); + if (!kcore->elf) + goto out_close; + + if (!gelf_newehdr(kcore->elf, elfclass)) + goto out_end; + + ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr); + if (!ehdr) + goto out_end; + + return 0; + +out_end: + elf_end(kcore->elf); +out_close: + close(kcore->fd); + unlink(filename); + return -1; +} + +static void kcore__close(struct kcore *kcore) +{ + elf_end(kcore->elf); + close(kcore->fd); +} + +static int kcore__copy_hdr(struct kcore *from, struct kcore *to, size_t count) +{ + GElf_Ehdr *ehdr = &to->ehdr; + GElf_Ehdr *kehdr = &from->ehdr; + + memcpy(ehdr->e_ident, kehdr->e_ident, EI_NIDENT); + ehdr->e_type = kehdr->e_type; + ehdr->e_machine = kehdr->e_machine; + ehdr->e_version = kehdr->e_version; + ehdr->e_entry = 0; + ehdr->e_shoff = 0; + ehdr->e_flags = kehdr->e_flags; + ehdr->e_phnum = count; + ehdr->e_shentsize = 0; + ehdr->e_shnum = 0; + ehdr->e_shstrndx = 0; + + if (from->elfclass == ELFCLASS32) { + ehdr->e_phoff = sizeof(Elf32_Ehdr); + ehdr->e_ehsize = sizeof(Elf32_Ehdr); + ehdr->e_phentsize = sizeof(Elf32_Phdr); + } else { + ehdr->e_phoff = sizeof(Elf64_Ehdr); + ehdr->e_ehsize = sizeof(Elf64_Ehdr); + ehdr->e_phentsize = sizeof(Elf64_Phdr); + } + + if (!gelf_update_ehdr(to->elf, ehdr)) + return -1; + + if (!gelf_newphdr(to->elf, count)) + return -1; + + return 0; +} + +static int kcore__add_phdr(struct kcore *kcore, int idx, off_t offset, + u64 addr, u64 len) +{ + GElf_Phdr gphdr; + GElf_Phdr *phdr; + + phdr = gelf_getphdr(kcore->elf, idx, &gphdr); + if (!phdr) + return -1; + + phdr->p_type = PT_LOAD; + phdr->p_flags = PF_R | PF_W | PF_X; + phdr->p_offset = offset; + phdr->p_vaddr = addr; + phdr->p_paddr = 0; + phdr->p_filesz = len; + phdr->p_memsz = len; + phdr->p_align = page_size; + + if (!gelf_update_phdr(kcore->elf, idx, phdr)) + return -1; + + return 0; +} + +static off_t kcore__write(struct kcore *kcore) +{ + return elf_update(kcore->elf, ELF_C_WRITE); +} + +int kcore_extract__create(struct kcore_extract *kce) +{ + struct kcore kcore; + struct kcore extract; + size_t count = 1; + int idx = 0, err = -1; + off_t offset = page_size, sz; + + if (kcore__open(&kcore, kce->kcore_filename)) + return -1; + + strcpy(kce->extract_filename, PERF_KCORE_EXTRACT); + if (kcore__init(&extract, kce->extract_filename, kcore.elfclass, true)) + goto out_kcore_close; + + if (kcore__copy_hdr(&kcore, &extract, count)) + goto out_extract_close; + + if (kcore__add_phdr(&extract, idx, offset, kce->addr, kce->len)) + goto out_extract_close; + + sz = kcore__write(&extract); + if (sz < 0 || sz > offset) + goto out_extract_close; + + if (copy_bytes(kcore.fd, kce->offs, extract.fd, offset, kce->len)) + goto out_extract_close; + + err = 0; + +out_extract_close: + kcore__close(&extract); + if (err) + unlink(kce->extract_filename); +out_kcore_close: + kcore__close(&kcore); + + return err; +} + +void kcore_extract__delete(struct kcore_extract *kce) +{ + unlink(kce->extract_filename); +} + void symbol__elf_init(void) { elf_version(EV_CURRENT); diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 3a802c3..928556d 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -308,6 +308,15 @@ int file__read_maps(int fd __maybe_unused, bool exe __maybe_unused, return -1; } +int kcore_extract__create(struct kcore_extract *kce __maybe_unused) +{ + return -1; +} + +void kcore_extract__delete(struct kcore_extract *kce __maybe_unused) +{ +} + void symbol__elf_init(void) { } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2d3eb43..fb107e1 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -256,4 +256,18 @@ typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data, bool *is_64_bit); +#define PERF_KCORE_EXTRACT "/tmp/perf-kcore-XXXXXX" + +struct kcore_extract { + char *kcore_filename; + u64 addr; + u64 offs; + u64 len; + char extract_filename[sizeof(PERF_KCORE_EXTRACT)]; + int fd; +}; + +int kcore_extract__create(struct kcore_extract *kce); +void kcore_extract__delete(struct kcore_extract *kce); + #endif /* __PERF_SYMBOL */ -- cgit v0.10.2 From 4e987712740a3634c19a6fedaf12577b26775dc5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 14 Oct 2013 13:43:38 +0300 Subject: perf symbols: Add map_groups__find_ams() Add a function to find a symbol using an ip that might be on a different map. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1381747424-3557-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Adrian Hunter Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 17ee458..9dea404 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -371,6 +371,23 @@ struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, return NULL; } +int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter) +{ + if (ams->addr < ams->map->start || ams->addr > ams->map->end) { + if (ams->map->groups == NULL) + return -1; + ams->map = map_groups__find(ams->map->groups, ams->map->type, + ams->addr); + if (ams->map == NULL) + return -1; + } + + ams->al_addr = ams->map->map_ip(ams->map, ams->addr); + ams->sym = map__find_symbol(ams->map, ams->al_addr, filter); + + return ams->sym ? 0 : -1; +} + size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type, int verbose, FILE *fp) { diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 4886ca2..0359b4a 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -167,6 +167,10 @@ struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, struct map **mapp, symbol_filter_t filter); +struct addr_map_symbol; + +int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter); + static inline struct symbol *map_groups__find_function_by_name(struct map_groups *mg, const char *name, struct map **mapp, -- cgit v0.10.2 From 3fb66335e13ef7426affe9efa48c08857202c1cb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Oct 2013 17:00:23 +0200 Subject: tools/perf/build: Fix non-existent build directory handling Arnaldo reported that non-existent build directories were not recognized properly. The reason is readlink failure causing 'O' to become empty. Solve it by passing through the 'O' variable unmodified if readlink fails. Reported-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20131009150023.GA10167@gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 5aa3d04..9147044 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -34,7 +34,7 @@ endif # Only pass canonical directory names as the output directory: # ifneq ($(O),) - FULL_O := $(shell readlink -f $(O)) + FULL_O := $(shell readlink -f $(O) || echo $(O)) endif define print_msg -- cgit v0.10.2 From fcf92585014f0a0e390d2819de8278ae90da5842 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 10 Oct 2013 08:05:25 +0200 Subject: tools/perf/build: Pass through DEBUG parameter Arnaldo reported that 'make DEBUG=1' does not work anymore. The reason is that 'Makefile' only passes it through to 'Makefile.perf' via the environment, but 'Makefile.perf' checks that it's a command line option: ifeq ("$(origin DEBUG)", "command line") PERF_DEBUG = $(DEBUG) endif So pass it through properly, and also clean up DEBUG parameter handling while at it and fix a couple of annoyances: - DEBUG=0 used to be interpreted as 'debugging on'. Turn it into 'debugging off' instead. - Same was the case for 'DEBUG=' - turn that into debug-off as well. - Pass in just a clean, sanitized 'DEBUG' value and get rid of the intermediate, unnecessary PERF_DEBUG variable. Reported-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Signed-off-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 9147044..4835618 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -37,12 +37,25 @@ ifneq ($(O),) FULL_O := $(shell readlink -f $(O) || echo $(O)) endif +# +# Only accept the 'DEBUG' variable from the command line: +# +ifeq ("$(origin DEBUG)", "command line") + ifeq ($(DEBUG),) + override DEBUG = 0 + else + SET_DEBUG = "DEBUG=$(DEBUG)" + endif +else + override DEBUG = 0 +endif + define print_msg @printf ' BUILD: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build\n' endef define make - @$(MAKE) -f Makefile.perf --no-print-directory -j$(JOBS) O=$(FULL_O) $@ + @$(MAKE) -f Makefile.perf --no-print-directory -j$(JOBS) O=$(FULL_O) $(SET_DEBUG) $@ endef # diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 9680424..9524c0c 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -66,10 +66,7 @@ ifneq ($(WERROR),0) CFLAGS += -Werror endif -ifeq ("$(origin DEBUG)", "command line") - PERF_DEBUG = $(DEBUG) -endif -ifndef PERF_DEBUG +ifeq ($(DEBUG),0) CFLAGS += -O6 endif @@ -210,7 +207,7 @@ ifeq ($(feature-volatile-register-var), 1) CFLAGS += -Wvolatile-register-var endif -ifndef PERF_DEBUG +ifeq ($(DEBUG),0) ifeq ($(feature-fortify-source), 1) CFLAGS += -D_FORTIFY_SOURCE=2 endif -- cgit v0.10.2 From 0dc097421974e076b47d19806966adf5ebc7fd6b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 10 Oct 2013 08:47:01 +0200 Subject: tools/perf/build: Fix DPACKAGE definitions for the libbfd et al testcases Namhyung Kim reported these duplicate DPACKAGE definitions: test-libbfd: $(BUILD) -DPACKAGE='perf' -DPACKAGE=perf -lbfd -ldl Fix all affected places and use Namhyung's suggestion that the definition should look like a normal C string: -DPACKAGE='"perf"'. Reported-by: Namhyung Kim Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Signed-off-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 8ecac19..cf33596 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -37,7 +37,7 @@ BUILD = $(CC) $(LDFLAGS) -o $(OUTPUT)$@ $@.c ############################### test-all: - $(BUILD) -Werror -fstack-protector -fstack-protector-all -Wvolatile-register-var -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lunwind -lunwind-x86_64 -lelf -laudit -I/usr/include/slang -lslang $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='perf' -DPACKAGE=perf -lbfd -ldl + $(BUILD) -Werror -fstack-protector -fstack-protector-all -Wvolatile-register-var -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lunwind -lunwind-x86_64 -lelf -laudit -I/usr/include/slang -lslang $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl test-hello: $(BUILD) @@ -123,13 +123,13 @@ test-libpython-version: $(BUILD) $(FLAGS_PYTHON_EMBED) test-libbfd: - $(BUILD) -DPACKAGE='perf' -DPACKAGE=perf -lbfd -ldl + $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl test-liberty: - $(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='perf' -DPACKAGE=perf -lbfd -ldl -liberty + $(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty test-liberty-z: - $(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='perf' -DPACKAGE=perf -lbfd -ldl -liberty -lz + $(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty -lz test-cplus-demangle: $(BUILD) -liberty -- cgit v0.10.2 From 5f36978ca5b12b1e35535dedb8c999694fc0dfcf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 10 Oct 2013 08:53:17 +0200 Subject: tools/perf/build: Simplify the libelf logic Ulrich Drepper and Namhyung Kim reported that the libelf logic in config/Makefile is duplicated in part. Remove the duplication, and also remove the now unused FLAGS_LIBELF variable. Reported-by: Ulrich Drepper Reported-by: Namhyung Kim Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: David Ahern Cc: Jiri Olsa Signed-off-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 9524c0c..d207922 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -280,7 +280,6 @@ endif # NO_LIBELF ifndef NO_LIBELF CFLAGS += -DHAVE_LIBELF_SUPPORT - FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) ifeq ($(feature-libelf-mmap), 1) CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT @@ -305,13 +304,6 @@ ifndef NO_LIBELF endif # NO_DWARF endif # NO_LIBELF -ifndef NO_LIBELF - CFLAGS += -DHAVE_LIBELF_SUPPORT - ifeq ($(feature-libelf-mmap), 1) - CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT - endif -endif # NO_LIBELF - # There's only x86 (both 32 and 64) support for CFI unwind so far ifneq ($(ARCH),x86) NO_LIBUNWIND := 1 -- cgit v0.10.2 From 01287e2cb7ad17b3d77751888d458a6b6a2bba15 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 10 Oct 2013 08:58:57 +0200 Subject: tools/perf/build: Remove the volatile-register-var feature check Namhyung Kim noticed that the volatile-register-var feature check is superfluous: > The gcc manpage says this warning is enabled by -Wall, and we add -Wall > to CFLAGS before doing feature checks. So all gcc versions that support > -Wvolatile-register-var enables it by default without this check and > older gcc versions will always fail the feature check. Remove it - this will further speed up feature checks. Reported-by: Namhyung Kim Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: David Ahern Cc: Jiri Olsa Signed-off-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index d207922..c516d6ba 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -132,8 +132,7 @@ CORE_FEATURE_TESTS = \ libunwind \ on-exit \ stackprotector \ - stackprotector-all \ - volatile-register-var + stackprotector-all # # So here we detect whether test-all was rebuilt, to be able @@ -203,10 +202,6 @@ ifeq ($(feature-stackprotector), 1) CFLAGS += -Wstack-protector endif -ifeq ($(feature-volatile-register-var), 1) - CFLAGS += -Wvolatile-register-var -endif - ifeq ($(DEBUG),0) ifeq ($(feature-fortify-source), 1) CFLAGS += -D_FORTIFY_SOURCE=2 diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index cf33596..2eb8346 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -25,8 +25,7 @@ FILES= \ test-libunwind \ test-on-exit \ test-stackprotector-all \ - test-stackprotector \ - test-volatile-register-var + test-stackprotector CC := $(CC) -MD @@ -37,7 +36,7 @@ BUILD = $(CC) $(LDFLAGS) -o $(OUTPUT)$@ $@.c ############################### test-all: - $(BUILD) -Werror -fstack-protector -fstack-protector-all -Wvolatile-register-var -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lunwind -lunwind-x86_64 -lelf -laudit -I/usr/include/slang -lslang $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl + $(BUILD) -Werror -fstack-protector -fstack-protector-all -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lunwind -lunwind-x86_64 -lelf -laudit -I/usr/include/slang -lslang $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl test-hello: $(BUILD) @@ -48,9 +47,6 @@ test-stackprotector-all: test-stackprotector: $(BUILD) -Werror -fstack-protector -test-volatile-register-var: - $(BUILD) -Werror -Wvolatile-register-var - test-fortify-source: $(BUILD) -O2 -Werror -D_FORTIFY_SOURCE=2 -- cgit v0.10.2 From 046fa7ae20d2390c65be8eb406efecff15e8b71d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 10 Oct 2013 09:06:21 +0200 Subject: tools/perf/build: Improve the 'stackprotector' feature test Namhyung Kim noticed that the stackprotector testcase was incomplete: > The flag being checked should be -"W"stack-protector instead of > -"f"stack-protector. And the gcc manpage says that -Wstack-protector is > only active when -fstack-protector is active. So the end result should > look like > > $(BUILD) -Werror -fstack-protector -Wstack-protector Add -Wstack-protector. Reported-by: Namhyung Kim Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: David Ahern Cc: Jiri Olsa Signed-off-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 2eb8346..c70d23e 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -45,7 +45,7 @@ test-stackprotector-all: $(BUILD) -Werror -fstack-protector-all test-stackprotector: - $(BUILD) -Werror -fstack-protector + $(BUILD) -Werror -fstack-protector -Wstack-protector test-fortify-source: $(BUILD) -O2 -Werror -D_FORTIFY_SOURCE=2 -- cgit v0.10.2 From 231486a5223b1023bcabf53d16d63a83b9f27bf7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 10 Oct 2013 09:10:59 +0200 Subject: tools/perf/build: Simplify the autodep inclusion rule Namhyung Kim noticed that the autodep .d file inclusion rule was unnecessarily complicated: > > +-include *.d */*.d > > Hmm.. this */*.d part is really needed? Only include *.d files. Reported-by: Namhyung Kim Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: David Ahern Cc: Jiri Olsa Signed-off-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index c70d23e..452b67c 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -136,7 +136,7 @@ test-on-exit: test-backtrace: $(BUILD) --include *.d */*.d +-include *.d ############################### -- cgit v0.10.2 From 6e427ab02c8886ca6c9ecdbb318e68fe8f605469 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 14 Oct 2013 13:43:40 +0300 Subject: perf annotate: Find kcore symbols on other maps Use the new map_groups__find_ams() method to find kcore symbols on other maps. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1381747424-3557-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 882bb86..cf6242c 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -825,20 +825,16 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, dl->ops.target.offset = dl->ops.target.addr - map__rip_2objdump(map, sym->start); - /* - * kcore has no symbols, so add the call target name if it is on the - * same map. - */ + /* kcore has no symbols, so add the call target name */ if (dl->ins && ins__is_call(dl->ins) && !dl->ops.target.name) { - struct symbol *s; - u64 ip = dl->ops.target.addr; - - if (ip >= map->start && ip <= map->end) { - ip = map->map_ip(map, ip); - s = map__find_symbol(map, ip, NULL); - if (s && s->start == ip) - dl->ops.target.name = strdup(s->name); - } + struct addr_map_symbol target = { + .map = map, + .addr = dl->ops.target.addr, + }; + + if (!map_groups__find_ams(&target, NULL) && + target.sym->start == target.al_addr) + dl->ops.target.name = strdup(target.sym->name); } disasm__add(¬es->src->source, dl); -- cgit v0.10.2 From 9a17d7268d71674f0bbff6821f7d8e6dc0ece19a Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 14 Oct 2013 13:43:41 +0300 Subject: perf tools: Add copyfile_mode() Add a function to copy a file specifying the permissions to use for the created file. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1381747424-3557-5-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index ab71d62..8dc8cf3 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -55,17 +55,20 @@ int mkdir_p(char *path, mode_t mode) return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0; } -static int slow_copyfile(const char *from, const char *to) +static int slow_copyfile(const char *from, const char *to, mode_t mode) { - int err = 0; + int err = -1; char *line = NULL; size_t n; FILE *from_fp = fopen(from, "r"), *to_fp; + mode_t old_umask; if (from_fp == NULL) goto out; + old_umask = umask(mode ^ 0777); to_fp = fopen(to, "w"); + umask(old_umask); if (to_fp == NULL) goto out_fclose_from; @@ -82,7 +85,7 @@ out: return err; } -int copyfile(const char *from, const char *to) +int copyfile_mode(const char *from, const char *to, mode_t mode) { int fromfd, tofd; struct stat st; @@ -93,13 +96,13 @@ int copyfile(const char *from, const char *to) goto out; if (st.st_size == 0) /* /proc? do it slowly... */ - return slow_copyfile(from, to); + return slow_copyfile(from, to, mode); fromfd = open(from, O_RDONLY); if (fromfd < 0) goto out; - tofd = creat(to, 0755); + tofd = creat(to, mode); if (tofd < 0) goto out_close_from; @@ -121,6 +124,11 @@ out: return err; } +int copyfile(const char *from, const char *to) +{ + return copyfile_mode(from, to, 0755); +} + unsigned long convert_unit(unsigned long value, char *unit) { *unit = ' '; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 1f06ba4..42dfba7 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -243,6 +243,7 @@ static inline int sane_case(int x, int high) int mkdir_p(char *path, mode_t mode); int copyfile(const char *from, const char *to); +int copyfile_mode(const char *from, const char *to, mode_t mode); s64 perf_atoll(const char *str); char **argv_split(const char *str, int *argcp); -- cgit v0.10.2 From 0544d4225c52ca31ab2a55b22ddce1392d8f45a4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 14 Oct 2013 13:43:43 +0300 Subject: perf symbols: Add ability to find kcore in build-id cache When no vmlinux is found, tools will use kallsyms and, if possible, kcore. Add the ability to find kcore in the build-id cache. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1381747424-3557-7-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index b2f60dd..76a9e93 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1401,6 +1401,105 @@ out: return err; } +static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz) +{ + char kallsyms_filename[PATH_MAX]; + struct dirent *dent; + int ret = -1; + DIR *d; + + d = opendir(dir); + if (!d) + return -1; + + while (1) { + dent = readdir(d); + if (!dent) + break; + if (dent->d_type != DT_DIR) + continue; + scnprintf(kallsyms_filename, sizeof(kallsyms_filename), + "%s/%s/kallsyms", dir, dent->d_name); + if (!validate_kcore_modules(kallsyms_filename, map)) { + strlcpy(dir, kallsyms_filename, dir_sz); + ret = 0; + break; + } + } + + closedir(d); + + return ret; +} + +static char *dso__find_kallsyms(struct dso *dso, struct map *map) +{ + u8 host_build_id[BUILD_ID_SIZE]; + char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + bool is_host = false; + char path[PATH_MAX]; + + if (!dso->has_build_id) { + /* + * Last resort, if we don't have a build-id and couldn't find + * any vmlinux file, try the running kernel kallsyms table. + */ + goto proc_kallsyms; + } + + if (sysfs__read_build_id("/sys/kernel/notes", host_build_id, + sizeof(host_build_id)) == 0) + is_host = dso__build_id_equal(dso, host_build_id); + + build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + + /* Use /proc/kallsyms if possible */ + if (is_host) { + DIR *d; + int fd; + + /* If no cached kcore go with /proc/kallsyms */ + scnprintf(path, sizeof(path), "%s/[kernel.kcore]/%s", + buildid_dir, sbuild_id); + d = opendir(path); + if (!d) + goto proc_kallsyms; + closedir(d); + + /* + * Do not check the build-id cache, until we know we cannot use + * /proc/kcore. + */ + fd = open("/proc/kcore", O_RDONLY); + if (fd != -1) { + close(fd); + /* If module maps match go with /proc/kallsyms */ + if (!validate_kcore_modules("/proc/kallsyms", map)) + goto proc_kallsyms; + } + + /* Find kallsyms in build-id cache with kcore */ + if (!find_matching_kcore(map, path, sizeof(path))) + return strdup(path); + + goto proc_kallsyms; + } + + scnprintf(path, sizeof(path), "%s/[kernel.kallsyms]/%s", + buildid_dir, sbuild_id); + + if (access(path, F_OK)) { + pr_err("No kallsyms or vmlinux with build-id %s was found\n", + sbuild_id); + return NULL; + } + + return strdup(path); + +proc_kallsyms: + return strdup("/proc/kallsyms"); +} + static int dso__load_kernel_sym(struct dso *dso, struct map *map, symbol_filter_t filter) { @@ -1449,51 +1548,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map, if (symbol_conf.symfs[0] != 0) return -1; - /* - * Say the kernel DSO was created when processing the build-id header table, - * we have a build-id, so check if it is the same as the running kernel, - * using it if it is. - */ - if (dso->has_build_id) { - u8 kallsyms_build_id[BUILD_ID_SIZE]; - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; - - if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id, - sizeof(kallsyms_build_id)) == 0) { - if (dso__build_id_equal(dso, kallsyms_build_id)) { - kallsyms_filename = "/proc/kallsyms"; - goto do_kallsyms; - } - } - /* - * Now look if we have it on the build-id cache in - * $HOME/.debug/[kernel.kallsyms]. - */ - build_id__sprintf(dso->build_id, sizeof(dso->build_id), - sbuild_id); - - if (asprintf(&kallsyms_allocated_filename, - "%s/.debug/[kernel.kallsyms]/%s", - getenv("HOME"), sbuild_id) == -1) { - pr_err("Not enough memory for kallsyms file lookup\n"); - return -1; - } - - kallsyms_filename = kallsyms_allocated_filename; + kallsyms_allocated_filename = dso__find_kallsyms(dso, map); + if (!kallsyms_allocated_filename) + return -1; - if (access(kallsyms_filename, F_OK)) { - pr_err("No kallsyms or vmlinux with build-id %s " - "was found\n", sbuild_id); - free(kallsyms_allocated_filename); - return -1; - } - } else { - /* - * Last resort, if we don't have a build-id and couldn't find - * any vmlinux file, try the running kernel kallsyms table. - */ - kallsyms_filename = "/proc/kallsyms"; - } + kallsyms_filename = kallsyms_allocated_filename; do_kallsyms: err = dso__load_kallsyms(dso, kallsyms_filename, map, filter); -- cgit v0.10.2 From 1179e11bbb655162e83b33e17a97c45d3fe292da Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 14 Oct 2013 13:43:39 +0300 Subject: perf annotate: Fix annotate_browser__callq() When following a call, annotate_browser__callq() uses the current symbol's map to look up the target ip. That will not work if the target ip is on a map with a different mapping (i.e. start - pgoff is different). Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1381747424-3557-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 08545ae..57d3a86 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -442,35 +442,34 @@ static bool annotate_browser__callq(struct annotate_browser *browser, { struct map_symbol *ms = browser->b.priv; struct disasm_line *dl = browser->selection; - struct symbol *sym = ms->sym; struct annotation *notes; - struct symbol *target; - u64 ip; + struct addr_map_symbol target = { + .map = ms->map, + .addr = dl->ops.target.addr, + }; char title[SYM_TITLE_MAX_SIZE]; if (!ins__is_call(dl->ins)) return false; - ip = ms->map->map_ip(ms->map, dl->ops.target.addr); - target = map__find_symbol(ms->map, ip, NULL); - if (target == NULL) { + if (map_groups__find_ams(&target, NULL)) { ui_helpline__puts("The called function was not found."); return true; } - notes = symbol__annotation(target); + notes = symbol__annotation(target.sym); pthread_mutex_lock(¬es->lock); - if (notes->src == NULL && symbol__alloc_hist(target) < 0) { + if (notes->src == NULL && symbol__alloc_hist(target.sym) < 0) { pthread_mutex_unlock(¬es->lock); ui__warning("Not enough memory for annotating '%s' symbol!\n", - target->name); + target.sym->name); return true; } pthread_mutex_unlock(¬es->lock); - symbol__tui_annotate(target, ms->map, evsel, hbt); - sym_title(sym, ms->map, title, sizeof(title)); + symbol__tui_annotate(target.sym, target.map, evsel, hbt); + sym_title(ms->sym, ms->map, title, sizeof(title)); ui_browser__show_title(&browser->b, title); return true; } -- cgit v0.10.2 From fc1b691d7651d9496e912de7e0fc73a5be3294af Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 14 Oct 2013 16:57:29 +0300 Subject: perf buildid-cache: Add ability to add kcore to the cache kcore can be used to view the running kernel object code. However, kcore changes as modules are loaded and unloaded, and when the kernel decides to modify its own code. Consequently it is useful to create a copy of kcore at a particular time. Unlike vmlinux, kcore is not unique for a given build-id. And in addition, the kallsyms and modules files are also needed. The tool therefore creates a directory: ~/.debug/[kernel.kcore]// which contains: kcore, kallsyms and modules. Note that the copied kcore contains only code sections. See the kcore_copy() function for how that is determined. The tool will not make additional copies of kcore if there is already one with the same modules at the same addresses. Currently, perf tools will not look for kcore in the cache. That is addressed in another patch. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/525BF849.5030405@intel.com [ renamed 'index' to 'idx' to avoid shadowing string.h symbol in f12, use at least one member initializer when initializing a struct to zeros, also to fix the build on f12 ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index e9a8349..fd77d81 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -21,6 +21,19 @@ OPTIONS -a:: --add=:: Add specified file to the cache. +-k:: +--kcore:: + Add specified kcore file to the cache. For the current host that is + /proc/kcore which requires root permissions to read. Be aware that + running 'perf buildid-cache' as root may update root's build-id cache + not the user's. Use the -v option to see where the file is created. + Note that the copied file contains only code sections not the whole core + image. Note also that files "kallsyms" and "modules" must also be in the + same directory and are also copied. All 3 files are created with read + permissions for root only. kcore will not be added if there is already a + kcore in the cache (with the same build-id) that has the same modules at + the same addresses. Use the -v option to see if a copy of kcore is + actually made. -r:: --remove=:: Remove specified file from the cache. diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index c96c8fa..8140b7b 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -6,6 +6,11 @@ * Copyright (C) 2010, Red Hat Inc. * Copyright (C) 2010, Arnaldo Carvalho de Melo */ +#include +#include +#include +#include +#include #include "builtin.h" #include "perf.h" #include "util/cache.h" @@ -17,6 +22,140 @@ #include "util/session.h" #include "util/symbol.h" +static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) +{ + char root_dir[PATH_MAX]; + char notes[PATH_MAX]; + u8 build_id[BUILD_ID_SIZE]; + char *p; + + strlcpy(root_dir, proc_dir, sizeof(root_dir)); + + p = strrchr(root_dir, '/'); + if (!p) + return -1; + *p = '\0'; + + scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir); + + if (sysfs__read_build_id(notes, build_id, sizeof(build_id))) + return -1; + + build_id__sprintf(build_id, sizeof(build_id), sbuildid); + + return 0; +} + +static int build_id_cache__kcore_dir(char *dir, size_t sz) +{ + struct timeval tv; + struct tm tm; + char dt[32]; + + if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm)) + return -1; + + if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm)) + return -1; + + scnprintf(dir, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000); + + return 0; +} + +static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir, + size_t to_dir_sz) +{ + char from[PATH_MAX]; + char to[PATH_MAX]; + struct dirent *dent; + int ret = -1; + DIR *d; + + d = opendir(to_dir); + if (!d) + return -1; + + scnprintf(from, sizeof(from), "%s/modules", from_dir); + + while (1) { + dent = readdir(d); + if (!dent) + break; + if (dent->d_type != DT_DIR) + continue; + scnprintf(to, sizeof(to), "%s/%s/modules", to_dir, + dent->d_name); + if (!compare_proc_modules(from, to)) { + scnprintf(to, sizeof(to), "%s/%s", to_dir, + dent->d_name); + strlcpy(to_dir, to, to_dir_sz); + ret = 0; + break; + } + } + + closedir(d); + + return ret; +} + +static int build_id_cache__add_kcore(const char *filename, const char *debugdir) +{ + char dir[32], sbuildid[BUILD_ID_SIZE * 2 + 1]; + char from_dir[PATH_MAX], to_dir[PATH_MAX]; + char *p; + + strlcpy(from_dir, filename, sizeof(from_dir)); + + p = strrchr(from_dir, '/'); + if (!p || strcmp(p + 1, "kcore")) + return -1; + *p = '\0'; + + if (build_id_cache__kcore_buildid(from_dir, sbuildid)) + return -1; + + scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s", + debugdir, sbuildid); + + if (!build_id_cache__kcore_existing(from_dir, to_dir, sizeof(to_dir))) { + pr_debug("same kcore found in %s\n", to_dir); + return 0; + } + + if (build_id_cache__kcore_dir(dir, sizeof(dir))) + return -1; + + scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s/%s", + debugdir, sbuildid, dir); + + if (mkdir_p(to_dir, 0755)) + return -1; + + if (kcore_copy(from_dir, to_dir)) { + /* Remove YYYYmmddHHMMSShh directory */ + if (!rmdir(to_dir)) { + p = strrchr(to_dir, '/'); + if (p) + *p = '\0'; + /* Try to remove buildid directory */ + if (!rmdir(to_dir)) { + p = strrchr(to_dir, '/'); + if (p) + *p = '\0'; + /* Try to remove [kernel.kcore] directory */ + rmdir(to_dir); + } + } + return -1; + } + + pr_debug("kcore added to build-id cache directory %s\n", to_dir); + + return 0; +} + static int build_id_cache__add_file(const char *filename, const char *debugdir) { char sbuild_id[BUILD_ID_SIZE * 2 + 1]; @@ -130,11 +269,14 @@ int cmd_buildid_cache(int argc, const char **argv, char const *add_name_list_str = NULL, *remove_name_list_str = NULL, *missing_filename = NULL, - *update_name_list_str = NULL; + *update_name_list_str = NULL, + *kcore_filename; const struct option buildid_cache_options[] = { OPT_STRING('a', "add", &add_name_list_str, "file list", "file(s) to add"), + OPT_STRING('k', "kcore", &kcore_filename, + "file", "kcore file to add"), OPT_STRING('r', "remove", &remove_name_list_str, "file list", "file(s) to remove"), OPT_STRING('M', "missing", &missing_filename, "file", @@ -217,5 +359,9 @@ int cmd_buildid_cache(int argc, const char **argv, } } + if (kcore_filename && + build_id_cache__add_kcore(kcore_filename, debugdir)) + pr_warning("Couldn't add %s\n", kcore_filename); + return ret; } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 499c71d..d6b8af3 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1202,6 +1202,372 @@ static off_t kcore__write(struct kcore *kcore) return elf_update(kcore->elf, ELF_C_WRITE); } +struct phdr_data { + off_t offset; + u64 addr; + u64 len; +}; + +struct kcore_copy_info { + u64 stext; + u64 etext; + u64 first_symbol; + u64 last_symbol; + u64 first_module; + u64 last_module_symbol; + struct phdr_data kernel_map; + struct phdr_data modules_map; +}; + +static int kcore_copy__process_kallsyms(void *arg, const char *name, char type, + u64 start) +{ + struct kcore_copy_info *kci = arg; + + if (!symbol_type__is_a(type, MAP__FUNCTION)) + return 0; + + if (strchr(name, '[')) { + if (start > kci->last_module_symbol) + kci->last_module_symbol = start; + return 0; + } + + if (!kci->first_symbol || start < kci->first_symbol) + kci->first_symbol = start; + + if (!kci->last_symbol || start > kci->last_symbol) + kci->last_symbol = start; + + if (!strcmp(name, "_stext")) { + kci->stext = start; + return 0; + } + + if (!strcmp(name, "_etext")) { + kci->etext = start; + return 0; + } + + return 0; +} + +static int kcore_copy__parse_kallsyms(struct kcore_copy_info *kci, + const char *dir) +{ + char kallsyms_filename[PATH_MAX]; + + scnprintf(kallsyms_filename, PATH_MAX, "%s/kallsyms", dir); + + if (symbol__restricted_filename(kallsyms_filename, "/proc/kallsyms")) + return -1; + + if (kallsyms__parse(kallsyms_filename, kci, + kcore_copy__process_kallsyms) < 0) + return -1; + + return 0; +} + +static int kcore_copy__process_modules(void *arg, + const char *name __maybe_unused, + u64 start) +{ + struct kcore_copy_info *kci = arg; + + if (!kci->first_module || start < kci->first_module) + kci->first_module = start; + + return 0; +} + +static int kcore_copy__parse_modules(struct kcore_copy_info *kci, + const char *dir) +{ + char modules_filename[PATH_MAX]; + + scnprintf(modules_filename, PATH_MAX, "%s/modules", dir); + + if (symbol__restricted_filename(modules_filename, "/proc/modules")) + return -1; + + if (modules__parse(modules_filename, kci, + kcore_copy__process_modules) < 0) + return -1; + + return 0; +} + +static void kcore_copy__map(struct phdr_data *p, u64 start, u64 end, u64 pgoff, + u64 s, u64 e) +{ + if (p->addr || s < start || s >= end) + return; + + p->addr = s; + p->offset = (s - start) + pgoff; + p->len = e < end ? e - s : end - s; +} + +static int kcore_copy__read_map(u64 start, u64 len, u64 pgoff, void *data) +{ + struct kcore_copy_info *kci = data; + u64 end = start + len; + + kcore_copy__map(&kci->kernel_map, start, end, pgoff, kci->stext, + kci->etext); + + kcore_copy__map(&kci->modules_map, start, end, pgoff, kci->first_module, + kci->last_module_symbol); + + return 0; +} + +static int kcore_copy__read_maps(struct kcore_copy_info *kci, Elf *elf) +{ + if (elf_read_maps(elf, true, kcore_copy__read_map, kci) < 0) + return -1; + + return 0; +} + +static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir, + Elf *elf) +{ + if (kcore_copy__parse_kallsyms(kci, dir)) + return -1; + + if (kcore_copy__parse_modules(kci, dir)) + return -1; + + if (kci->stext) + kci->stext = round_down(kci->stext, page_size); + else + kci->stext = round_down(kci->first_symbol, page_size); + + if (kci->etext) { + kci->etext = round_up(kci->etext, page_size); + } else if (kci->last_symbol) { + kci->etext = round_up(kci->last_symbol, page_size); + kci->etext += page_size; + } + + kci->first_module = round_down(kci->first_module, page_size); + + if (kci->last_module_symbol) { + kci->last_module_symbol = round_up(kci->last_module_symbol, + page_size); + kci->last_module_symbol += page_size; + } + + if (!kci->stext || !kci->etext) + return -1; + + if (kci->first_module && !kci->last_module_symbol) + return -1; + + return kcore_copy__read_maps(kci, elf); +} + +static int kcore_copy__copy_file(const char *from_dir, const char *to_dir, + const char *name) +{ + char from_filename[PATH_MAX]; + char to_filename[PATH_MAX]; + + scnprintf(from_filename, PATH_MAX, "%s/%s", from_dir, name); + scnprintf(to_filename, PATH_MAX, "%s/%s", to_dir, name); + + return copyfile_mode(from_filename, to_filename, 0400); +} + +static int kcore_copy__unlink(const char *dir, const char *name) +{ + char filename[PATH_MAX]; + + scnprintf(filename, PATH_MAX, "%s/%s", dir, name); + + return unlink(filename); +} + +static int kcore_copy__compare_fds(int from, int to) +{ + char *buf_from; + char *buf_to; + ssize_t ret; + size_t len; + int err = -1; + + buf_from = malloc(page_size); + buf_to = malloc(page_size); + if (!buf_from || !buf_to) + goto out; + + while (1) { + /* Use read because mmap won't work on proc files */ + ret = read(from, buf_from, page_size); + if (ret < 0) + goto out; + + if (!ret) + break; + + len = ret; + + if (readn(to, buf_to, len) != (int)len) + goto out; + + if (memcmp(buf_from, buf_to, len)) + goto out; + } + + err = 0; +out: + free(buf_to); + free(buf_from); + return err; +} + +static int kcore_copy__compare_files(const char *from_filename, + const char *to_filename) +{ + int from, to, err = -1; + + from = open(from_filename, O_RDONLY); + if (from < 0) + return -1; + + to = open(to_filename, O_RDONLY); + if (to < 0) + goto out_close_from; + + err = kcore_copy__compare_fds(from, to); + + close(to); +out_close_from: + close(from); + return err; +} + +static int kcore_copy__compare_file(const char *from_dir, const char *to_dir, + const char *name) +{ + char from_filename[PATH_MAX]; + char to_filename[PATH_MAX]; + + scnprintf(from_filename, PATH_MAX, "%s/%s", from_dir, name); + scnprintf(to_filename, PATH_MAX, "%s/%s", to_dir, name); + + return kcore_copy__compare_files(from_filename, to_filename); +} + +/** + * kcore_copy - copy kallsyms, modules and kcore from one directory to another. + * @from_dir: from directory + * @to_dir: to directory + * + * This function copies kallsyms, modules and kcore files from one directory to + * another. kallsyms and modules are copied entirely. Only code segments are + * copied from kcore. It is assumed that two segments suffice: one for the + * kernel proper and one for all the modules. The code segments are determined + * from kallsyms and modules files. The kernel map starts at _stext or the + * lowest function symbol, and ends at _etext or the highest function symbol. + * The module map starts at the lowest module address and ends at the highest + * module symbol. Start addresses are rounded down to the nearest page. End + * addresses are rounded up to the nearest page. An extra page is added to the + * highest kernel symbol and highest module symbol to, hopefully, encompass that + * symbol too. Because it contains only code sections, the resulting kcore is + * unusual. One significant peculiarity is that the mapping (start -> pgoff) + * is not the same for the kernel map and the modules map. That happens because + * the data is copied adjacently whereas the original kcore has gaps. Finally, + * kallsyms and modules files are compared with their copies to check that + * modules have not been loaded or unloaded while the copies were taking place. + * + * Return: %0 on success, %-1 on failure. + */ +int kcore_copy(const char *from_dir, const char *to_dir) +{ + struct kcore kcore; + struct kcore extract; + size_t count = 2; + int idx = 0, err = -1; + off_t offset = page_size, sz, modules_offset = 0; + struct kcore_copy_info kci = { .stext = 0, }; + char kcore_filename[PATH_MAX]; + char extract_filename[PATH_MAX]; + + if (kcore_copy__copy_file(from_dir, to_dir, "kallsyms")) + return -1; + + if (kcore_copy__copy_file(from_dir, to_dir, "modules")) + goto out_unlink_kallsyms; + + scnprintf(kcore_filename, PATH_MAX, "%s/kcore", from_dir); + scnprintf(extract_filename, PATH_MAX, "%s/kcore", to_dir); + + if (kcore__open(&kcore, kcore_filename)) + goto out_unlink_modules; + + if (kcore_copy__calc_maps(&kci, from_dir, kcore.elf)) + goto out_kcore_close; + + if (kcore__init(&extract, extract_filename, kcore.elfclass, false)) + goto out_kcore_close; + + if (!kci.modules_map.addr) + count -= 1; + + if (kcore__copy_hdr(&kcore, &extract, count)) + goto out_extract_close; + + if (kcore__add_phdr(&extract, idx++, offset, kci.kernel_map.addr, + kci.kernel_map.len)) + goto out_extract_close; + + if (kci.modules_map.addr) { + modules_offset = offset + kci.kernel_map.len; + if (kcore__add_phdr(&extract, idx, modules_offset, + kci.modules_map.addr, kci.modules_map.len)) + goto out_extract_close; + } + + sz = kcore__write(&extract); + if (sz < 0 || sz > offset) + goto out_extract_close; + + if (copy_bytes(kcore.fd, kci.kernel_map.offset, extract.fd, offset, + kci.kernel_map.len)) + goto out_extract_close; + + if (modules_offset && copy_bytes(kcore.fd, kci.modules_map.offset, + extract.fd, modules_offset, + kci.modules_map.len)) + goto out_extract_close; + + if (kcore_copy__compare_file(from_dir, to_dir, "modules")) + goto out_extract_close; + + if (kcore_copy__compare_file(from_dir, to_dir, "kallsyms")) + goto out_extract_close; + + err = 0; + +out_extract_close: + kcore__close(&extract); + if (err) + unlink(extract_filename); +out_kcore_close: + kcore__close(&kcore); +out_unlink_modules: + if (err) + kcore_copy__unlink(to_dir, "modules"); +out_unlink_kallsyms: + if (err) + kcore_copy__unlink(to_dir, "kallsyms"); + + return err; +} + int kcore_extract__create(struct kcore_extract *kce) { struct kcore kcore; diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 928556d..2d2dd05 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -317,6 +317,12 @@ void kcore_extract__delete(struct kcore_extract *kce __maybe_unused) { } +int kcore_copy(const char *from_dir __maybe_unused, + const char *to_dir __maybe_unused) +{ + return -1; +} + void symbol__elf_init(void) { } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 76a9e93..b66c1ee 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -893,6 +893,47 @@ static int read_proc_modules(const char *filename, struct rb_root *modules) return 0; } +int compare_proc_modules(const char *from, const char *to) +{ + struct rb_root from_modules = RB_ROOT; + struct rb_root to_modules = RB_ROOT; + struct rb_node *from_node, *to_node; + struct module_info *from_m, *to_m; + int ret = -1; + + if (read_proc_modules(from, &from_modules)) + return -1; + + if (read_proc_modules(to, &to_modules)) + goto out_delete_from; + + from_node = rb_first(&from_modules); + to_node = rb_first(&to_modules); + while (from_node) { + if (!to_node) + break; + + from_m = rb_entry(from_node, struct module_info, rb_node); + to_m = rb_entry(to_node, struct module_info, rb_node); + + if (from_m->start != to_m->start || + strcmp(from_m->name, to_m->name)) + break; + + from_node = rb_next(from_node); + to_node = rb_next(to_node); + } + + if (!from_node && !to_node) + ret = 0; + + delete_modules(&to_modules); +out_delete_from: + delete_modules(&from_modules); + + return ret; +} + static int do_validate_kcore_modules(const char *filename, struct map *map, struct map_groups *kmaps) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index fb107e1..07de8fe 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -270,4 +270,7 @@ struct kcore_extract { int kcore_extract__create(struct kcore_extract *kce); void kcore_extract__delete(struct kcore_extract *kce); +int kcore_copy(const char *from_dir, const char *to_dir); +int compare_proc_modules(const char *from, const char *to); + #endif /* __PERF_SYMBOL */ -- cgit v0.10.2 From 1d5077bdd9a10c4297cded139989bb9ee2998a6c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 14 Oct 2013 13:43:44 +0300 Subject: perf annotate: Another fix for annotate_browser__callq() The target address is provided by objdump and is not necessary a memory address. Add a helper to get the correct address. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1381747424-3557-8-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 57d3a86..f0697a3 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -445,14 +445,17 @@ static bool annotate_browser__callq(struct annotate_browser *browser, struct annotation *notes; struct addr_map_symbol target = { .map = ms->map, - .addr = dl->ops.target.addr, + .addr = map__objdump_2mem(ms->map, dl->ops.target.addr), }; char title[SYM_TITLE_MAX_SIZE]; if (!ins__is_call(dl->ins)) return false; - if (map_groups__find_ams(&target, NULL)) { + if (map_groups__find_ams(&target, NULL) || + map__rip_2objdump(target.map, target.map->map_ip(target.map, + target.addr)) != + dl->ops.target.addr) { ui_helpline__puts("The called function was not found."); return true; } diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 9dea404..ef5bc91 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -252,10 +252,16 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp) return fprintf(fp, "%s", dsoname); } -/* +/** + * map__rip_2objdump - convert symbol start address to objdump address. + * @map: memory map + * @rip: symbol start address + * * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN. * map->dso->adjust_symbols==1 for ET_EXEC-like cases except ET_REL which is * relative to section start. + * + * Return: Address suitable for passing to "objdump --start-address=" */ u64 map__rip_2objdump(struct map *map, u64 rip) { @@ -268,6 +274,29 @@ u64 map__rip_2objdump(struct map *map, u64 rip) return map->unmap_ip(map, rip); } +/** + * map__objdump_2mem - convert objdump address to a memory address. + * @map: memory map + * @ip: objdump address + * + * Closely related to map__rip_2objdump(), this function takes an address from + * objdump and converts it to a memory address. Note this assumes that @map + * contains the address. To be sure the result is valid, check it forwards + * e.g. map__rip_2objdump(map->map_ip(map, map__objdump_2mem(map, ip))) == ip + * + * Return: Memory address. + */ +u64 map__objdump_2mem(struct map *map, u64 ip) +{ + if (!map->dso->adjust_symbols) + return map->unmap_ip(map, ip); + + if (map->dso->rel) + return map->unmap_ip(map, ip + map->pgoff); + + return ip; +} + void map_groups__init(struct map_groups *mg) { int i; diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 0359b4a..e4e259c 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -84,6 +84,9 @@ static inline u64 identity__map_ip(struct map *map __maybe_unused, u64 ip) /* rip/ip <-> addr suitable for passing to `objdump --start-address=` */ u64 map__rip_2objdump(struct map *map, u64 rip); +/* objdump address -> memory address */ +u64 map__objdump_2mem(struct map *map, u64 ip); + struct symbol; typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); -- cgit v0.10.2 From d4f74eb89199dc7bde5579783e9188841e1271e3 Mon Sep 17 00:00:00 2001 From: Chenggang Qin Date: Fri, 11 Oct 2013 08:27:59 +0800 Subject: perf symbols: Fix a memory leak due to symbol__delete not being used In function symbols__fixup_duplicate(), while duplicated symbols are found, only the rb_node is removed from the tree. The symbol structures themself are ignored. Then, these memory areas are lost. Signed-off-by: Chenggang Qin Acked-by: Namhyung Kim Cc: Andrew Morton Cc: Arjan van de Ven Cc: David Ahern Cc: Ingo Molnar Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wu Fengguang Cc: Yanmin Zhang Link: http://lkml.kernel.org/r/1381451279-4109-3-git-send-email-chenggang.qin@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index b66c1ee..c0c3696 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -160,10 +160,12 @@ again: if (choose_best_symbol(curr, next) == SYMBOL_A) { rb_erase(&next->rb_node, symbols); + symbol__delete(next); goto again; } else { nd = rb_next(&curr->rb_node); rb_erase(&curr->rb_node, symbols); + symbol__delete(curr); } } } -- cgit v0.10.2 From 784f3390f9bd900adfb3b0373615e105a0d9749a Mon Sep 17 00:00:00 2001 From: Chenggang Qin Date: Fri, 11 Oct 2013 08:27:57 +0800 Subject: perf symbols: Fix a mmap and munmap mismatched bug In function filename__read_debuglink(), while the ELF file is opend and mmapped in elf_begin(), but if this file is considered to not be usable during the following code, we will goto the close(fd) directly. The elf_end() is skipped. So, the mmaped ELF file cannot be munmapped. The mmapped areas exist during the life of perf. This is a memory leak. This patch fixed this bug. Reviewed-by: Namhyung Kim Signed-off-by: Chenggang Qin Cc: Andrew Morton Cc: Arjan van de Ven Cc: Chenggang Qin Cc: David Ahern Cc: Ingo Molnar Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wu Fengguang Cc: Yanmin Zhang Link: http://lkml.kernel.org/r/1381451279-4109-1-git-send-email-chenggang.qin@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index d6b8af3..eed0b96 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -487,27 +487,27 @@ int filename__read_debuglink(const char *filename, char *debuglink, ek = elf_kind(elf); if (ek != ELF_K_ELF) - goto out_close; + goto out_elf_end; if (gelf_getehdr(elf, &ehdr) == NULL) { pr_err("%s: cannot get elf header.\n", __func__); - goto out_close; + goto out_elf_end; } sec = elf_section_by_name(elf, &ehdr, &shdr, ".gnu_debuglink", NULL); if (sec == NULL) - goto out_close; + goto out_elf_end; data = elf_getdata(sec, NULL); if (data == NULL) - goto out_close; + goto out_elf_end; /* the start of this section is a zero-terminated string */ strncpy(debuglink, data->d_buf, size); +out_elf_end: elf_end(elf); - out_close: close(fd); out: -- cgit v0.10.2 From 6650b181cc0b0c4218ebff9b0c368a2e56287907 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 14 Oct 2013 18:25:12 -0300 Subject: perf scripting perl: Fix build error on Fedora 12 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cast __u64 to u64 to silence this warning on older distros, such as Fedora 12: CC /tmp/build/perf/util/scripting-engines/trace-event-perl.o cc1: warnings being treated as errors util/scripting-engines/trace-event-perl.c: In function ‘perl_process_tracepoint’: util/scripting-engines/trace-event-perl.c:285: error: format ‘%lu’ expects type ‘long unsigned int’, but argument 2 has type ‘__u64’ make[1]: *** [/tmp/build/perf/util/scripting-engines/trace-event-perl.o] Error 1 make: *** [install] Error 2 make: Leaving directory `/home/acme/git/linux/tools/perf' [acme@fedora12 linux]$ Reported-by: Waiman Long Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Cc: Waiman Long Link: http://lkml.kernel.org/n/tip-nlxofdqcdjfm0w9o6bgq4kqv@git.kernel.org Link: http://lkml.kernel.org/r/1381265120-58532-1-git-send-email-Waiman.Long@hp.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index a85e4ae..c0c9795 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -282,7 +282,7 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused, event = find_cache_event(evsel); if (!event) - die("ug! no event found for type %" PRIu64, evsel->attr.config); + die("ug! no event found for type %" PRIu64, (u64)evsel->attr.config); pid = raw_field_value(event, "common_pid", data); -- cgit v0.10.2 From 9536c8d2da8059b00775bd9c5a84816b608cf6f4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Oct 2013 12:14:04 +0200 Subject: perf/x86: Optimize intel_pmu_pebs_fixup_ip() There's been reports of high NMI handler overhead, highlighted by such kernel messages: [ 3697.380195] perf samples too long (10009 > 10000), lowering kernel.perf_event_max_sample_rate to 13000 [ 3697.389509] INFO: NMI handler (perf_event_nmi_handler) took too long to run: 9.331 msecs Don Zickus analyzed the source of the overhead and reported: > While there are a few places that are causing latencies, for now I focused on > the longest one first. It seems to be 'copy_user_from_nmi' > > intel_pmu_handle_irq -> > intel_pmu_drain_pebs_nhm -> > __intel_pmu_drain_pebs_nhm -> > __intel_pmu_pebs_event -> > intel_pmu_pebs_fixup_ip -> > copy_from_user_nmi > > In intel_pmu_pebs_fixup_ip(), if the while-loop goes over 50, the sum of > all the copy_from_user_nmi latencies seems to go over 1,000,000 cycles > (there are some cases where only 10 iterations are needed to go that high > too, but in generall over 50 or so). At this point copy_user_from_nmi > seems to account for over 90% of the nmi latency. The solution to that is to avoid having to call copy_from_user_nmi() for every instruction. Since we already limit the max basic block size, we can easily pre-allocate a piece of memory to copy the entire thing into in one go. Don reported this test result: > Your patch made a huge difference in improvement. The > copy_from_user_nmi() no longer hits the million of cycles. I still > have a batch of 100,000-300,000 cycles. My longest NMI paths used > to be dominated by copy_from_user_nmi, now it is not (I have to dig > up the new hot path). Reported-and-tested-by: Don Zickus Cc: jmario@redhat.com Cc: acme@infradead.org Cc: dave.hansen@linux.intel.com Cc: eranian@google.com Cc: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20131016105755.GX10651@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 32e9ed8..c1760ff 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -12,6 +12,7 @@ #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) #define PEBS_BUFFER_SIZE PAGE_SIZE +#define PEBS_FIXUP_SIZE PAGE_SIZE /* * pebs_record_32 for p4 and core not supported @@ -228,12 +229,14 @@ void fini_debug_store_on_cpu(int cpu) wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); } +static DEFINE_PER_CPU(void *, insn_buffer); + static int alloc_pebs_buffer(int cpu) { struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; int node = cpu_to_node(cpu); int max, thresh = 1; /* always use a single PEBS record */ - void *buffer; + void *buffer, *ibuffer; if (!x86_pmu.pebs) return 0; @@ -242,6 +245,19 @@ static int alloc_pebs_buffer(int cpu) if (unlikely(!buffer)) return -ENOMEM; + /* + * HSW+ already provides us the eventing ip; no need to allocate this + * buffer then. + */ + if (x86_pmu.intel_cap.pebs_format < 2) { + ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); + if (!ibuffer) { + kfree(buffer); + return -ENOMEM; + } + per_cpu(insn_buffer, cpu) = ibuffer; + } + max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; ds->pebs_buffer_base = (u64)(unsigned long)buffer; @@ -262,6 +278,9 @@ static void release_pebs_buffer(int cpu) if (!ds || !x86_pmu.pebs) return; + kfree(per_cpu(insn_buffer, cpu)); + per_cpu(insn_buffer, cpu) = NULL; + kfree((void *)(unsigned long)ds->pebs_buffer_base); ds->pebs_buffer_base = 0; } @@ -729,6 +748,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) unsigned long old_to, to = cpuc->lbr_entries[0].to; unsigned long ip = regs->ip; int is_64bit = 0; + void *kaddr; /* * We don't need to fixup if the PEBS assist is fault like @@ -752,7 +772,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) * unsigned math, either ip is before the start (impossible) or * the basic block is larger than 1 page (sanity) */ - if ((ip - to) > PAGE_SIZE) + if ((ip - to) > PEBS_FIXUP_SIZE) return 0; /* @@ -763,29 +783,33 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) return 1; } + if (!kernel_ip(ip)) { + int size, bytes; + u8 *buf = this_cpu_read(insn_buffer); + + size = ip - to; /* Must fit our buffer, see above */ + bytes = copy_from_user_nmi(buf, (void __user *)to, size); + if (bytes != size) + return 0; + + kaddr = buf; + } else { + kaddr = (void *)to; + } + do { struct insn insn; - u8 buf[MAX_INSN_SIZE]; - void *kaddr; old_to = to; - if (!kernel_ip(ip)) { - int bytes, size = MAX_INSN_SIZE; - - bytes = copy_from_user_nmi(buf, (void __user *)to, size); - if (bytes != size) - return 0; - - kaddr = buf; - } else - kaddr = (void *)to; #ifdef CONFIG_X86_64 is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); #endif insn_init(&insn, kaddr, is_64bit); insn_get_length(&insn); + to += insn.length; + kaddr += insn.length; } while (to < ip); if (to == ip) { -- cgit v0.10.2 From 97119f37bbebbab852899bd37ed52b80396728f9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Sep 2013 17:34:10 -0300 Subject: perf trace: Split fd -> pathname array handling So that the part that grows the array as needed is untied from the code that reads the /proc/pid/fd symlink and can be used for the vfs_getname hook that will set the fd -> path translation too, when available. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ydo5rumyv9hdc1vsfmqamugs@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d0f91fe..763bef4 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -975,30 +975,9 @@ struct trace { double runtime_ms; }; -static int thread__read_fd_path(struct thread *thread, int fd) +static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) { struct thread_trace *ttrace = thread->priv; - char linkname[PATH_MAX], pathname[PATH_MAX]; - struct stat st; - int ret; - - if (thread->pid_ == thread->tid) { - scnprintf(linkname, sizeof(linkname), - "/proc/%d/fd/%d", thread->pid_, fd); - } else { - scnprintf(linkname, sizeof(linkname), - "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd); - } - - if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname)) - return -1; - - ret = readlink(linkname, pathname, sizeof(pathname)); - - if (ret < 0 || ret > st.st_size) - return -1; - - pathname[ret] = '\0'; if (fd > ttrace->paths.max) { char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *)); @@ -1022,6 +1001,32 @@ static int thread__read_fd_path(struct thread *thread, int fd) return ttrace->paths.table[fd] != NULL ? 0 : -1; } +static int thread__read_fd_path(struct thread *thread, int fd) +{ + char linkname[PATH_MAX], pathname[PATH_MAX]; + struct stat st; + int ret; + + if (thread->pid_ == thread->tid) { + scnprintf(linkname, sizeof(linkname), + "/proc/%d/fd/%d", thread->pid_, fd); + } else { + scnprintf(linkname, sizeof(linkname), + "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd); + } + + if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname)) + return -1; + + ret = readlink(linkname, pathname, sizeof(pathname)); + + if (ret < 0 || ret > st.st_size) + return -1; + + pathname[ret] = '\0'; + return trace__set_fd_pathname(thread, fd, pathname); +} + static const char *thread__fd_path(struct thread *thread, int fd, bool live) { struct thread_trace *ttrace = thread->priv; -- cgit v0.10.2 From c522739d72a341a3e74a369ce6298b9412813d3f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Sep 2013 18:06:19 -0300 Subject: perf trace: Use vfs_getname hook if available Initially it tries to find a probe:vfs_getname that should be setup with: perf probe 'vfs_getname=getname_flags:65 pathname=result->name:string' or with slight changes to cope with code flux in the getname_flags code. In the future, if a "vfs:getname" tracepoint becomes available, then it will be preferred. This is not strictly required and more expensive method of reading the /proc/pid/fd/ symlink will be used when the fd->path array entry is not populated by a previous vfs_getname + open syscall ret sequence. As with any other 'perf probe' probe the setup must be done just once and the probe will be left inactive, waiting for users, be it 'perf trace' of any other tool. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ujg8se8glq5izmu8cdkq15po@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 54139c6..7b0497f 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -97,6 +97,10 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. Show a summary of syscalls by thread with min, max, and average times (in msec) and relative stddev. +--tool_stats:: + Show tool stats such as number of times fd->pathname was discovered thru + hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 763bef4..86d2b1f 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -951,7 +951,10 @@ fail: struct trace { struct perf_tool tool; - int audit_machine; + struct { + int machine; + int open_id; + } audit; struct { int max; struct syscall *table; @@ -965,14 +968,19 @@ struct trace { struct strlist *ev_qualifier; bool not_ev_qualifier; bool live; + const char *last_vfs_getname; struct intlist *tid_list; struct intlist *pid_list; bool sched; bool multiple_threads; bool summary; bool show_comm; + bool show_tool_stats; double duration_filter; double runtime_ms; + struct { + u64 vfs_getname, proc_getname; + } stats; }; static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) @@ -1027,7 +1035,8 @@ static int thread__read_fd_path(struct thread *thread, int fd) return trace__set_fd_pathname(thread, fd, pathname); } -static const char *thread__fd_path(struct thread *thread, int fd, bool live) +static const char *thread__fd_path(struct thread *thread, int fd, + struct trace *trace) { struct thread_trace *ttrace = thread->priv; @@ -1037,9 +1046,13 @@ static const char *thread__fd_path(struct thread *thread, int fd, bool live) if (fd < 0) return NULL; - if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL) && - (!live || thread__read_fd_path(thread, fd))) - return NULL; + if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) + if (!trace->live) + return NULL; + ++trace->stats.proc_getname; + if (thread__read_fd_path(thread, fd)) { + return NULL; + } return ttrace->paths.table[fd]; } @@ -1049,7 +1062,7 @@ static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, { int fd = arg->val; size_t printed = scnprintf(bf, size, "%d", fd); - const char *path = thread__fd_path(arg->thread, fd, arg->trace->live); + const char *path = thread__fd_path(arg->thread, fd, arg->trace); if (path) printed += scnprintf(bf + printed, size - printed, "<%s>", path); @@ -1186,7 +1199,7 @@ static int trace__read_syscall_info(struct trace *trace, int id) { char tp_name[128]; struct syscall *sc; - const char *name = audit_syscall_to_name(id, trace->audit_machine); + const char *name = audit_syscall_to_name(id, trace->audit.machine); if (name == NULL) return -1; @@ -1450,6 +1463,12 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, ret = perf_evsel__intval(evsel, sample, "ret"); + if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) { + trace__set_fd_pathname(thread, ret, trace->last_vfs_getname); + trace->last_vfs_getname = NULL; + ++trace->stats.vfs_getname; + } + ttrace = thread->priv; ttrace->exit_time = sample->time; @@ -1494,6 +1513,13 @@ out: return 0; } +static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, + struct perf_sample *sample) +{ + trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname"); + return 0; +} + static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, struct perf_sample *sample) { @@ -1616,6 +1642,22 @@ static int trace__record(int argc, const char **argv) static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); +static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname", + evlist->nr_entries); + if (evsel == NULL) + return; + + if (perf_evsel__field(evsel, "pathname") == NULL) { + perf_evsel__delete(evsel); + return; + } + + evsel->handler.func = trace__vfs_getname; + perf_evlist__add(evlist, evsel); +} + static int trace__run(struct trace *trace, int argc, const char **argv) { struct perf_evlist *evlist = perf_evlist__new(); @@ -1635,6 +1677,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv) perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) goto out_error_tp; + perf_evlist__add_vfs_getname(evlist); + if (trace->sched && perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime)) @@ -1741,12 +1785,22 @@ again: if (done) perf_evlist__disable(evlist); - - goto again; + else + goto again; out_unmap_evlist: - if (!err && trace->summary) - trace__fprintf_thread_summary(trace, trace->output); + if (!err) { + if (trace->summary) + trace__fprintf_thread_summary(trace, trace->output); + + if (trace->show_tool_stats) { + fprintf(trace->output, "Stats:\n " + " vfs_getname : %" PRIu64 "\n" + " proc_getname: %" PRIu64 "\n", + trace->stats.vfs_getname, + trace->stats.proc_getname); + } + } perf_evlist__munmap(evlist); out_close_evlist: @@ -1788,6 +1842,7 @@ static int trace__replay(struct trace *trace) const struct perf_evsel_str_handler handlers[] = { { "raw_syscalls:sys_enter", trace__sys_enter, }, { "raw_syscalls:sys_exit", trace__sys_exit, }, + { "probe:vfs_getname", trace__vfs_getname, }, }; struct perf_session *session; @@ -1997,7 +2052,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) NULL }; struct trace trace = { - .audit_machine = audit_detect_machine(), + .audit = { + .machine = audit_detect_machine(), + .open_id = audit_name_to_syscall("open", trace.audit.machine), + }, .syscalls = { . max = -1, }, @@ -2019,6 +2077,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) const struct option trace_options[] = { OPT_BOOLEAN(0, "comm", &trace.show_comm, "show the thread COMM next to its id"), + OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of events to trace"), OPT_STRING('o', "output", &output_name, "file", "output file name"), -- cgit v0.10.2 From ba209f856380891a6ea6cdb07b2b068faccbff73 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 16 Oct 2013 11:57:33 -0300 Subject: perf trace: Improve event processing exit We need to differentiate SIGCHLD from SIGINT, the later should cause as immediate as possible exit, while the former should wait to process the events that may be perceived in the ring buffer after the SIGCHLD is handled. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-vf6n57ewm3mjy2sz6r491hus@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 86d2b1f..ec82895 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1098,10 +1098,12 @@ static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) } static bool done = false; +static bool interrupted = false; -static void sig_handler(int sig __maybe_unused) +static void sig_handler(int sig) { done = true; + interrupted = sig == SIGINT; } static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, @@ -1771,24 +1773,23 @@ again: handler = evsel->handler.func; handler(trace, evsel, &sample); - if (done) - goto out_unmap_evlist; + if (interrupted) + goto out_disable; } } if (trace->nr_events == before) { - if (done) - goto out_unmap_evlist; + int timeout = done ? 100 : -1; - poll(evlist->pollfd, evlist->nr_fds, -1); + if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0) + goto again; + } else { + goto again; } - if (done) - perf_evlist__disable(evlist); - else - goto again; +out_disable: + perf_evlist__disable(evlist); -out_unmap_evlist: if (!err) { if (trace->summary) trace__fprintf_thread_summary(trace, trace->output); -- cgit v0.10.2 From 6ef068cb8e77784431b6c80adc49d0b0a6a5df66 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Oct 2013 12:07:58 -0300 Subject: perf evlist: Introduce perf_evlist__strerror_tp method Out of 'perf trace', should be used by other tools that uses tracepoints. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Ramkumar Ramachandra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-lyvtxhchz4ga8fwht15x8wou@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ec82895..78b0d6a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1814,27 +1814,11 @@ out: trace->live = false; return err; out_error_tp: - switch(errno) { - case ENOENT: - fputs("Error:\tUnable to find debugfs\n" - "Hint:\tWas your kernel was compiled with debugfs support?\n" - "Hint:\tIs the debugfs filesystem mounted?\n" - "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'\n", - trace->output); - break; - case EACCES: - fprintf(trace->output, - "Error:\tNo permissions to read %s/tracing/events/raw_syscalls\n" - "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", - debugfs_mountpoint, debugfs_mountpoint); - break; - default: { - char bf[256]; - fprintf(trace->output, "Can't trace: %s\n", - strerror_r(errno, bf, sizeof(bf))); - } - break; - } +{ + char errbuf[BUFSIZ]; + perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf)); + fprintf(trace->output, "%s\n", errbuf); +} goto out_delete_evlist; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index cb9523f..6737420 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1126,3 +1126,30 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) return printed + fprintf(fp, "\n");; } + +int perf_evlist__strerror_tp(struct perf_evlist *evlist __maybe_unused, + int err, char *buf, size_t size) +{ + char sbuf[128]; + + switch (err) { + case ENOENT: + scnprintf(buf, size, "%s", + "Error:\tUnable to find debugfs\n" + "Hint:\tWas your kernel was compiled with debugfs support?\n" + "Hint:\tIs the debugfs filesystem mounted?\n" + "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'"); + break; + case EACCES: + scnprintf(buf, size, + "Error:\tNo permissions to read %s/tracing/events/raw_syscalls\n" + "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", + debugfs_mountpoint, debugfs_mountpoint); + break; + default: + scnprintf(buf, size, "%s", strerror_r(err, sbuf, sizeof(sbuf))); + break; + } + + return 0; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 722618f..386de10 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -168,6 +168,8 @@ static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist) size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp); +int perf_evlist__strerror_tp(struct perf_evlist *evlist, int err, char *buf, size_t size); + static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm) { struct perf_event_mmap_page *pc = mm->base; -- cgit v0.10.2 From 97a07f10c38064dea492794c99445f6260afcdc1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Oct 2013 16:33:43 -0300 Subject: perf tools: Introduce filename__read_int helper Just opens a file and calls atoi() in at most its first 64 bytes. To read things like /proc/sys/kernel/perf_event_paranoid. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-669q04c5tou5pnt8jtiz6y2r@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 8dc8cf3..c25e57b 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -394,3 +394,20 @@ unsigned long parse_tag_value(const char *str, struct parse_tag *tags) return (unsigned long) -1; } + +int filename__read_int(const char *filename, int *value) +{ + char line[64]; + int fd = open(filename, O_RDONLY), err = -1; + + if (fd < 0) + return -1; + + if (read(fd, line, sizeof(line)) > 0) { + *value = atoi(line); + err = 0; + } + + close(fd); + return err; +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 42dfba7..c8f362d 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -305,4 +305,6 @@ struct dso; char *get_srcline(struct dso *dso, unsigned long addr); void free_srcline(char *srcline); + +int filename__read_int(const char *filename, int *value); #endif /* GIT_COMPAT_UTIL_H */ -- cgit v0.10.2 From a8f23d8f8af43d49cd3331681913b76e4951e1a4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Oct 2013 17:38:29 -0300 Subject: perf trace: Improve messages related to /proc/sys/kernel/perf_event_paranoid kernel/events/core.c has: /* * perf event paranoia level: * -1 - not paranoid at all * 0 - disallow raw tracepoint access for unpriv * 1 - disallow cpu events for unpriv * 2 - disallow kernel profiling for unpriv */ int sysctl_perf_event_paranoid __read_mostly = 1; So, with the default being 1, a non-root user can trace his stuff: [acme@zoo ~]$ cat /proc/sys/kernel/perf_event_paranoid 1 [acme@zoo ~]$ yes > /dev/null & [1] 15338 [acme@zoo ~]$ trace -p 15338 | head -5 0.005 ( 0.005 ms): write(fd: 1, buf: 0x7fe6db765000, count: 4096 ) = 4096 0.045 ( 0.001 ms): write(fd: 1, buf: 0x7fe6db765000, count: 4096 ) = 4096 0.085 ( 0.001 ms): write(fd: 1, buf: 0x7fe6db765000, count: 4096 ) = 4096 0.125 ( 0.001 ms): write(fd: 1, buf: 0x7fe6db765000, count: 4096 ) = 4096 0.165 ( 0.001 ms): write(fd: 1, buf: 0x7fe6db765000, count: 4096 ) = 4096 [acme@zoo ~]$ [acme@zoo ~]$ trace --duration 1 sleep 1 1002.148 (1001.218 ms): nanosleep(rqtp: 0x7fff46c79250 ) = 0 [acme@zoo ~]$ [acme@zoo ~]$ trace -- usleep 1 | tail -5 0.905 ( 0.002 ms): brk( ) = 0x1c82000 0.910 ( 0.003 ms): brk(brk: 0x1ca3000 ) = 0x1ca3000 0.913 ( 0.001 ms): brk( ) = 0x1ca3000 0.990 ( 0.059 ms): nanosleep(rqtp: 0x7fffe31a3280 ) = 0 0.995 ( 0.000 ms): exit_group( [acme@zoo ~]$ But can't do system wide tracing: [acme@zoo ~]$ trace Error: Operation not permitted. Hint: Check /proc/sys/kernel/perf_event_paranoid setting. Hint: For system wide tracing it needs to be set to -1. Hint: The current value is 1. [acme@zoo ~]$ [acme@zoo ~]$ trace --cpu 0 Error: Operation not permitted. Hint: Check /proc/sys/kernel/perf_event_paranoid setting. Hint: For system wide tracing it needs to be set to -1. Hint: The current value is 1. [acme@zoo ~]$ If the paranoid level is >= 2, i.e. turn this perf stuff off for !root users: [acme@zoo ~]$ sudo sh -c 'echo 2 > /proc/sys/kernel/perf_event_paranoid' [acme@zoo ~]$ cat /proc/sys/kernel/perf_event_paranoid 2 [acme@zoo ~]$ [acme@zoo ~]$ trace usleep 1 Error: Permission denied. Hint: Check /proc/sys/kernel/perf_event_paranoid setting. Hint: For your workloads it needs to be <= 1 Hint: For system wide tracing it needs to be set to -1. Hint: The current value is 2. [acme@zoo ~]$ [acme@zoo ~]$ trace Error: Permission denied. Hint: Check /proc/sys/kernel/perf_event_paranoid setting. Hint: For your workloads it needs to be <= 1 Hint: For system wide tracing it needs to be set to -1. Hint: The current value is 2. [acme@zoo ~]$ [acme@zoo ~]$ trace --cpu 1 Error: Permission denied. Hint: Check /proc/sys/kernel/perf_event_paranoid setting. Hint: For your workloads it needs to be <= 1 Hint: For system wide tracing it needs to be set to -1. Hint: The current value is 2. [acme@zoo ~]$ If the user manages to get what he/she wants, convincing root not to be paranoid at all... [root@zoo ~]# echo -1 > /proc/sys/kernel/perf_event_paranoid [root@zoo ~]# cat /proc/sys/kernel/perf_event_paranoid -1 [root@zoo ~]# [acme@zoo ~]$ ps -eo user,pid,comm | grep Xorg root 729 Xorg [acme@zoo ~]$ [acme@zoo ~]$ trace -a --duration 0.001 -e \!select,ioctl,writev | grep Xorg | head -5 23.143 ( 0.003 ms): Xorg/729 setitimer(which: REAL, value: 0x7fffaadf16e0 ) = 0 23.152 ( 0.004 ms): Xorg/729 read(fd: 31, buf: 0x2544af0, count: 4096 ) = 8 23.161 ( 0.002 ms): Xorg/729 read(fd: 31, buf: 0x2544af0, count: 4096 ) = -1 EAGAIN Resource temporarily unavailable 23.175 ( 0.002 ms): Xorg/729 setitimer(which: REAL, value: 0x7fffaadf16e0 ) = 0 23.235 ( 0.002 ms): Xorg/729 setitimer(which: REAL, value: 0x7fffaadf16e0 ) = 0 [acme@zoo ~]$ Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-di28olfwd28rvkox7v3hqhu1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 78b0d6a..db959ac 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1713,10 +1713,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv) } err = perf_evlist__open(evlist); - if (err < 0) { - fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno)); - goto out_delete_maps; - } + if (err < 0) + goto out_error_open; err = perf_evlist__mmap(evlist, UINT_MAX, false); if (err < 0) { @@ -1813,14 +1811,21 @@ out_delete_evlist: out: trace->live = false; return err; -out_error_tp: { char errbuf[BUFSIZ]; + +out_error_tp: perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf)); + goto out_error; + +out_error_open: + perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); + +out_error: fprintf(trace->output, "%s\n", errbuf); -} goto out_delete_evlist; } +} static int trace__replay(struct trace *trace) { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6737420..0b5425b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1153,3 +1153,39 @@ int perf_evlist__strerror_tp(struct perf_evlist *evlist __maybe_unused, return 0; } + +int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused, + int err, char *buf, size_t size) +{ + int printed, value; + char sbuf[128], *emsg = strerror_r(err, sbuf, sizeof(sbuf)); + + switch (err) { + case EACCES: + case EPERM: + printed = scnprintf(buf, size, + "Error:\t%s.\n" + "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); + + if (filename__read_int("/proc/sys/kernel/perf_event_paranoid", &value)) + break; + + printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); + + if (value >= 2) { + printed += scnprintf(buf + printed, size - printed, + "For your workloads it needs to be <= 1\nHint:\t"); + } + printed += scnprintf(buf + printed, size - printed, + "For system wide tracing it needs to be set to -1"); + + printed += scnprintf(buf + printed, size - printed, + ".\nHint:\tThe current value is %d.", value); + break; + default: + scnprintf(buf, size, "%s", emsg); + break; + } + + return 0; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 386de10..7f8f1ae 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -169,6 +169,7 @@ static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist) size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp); int perf_evlist__strerror_tp(struct perf_evlist *evlist, int err, char *buf, size_t size); +int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size); static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm) { -- cgit v0.10.2 From 40d54ec2f77edc52340dcae236aaabe8c3cc3a07 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:28:58 +0300 Subject: perf evsel: Add missing 'mmap2' from debug print The struct perf_event_attr now has a 'mmap2' member. Add it to perf_event_attr__fprintf(). Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index bfebc1e..291b18a 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -986,6 +986,7 @@ static size_t perf_event_attr__fprintf(struct perf_event_attr *attr, FILE *fp) ret += PRINT_ATTR2(exclude_host, exclude_guest); ret += PRINT_ATTR2N("excl.callchain_kern", exclude_callchain_kernel, "excl.callchain_user", exclude_callchain_user); + ret += PRINT_ATTR_U32(mmap2); ret += PRINT_ATTR_U32(wakeup_events); ret += PRINT_ATTR_U32(wakeup_watermark); -- cgit v0.10.2 From dd44bc6be05e4a948124053c8105cfa581177554 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:01 +0300 Subject: perf evsel: Add missing decrement in id sample parsing The final array decrement in id sample parsing is missing, which may trip up the next person adding a sample format, so add it in. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-5-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 291b18a..ec0cc1e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1218,6 +1218,7 @@ static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel, sample->pid = u.val32[0]; sample->tid = u.val32[1]; + array--; } return 0; -- cgit v0.10.2 From 4f624685f92719565981eb6f8d9195bb578522a3 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:00 +0300 Subject: perf record: Improve write_output error message Improve the error message from write_output() to say what failed to write and give the error number. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 92ca541..d269dfa 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -88,7 +88,7 @@ static int write_output(struct perf_record *rec, void *buf, size_t size) int ret = write(rec->output, buf, size); if (ret < 0) { - pr_err("failed to write\n"); + pr_err("failed to write perf data, error: %m\n"); return -1; } -- cgit v0.10.2 From 8c16b649606ff9f6d742ad6f71c76fc0ee996c8e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:02 +0300 Subject: perf session: Add missing sample flush for piped events Piped events can be sorted so a final flush is needed. Add that and remove a redundant 'err = 0'. Signed-off-by: Adrian Hunter Reviewed-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d1e4495..d51e62d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1263,7 +1263,9 @@ more: if (!session_done()) goto more; done: - err = 0; + /* do the final flush for ordered samples */ + self->ordered_samples.next_flush = ULLONG_MAX; + err = flush_sample_queue(self, tool); out_err: free(buf); perf_session__warn_about_errors(self, tool); @@ -1392,13 +1394,13 @@ more: "Processing events..."); } - err = 0; if (session_done()) - goto out_err; + goto out; if (file_pos < file_size) goto more; +out: /* do the final flush for ordered samples */ session->ordered_samples.next_flush = ULLONG_MAX; err = flush_sample_queue(session, tool); -- cgit v0.10.2 From 7db5952846dfa015d74e64b5e8656acac979490b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:03 +0300 Subject: perf session: Add missing members to perf_event__attr_swap() The perf_event__attr_swap() method needs to swap all members of struct perf_event_attr. Add missing ones. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-7-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d51e62d..d4559ca 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -453,6 +453,9 @@ void perf_event__attr_swap(struct perf_event_attr *attr) attr->bp_type = bswap_32(attr->bp_type); attr->bp_addr = bswap_64(attr->bp_addr); attr->bp_len = bswap_64(attr->bp_len); + attr->branch_sample_type = bswap_64(attr->branch_sample_type); + attr->sample_regs_user = bswap_64(attr->sample_regs_user); + attr->sample_stack_user = bswap_32(attr->sample_stack_user); swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64)); } -- cgit v0.10.2 From 2af68ef50c6afc1632edc984e9c834545d90f597 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:07 +0300 Subject: perf evlist: Fix 32-bit build error util/evlist.c: In function 'perf_evlist__mmap': util/evlist.c:772:2: error: format '%lu' expects argument of type 'long unsigned int', but argument 3 has type 'size_t' [-Werror=format] cc1: all warnings being treated as errors Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-11-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 0b5425b..07ba0a4 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -769,7 +769,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, evlist->overwrite = overwrite; evlist->mmap_len = perf_evlist__mmap_size(pages); - pr_debug("mmap size %luB\n", evlist->mmap_len); + pr_debug("mmap size %zuB\n", evlist->mmap_len); mask = evlist->mmap_len - page_size - 1; list_for_each_entry(evsel, &evlist->entries, node) { -- cgit v0.10.2 From 9402802a416c96b48b2bd9331c070ba2d7550b36 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:08 +0300 Subject: perf tools: Fix test_on_exit for 32-bit build builtin-record.c:42:12: error: static declaration of 'on_exit' follows non-static declaration In file included from util/util.h:51:0, from builtin.h:4, from builtin-record.c:8: /usr/include/stdlib.h:536:12: note: previous declaration of 'on_exit' was here Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-12-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/feature-checks/test-on-exit.c b/tools/perf/config/feature-checks/test-on-exit.c index 8f64ed3..8e88b16 100644 --- a/tools/perf/config/feature-checks/test-on-exit.c +++ b/tools/perf/config/feature-checks/test-on-exit.c @@ -1,4 +1,5 @@ #include +#include static void exit_fn(int status, void *__data) { -- cgit v0.10.2 From 2100f778d44b9c25bd13d38c24a999e2caf1ae3d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:09 +0300 Subject: perf tools: Fix bench/numa.c for 32-bit build bench/numa.c: In function 'worker_thread': bench/numa.c:1123:20: error: comparison between signed and unsigned integer expressions [-Werror=sign-compare] bench/numa.c:1171:6: error: format '%lx' expects argument of type 'long unsigned int', but argument 5 has type 'u64' [-Werror=format] cc1: all warnings being treated as errors Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-13-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 64fa01c..d4c83c6 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -1120,7 +1120,7 @@ static void *worker_thread(void *__tdata) /* Check whether our max runtime timed out: */ if (g->p.nr_secs) { timersub(&stop, &start0, &diff); - if (diff.tv_sec >= g->p.nr_secs) { + if ((u32)diff.tv_sec >= g->p.nr_secs) { g->stop_work = true; break; } @@ -1167,7 +1167,7 @@ static void *worker_thread(void *__tdata) runtime_ns_max += diff.tv_usec * 1000; if (details >= 0) { - printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016lx]\n", + printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016"PRIx64"]\n", process_nr, thread_nr, runtime_ns_max / bytes_done, val); } fflush(stdout); -- cgit v0.10.2 From c83fa7f2549cb60bc4d429de31096546f659ce6d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:12 +0300 Subject: perf evlist: Fix perf_evlist__mmap comments Put the comments into the correct kernel-doc format and correct reference to perf_evlist__read_on_cpu() which should be perf_evlist__mmap_read(). Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-16-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 07ba0a4..acef94a 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -738,20 +738,17 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, return 0; } -/** perf_evlist__mmap - Create per cpu maps to receive events - * - * @evlist - list of events - * @pages - map length in pages - * @overwrite - overwrite older events? - * - * If overwrite is false the user needs to signal event consuption using: - * - * struct perf_mmap *m = &evlist->mmap[cpu]; - * unsigned int head = perf_mmap__read_head(m); +/** + * perf_evlist__mmap - Create mmaps to receive events. + * @evlist: list of events + * @pages: map length in pages + * @overwrite: overwrite older events? * - * perf_mmap__write_tail(m, head) + * If @overwrite is %false the user needs to signal event consumption using + * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this + * automatically. * - * Using perf_evlist__read_on_cpu does this automatically. + * Return: %0 on success, negative error code otherwise. */ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, bool overwrite) -- cgit v0.10.2 From 04e213148c434544e3aa88d227b5375fd375738b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:13 +0300 Subject: perf evlist: Factor out duplicated mmap code The same code is used in perf_evlist__mmap_per_cpu() and perf_evlist__mmap_per_thread(). Factor it out into a separate function perf_evlist__mmap_per_evsel(). Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-17-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index acef94a..85c4c80 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -608,9 +608,36 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, return 0; } -static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, int mask) +static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, + int prot, int mask, int cpu, int thread, + int *output) { struct perf_evsel *evsel; + + list_for_each_entry(evsel, &evlist->entries, node) { + int fd = FD(evsel, cpu, thread); + + if (*output == -1) { + *output = fd; + if (__perf_evlist__mmap(evlist, idx, prot, mask, + *output) < 0) + return -1; + } else { + if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) + return -1; + } + + if ((evsel->attr.read_format & PERF_FORMAT_ID) && + perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0) + return -1; + } + + return 0; +} + +static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, + int mask) +{ int cpu, thread; int nr_cpus = cpu_map__nr(evlist->cpus); int nr_threads = thread_map__nr(evlist->threads); @@ -620,23 +647,9 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, int m int output = -1; for (thread = 0; thread < nr_threads; thread++) { - list_for_each_entry(evsel, &evlist->entries, node) { - int fd = FD(evsel, cpu, thread); - - if (output == -1) { - output = fd; - if (__perf_evlist__mmap(evlist, cpu, - prot, mask, output) < 0) - goto out_unmap; - } else { - if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, output) != 0) - goto out_unmap; - } - - if ((evsel->attr.read_format & PERF_FORMAT_ID) && - perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0) - goto out_unmap; - } + if (perf_evlist__mmap_per_evsel(evlist, cpu, prot, mask, + cpu, thread, &output)) + goto out_unmap; } } @@ -648,9 +661,9 @@ out_unmap: return -1; } -static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, int mask) +static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, + int mask) { - struct perf_evsel *evsel; int thread; int nr_threads = thread_map__nr(evlist->threads); @@ -658,23 +671,9 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, in for (thread = 0; thread < nr_threads; thread++) { int output = -1; - list_for_each_entry(evsel, &evlist->entries, node) { - int fd = FD(evsel, 0, thread); - - if (output == -1) { - output = fd; - if (__perf_evlist__mmap(evlist, thread, - prot, mask, output) < 0) - goto out_unmap; - } else { - if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, output) != 0) - goto out_unmap; - } - - if ((evsel->attr.read_format & PERF_FORMAT_ID) && - perf_evlist__id_add_fd(evlist, evsel, 0, thread, fd) < 0) - goto out_unmap; - } + if (perf_evlist__mmap_per_evsel(evlist, thread, prot, mask, 0, + thread, &output)) + goto out_unmap; } return 0; -- cgit v0.10.2 From 243be3dd7c14454899e51334a6d66d29a41ae6ab Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:14 +0300 Subject: perf script: Print addr by default for BTS The addr field is not displayed by default for hardware events, however for branch events it is the target of the branch so for BTS display it by default if it was recorded. Signed-off-by: Adrian Hunter Acked-by: David Ahern Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-18-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 9c333ff..ebb2b5f 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -409,7 +409,9 @@ static void print_sample_bts(union perf_event *event, printf(" => "); /* print branch_to information */ - if (PRINT_FIELD(ADDR)) + if (PRINT_FIELD(ADDR) || + ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) && + !output[attr->type].user_set)) print_sample_addr(event, sample, machine, thread, attr); printf("\n"); -- cgit v0.10.2 From f11cfc6f294dbd83b0d58037404df2bd16066238 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 15 Oct 2013 23:07:27 +0300 Subject: perf list: Show error if tracepoints not available Tracepoints are not visible in "perf list" on Fedora 19 because regular users have no permission to /sys/kernel/debug by default. Show an error message so that the user knows about it instead of assuming that tracepoints are not supported on the system. Signed-off-by: Pekka Enberg Acked-by: Ingo Molnar Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1381867647-8594-1-git-send-email-penberg@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 9812531..c90e55c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -998,8 +998,10 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - if (debugfs_valid_mountpoint(tracing_events_path)) + if (debugfs_valid_mountpoint(tracing_events_path)) { + printf(" [ Tracepoints not available: %s ]\n", strerror(errno)); return; + } sys_dir = opendir(tracing_events_path); if (!sys_dir) -- cgit v0.10.2 From e369517ce5f796945c6af047b4e8b1d650e03458 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 11 Oct 2013 14:15:36 +0900 Subject: perf callchain: Convert children list to rbtree Current collapse stage has a scalability problem which can be reproduced easily with a parallel kernel build. This is because it needs to traverse every children of callchains linearly during the collapse/merge stage. Converting it to a rbtree reduced the overhead significantly. On my 400MB perf.data file which recorded with make -j32 kernel build: $ time perf --no-pager report --stdio > /dev/null before: real 6m22.073s user 6m18.683s sys 0m0.706s after: real 0m20.780s user 0m19.962s sys 0m0.689s During the perf report the overhead on append_chain_children went down from 96.69% to 18.16%: - 18.16% perf perf [.] append_chain_children - append_chain_children - 77.48% append_chain_children + 69.79% merge_chain_branch - 22.96% append_chain_children + 67.44% merge_chain_branch + 30.15% append_chain_children + 2.41% callchain_append + 7.25% callchain_append + 12.26% callchain_append + 10.22% merge_chain_branch + 11.58% perf perf [.] dso__find_symbol + 8.02% perf perf [.] sort__comm_cmp + 5.48% perf libc-2.17.so [.] malloc_consolidate Reported-by: Linus Torvalds Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Linus Torvalds Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1381468543-25334-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 482f680..e3970e3 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -21,12 +21,6 @@ __thread struct callchain_cursor callchain_cursor; -#define chain_for_each_child(child, parent) \ - list_for_each_entry(child, &parent->children, siblings) - -#define chain_for_each_child_safe(child, next, parent) \ - list_for_each_entry_safe(child, next, &parent->children, siblings) - static void rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, enum chain_mode mode) @@ -71,10 +65,16 @@ static void __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, u64 min_hit) { + struct rb_node *n; struct callchain_node *child; - chain_for_each_child(child, node) + n = rb_first(&node->rb_root_in); + while (n) { + child = rb_entry(n, struct callchain_node, rb_node_in); + n = rb_next(n); + __sort_chain_flat(rb_root, child, min_hit); + } if (node->hit && node->hit >= min_hit) rb_insert_callchain(rb_root, node, CHAIN_FLAT); @@ -94,11 +94,16 @@ sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root, static void __sort_chain_graph_abs(struct callchain_node *node, u64 min_hit) { + struct rb_node *n; struct callchain_node *child; node->rb_root = RB_ROOT; + n = rb_first(&node->rb_root_in); + + while (n) { + child = rb_entry(n, struct callchain_node, rb_node_in); + n = rb_next(n); - chain_for_each_child(child, node) { __sort_chain_graph_abs(child, min_hit); if (callchain_cumul_hits(child) >= min_hit) rb_insert_callchain(&node->rb_root, child, @@ -117,13 +122,18 @@ sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root, static void __sort_chain_graph_rel(struct callchain_node *node, double min_percent) { + struct rb_node *n; struct callchain_node *child; u64 min_hit; node->rb_root = RB_ROOT; min_hit = ceil(node->children_hit * min_percent); - chain_for_each_child(child, node) { + n = rb_first(&node->rb_root_in); + while (n) { + child = rb_entry(n, struct callchain_node, rb_node_in); + n = rb_next(n); + __sort_chain_graph_rel(child, min_percent); if (callchain_cumul_hits(child) >= min_hit) rb_insert_callchain(&node->rb_root, child, @@ -173,19 +183,26 @@ create_child(struct callchain_node *parent, bool inherit_children) return NULL; } new->parent = parent; - INIT_LIST_HEAD(&new->children); INIT_LIST_HEAD(&new->val); if (inherit_children) { - struct callchain_node *next; + struct rb_node *n; + struct callchain_node *child; + + new->rb_root_in = parent->rb_root_in; + parent->rb_root_in = RB_ROOT; - list_splice(&parent->children, &new->children); - INIT_LIST_HEAD(&parent->children); + n = rb_first(&new->rb_root_in); + while (n) { + child = rb_entry(n, struct callchain_node, rb_node_in); + child->parent = new; + n = rb_next(n); + } - chain_for_each_child(next, new) - next->parent = new; + /* make it the first child */ + rb_link_node(&new->rb_node_in, NULL, &parent->rb_root_in.rb_node); + rb_insert_color(&new->rb_node_in, &parent->rb_root_in); } - list_add_tail(&new->siblings, &parent->children); return new; } @@ -223,7 +240,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) } } -static void +static struct callchain_node * add_child(struct callchain_node *parent, struct callchain_cursor *cursor, u64 period) @@ -235,6 +252,19 @@ add_child(struct callchain_node *parent, new->children_hit = 0; new->hit = period; + return new; +} + +static s64 match_chain(struct callchain_cursor_node *node, + struct callchain_list *cnode) +{ + struct symbol *sym = node->sym; + + if (cnode->ms.sym && sym && + callchain_param.key == CCKEY_FUNCTION) + return cnode->ms.sym->start - sym->start; + else + return cnode->ip - node->ip; } /* @@ -272,9 +302,33 @@ split_add_child(struct callchain_node *parent, /* create a new child for the new branch if any */ if (idx_total < cursor->nr) { + struct callchain_node *first; + struct callchain_list *cnode; + struct callchain_cursor_node *node; + struct rb_node *p, **pp; + parent->hit = 0; - add_child(parent, cursor, period); parent->children_hit += period; + + node = callchain_cursor_current(cursor); + new = add_child(parent, cursor, period); + + /* + * This is second child since we moved parent's children + * to new (first) child above. + */ + p = parent->rb_root_in.rb_node; + first = rb_entry(p, struct callchain_node, rb_node_in); + cnode = list_first_entry(&first->val, struct callchain_list, + list); + + if (match_chain(node, cnode) < 0) + pp = &p->rb_left; + else + pp = &p->rb_right; + + rb_link_node(&new->rb_node_in, p, pp); + rb_insert_color(&new->rb_node_in, &parent->rb_root_in); } else { parent->hit = period; } @@ -291,16 +345,40 @@ append_chain_children(struct callchain_node *root, u64 period) { struct callchain_node *rnode; + struct callchain_cursor_node *node; + struct rb_node **p = &root->rb_root_in.rb_node; + struct rb_node *parent = NULL; + + node = callchain_cursor_current(cursor); + if (!node) + return; /* lookup in childrens */ - chain_for_each_child(rnode, root) { - unsigned int ret = append_chain(rnode, cursor, period); + while (*p) { + s64 ret; + struct callchain_list *cnode; - if (!ret) + parent = *p; + rnode = rb_entry(parent, struct callchain_node, rb_node_in); + cnode = list_first_entry(&rnode->val, struct callchain_list, + list); + + /* just check first entry */ + ret = match_chain(node, cnode); + if (ret == 0) { + append_chain(rnode, cursor, period); goto inc_children_hit; + } + + if (ret < 0) + p = &parent->rb_left; + else + p = &parent->rb_right; } /* nothing in children, add to the current node */ - add_child(root, cursor, period); + rnode = add_child(root, cursor, period); + rb_link_node(&rnode->rb_node_in, parent, p); + rb_insert_color(&rnode->rb_node_in, &root->rb_root_in); inc_children_hit: root->children_hit += period; @@ -325,28 +403,20 @@ append_chain(struct callchain_node *root, */ list_for_each_entry(cnode, &root->val, list) { struct callchain_cursor_node *node; - struct symbol *sym; node = callchain_cursor_current(cursor); if (!node) break; - sym = node->sym; - - if (cnode->ms.sym && sym && - callchain_param.key == CCKEY_FUNCTION) { - if (cnode->ms.sym->start != sym->start) - break; - } else if (cnode->ip != node->ip) + if (match_chain(node, cnode) != 0) break; - if (!found) - found = true; + found = true; callchain_cursor_advance(cursor); } - /* matches not, relay on the parent */ + /* matches not, relay no the parent */ if (!found) { cursor->curr = curr_snap; cursor->pos = start; @@ -395,8 +465,9 @@ merge_chain_branch(struct callchain_cursor *cursor, struct callchain_node *dst, struct callchain_node *src) { struct callchain_cursor_node **old_last = cursor->last; - struct callchain_node *child, *next_child; + struct callchain_node *child; struct callchain_list *list, *next_list; + struct rb_node *n; int old_pos = cursor->nr; int err = 0; @@ -412,12 +483,16 @@ merge_chain_branch(struct callchain_cursor *cursor, append_chain_children(dst, cursor, src->hit); } - chain_for_each_child_safe(child, next_child, src) { + n = rb_first(&src->rb_root_in); + while (n) { + child = container_of(n, struct callchain_node, rb_node_in); + n = rb_next(n); + rb_erase(&child->rb_node_in, &src->rb_root_in); + err = merge_chain_branch(cursor, dst, child); if (err) break; - list_del(&child->siblings); free(child); } diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 2b585bc..7bb3602 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -21,11 +21,11 @@ enum chain_order { struct callchain_node { struct callchain_node *parent; - struct list_head siblings; - struct list_head children; struct list_head val; - struct rb_node rb_node; /* to sort nodes in an rbtree */ - struct rb_root rb_root; /* sorted tree of children */ + struct rb_node rb_node_in; /* to insert nodes in an rbtree */ + struct rb_node rb_node; /* to sort nodes in an output tree */ + struct rb_root rb_root_in; /* input tree of children */ + struct rb_root rb_root; /* sorted output tree of children */ unsigned int val_nr; u64 hit; u64 children_hit; @@ -86,13 +86,12 @@ extern __thread struct callchain_cursor callchain_cursor; static inline void callchain_init(struct callchain_root *root) { - INIT_LIST_HEAD(&root->node.siblings); - INIT_LIST_HEAD(&root->node.children); INIT_LIST_HEAD(&root->node.val); root->node.parent = NULL; root->node.hit = 0; root->node.children_hit = 0; + root->node.rb_root_in = RB_ROOT; root->max_depth = 0; } -- cgit v0.10.2 From 09600e0f9ebb06235b852a646a3644b7d4a71aca Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 15 Oct 2013 11:01:56 +0900 Subject: perf tools: Compare dso's also when comparing symbols Linus reported that sometimes 'perf report -s symbol' exits without any message on TUI. David and Jiri found that it's because it failed to add a hist entry due to an invalid symbol length. It turns out that sorting by symbol (address) was broken since it only compares symbol addresses. The symbol address is a relative address within a dso thus just checking its address can result in merging unrelated symbols together. Fix it by checking dso before comparing symbol address. Reported-by: Linus Torvalds Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1381802517-18812-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 32c5637..1f9821d 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -182,9 +182,19 @@ static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r) static int64_t sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) { + int64_t ret; + if (!left->ms.sym && !right->ms.sym) return right->level - left->level; + /* + * comparing symbol address alone is not enough since it's a + * relative address within a dso. + */ + ret = sort__dso_cmp(left, right); + if (ret != 0) + return ret; + return _sort__sym_cmp(left->ms.sym, right->ms.sym); } -- cgit v0.10.2 From f5fc14124c5cefdd052a2b2a6a3f0ed531540113 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 15 Oct 2013 16:27:32 +0200 Subject: perf tools: Add data object to handle perf data file This patch is adding 'struct perf_data_file' object as a placeholder for all attributes regarding perf.data file handling. Changing perf_session__new to take it as an argument. The rest of the functionality will be added later to keep this change simple enough, because all the places using perf_session are changed now. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1381847254-28809-2-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 94f9a8e..95df683 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -28,6 +28,7 @@ #include "util/hist.h" #include "util/session.h" #include "util/tool.h" +#include "util/data.h" #include "arch/common.h" #include @@ -199,9 +200,13 @@ static int __cmd_annotate(struct perf_annotate *ann) struct perf_session *session; struct perf_evsel *pos; u64 total_nr_samples; + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + .force = ann->force, + }; - session = perf_session__new(input_name, O_RDONLY, - ann->force, false, &ann->tool); + session = perf_session__new(&file, false, &ann->tool); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 8140b7b..cfede86 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -221,8 +221,12 @@ static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) static int build_id_cache__fprintf_missing(const char *filename, bool force, FILE *fp) { - struct perf_session *session = perf_session__new(filename, O_RDONLY, - force, false, NULL); + struct perf_data_file file = { + .path = filename, + .mode = PERF_DATA_MODE_READ, + .force = force, + }; + struct perf_session *session = perf_session__new(&file, false, NULL); if (session == NULL) return -1; diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index e74366a..0164c1c 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -15,6 +15,7 @@ #include "util/parse-options.h" #include "util/session.h" #include "util/symbol.h" +#include "util/data.h" static int sysfs__fprintf_build_id(FILE *fp) { @@ -52,6 +53,11 @@ static bool dso__skip_buildid(struct dso *dso, int with_hits) static int perf_session__list_build_ids(bool force, bool with_hits) { struct perf_session *session; + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + .force = force, + }; symbol__elf_init(); /* @@ -60,8 +66,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits) if (filename__fprintf_build_id(input_name, stdout)) goto out; - session = perf_session__new(input_name, O_RDONLY, force, false, - &build_id__mark_dso_hit_ops); + session = perf_session__new(&file, false, &build_id__mark_dso_hit_ops); if (session == NULL) return -1; /* diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 2a78dc8..419d27d 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -16,6 +16,7 @@ #include "util/sort.h" #include "util/symbol.h" #include "util/util.h" +#include "util/data.h" #include #include @@ -42,7 +43,7 @@ struct diff_hpp_fmt { struct data__file { struct perf_session *session; - const char *file; + struct perf_data_file file; int idx; struct hists *hists; struct diff_hpp_fmt fmt[PERF_HPP_DIFF__MAX_INDEX]; @@ -601,7 +602,7 @@ static void data__fprintf(void) data__for_each_file(i, d) fprintf(stdout, "# [%d] %s %s\n", - d->idx, d->file, + d->idx, d->file.path, !d->idx ? "(Baseline)" : ""); fprintf(stdout, "#\n"); @@ -663,17 +664,16 @@ static int __cmd_diff(void) int ret = -EINVAL, i; data__for_each_file(i, d) { - d->session = perf_session__new(d->file, O_RDONLY, force, - false, &tool); + d->session = perf_session__new(&d->file, false, &tool); if (!d->session) { - pr_err("Failed to open %s\n", d->file); + pr_err("Failed to open %s\n", d->file.path); ret = -ENOMEM; goto out_delete; } ret = perf_session__process_events(d->session, &tool); if (ret) { - pr_err("Failed to process %s\n", d->file); + pr_err("Failed to process %s\n", d->file.path); goto out_delete; } @@ -1016,7 +1016,12 @@ static int data_init(int argc, const char **argv) return -ENOMEM; data__for_each_file(i, d) { - d->file = use_default ? defaults[i] : argv[i]; + struct perf_data_file *file = &d->file; + + file->path = use_default ? defaults[i] : argv[i]; + file->mode = PERF_DATA_MODE_READ, + file->force = force, + d->idx = i; } diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 05bd9df..20b0f12 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -14,13 +14,18 @@ #include "util/parse-events.h" #include "util/parse-options.h" #include "util/session.h" +#include "util/data.h" static int __cmd_evlist(const char *file_name, struct perf_attr_details *details) { struct perf_session *session; struct perf_evsel *pos; + struct perf_data_file file = { + .path = file_name, + .mode = PERF_DATA_MODE_READ, + }; - session = perf_session__new(file_name, O_RDONLY, 0, false, NULL); + session = perf_session__new(&file, 0, NULL); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index f51a963..4aa6d78 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -15,6 +15,7 @@ #include "util/tool.h" #include "util/debug.h" #include "util/build-id.h" +#include "util/data.h" #include "util/parse-options.h" @@ -345,6 +346,10 @@ static int __cmd_inject(struct perf_inject *inject) { struct perf_session *session; int ret = -EINVAL; + struct perf_data_file file = { + .path = inject->input_name, + .mode = PERF_DATA_MODE_READ, + }; signal(SIGINT, sig_handler); @@ -355,7 +360,7 @@ static int __cmd_inject(struct perf_inject *inject) inject->tool.tracing_data = perf_event__repipe_tracing_data; } - session = perf_session__new(inject->input_name, O_RDONLY, false, true, &inject->tool); + session = perf_session__new(&file, true, &inject->tool); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 9b5f077..1126382 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -13,6 +13,7 @@ #include "util/parse-options.h" #include "util/trace-event.h" +#include "util/data.h" #include "util/debug.h" @@ -486,8 +487,12 @@ static int __cmd_kmem(void) { "kmem:kfree", perf_evsel__process_free_event, }, { "kmem:kmem_cache_free", perf_evsel__process_free_event, }, }; + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + }; - session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_kmem); + session = perf_session__new(&file, false, &perf_kmem); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index cfa0b79..188bb29 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -17,6 +17,7 @@ #include "util/tool.h" #include "util/stat.h" #include "util/top.h" +#include "util/data.h" #include #include @@ -1215,10 +1216,13 @@ static int read_events(struct perf_kvm_stat *kvm) .comm = perf_event__process_comm, .ordered_samples = true, }; + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + }; kvm->tool = eops; - kvm->session = perf_session__new(kvm->file_name, O_RDONLY, 0, false, - &kvm->tool); + kvm->session = perf_session__new(&file, false, &kvm->tool); if (!kvm->session) { pr_err("Initializing perf session failed\n"); return -EINVAL; @@ -1450,6 +1454,9 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, "perf kvm stat live []", NULL }; + struct perf_data_file file = { + .mode = PERF_DATA_MODE_WRITE, + }; /* event handling */ @@ -1514,7 +1521,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, /* * perf session */ - kvm->session = perf_session__new(NULL, O_WRONLY, false, false, &kvm->tool); + kvm->session = perf_session__new(&file, false, &kvm->tool); if (kvm->session == NULL) { err = -ENOMEM; goto out; diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 6a9076f..33c7253 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -15,6 +15,7 @@ #include "util/debug.h" #include "util/session.h" #include "util/tool.h" +#include "util/data.h" #include #include @@ -853,8 +854,12 @@ static int __cmd_report(bool display_info) .comm = perf_event__process_comm, .ordered_samples = true, }; + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + }; - session = perf_session__new(input_name, O_RDONLY, 0, false, &eops); + session = perf_session__new(&file, false, &eops); if (!session) { pr_err("Initializing perf session failed\n"); return -ENOMEM; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 253133a..31c00f1 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -5,6 +5,7 @@ #include "util/trace-event.h" #include "util/tool.h" #include "util/session.h" +#include "util/data.h" #define MEM_OPERATION_LOAD "load" #define MEM_OPERATION_STORE "store" @@ -119,10 +120,14 @@ static int process_sample_event(struct perf_tool *tool, static int report_raw_events(struct perf_mem *mem) { + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + }; int err = -EINVAL; int ret; - struct perf_session *session = perf_session__new(input_name, O_RDONLY, - 0, false, &mem->tool); + struct perf_session *session = perf_session__new(&file, false, + &mem->tool); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d269dfa..4ea46ff 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -24,6 +24,7 @@ #include "util/symbol.h" #include "util/cpumap.h" #include "util/thread_map.h" +#include "util/data.h" #include #include @@ -65,11 +66,10 @@ struct perf_record { struct perf_tool tool; struct perf_record_opts opts; u64 bytes_written; - const char *output_name; + struct perf_data_file file; struct perf_evlist *evlist; struct perf_session *session; const char *progname; - int output; int realtime_prio; bool no_buildid; bool no_buildid_cache; @@ -84,8 +84,10 @@ static void advance_output(struct perf_record *rec, size_t size) static int write_output(struct perf_record *rec, void *buf, size_t size) { + struct perf_data_file *file = &rec->file; + while (size) { - int ret = write(rec->output, buf, size); + int ret = write(file->fd, buf, size); if (ret < 0) { pr_err("failed to write perf data, error: %m\n"); @@ -248,13 +250,15 @@ out: static int process_buildids(struct perf_record *rec) { - u64 size = lseek(rec->output, 0, SEEK_CUR); + struct perf_data_file *file = &rec->file; + struct perf_session *session = rec->session; + u64 size = lseek(file->fd, 0, SEEK_CUR); if (size == 0) return 0; - rec->session->fd = rec->output; - return __perf_session__process_events(rec->session, rec->post_processing_offset, + session->fd = file->fd; + return __perf_session__process_events(session, rec->post_processing_offset, size - rec->post_processing_offset, size, &build_id__mark_dso_hit_ops); } @@ -262,17 +266,18 @@ static int process_buildids(struct perf_record *rec) static void perf_record__exit(int status, void *arg) { struct perf_record *rec = arg; + struct perf_data_file *file = &rec->file; if (status != 0) return; - if (!rec->opts.pipe_output) { + if (!file->is_pipe) { rec->session->header.data_size += rec->bytes_written; if (!rec->no_buildid) process_buildids(rec); perf_session__write_header(rec->session, rec->evlist, - rec->output, true); + file->fd, true); perf_session__delete(rec->session); perf_evlist__delete(rec->evlist); symbol__exit(); @@ -342,14 +347,15 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) { struct stat st; int flags; - int err, output, feat; + int err, feat; unsigned long waking = 0; const bool forks = argc > 0; struct machine *machine; struct perf_tool *tool = &rec->tool; struct perf_record_opts *opts = &rec->opts; struct perf_evlist *evsel_list = rec->evlist; - const char *output_name = rec->output_name; + struct perf_data_file *file = &rec->file; + const char *output_name = file->path; struct perf_session *session; bool disabled = false; @@ -363,13 +369,13 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) if (!output_name) { if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode)) - opts->pipe_output = true; + file->is_pipe = true; else - rec->output_name = output_name = "perf.data"; + file->path = output_name = "perf.data"; } if (output_name) { if (!strcmp(output_name, "-")) - opts->pipe_output = true; + file->is_pipe = true; else if (!stat(output_name, &st) && st.st_size) { char oldname[PATH_MAX]; snprintf(oldname, sizeof(oldname), "%s.old", @@ -381,19 +387,16 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) flags = O_CREAT|O_RDWR|O_TRUNC; - if (opts->pipe_output) - output = STDOUT_FILENO; + if (file->is_pipe) + file->fd = STDOUT_FILENO; else - output = open(output_name, flags, S_IRUSR | S_IWUSR); - if (output < 0) { + file->fd = open(output_name, flags, S_IRUSR | S_IWUSR); + if (file->fd < 0) { perror("failed to create output file"); return -1; } - rec->output = output; - - session = perf_session__new(output_name, O_WRONLY, - true, false, NULL); + session = perf_session__new(file, false, NULL); if (session == NULL) { pr_err("Not enough memory for reading perf file header\n"); return -1; @@ -415,7 +418,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) if (forks) { err = perf_evlist__prepare_workload(evsel_list, &opts->target, - argv, opts->pipe_output, + argv, file->is_pipe, true); if (err < 0) { pr_err("Couldn't run the workload!\n"); @@ -436,13 +439,13 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) */ on_exit(perf_record__exit, rec); - if (opts->pipe_output) { - err = perf_header__write_pipe(output); + if (file->is_pipe) { + err = perf_header__write_pipe(file->fd); if (err < 0) goto out_delete_session; } else { err = perf_session__write_header(session, evsel_list, - output, false); + file->fd, false); if (err < 0) goto out_delete_session; } @@ -455,11 +458,11 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) goto out_delete_session; } - rec->post_processing_offset = lseek(output, 0, SEEK_CUR); + rec->post_processing_offset = lseek(file->fd, 0, SEEK_CUR); machine = &session->machines.host; - if (opts->pipe_output) { + if (file->is_pipe) { err = perf_event__synthesize_attrs(tool, session, process_synthesized_event); if (err < 0) { @@ -476,7 +479,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) * return this more properly and also * propagate errors that now are calling die() */ - err = perf_event__synthesize_tracing_data(tool, output, evsel_list, + err = perf_event__synthesize_tracing_data(tool, file->fd, evsel_list, process_synthesized_event); if (err <= 0) { pr_err("Couldn't record tracing data.\n"); @@ -845,7 +848,7 @@ const struct option record_options[] = { OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", "list of cpus to monitor"), OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), - OPT_STRING('o', "output", &record.output_name, "file", + OPT_STRING('o', "output", &record.file.path, "file", "output file name"), OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit, "child tasks do not inherit counters"), diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 21b5c2f..60d7f8e 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -33,6 +33,7 @@ #include "util/thread.h" #include "util/sort.h" #include "util/hist.h" +#include "util/data.h" #include "arch/common.h" #include @@ -857,6 +858,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "Don't show entries under that percent", parse_percent_limit), OPT_END() }; + struct perf_data_file file = { + .mode = PERF_DATA_MODE_READ, + }; perf_config(perf_report_config, &report); @@ -886,9 +890,11 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) perf_hpp__init(); } + file.path = input_name; + file.force = report.force; + repeat: - session = perf_session__new(input_name, O_RDONLY, - report.force, false, &report.tool); + session = perf_session__new(&file, false, &report.tool); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index d8c51b2..5a46b10 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1446,8 +1446,12 @@ static int perf_sched__read_events(struct perf_sched *sched, { "sched:sched_migrate_task", process_sched_migrate_task_event, }, }; struct perf_session *session; + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + }; - session = perf_session__new(input_name, O_RDONLY, 0, false, &sched->tool); + session = perf_session__new(&file, false, &sched->tool); if (session == NULL) { pr_debug("No Memory for session\n"); return -1; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ebb2b5f..f0c77a1 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -15,6 +15,7 @@ #include "util/evlist.h" #include "util/evsel.h" #include "util/sort.h" +#include "util/data.h" #include static char const *script_name; @@ -1115,10 +1116,14 @@ int find_scripts(char **scripts_array, char **scripts_path_array) char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN]; DIR *scripts_dir, *lang_dir; struct perf_session *session; + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + }; char *temp; int i = 0; - session = perf_session__new(input_name, O_RDONLY, 0, false, NULL); + session = perf_session__new(&file, false, NULL); if (!session) return -1; @@ -1319,12 +1324,17 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "perf script [] [script-args]", NULL }; + struct perf_data_file file = { + .mode = PERF_DATA_MODE_READ, + }; setup_scripting(); argc = parse_options(argc, argv, options, script_usage, PARSE_OPT_STOP_AT_NON_OPTION); + file.path = input_name; + if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { rec_script_path = get_script_path(argv[1], RECORD_SUFFIX); if (!rec_script_path) @@ -1488,8 +1498,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) if (!script_name) setup_pager(); - session = perf_session__new(input_name, O_RDONLY, 0, false, - &perf_script); + session = perf_session__new(&file, false, &perf_script); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index c2e0231..e11c61d 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -36,6 +36,7 @@ #include "util/session.h" #include "util/svghelper.h" #include "util/tool.h" +#include "util/data.h" #define SUPPORT_OLD_POWER_EVENTS 1 #define PWR_EVENT_EXIT -1 @@ -990,8 +991,13 @@ static int __cmd_timechart(const char *output_name) { "power:power_frequency", process_sample_power_frequency }, #endif }; - struct perf_session *session = perf_session__new(input_name, O_RDONLY, - 0, false, &perf_timechart); + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + }; + + struct perf_session *session = perf_session__new(&file, false, + &perf_timechart); int ret = -EINVAL; if (session == NULL) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 65c49b2..752bebe 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -929,11 +929,15 @@ static int __cmd_top(struct perf_top *top) struct perf_record_opts *opts = &top->record_opts; pthread_t thread; int ret; + struct perf_data_file file = { + .mode = PERF_DATA_MODE_WRITE, + }; + /* * FIXME: perf_session__new should allow passing a O_MMAP, so that all this * mmap reading, etc is encapsulated in it. Use O_WRONLY for now. */ - top->session = perf_session__new(NULL, O_WRONLY, false, false, NULL); + top->session = perf_session__new(&file, false, NULL); if (top->session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index db959ac..fa620bc 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1834,7 +1834,10 @@ static int trace__replay(struct trace *trace) { "raw_syscalls:sys_exit", trace__sys_exit, }, { "probe:vfs_getname", trace__vfs_getname, }, }; - + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + }; struct perf_session *session; int err = -1; @@ -1857,8 +1860,7 @@ static int trace__replay(struct trace *trace) if (symbol__init() < 0) return -1; - session = perf_session__new(input_name, O_RDONLY, 0, false, - &trace->tool); + session = perf_session__new(&file, false, &trace->tool); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 84502e8..f61c230 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -220,7 +220,6 @@ struct perf_record_opts { bool no_delay; bool no_inherit; bool no_samples; - bool pipe_output; bool raw_samples; bool sample_address; bool sample_weight; diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h new file mode 100644 index 0000000..ffa0186 --- /dev/null +++ b/tools/perf/util/data.h @@ -0,0 +1,29 @@ +#ifndef __PERF_DATA_H +#define __PERF_DATA_H + +#include + +enum perf_data_mode { + PERF_DATA_MODE_WRITE, + PERF_DATA_MODE_READ, +}; + +struct perf_data_file { + const char *path; + int fd; + bool is_pipe; + bool force; + enum perf_data_mode mode; +}; + +static inline bool perf_data_file__is_read(struct perf_data_file *file) +{ + return file->mode == PERF_DATA_MODE_READ; +} + +static inline bool perf_data_file__is_write(struct perf_data_file *file) +{ + return file->mode == PERF_DATA_MODE_WRITE; +} + +#endif /* __PERF_DATA_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d4559ca..e3f63df 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -106,11 +106,11 @@ static void perf_session__destroy_kernel_maps(struct perf_session *self) machines__destroy_kernel_maps(&self->machines); } -struct perf_session *perf_session__new(const char *filename, int mode, - bool force, bool repipe, - struct perf_tool *tool) +struct perf_session *perf_session__new(struct perf_data_file *file, + bool repipe, struct perf_tool *tool) { struct perf_session *self; + const char *filename = file->path; struct stat st; size_t len; @@ -134,11 +134,11 @@ struct perf_session *perf_session__new(const char *filename, int mode, INIT_LIST_HEAD(&self->ordered_samples.to_free); machines__init(&self->machines); - if (mode == O_RDONLY) { - if (perf_session__open(self, force) < 0) + if (perf_data_file__is_read(file)) { + if (perf_session__open(self, file->force) < 0) goto out_delete; perf_session__set_id_hdr_size(self); - } else if (mode == O_WRONLY) { + } else if (perf_data_file__is_write(file)) { /* * In O_RDONLY mode this will be performed when reading the * kernel MMAP event, in perf_event__process_mmap(). diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 04bf737..f2f6251 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -7,6 +7,7 @@ #include "machine.h" #include "symbol.h" #include "thread.h" +#include "data.h" #include #include @@ -49,9 +50,8 @@ struct perf_session { struct perf_tool; -struct perf_session *perf_session__new(const char *filename, int mode, - bool force, bool repipe, - struct perf_tool *tool); +struct perf_session *perf_session__new(struct perf_data_file *file, + bool repipe, struct perf_tool *tool); void perf_session__delete(struct perf_session *session); void perf_event_header__bswap(struct perf_event_header *self); -- cgit v0.10.2 From 6a4d98d787b38a130a67e78b64182b419899623a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 15 Oct 2013 16:27:33 +0200 Subject: perf tools: Add perf_data_file__open interface to data object Adding perf_data_file__open interface to data object to open the perf.data file for both read and write. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1381847254-28809-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index c873e03..326a26e 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -365,6 +365,7 @@ LIB_OBJS += $(OUTPUT)util/vdso.o LIB_OBJS += $(OUTPUT)util/stat.o LIB_OBJS += $(OUTPUT)util/record.o LIB_OBJS += $(OUTPUT)util/srcline.o +LIB_OBJS += $(OUTPUT)util/data.o LIB_OBJS += $(OUTPUT)ui/setup.o LIB_OBJS += $(OUTPUT)ui/helpline.o diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4ea46ff..428e28f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -345,8 +345,6 @@ out: static int __cmd_record(struct perf_record *rec, int argc, const char **argv) { - struct stat st; - int flags; int err, feat; unsigned long waking = 0; const bool forks = argc > 0; @@ -355,7 +353,6 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) struct perf_record_opts *opts = &rec->opts; struct perf_evlist *evsel_list = rec->evlist; struct perf_data_file *file = &rec->file; - const char *output_name = file->path; struct perf_session *session; bool disabled = false; @@ -367,35 +364,6 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) signal(SIGUSR1, sig_handler); signal(SIGTERM, sig_handler); - if (!output_name) { - if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode)) - file->is_pipe = true; - else - file->path = output_name = "perf.data"; - } - if (output_name) { - if (!strcmp(output_name, "-")) - file->is_pipe = true; - else if (!stat(output_name, &st) && st.st_size) { - char oldname[PATH_MAX]; - snprintf(oldname, sizeof(oldname), "%s.old", - output_name); - unlink(oldname); - rename(output_name, oldname); - } - } - - flags = O_CREAT|O_RDWR|O_TRUNC; - - if (file->is_pipe) - file->fd = STDOUT_FILENO; - else - file->fd = open(output_name, flags, S_IRUSR | S_IWUSR); - if (file->fd < 0) { - perror("failed to create output file"); - return -1; - } - session = perf_session__new(file, false, NULL); if (session == NULL) { pr_err("Not enough memory for reading perf file header\n"); @@ -586,7 +554,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n", (double)rec->bytes_written / 1024.0 / 1024.0, - output_name, + file->path, rec->bytes_written / 24); return 0; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 752bebe..d934f70 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -929,15 +929,8 @@ static int __cmd_top(struct perf_top *top) struct perf_record_opts *opts = &top->record_opts; pthread_t thread; int ret; - struct perf_data_file file = { - .mode = PERF_DATA_MODE_WRITE, - }; - /* - * FIXME: perf_session__new should allow passing a O_MMAP, so that all this - * mmap reading, etc is encapsulated in it. Use O_WRONLY for now. - */ - top->session = perf_session__new(&file, false, NULL); + top->session = perf_session__new(NULL, false, NULL); if (top->session == NULL) return -ENOMEM; diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c new file mode 100644 index 0000000..7d09faf --- /dev/null +++ b/tools/perf/util/data.c @@ -0,0 +1,120 @@ +#include +#include +#include +#include +#include +#include + +#include "data.h" +#include "util.h" + +static bool check_pipe(struct perf_data_file *file) +{ + struct stat st; + bool is_pipe = false; + int fd = perf_data_file__is_read(file) ? + STDIN_FILENO : STDOUT_FILENO; + + if (!file->path) { + if (!fstat(fd, &st) && S_ISFIFO(st.st_mode)) + is_pipe = true; + } else { + if (!strcmp(file->path, "-")) + is_pipe = true; + } + + if (is_pipe) + file->fd = fd; + + return file->is_pipe = is_pipe; +} + +static int check_backup(struct perf_data_file *file) +{ + struct stat st; + + if (!stat(file->path, &st) && st.st_size) { + /* TODO check errors properly */ + char oldname[PATH_MAX]; + snprintf(oldname, sizeof(oldname), "%s.old", + file->path); + unlink(oldname); + rename(file->path, oldname); + } + + return 0; +} + +static int open_file_read(struct perf_data_file *file) +{ + struct stat st; + int fd; + + fd = open(file->path, O_RDONLY); + if (fd < 0) { + int err = errno; + + pr_err("failed to open %s: %s", file->path, strerror(err)); + if (err == ENOENT && !strcmp(file->path, "perf.data")) + pr_err(" (try 'perf record' first)"); + pr_err("\n"); + return -err; + } + + if (fstat(fd, &st) < 0) + goto out_close; + + if (!file->force && st.st_uid && (st.st_uid != geteuid())) { + pr_err("file %s not owned by current user or root\n", + file->path); + goto out_close; + } + + if (!st.st_size) { + pr_info("zero-sized file (%s), nothing to do!\n", + file->path); + goto out_close; + } + + file->size = st.st_size; + return fd; + + out_close: + close(fd); + return -1; +} + +static int open_file_write(struct perf_data_file *file) +{ + if (check_backup(file)) + return -1; + + return open(file->path, O_CREAT|O_RDWR|O_TRUNC, S_IRUSR|S_IWUSR); +} + +static int open_file(struct perf_data_file *file) +{ + int fd; + + fd = perf_data_file__is_read(file) ? + open_file_read(file) : open_file_write(file); + + file->fd = fd; + return fd < 0 ? -1 : 0; +} + +int perf_data_file__open(struct perf_data_file *file) +{ + if (check_pipe(file)) + return 0; + + if (!file->path) + file->path = "perf.data"; + + return open_file(file); +} + +void perf_data_file__close(struct perf_data_file *file) +{ + close(file->fd); +} diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index ffa0186..d6c262e 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -13,6 +13,7 @@ struct perf_data_file { int fd; bool is_pipe; bool force; + unsigned long size; enum perf_data_mode mode; }; @@ -26,4 +27,7 @@ static inline bool perf_data_file__is_write(struct perf_data_file *file) return file->mode == PERF_DATA_MODE_WRITE; } +int perf_data_file__open(struct perf_data_file *file); +void perf_data_file__close(struct perf_data_file *file); + #endif /* __PERF_DATA_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index e3f63df..d857c18 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -16,73 +16,35 @@ #include "perf_regs.h" #include "vdso.h" -static int perf_session__open(struct perf_session *self, bool force) +static int perf_session__open(struct perf_session *self) { - struct stat input_stat; - - if (!strcmp(self->filename, "-")) { - self->fd_pipe = true; - self->fd = STDIN_FILENO; - + if (self->fd_pipe) { if (perf_session__read_header(self) < 0) pr_err("incompatible file format (rerun with -v to learn more)"); - return 0; } - self->fd = open(self->filename, O_RDONLY); - if (self->fd < 0) { - int err = errno; - - pr_err("failed to open %s: %s", self->filename, strerror(err)); - if (err == ENOENT && !strcmp(self->filename, "perf.data")) - pr_err(" (try 'perf record' first)"); - pr_err("\n"); - return -errno; - } - - if (fstat(self->fd, &input_stat) < 0) - goto out_close; - - if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { - pr_err("file %s not owned by current user or root\n", - self->filename); - goto out_close; - } - - if (!input_stat.st_size) { - pr_info("zero-sized file (%s), nothing to do!\n", - self->filename); - goto out_close; - } - if (perf_session__read_header(self) < 0) { pr_err("incompatible file format (rerun with -v to learn more)"); - goto out_close; + return -1; } if (!perf_evlist__valid_sample_type(self->evlist)) { pr_err("non matching sample_type"); - goto out_close; + return -1; } if (!perf_evlist__valid_sample_id_all(self->evlist)) { pr_err("non matching sample_id_all"); - goto out_close; + return -1; } if (!perf_evlist__valid_read_format(self->evlist)) { pr_err("non matching read_format"); - goto out_close; + return -1; } - self->size = input_stat.st_size; return 0; - -out_close: - close(self->fd); - self->fd = -1; - return -1; } void perf_session__set_id_hdr_size(struct perf_session *session) @@ -110,35 +72,35 @@ struct perf_session *perf_session__new(struct perf_data_file *file, bool repipe, struct perf_tool *tool) { struct perf_session *self; - const char *filename = file->path; - struct stat st; - size_t len; - - if (!filename || !strlen(filename)) { - if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) - filename = "-"; - else - filename = "perf.data"; - } - len = strlen(filename); - self = zalloc(sizeof(*self) + len); - - if (self == NULL) + self = zalloc(sizeof(*self)); + if (!self) goto out; - memcpy(self->filename, filename, len); self->repipe = repipe; INIT_LIST_HEAD(&self->ordered_samples.samples); INIT_LIST_HEAD(&self->ordered_samples.sample_cache); INIT_LIST_HEAD(&self->ordered_samples.to_free); machines__init(&self->machines); - if (perf_data_file__is_read(file)) { - if (perf_session__open(self, file->force) < 0) + if (file) { + if (perf_data_file__open(file)) goto out_delete; - perf_session__set_id_hdr_size(self); - } else if (perf_data_file__is_write(file)) { + + self->fd = file->fd; + self->fd_pipe = file->is_pipe; + self->filename = file->path; + self->size = file->size; + + if (perf_data_file__is_read(file)) { + if (perf_session__open(self) < 0) + goto out_close; + + perf_session__set_id_hdr_size(self); + } + } + + if (!file || perf_data_file__is_write(file)) { /* * In O_RDONLY mode this will be performed when reading the * kernel MMAP event, in perf_event__process_mmap(). @@ -153,10 +115,13 @@ struct perf_session *perf_session__new(struct perf_data_file *file, tool->ordered_samples = false; } -out: return self; -out_delete: + + out_close: + perf_data_file__close(file); + out_delete: perf_session__delete(self); + out: return NULL; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index f2f6251..e1ca2d0 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -39,7 +39,7 @@ struct perf_session { bool fd_pipe; bool repipe; struct ordered_samples ordered_samples; - char filename[1]; + const char *filename; }; #define PRINT_IP_OPT_IP (1<<0) -- cgit v0.10.2 From cc9784bd9fa9d8e27fdea61142398cb85ce401a8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 15 Oct 2013 16:27:34 +0200 Subject: perf session: Separating data file properties from session Removing 'fd, fd_pipe, filename, size' from struct perf_session and replacing them with struct perf_data_file object. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1381847254-28809-4-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 95df683..03cfa59 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -259,7 +259,7 @@ static int __cmd_annotate(struct perf_annotate *ann) } if (total_nr_samples == 0) { - ui__error("The %s file has no samples!\n", session->filename); + ui__error("The %s file has no samples!\n", file.path); goto out_delete; } diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 0164c1c..ed3873b 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -73,7 +73,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits) * in pipe-mode, the only way to get the buildids is to parse * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID */ - if (with_hits || session->fd_pipe) + if (with_hits || perf_data_file__is_pipe(&file)) perf_session__process_events(session, &build_id__mark_dso_hit_ops); perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 428e28f..ab8d15e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -257,7 +257,6 @@ static int process_buildids(struct perf_record *rec) if (size == 0) return 0; - session->fd = file->fd; return __perf_session__process_events(session, rec->post_processing_offset, size - rec->post_processing_offset, size, &build_id__mark_dso_hit_ops); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 60d7f8e..fa68a36 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -368,8 +368,9 @@ static int perf_report__setup_sample_type(struct perf_report *rep) { struct perf_session *self = rep->session; u64 sample_type = perf_evlist__combined_sample_type(self->evlist); + bool is_pipe = perf_data_file__is_pipe(self->file); - if (!self->fd_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { + if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { if (sort__has_parent) { ui__error("Selected --sort parent, but no " "callchain data. Did you call " @@ -392,7 +393,7 @@ static int perf_report__setup_sample_type(struct perf_report *rep) } if (sort__mode == SORT_MODE__BRANCH) { - if (!self->fd_pipe && + if (!is_pipe && !(sample_type & PERF_SAMPLE_BRANCH_STACK)) { ui__error("Selected -b but no branch data. " "Did you call perf record without -b?\n"); @@ -488,6 +489,7 @@ static int __cmd_report(struct perf_report *rep) struct map *kernel_map; struct kmap *kernel_kmap; const char *help = "For a higher level overview, try: perf report --sort comm,dso"; + struct perf_data_file *file = session->file; signal(SIGINT, sig_handler); @@ -572,7 +574,7 @@ static int __cmd_report(struct perf_report *rep) return 0; if (nr_samples == 0) { - ui__error("The %s file has no samples!\n", session->filename); + ui__error("The %s file has no samples!\n", file->path); return 0; } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index f0c77a1..27de606 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1525,7 +1525,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) return -1; } - input = open(session->filename, O_RDONLY); /* input_name */ + input = open(file.path, O_RDONLY); /* input_name */ if (input < 0) { perror("failed to open file"); return -1; diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index d6c262e..8c2df80 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -27,6 +27,21 @@ static inline bool perf_data_file__is_write(struct perf_data_file *file) return file->mode == PERF_DATA_MODE_WRITE; } +static inline int perf_data_file__is_pipe(struct perf_data_file *file) +{ + return file->is_pipe; +} + +static inline int perf_data_file__fd(struct perf_data_file *file) +{ + return file->fd; +} + +static inline unsigned long perf_data_file__size(struct perf_data_file *file) +{ + return file->size; +} + int perf_data_file__open(struct perf_data_file *file); void perf_data_file__close(struct perf_data_file *file); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index c3e5a3b..26d9520 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -22,6 +22,7 @@ #include "vdso.h" #include "strbuf.h" #include "build-id.h" +#include "data.h" static bool no_buildid_cache = false; @@ -2189,7 +2190,7 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) { struct header_print_data hd; struct perf_header *header = &session->header; - int fd = session->fd; + int fd = perf_data_file__fd(session->file); hd.fp = fp; hd.full = full; @@ -2650,7 +2651,8 @@ static int perf_header__read_pipe(struct perf_session *session) struct perf_header *header = &session->header; struct perf_pipe_file_header f_header; - if (perf_file_header__read_pipe(&f_header, header, session->fd, + if (perf_file_header__read_pipe(&f_header, header, + perf_data_file__fd(session->file), session->repipe) < 0) { pr_debug("incompatible file format\n"); return -EINVAL; @@ -2751,18 +2753,19 @@ static int perf_evlist__prepare_tracepoint_events(struct perf_evlist *evlist, int perf_session__read_header(struct perf_session *session) { + struct perf_data_file *file = session->file; struct perf_header *header = &session->header; struct perf_file_header f_header; struct perf_file_attr f_attr; u64 f_id; int nr_attrs, nr_ids, i, j; - int fd = session->fd; + int fd = perf_data_file__fd(file); session->evlist = perf_evlist__new(); if (session->evlist == NULL) return -ENOMEM; - if (session->fd_pipe) + if (perf_data_file__is_pipe(file)) return perf_header__read_pipe(session); if (perf_file_header__read(&f_header, header, fd) < 0) @@ -2777,7 +2780,7 @@ int perf_session__read_header(struct perf_session *session) if (f_header.data.size == 0) { pr_warning("WARNING: The %s file's data size field is 0 which is unexpected.\n" "Was the 'perf record' command properly terminated?\n", - session->filename); + file->path); } nr_attrs = f_header.attrs.size / f_header.attr_size; @@ -2990,18 +2993,19 @@ int perf_event__process_tracing_data(struct perf_tool *tool __maybe_unused, struct perf_session *session) { ssize_t size_read, padding, size = event->tracing_data.size; - off_t offset = lseek(session->fd, 0, SEEK_CUR); + int fd = perf_data_file__fd(session->file); + off_t offset = lseek(fd, 0, SEEK_CUR); char buf[BUFSIZ]; /* setup for reading amidst mmap */ - lseek(session->fd, offset + sizeof(struct tracing_data_event), + lseek(fd, offset + sizeof(struct tracing_data_event), SEEK_SET); - size_read = trace_report(session->fd, &session->pevent, + size_read = trace_report(fd, &session->pevent, session->repipe); padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read; - if (readn(session->fd, buf, padding) < 0) { + if (readn(fd, buf, padding) < 0) { pr_err("%s: reading input file", __func__); return -1; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d857c18..19fc716 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -18,17 +18,16 @@ static int perf_session__open(struct perf_session *self) { - if (self->fd_pipe) { - if (perf_session__read_header(self) < 0) - pr_err("incompatible file format (rerun with -v to learn more)"); - return 0; - } + struct perf_data_file *file = self->file; if (perf_session__read_header(self) < 0) { pr_err("incompatible file format (rerun with -v to learn more)"); return -1; } + if (perf_data_file__is_pipe(file)) + return 0; + if (!perf_evlist__valid_sample_type(self->evlist)) { pr_err("non matching sample_type"); return -1; @@ -87,10 +86,7 @@ struct perf_session *perf_session__new(struct perf_data_file *file, if (perf_data_file__open(file)) goto out_delete; - self->fd = file->fd; - self->fd_pipe = file->is_pipe; - self->filename = file->path; - self->size = file->size; + self->file = file; if (perf_data_file__is_read(file)) { if (perf_session__open(self) < 0) @@ -158,7 +154,8 @@ void perf_session__delete(struct perf_session *self) perf_session__delete_threads(self); perf_session_env__delete(&self->header.env); machines__exit(&self->machines); - close(self->fd); + if (self->file) + perf_data_file__close(self->file); free(self); vdso__exit(); } @@ -1015,6 +1012,7 @@ static int perf_session_deliver_event(struct perf_session *session, static int perf_session__process_user_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool, u64 file_offset) { + int fd = perf_data_file__fd(session->file); int err; dump_event(session, event, file_offset, NULL); @@ -1028,7 +1026,7 @@ static int perf_session__process_user_event(struct perf_session *session, union return err; case PERF_RECORD_HEADER_TRACING_DATA: /* setup for reading amidst mmap */ - lseek(session->fd, file_offset, SEEK_SET); + lseek(fd, file_offset, SEEK_SET); return tool->tracing_data(tool, event, session); case PERF_RECORD_HEADER_BUILD_ID: return tool->build_id(tool, event, session); @@ -1154,6 +1152,7 @@ volatile int session_done; static int __perf_session__process_pipe_events(struct perf_session *self, struct perf_tool *tool) { + int fd = perf_data_file__fd(self->file); union perf_event *event; uint32_t size, cur_size = 0; void *buf = NULL; @@ -1172,7 +1171,7 @@ static int __perf_session__process_pipe_events(struct perf_session *self, return -errno; more: event = buf; - err = readn(self->fd, event, sizeof(struct perf_event_header)); + err = readn(fd, event, sizeof(struct perf_event_header)); if (err <= 0) { if (err == 0) goto done; @@ -1204,7 +1203,7 @@ more: p += sizeof(struct perf_event_header); if (size - sizeof(struct perf_event_header)) { - err = readn(self->fd, p, size - sizeof(struct perf_event_header)); + err = readn(fd, p, size - sizeof(struct perf_event_header)); if (err <= 0) { if (err == 0) { pr_err("unexpected end of event stream\n"); @@ -1285,6 +1284,7 @@ int __perf_session__process_events(struct perf_session *session, u64 data_offset, u64 data_size, u64 file_size, struct perf_tool *tool) { + int fd = perf_data_file__fd(session->file); u64 head, page_offset, file_offset, file_pos, progress_next; int err, mmap_prot, mmap_flags, map_idx = 0; size_t mmap_size; @@ -1317,7 +1317,7 @@ int __perf_session__process_events(struct perf_session *session, mmap_flags = MAP_PRIVATE; } remap: - buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd, + buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, fd, file_offset); if (buf == MAP_FAILED) { pr_err("failed to mmap file\n"); @@ -1382,16 +1382,17 @@ out_err: int perf_session__process_events(struct perf_session *self, struct perf_tool *tool) { + u64 size = perf_data_file__size(self->file); int err; if (perf_session__register_idle_thread(self) == NULL) return -ENOMEM; - if (!self->fd_pipe) + if (!perf_data_file__is_pipe(self->file)) err = __perf_session__process_events(self, self->header.data_offset, self->header.data_size, - self->size, tool); + size, tool); else err = __perf_session__process_pipe_events(self, tool); @@ -1615,13 +1616,14 @@ int perf_session__cpu_bitmap(struct perf_session *session, void perf_session__fprintf_info(struct perf_session *session, FILE *fp, bool full) { + int fd = perf_data_file__fd(session->file); struct stat st; int ret; if (session == NULL || fp == NULL) return; - ret = fstat(session->fd, &st); + ret = fstat(fd, &st); if (ret == -1) return; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index e1ca2d0..27c74d3 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -30,16 +30,13 @@ struct ordered_samples { struct perf_session { struct perf_header header; - unsigned long size; struct machines machines; struct perf_evlist *evlist; struct pevent *pevent; struct events_stats stats; - int fd; - bool fd_pipe; bool repipe; struct ordered_samples ordered_samples; - const char *filename; + struct perf_data_file *file; }; #define PRINT_IP_OPT_IP (1<<0) -- cgit v0.10.2 From 91e95617429cb272fd908b1928a1915b37b9655f Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 18 Oct 2013 10:38:48 -0400 Subject: perf report: Add --max-stack option to limit callchain stack scan When callgraph data was included in the perf data file, it may take a long time to scan all those data and merge them together especially if the stored callchains are long and the perf data file itself is large, like a Gbyte or so. The callchain stack is currently limited to PERF_MAX_STACK_DEPTH (127). This is a large value. Usually the callgraph data that developers are most interested in are the first few levels, the rests are usually not looked at. This patch adds a new --max-stack option to perf-report to limit the depth of callchain stack data to look at to reduce the time it takes for perf-report to finish its processing. It trades the presence of trailing stack information with faster speed. The following table shows the elapsed time of doing perf-report on a perf.data file of size 985,531,828 bytes. --max_stack Elapsed Time Output data size ----------- ------------ ---------------- not set 88.0s 124,422,651 64 87.5s 116,303,213 32 87.2s 112,023,804 16 86.6s 94,326,380 8 59.9s 33,697,248 4 40.7s 10,116,637 -g none 27.1s 2,555,810 Signed-off-by: Waiman Long Acked-by: David Ahern Cc: Adrian Hunter Cc: Aswin Chandramouleeswaran Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Scott J Norton Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382107129-2010-4-git-send-email-Waiman.Long@hp.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index be5ad87..10a2798 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -141,6 +141,14 @@ OPTIONS Default: fractal,0.5,callee,function. +--max-stack:: + Set the stack depth limit when parsing the callchain, anything + beyond the specified depth will be ignored. This is a trade-off + between information loss and faster processing especially for + workloads that can have a very long callchain stack. + + Default: 127 + -G:: --inverted:: alias for inverted caller based call graph. diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index fa68a36..81addca 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -49,6 +49,7 @@ struct perf_report { bool show_threads; bool inverted_callchain; bool mem_mode; + int max_stack; struct perf_read_values show_threads_values; const char *pretty_printing_style; const char *cpu_list; @@ -90,7 +91,8 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool, if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { err = machine__resolve_callchain(machine, evsel, al->thread, - sample, &parent, al); + sample, &parent, al, + rep->max_stack); if (err) return err; } @@ -181,7 +183,8 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { err = machine__resolve_callchain(machine, evsel, al->thread, - sample, &parent, al); + sample, &parent, al, + rep->max_stack); if (err) return err; } @@ -244,18 +247,21 @@ out: return err; } -static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, +static int perf_evsel__add_hist_entry(struct perf_tool *tool, + struct perf_evsel *evsel, struct addr_location *al, struct perf_sample *sample, struct machine *machine) { + struct perf_report *rep = container_of(tool, struct perf_report, tool); struct symbol *parent = NULL; int err = 0; struct hist_entry *he; if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { err = machine__resolve_callchain(machine, evsel, al->thread, - sample, &parent, al); + sample, &parent, al, + rep->max_stack); if (err) return err; } @@ -332,7 +338,8 @@ static int process_sample_event(struct perf_tool *tool, if (al.map != NULL) al.map->dso->hit = 1; - ret = perf_evsel__add_hist_entry(evsel, &al, sample, machine); + ret = perf_evsel__add_hist_entry(tool, evsel, &al, sample, + machine); if (ret < 0) pr_debug("problem incrementing symbol period, skipping event\n"); } @@ -772,6 +779,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) .ordered_samples = true, .ordering_requires_timestamps = true, }, + .max_stack = PERF_MAX_STACK_DEPTH, .pretty_printing_style = "normal", }; const struct option options[] = { @@ -812,6 +820,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " "Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt), + OPT_INTEGER(0, "max-stack", &report.max_stack, + "Set the maximum stack depth when parsing the callchain, " + "anything beyond the specified depth will be ignored. " + "Default: " __stringify(PERF_MAX_STACK_DEPTH)), OPT_BOOLEAN('G', "inverted", &report.inverted_callchain, "alias for inverted call graph"), OPT_CALLBACK(0, "ignore-callees", NULL, "regex", diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index d934f70..112cb7d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -770,7 +770,8 @@ static void perf_event__process_sample(struct perf_tool *tool, sample->callchain) { err = machine__resolve_callchain(machine, evsel, al.thread, sample, - &parent, &al); + &parent, &al, + PERF_MAX_STACK_DEPTH); if (err) return; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 6b861ae..ea93425 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1253,10 +1253,12 @@ static int machine__resolve_callchain_sample(struct machine *machine, struct thread *thread, struct ip_callchain *chain, struct symbol **parent, - struct addr_location *root_al) + struct addr_location *root_al, + int max_stack) { u8 cpumode = PERF_RECORD_MISC_USER; - unsigned int i; + int chain_nr = min(max_stack, (int)chain->nr); + int i; int err; callchain_cursor_reset(&callchain_cursor); @@ -1266,7 +1268,7 @@ static int machine__resolve_callchain_sample(struct machine *machine, return 0; } - for (i = 0; i < chain->nr; i++) { + for (i = 0; i < chain_nr; i++) { u64 ip; struct addr_location al; @@ -1338,12 +1340,14 @@ int machine__resolve_callchain(struct machine *machine, struct thread *thread, struct perf_sample *sample, struct symbol **parent, - struct addr_location *root_al) + struct addr_location *root_al, + int max_stack) { int ret; ret = machine__resolve_callchain_sample(machine, thread, - sample->callchain, parent, root_al); + sample->callchain, parent, + root_al, max_stack); if (ret) return ret; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index d44c09b..4c1f5d5 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -92,7 +92,8 @@ int machine__resolve_callchain(struct machine *machine, struct thread *thread, struct perf_sample *sample, struct symbol **parent, - struct addr_location *root_al); + struct addr_location *root_al, + int max_stack); /* * Default guest kernel is defined by parameter --guestkallsyms diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 19fc716..854c5aa 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1512,7 +1512,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event, if (symbol_conf.use_callchain && sample->callchain) { if (machine__resolve_callchain(machine, evsel, al.thread, - sample, NULL, NULL) != 0) { + sample, NULL, NULL, + PERF_MAX_STACK_DEPTH) != 0) { if (verbose) error("Failed to resolve callchain. Skipping\n"); return; -- cgit v0.10.2 From 5dbb6e81d85e55ee2b4cf523c1738e16f63e5400 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 18 Oct 2013 10:38:49 -0400 Subject: perf top: Add --max-stack option to limit callchain stack scan When the callgraph function is enabled (-G), it may take a long time to scan all the stack data and merge them accordingly. This patch adds a new --max-stack option to perf-top to limit the depth of callchain stack data to look at to reduce the time it takes for perf-top to finish its processing. It reduces the amount of information provided to the user in exchange for faster speed. Signed-off-by: Waiman Long Acked-by: David Ahern Tested-by: Davidlohr Bueso Cc: Adrian Hunter Cc: Aswin Chandramouleeswaran Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Scott J Norton Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382107129-2010-5-git-send-email-Waiman.Long@hp.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index f65777c..c16a09e 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -158,6 +158,14 @@ Default is to monitor all CPUS. Default: fractal,0.5,callee. +--max-stack:: + Set the stack depth limit when parsing the callchain, anything + beyond the specified depth will be ignored. This is a trade-off + between information loss and faster processing especially for + workloads that can have a very long callchain stack. + + Default: 127 + --ignore-callees=:: Ignore callees of the function(s) matching the given regex. This has the effect of collecting the callers of each such diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 112cb7d..386d833 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -771,7 +771,7 @@ static void perf_event__process_sample(struct perf_tool *tool, err = machine__resolve_callchain(machine, evsel, al.thread, sample, &parent, &al, - PERF_MAX_STACK_DEPTH); + top->max_stack); if (err) return; } @@ -1048,10 +1048,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) .user_freq = UINT_MAX, .user_interval = ULLONG_MAX, .freq = 4000, /* 4 KHz */ - .target = { + .target = { .uses_mmap = true, }, }, + .max_stack = PERF_MAX_STACK_DEPTH, .sym_pcnt_filter = 5, }; struct perf_record_opts *opts = &top.record_opts; @@ -1110,6 +1111,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts, "mode[,dump_size]", record_callchain_help, &parse_callchain_opt, "fp"), + OPT_INTEGER(0, "max-stack", &top.max_stack, + "Set the maximum stack depth when parsing the callchain. " + "Default: " __stringify(PERF_MAX_STACK_DEPTH)), OPT_CALLBACK(0, "ignore-callees", NULL, "regex", "ignore callees of these functions in call graphs", report_parse_ignore_callees_opt), diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index b554ffc..88cfeaf 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -24,6 +24,7 @@ struct perf_top { u64 exact_samples; u64 guest_us_samples, guest_kernel_samples; int print_entries, count_filter, delay_secs; + int max_stack; bool hide_kernel_symbols, hide_user_symbols, zero; bool use_tui, use_stdio; bool kptr_restrict_warned; -- cgit v0.10.2 From 11a4d435a2d75918039540f08b259969c63b8635 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 22 Oct 2013 15:24:58 -0300 Subject: perf test: Clarify the "sample parsing" test entry Before: [root@sandy ~]# perf test -v 22 22: Test sample parsing : --- start --- sample format has changed - test needs updating ---- end ---- Test sample parsing: FAILED! [root@sandy ~]# After: [root@sandy ~]# perf test -v 22 22: Test sample parsing : --- start --- sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating ---- end ---- Test sample parsing: FAILED! [root@sandy ~]# Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-8cazc2fpmk70jcbww8c0cobx@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 77f598d..17d000c 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -276,7 +276,7 @@ int test__sample_parsing(void) * were added. */ if (PERF_SAMPLE_MAX > PERF_SAMPLE_IDENTIFIER << 1) { - pr_debug("sample format has changed - test needs updating\n"); + pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n"); return -1; } -- cgit v0.10.2 From 4ac2f1c1014a121f1493a9d5207258793c576438 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 22 Oct 2013 15:28:11 -0300 Subject: perf test: Consider PERF_SAMPLE_TRANSACTION in the "sample parsing" test [root@sandy ~]# perf test -v 22 22: Test sample parsing : --- start --- sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating ---- end ---- Test sample parsing: FAILED! [root@sandy ~]# Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-cx83wuzz30m10m4s1xt0ocyq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 17d000c..61c9da2 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -275,7 +275,7 @@ int test__sample_parsing(void) * Fail the test if it has not been updated when new sample format bits * were added. */ - if (PERF_SAMPLE_MAX > PERF_SAMPLE_IDENTIFIER << 1) { + if (PERF_SAMPLE_MAX > PERF_SAMPLE_TRANSACTION << 1) { pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n"); return -1; } -- cgit v0.10.2 From c824c4338ac47979c69ba6f8faab33670ae179df Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 22 Oct 2013 19:01:31 -0300 Subject: perf tools: Stop using 'self' in some more places As suggested by tglx, 'self' should be replaced by something that is more useful. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-fmblhc6tbb99tk1q8vowtsbj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 03cfa59..17c6b49 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -118,11 +118,11 @@ static int hist_entry__tty_annotate(struct hist_entry *he, ann->print_line, ann->full_paths, 0, 0); } -static void hists__find_annotations(struct hists *self, +static void hists__find_annotations(struct hists *hists, struct perf_evsel *evsel, struct perf_annotate *ann) { - struct rb_node *nd = rb_first(&self->entries), *next; + struct rb_node *nd = rb_first(&hists->entries), *next; int key = K_RIGHT; while (nd) { diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 419d27d..9c82888 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -303,12 +303,11 @@ static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair, return -1; } -static int hists__add_entry(struct hists *self, +static int hists__add_entry(struct hists *hists, struct addr_location *al, u64 period, u64 weight, u64 transaction) { - if (__hists__add_entry(self, al, NULL, period, weight, transaction) - != NULL) + if (__hists__add_entry(hists, al, NULL, period, weight, transaction) != NULL) return 0; return -ENOMEM; } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 4aa6d78..eb1a594 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -162,38 +162,38 @@ static int perf_event__repipe_tracing_data(struct perf_tool *tool, return err; } -static int dso__read_build_id(struct dso *self) +static int dso__read_build_id(struct dso *dso) { - if (self->has_build_id) + if (dso->has_build_id) return 0; - if (filename__read_build_id(self->long_name, self->build_id, - sizeof(self->build_id)) > 0) { - self->has_build_id = true; + if (filename__read_build_id(dso->long_name, dso->build_id, + sizeof(dso->build_id)) > 0) { + dso->has_build_id = true; return 0; } return -1; } -static int dso__inject_build_id(struct dso *self, struct perf_tool *tool, +static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool, struct machine *machine) { u16 misc = PERF_RECORD_MISC_USER; int err; - if (dso__read_build_id(self) < 0) { - pr_debug("no build_id found for %s\n", self->long_name); + if (dso__read_build_id(dso) < 0) { + pr_debug("no build_id found for %s\n", dso->long_name); return -1; } - if (self->kernel) + if (dso->kernel) misc = PERF_RECORD_MISC_KERNEL; - err = perf_event__synthesize_build_id(tool, self, misc, perf_event__repipe, + err = perf_event__synthesize_build_id(tool, dso, misc, perf_event__repipe, machine); if (err) { - pr_err("Can't synthesize build_id event for %s\n", self->long_name); + pr_err("Can't synthesize build_id event for %s\n", dso->long_name); return -1; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 81addca..e3598a4 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -373,9 +373,9 @@ static int process_read_event(struct perf_tool *tool, /* For pipe mode, sample_type is not currently set */ static int perf_report__setup_sample_type(struct perf_report *rep) { - struct perf_session *self = rep->session; - u64 sample_type = perf_evlist__combined_sample_type(self->evlist); - bool is_pipe = perf_data_file__is_pipe(self->file); + struct perf_session *session = rep->session; + u64 sample_type = perf_evlist__combined_sample_type(session->evlist); + bool is_pipe = perf_data_file__is_pipe(session->file); if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { if (sort__has_parent) { @@ -417,14 +417,14 @@ static void sig_handler(int sig __maybe_unused) } static size_t hists__fprintf_nr_sample_events(struct perf_report *rep, - struct hists *self, + struct hists *hists, const char *evname, FILE *fp) { size_t ret; char unit; - unsigned long nr_samples = self->stats.nr_events[PERF_RECORD_SAMPLE]; - u64 nr_events = self->stats.total_period; - struct perf_evsel *evsel = hists_to_evsel(self); + unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; + u64 nr_events = hists->stats.total_period; + struct perf_evsel *evsel = hists_to_evsel(hists); char buf[512]; size_t size = sizeof(buf); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 7ded71d..a92770c 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -89,14 +89,14 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf) return raw - build_id; } -char *dso__build_id_filename(struct dso *self, char *bf, size_t size) +char *dso__build_id_filename(struct dso *dso, char *bf, size_t size) { char build_id_hex[BUILD_ID_SIZE * 2 + 1]; - if (!self->has_build_id) + if (!dso->has_build_id) return NULL; - build_id__sprintf(self->build_id, sizeof(self->build_id), build_id_hex); + build_id__sprintf(dso->build_id, sizeof(dso->build_id), build_id_hex); if (bf == NULL) { if (asprintf(&bf, "%s/.build-id/%.2s/%s", buildid_dir, build_id_hex, build_id_hex + 2) < 0) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index cca03831..f0b863e 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -406,7 +406,7 @@ out: return he; } -struct hist_entry *__hists__add_mem_entry(struct hists *self, +struct hist_entry *__hists__add_mem_entry(struct hists *hists, struct addr_location *al, struct symbol *sym_parent, struct mem_info *mi, @@ -429,14 +429,14 @@ struct hist_entry *__hists__add_mem_entry(struct hists *self, .level = al->level, .parent = sym_parent, .filtered = symbol__parent_filter(sym_parent), - .hists = self, + .hists = hists, .mem_info = mi, .branch_info = NULL, }; - return add_hist_entry(self, &entry, al, period, weight); + return add_hist_entry(hists, &entry, al, period, weight); } -struct hist_entry *__hists__add_branch_entry(struct hists *self, +struct hist_entry *__hists__add_branch_entry(struct hists *hists, struct addr_location *al, struct symbol *sym_parent, struct branch_info *bi, @@ -460,14 +460,14 @@ struct hist_entry *__hists__add_branch_entry(struct hists *self, .parent = sym_parent, .filtered = symbol__parent_filter(sym_parent), .branch_info = bi, - .hists = self, + .hists = hists, .mem_info = NULL, }; - return add_hist_entry(self, &entry, al, period, weight); + return add_hist_entry(hists, &entry, al, period, weight); } -struct hist_entry *__hists__add_entry(struct hists *self, +struct hist_entry *__hists__add_entry(struct hists *hists, struct addr_location *al, struct symbol *sym_parent, u64 period, u64 weight, u64 transaction) @@ -488,13 +488,13 @@ struct hist_entry *__hists__add_entry(struct hists *self, }, .parent = sym_parent, .filtered = symbol__parent_filter(sym_parent), - .hists = self, + .hists = hists, .branch_info = NULL, .mem_info = NULL, .transaction = transaction, }; - return add_hist_entry(self, &entry, al, period, weight); + return add_hist_entry(hists, &entry, al, period, weight); } int64_t diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 1f9821d..19b4aa2 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -60,11 +60,11 @@ sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) return right->thread->tid - left->thread->tid; } -static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { return repsep_snprintf(bf, size, "%*s:%5d", width - 6, - self->thread->comm ?: "", self->thread->tid); + he->thread->comm ?: "", he->thread->tid); } struct sort_entry sort_thread = { @@ -94,10 +94,10 @@ sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) return strcmp(comm_l, comm_r); } -static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__comm_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%*s", width, self->thread->comm); + return repsep_snprintf(bf, size, "%*s", width, he->thread->comm); } struct sort_entry sort_comm = { @@ -148,10 +148,10 @@ static int _hist_entry__dso_snprintf(struct map *map, char *bf, return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); } -static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__dso_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return _hist_entry__dso_snprintf(self->ms.map, bf, size, width); + return _hist_entry__dso_snprintf(he->ms.map, bf, size, width); } struct sort_entry sort_dso = { @@ -234,11 +234,11 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, return ret; } -static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip, - self->level, bf, size, width); + return _hist_entry__sym_snprintf(he->ms.map, he->ms.sym, he->ip, + he->level, bf, size, width); } struct sort_entry sort_sym = { @@ -274,11 +274,11 @@ sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) return strcmp(left->srcline, right->srcline); } -static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width __maybe_unused) { - return repsep_snprintf(bf, size, "%s", self->srcline); + return repsep_snprintf(bf, size, "%s", he->srcline); } struct sort_entry sort_srcline = { @@ -302,11 +302,11 @@ sort__parent_cmp(struct hist_entry *left, struct hist_entry *right) return strcmp(sym_l->name, sym_r->name); } -static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__parent_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { return repsep_snprintf(bf, size, "%-*s", width, - self->parent ? self->parent->name : "[other]"); + he->parent ? he->parent->name : "[other]"); } struct sort_entry sort_parent = { @@ -324,10 +324,10 @@ sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right) return right->cpu - left->cpu; } -static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf, - size_t size, unsigned int width) +static int hist_entry__cpu_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%*d", width, self->cpu); + return repsep_snprintf(bf, size, "%*d", width, he->cpu); } struct sort_entry sort_cpu = { @@ -346,10 +346,10 @@ sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right) right->branch_info->from.map); } -static int hist_entry__dso_from_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__dso_from_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return _hist_entry__dso_snprintf(self->branch_info->from.map, + return _hist_entry__dso_snprintf(he->branch_info->from.map, bf, size, width); } @@ -360,10 +360,10 @@ sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right) right->branch_info->to.map); } -static int hist_entry__dso_to_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__dso_to_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return _hist_entry__dso_snprintf(self->branch_info->to.map, + return _hist_entry__dso_snprintf(he->branch_info->to.map, bf, size, width); } @@ -391,21 +391,21 @@ sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right) return _sort__sym_cmp(to_l->sym, to_r->sym); } -static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - struct addr_map_symbol *from = &self->branch_info->from; + struct addr_map_symbol *from = &he->branch_info->from; return _hist_entry__sym_snprintf(from->map, from->sym, from->addr, - self->level, bf, size, width); + he->level, bf, size, width); } -static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - struct addr_map_symbol *to = &self->branch_info->to; + struct addr_map_symbol *to = &he->branch_info->to; return _hist_entry__sym_snprintf(to->map, to->sym, to->addr, - self->level, bf, size, width); + he->level, bf, size, width); } @@ -448,13 +448,13 @@ sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right) return mp || p; } -static int hist_entry__mispredict_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width){ static const char *out = "N/A"; - if (self->branch_info->flags.predicted) + if (he->branch_info->flags.predicted) out = "N"; - else if (self->branch_info->flags.mispred) + else if (he->branch_info->flags.mispred) out = "Y"; return repsep_snprintf(bf, size, "%-*s", width, out); @@ -474,19 +474,19 @@ sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right) return (int64_t)(r - l); } -static int hist_entry__daddr_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__daddr_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { uint64_t addr = 0; struct map *map = NULL; struct symbol *sym = NULL; - if (self->mem_info) { - addr = self->mem_info->daddr.addr; - map = self->mem_info->daddr.map; - sym = self->mem_info->daddr.sym; + if (he->mem_info) { + addr = he->mem_info->daddr.addr; + map = he->mem_info->daddr.map; + sym = he->mem_info->daddr.sym; } - return _hist_entry__sym_snprintf(map, sym, addr, self->level, bf, size, + return _hist_entry__sym_snprintf(map, sym, addr, he->level, bf, size, width); } @@ -504,13 +504,13 @@ sort__dso_daddr_cmp(struct hist_entry *left, struct hist_entry *right) return _sort__dso_cmp(map_l, map_r); } -static int hist_entry__dso_daddr_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__dso_daddr_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { struct map *map = NULL; - if (self->mem_info) - map = self->mem_info->daddr.map; + if (he->mem_info) + map = he->mem_info->daddr.map; return _hist_entry__dso_snprintf(map, bf, size, width); } @@ -534,14 +534,14 @@ sort__locked_cmp(struct hist_entry *left, struct hist_entry *right) return (int64_t)(data_src_r.mem_lock - data_src_l.mem_lock); } -static int hist_entry__locked_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { const char *out; u64 mask = PERF_MEM_LOCK_NA; - if (self->mem_info) - mask = self->mem_info->data_src.mem_lock; + if (he->mem_info) + mask = he->mem_info->data_src.mem_lock; if (mask & PERF_MEM_LOCK_NA) out = "N/A"; @@ -583,7 +583,7 @@ static const char * const tlb_access[] = { }; #define NUM_TLB_ACCESS (sizeof(tlb_access)/sizeof(const char *)) -static int hist_entry__tlb_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { char out[64]; @@ -594,8 +594,8 @@ static int hist_entry__tlb_snprintf(struct hist_entry *self, char *bf, out[0] = '\0'; - if (self->mem_info) - m = self->mem_info->data_src.mem_dtlb; + if (he->mem_info) + m = he->mem_info->data_src.mem_dtlb; hit = m & PERF_MEM_TLB_HIT; miss = m & PERF_MEM_TLB_MISS; @@ -660,7 +660,7 @@ static const char * const mem_lvl[] = { }; #define NUM_MEM_LVL (sizeof(mem_lvl)/sizeof(const char *)) -static int hist_entry__lvl_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { char out[64]; @@ -669,8 +669,8 @@ static int hist_entry__lvl_snprintf(struct hist_entry *self, char *bf, u64 m = PERF_MEM_LVL_NA; u64 hit, miss; - if (self->mem_info) - m = self->mem_info->data_src.mem_lvl; + if (he->mem_info) + m = he->mem_info->data_src.mem_lvl; out[0] = '\0'; @@ -728,7 +728,7 @@ static const char * const snoop_access[] = { }; #define NUM_SNOOP_ACCESS (sizeof(snoop_access)/sizeof(const char *)) -static int hist_entry__snoop_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { char out[64]; @@ -738,8 +738,8 @@ static int hist_entry__snoop_snprintf(struct hist_entry *self, char *bf, out[0] = '\0'; - if (self->mem_info) - m = self->mem_info->data_src.mem_snoop; + if (he->mem_info) + m = he->mem_info->data_src.mem_snoop; for (i = 0; m && i < NUM_SNOOP_ACCESS; i++, m >>= 1) { if (!(m & 0x1)) @@ -776,10 +776,10 @@ sort__local_weight_cmp(struct hist_entry *left, struct hist_entry *right) return he_weight(left) - he_weight(right); } -static int hist_entry__local_weight_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*llu", width, he_weight(self)); + return repsep_snprintf(bf, size, "%-*llu", width, he_weight(he)); } struct sort_entry sort_local_weight = { @@ -795,10 +795,10 @@ sort__global_weight_cmp(struct hist_entry *left, struct hist_entry *right) return left->stat.weight - right->stat.weight; } -static int hist_entry__global_weight_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*llu", width, self->stat.weight); + return repsep_snprintf(bf, size, "%-*llu", width, he->stat.weight); } struct sort_entry sort_global_weight = { @@ -857,12 +857,12 @@ sort__abort_cmp(struct hist_entry *left, struct hist_entry *right) right->branch_info->flags.abort; } -static int hist_entry__abort_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__abort_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { static const char *out = "."; - if (self->branch_info->flags.abort) + if (he->branch_info->flags.abort) out = "A"; return repsep_snprintf(bf, size, "%-*s", width, out); } @@ -881,12 +881,12 @@ sort__in_tx_cmp(struct hist_entry *left, struct hist_entry *right) right->branch_info->flags.in_tx; } -static int hist_entry__in_tx_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__in_tx_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { static const char *out = "."; - if (self->branch_info->flags.in_tx) + if (he->branch_info->flags.in_tx) out = "T"; return repsep_snprintf(bf, size, "%-*s", width, out); @@ -940,10 +940,10 @@ int hist_entry__transaction_len(void) return len; } -static int hist_entry__transaction_snprintf(struct hist_entry *self, char *bf, +static int hist_entry__transaction_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - u64 t = self->transaction; + u64 t = he->transaction; char buf[128]; char *p = buf; int i; @@ -1125,7 +1125,7 @@ int setup_sorting(void) return ret; } -static void sort_entry__setup_elide(struct sort_entry *self, +static void sort_entry__setup_elide(struct sort_entry *se, struct strlist *list, const char *list_name, FILE *fp) { @@ -1133,7 +1133,7 @@ static void sort_entry__setup_elide(struct sort_entry *self, if (fp != NULL) fprintf(fp, "# %s: %s\n", list_name, strlist__entry(list, 0)->s); - self->elide = true; + se->elide = true; } } diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c index 834c8eb..67e4a00 100644 --- a/tools/perf/util/strfilter.c +++ b/tools/perf/util/strfilter.c @@ -10,22 +10,22 @@ static const char *OP_not = "!"; /* Logical NOT */ #define is_operator(c) ((c) == '|' || (c) == '&' || (c) == '!') #define is_separator(c) (is_operator(c) || (c) == '(' || (c) == ')') -static void strfilter_node__delete(struct strfilter_node *self) +static void strfilter_node__delete(struct strfilter_node *node) { - if (self) { - if (self->p && !is_operator(*self->p)) - free((char *)self->p); - strfilter_node__delete(self->l); - strfilter_node__delete(self->r); - free(self); + if (node) { + if (node->p && !is_operator(*node->p)) + free((char *)node->p); + strfilter_node__delete(node->l); + strfilter_node__delete(node->r); + free(node); } } -void strfilter__delete(struct strfilter *self) +void strfilter__delete(struct strfilter *filter) { - if (self) { - strfilter_node__delete(self->root); - free(self); + if (filter) { + strfilter_node__delete(filter->root); + free(filter); } } @@ -170,30 +170,30 @@ struct strfilter *strfilter__new(const char *rules, const char **err) return ret; } -static bool strfilter_node__compare(struct strfilter_node *self, +static bool strfilter_node__compare(struct strfilter_node *node, const char *str) { - if (!self || !self->p) + if (!node || !node->p) return false; - switch (*self->p) { + switch (*node->p) { case '|': /* OR */ - return strfilter_node__compare(self->l, str) || - strfilter_node__compare(self->r, str); + return strfilter_node__compare(node->l, str) || + strfilter_node__compare(node->r, str); case '&': /* AND */ - return strfilter_node__compare(self->l, str) && - strfilter_node__compare(self->r, str); + return strfilter_node__compare(node->l, str) && + strfilter_node__compare(node->r, str); case '!': /* NOT */ - return !strfilter_node__compare(self->r, str); + return !strfilter_node__compare(node->r, str); default: - return strglobmatch(str, self->p); + return strglobmatch(str, node->p); } } /* Return true if STR matches the filter rules */ -bool strfilter__compare(struct strfilter *self, const char *str) +bool strfilter__compare(struct strfilter *node, const char *str) { - if (!self) + if (!node) return false; - return strfilter_node__compare(self->root, str); + return strfilter_node__compare(node->root, str); } diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index e3d4a55..80d19a0 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -9,51 +9,51 @@ struct thread *thread__new(pid_t pid, pid_t tid) { - struct thread *self = zalloc(sizeof(*self)); + struct thread *thread = zalloc(sizeof(*thread)); - if (self != NULL) { - map_groups__init(&self->mg); - self->pid_ = pid; - self->tid = tid; - self->ppid = -1; - self->comm = malloc(32); - if (self->comm) - snprintf(self->comm, 32, ":%d", self->tid); + if (thread != NULL) { + map_groups__init(&thread->mg); + thread->pid_ = pid; + thread->tid = tid; + thread->ppid = -1; + thread->comm = malloc(32); + if (thread->comm) + snprintf(thread->comm, 32, ":%d", thread->tid); } - return self; + return thread; } -void thread__delete(struct thread *self) +void thread__delete(struct thread *thread) { - map_groups__exit(&self->mg); - free(self->comm); - free(self); + map_groups__exit(&thread->mg); + free(thread->comm); + free(thread); } -int thread__set_comm(struct thread *self, const char *comm) +int thread__set_comm(struct thread *thread, const char *comm) { int err; - if (self->comm) - free(self->comm); - self->comm = strdup(comm); - err = self->comm == NULL ? -ENOMEM : 0; + if (thread->comm) + free(thread->comm); + thread->comm = strdup(comm); + err = thread->comm == NULL ? -ENOMEM : 0; if (!err) { - self->comm_set = true; + thread->comm_set = true; } return err; } -int thread__comm_len(struct thread *self) +int thread__comm_len(struct thread *thread) { - if (!self->comm_len) { - if (!self->comm) + if (!thread->comm_len) { + if (!thread->comm) return 0; - self->comm_len = strlen(self->comm); + thread->comm_len = strlen(thread->comm); } - return self->comm_len; + return thread->comm_len; } size_t thread__fprintf(struct thread *thread, FILE *fp) @@ -62,30 +62,30 @@ size_t thread__fprintf(struct thread *thread, FILE *fp) map_groups__fprintf(&thread->mg, verbose, fp); } -void thread__insert_map(struct thread *self, struct map *map) +void thread__insert_map(struct thread *thread, struct map *map) { - map_groups__fixup_overlappings(&self->mg, map, verbose, stderr); - map_groups__insert(&self->mg, map); + map_groups__fixup_overlappings(&thread->mg, map, verbose, stderr); + map_groups__insert(&thread->mg, map); } -int thread__fork(struct thread *self, struct thread *parent) +int thread__fork(struct thread *thread, struct thread *parent) { int i; if (parent->comm_set) { - if (self->comm) - free(self->comm); - self->comm = strdup(parent->comm); - if (!self->comm) + if (thread->comm) + free(thread->comm); + thread->comm = strdup(parent->comm); + if (!thread->comm) return -ENOMEM; - self->comm_set = true; + thread->comm_set = true; } for (i = 0; i < MAP__NR_TYPES; ++i) - if (map_groups__clone(&self->mg, &parent->mg, i) < 0) + if (map_groups__clone(&thread->mg, &parent->mg, i) < 0) return -ENOMEM; - self->ppid = parent->tid; + thread->ppid = parent->tid; return 0; } -- cgit v0.10.2 From 7969ec7728ba6340de8000ddb0a8833273629d6a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 11 Oct 2013 16:10:23 +0900 Subject: perf probe: Support "$vars" meta argument syntax for local variables Support "$vars" meta argument syntax for tracing all local variables at probe point. Now you can trace all available local variables (including function parameters) at the probe point by passing $vars. # perf probe --add foo $vars This automatically finds all local variables at foo() and adds it as probe arguments. Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20131011071023.15557.51770.stgit@udc4-manage.rcp.hitachi.co.jp Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 779b2da..9c6989c 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -47,7 +47,6 @@ #include "session.h" #define MAX_CMDLEN 256 -#define MAX_PROBE_ARGS 128 #define PERFPROBE_GROUP "probe" bool probe_event_dry_run; /* Dry run flag */ diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index c09e0a9..5a46be9 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1136,12 +1136,78 @@ found: return ret; } +struct local_vars_finder { + struct probe_finder *pf; + struct perf_probe_arg *args; + int max_args; + int nargs; + int ret; +}; + +/* Collect available variables in this scope */ +static int copy_variables_cb(Dwarf_Die *die_mem, void *data) +{ + struct local_vars_finder *vf = data; + int tag; + + tag = dwarf_tag(die_mem); + if (tag == DW_TAG_formal_parameter || + tag == DW_TAG_variable) { + if (convert_variable_location(die_mem, vf->pf->addr, + vf->pf->fb_ops, NULL) == 0) { + vf->args[vf->nargs].var = (char *)dwarf_diename(die_mem); + if (vf->args[vf->nargs].var == NULL) { + vf->ret = -ENOMEM; + return DIE_FIND_CB_END; + } + pr_debug(" %s", vf->args[vf->nargs].var); + vf->nargs++; + } + } + + if (dwarf_haspc(die_mem, vf->pf->addr)) + return DIE_FIND_CB_CONTINUE; + else + return DIE_FIND_CB_SIBLING; +} + +static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf, + struct perf_probe_arg *args) +{ + Dwarf_Die die_mem; + int i; + int n = 0; + struct local_vars_finder vf = {.pf = pf, .args = args, + .max_args = MAX_PROBE_ARGS, .ret = 0}; + + for (i = 0; i < pf->pev->nargs; i++) { + /* var never be NULL */ + if (strcmp(pf->pev->args[i].var, "$vars") == 0) { + pr_debug("Expanding $vars into:"); + vf.nargs = n; + /* Special local variables */ + die_find_child(sc_die, copy_variables_cb, (void *)&vf, + &die_mem); + pr_debug(" (%d)\n", vf.nargs - n); + if (vf.ret < 0) + return vf.ret; + n = vf.nargs; + } else { + /* Copy normal argument */ + args[n] = pf->pev->args[i]; + n++; + } + } + return n; +} + /* Add a found probe point into trace event list */ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) { struct trace_event_finder *tf = container_of(pf, struct trace_event_finder, pf); struct probe_trace_event *tev; + struct perf_probe_arg *args; int ret, i; /* Check number of tevs */ @@ -1161,21 +1227,35 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) pr_debug("Probe point found: %s+%lu\n", tev->point.symbol, tev->point.offset); - /* Find each argument */ - tev->nargs = pf->pev->nargs; - tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs); - if (tev->args == NULL) + /* Expand special probe argument if exist */ + args = zalloc(sizeof(struct perf_probe_arg) * MAX_PROBE_ARGS); + if (args == NULL) return -ENOMEM; - for (i = 0; i < pf->pev->nargs; i++) { - pf->pvar = &pf->pev->args[i]; + + ret = expand_probe_args(sc_die, pf, args); + if (ret < 0) + goto end; + + tev->nargs = ret; + tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs); + if (tev->args == NULL) { + ret = -ENOMEM; + goto end; + } + + /* Find each argument */ + for (i = 0; i < tev->nargs; i++) { + pf->pvar = &args[i]; pf->tvar = &tev->args[i]; /* Variable should be found from scope DIE */ ret = find_variable(sc_die, pf); if (ret != 0) - return ret; + break; } - return 0; +end: + free(args); + return ret; } /* Find probe_trace_events specified by perf_probe_event from debuginfo */ diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 3f0c29d..d6dab0e 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -7,6 +7,7 @@ #define MAX_PROBE_BUFFER 1024 #define MAX_PROBES 128 +#define MAX_PROBE_ARGS 128 static inline int is_c_varname(const char *name) { -- cgit v0.10.2 From 3d918a12a1b3088ac16ff37fa52760639d6e2403 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 11 Oct 2013 16:10:26 +0900 Subject: perf probe: Find fentry mcount fuzzed parameter location At this point, --fentry (mcount function entry) option for gcc fuzzes the debuginfo variable locations by skipping the mcount instruction offset (on x86, this is a 5 byte call instruction). This makes variable searching fail at the entry of functions which are mcount'ed. e.g.) Available variables at vfs_read @ (No matched variables) This patch adds additional location search at the function entry point to solve this issue, which tries to find the earliest address for the variable location. Note that this only works with function parameters (formal parameters) because any local variables should not exist on the function entry address (those are not initialized yet). With this patch, perf probe shows correct parameters if possible; # perf probe --vars vfs_read Available variables at vfs_read @ char* buf loff_t* pos size_t count struct file* file Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20131011071025.15557.13275.stgit@udc4-manage.rcp.hitachi.co.jp Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 5a46be9..c044052 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -273,12 +273,15 @@ static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs) /* * Convert a location into trace_arg. * If tvar == NULL, this just checks variable can be converted. + * If fentry == true and vr_die is a parameter, do huristic search + * for the location fuzzed by function entry mcount. */ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr, - Dwarf_Op *fb_ops, + Dwarf_Op *fb_ops, Dwarf_Die *sp_die, struct probe_trace_arg *tvar) { Dwarf_Attribute attr; + Dwarf_Addr tmp = 0; Dwarf_Op *op; size_t nops; unsigned int regn; @@ -291,12 +294,29 @@ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr, goto static_var; /* TODO: handle more than 1 exprs */ - if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL || - dwarf_getlocation_addr(&attr, addr, &op, &nops, 1) <= 0 || - nops == 0) { - /* TODO: Support const_value */ + if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL) + return -EINVAL; /* Broken DIE ? */ + if (dwarf_getlocation_addr(&attr, addr, &op, &nops, 1) <= 0) { + ret = dwarf_entrypc(sp_die, &tmp); + if (ret || addr != tmp || + dwarf_tag(vr_die) != DW_TAG_formal_parameter || + dwarf_highpc(sp_die, &tmp)) + return -ENOENT; + /* + * This is fuzzed by fentry mcount. We try to find the + * parameter location at the earliest address. + */ + for (addr += 1; addr <= tmp; addr++) { + if (dwarf_getlocation_addr(&attr, addr, &op, + &nops, 1) > 0) + goto found; + } return -ENOENT; } +found: + if (nops == 0) + /* TODO: Support const_value */ + return -ENOENT; if (op->atom == DW_OP_addr) { static_var: @@ -600,7 +620,7 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf) dwarf_diename(vr_die)); ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops, - pf->tvar); + &pf->sp_die, pf->tvar); if (ret == -ENOENT) pr_err("Failed to find the location of %s at this address.\n" " Perhaps, it has been optimized out.\n", pf->pvar->var); @@ -1148,13 +1168,15 @@ struct local_vars_finder { static int copy_variables_cb(Dwarf_Die *die_mem, void *data) { struct local_vars_finder *vf = data; + struct probe_finder *pf = vf->pf; int tag; tag = dwarf_tag(die_mem); if (tag == DW_TAG_formal_parameter || tag == DW_TAG_variable) { if (convert_variable_location(die_mem, vf->pf->addr, - vf->pf->fb_ops, NULL) == 0) { + vf->pf->fb_ops, &pf->sp_die, + NULL) == 0) { vf->args[vf->nargs].var = (char *)dwarf_diename(die_mem); if (vf->args[vf->nargs].var == NULL) { vf->ret = -ENOMEM; @@ -1302,7 +1324,8 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data) if (tag == DW_TAG_formal_parameter || tag == DW_TAG_variable) { ret = convert_variable_location(die_mem, af->pf.addr, - af->pf.fb_ops, NULL); + af->pf.fb_ops, &af->pf.sp_die, + NULL); if (ret == 0) { ret = die_get_varname(die_mem, buf, MAX_VAR_LEN); pr_debug2("Add new var: %s\n", buf); -- cgit v0.10.2 From 4157922a9070aef6a516573111fb1c0c67b891ac Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 23 Oct 2013 14:37:56 +0200 Subject: perf bench: Change the procps visible command-name of invididual benchmark tests plus cleanups Before this patch, looking at 'perf bench sched pipe' behavior over 'top' only told us that something related to perf is running: PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 19934 mingo 20 0 54836 1296 952 R 18.6 0.0 0:00.56 perf 19935 mingo 20 0 54836 384 36 S 18.6 0.0 0:00.56 perf After the patch it's clearly visible what's going on: PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 19744 mingo 20 0 125m 3536 2644 R 68.2 0.0 0:01.12 sched-pipe 19745 mingo 20 0 125m 1172 276 R 68.2 0.0 0:01.12 sched-pipe The benchmark-subsystem name is concatenated with the individual testcase name. Unfortunately 'perf top' does not show the reconfigured name, possibly because it caches ->comm[] values and does not recognize changes to them? Also clean up a few bits in builtin-bench.c while at it and reorganize the code and the output strings to be consistent. Use iterators to access the various arrays. Rename 'suites' concept to 'benchmark collection' and the 'bench_suite' to 'benchmark/bench'. The many repetitions of 'suite' made the code harder to read and understand. The new output is: comet:~/tip/tools/perf> ./perf bench Usage: perf bench [] [] # List of all available benchmark collections: sched: Scheduler and IPC benchmarks mem: Memory access benchmarks numa: NUMA scheduling and MM benchmarks all: All benchmarks comet:~/tip/tools/perf> ./perf bench sched # List of available benchmarks for collection 'sched': messaging: Benchmark for scheduling and IPC pipe: Benchmark for pipe() between two processes all: Test all scheduler benchmarks comet:~/tip/tools/perf> ./perf bench mem # List of available benchmarks for collection 'mem': memcpy: Benchmark for memcpy() memset: Benchmark for memset() tests all: Test all memory benchmarks comet:~/tip/tools/perf> ./perf bench numa # List of available benchmarks for collection 'numa': mem: Benchmark for NUMA workloads all: Test all NUMA benchmarks Individual benchmark modules were not touched. Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Hitoshi Mitake Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20131023123756.GA17871@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 33af80f..e47f90c 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -1,21 +1,18 @@ /* - * * builtin-bench.c * - * General benchmarking subsystem provided by perf + * General benchmarking collections provided by perf * * Copyright (C) 2009, Hitoshi Mitake - * */ /* + * Available benchmark collection list: * - * Available subsystem list: - * sched ... scheduler and IPC mechanism + * sched ... scheduler and IPC performance * mem ... memory access performance - * + * numa ... NUMA scheduling and MM performance */ - #include "perf.h" #include "util/util.h" #include "util/parse-options.h" @@ -25,112 +22,92 @@ #include #include #include +#include -struct bench_suite { - const char *name; - const char *summary; - int (*fn)(int, const char **, const char *); +typedef int (*bench_fn_t)(int argc, const char **argv, const char *prefix); + +struct bench { + const char *name; + const char *summary; + bench_fn_t fn; }; - \ -/* sentinel: easy for help */ -#define suite_all { "all", "Test all benchmark suites", NULL } #ifdef HAVE_LIBNUMA_SUPPORT -static struct bench_suite numa_suites[] = { - { "mem", - "Benchmark for NUMA workloads", - bench_numa }, - suite_all, - { NULL, - NULL, - NULL } +static struct bench numa_benchmarks[] = { + { "mem", "Benchmark for NUMA workloads", bench_numa }, + { "all", "Test all NUMA benchmarks", NULL }, + { NULL, NULL, NULL } }; #endif -static struct bench_suite sched_suites[] = { - { "messaging", - "Benchmark for scheduler and IPC mechanisms", - bench_sched_messaging }, - { "pipe", - "Flood of communication over pipe() between two processes", - bench_sched_pipe }, - suite_all, - { NULL, - NULL, - NULL } +static struct bench sched_benchmarks[] = { + { "messaging", "Benchmark for scheduling and IPC", bench_sched_messaging }, + { "pipe", "Benchmark for pipe() between two processes", bench_sched_pipe }, + { "all", "Test all scheduler benchmarks", NULL }, + { NULL, NULL, NULL } }; -static struct bench_suite mem_suites[] = { - { "memcpy", - "Simple memory copy in various ways", - bench_mem_memcpy }, - { "memset", - "Simple memory set in various ways", - bench_mem_memset }, - suite_all, - { NULL, - NULL, - NULL } +static struct bench mem_benchmarks[] = { + { "memcpy", "Benchmark for memcpy()", bench_mem_memcpy }, + { "memset", "Benchmark for memset() tests", bench_mem_memset }, + { "all", "Test all memory benchmarks", NULL }, + { NULL, NULL, NULL } }; -struct bench_subsys { - const char *name; - const char *summary; - struct bench_suite *suites; +struct collection { + const char *name; + const char *summary; + struct bench *benchmarks; }; -static struct bench_subsys subsystems[] = { +static struct collection collections[] = { + { "sched", "Scheduler and IPC benchmarks", sched_benchmarks }, + { "mem", "Memory access benchmarks", mem_benchmarks }, #ifdef HAVE_LIBNUMA_SUPPORT - { "numa", - "NUMA scheduling and MM behavior", - numa_suites }, + { "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks }, #endif - { "sched", - "scheduler and IPC mechanism", - sched_suites }, - { "mem", - "memory access performance", - mem_suites }, - { "all", /* sentinel: easy for help */ - "all benchmark subsystem", - NULL }, - { NULL, - NULL, - NULL } + { "all", "All benchmarks", NULL }, + { NULL, NULL, NULL } }; -static void dump_suites(int subsys_index) +/* Iterate over all benchmark collections: */ +#define for_each_collection(coll) \ + for (coll = collections; coll->name; coll++) + +/* Iterate over all benchmarks within a collection: */ +#define for_each_bench(coll, bench) \ + for (bench = coll->benchmarks; bench->name; bench++) + +static void dump_benchmarks(struct collection *coll) { - int i; + struct bench *bench; - printf("# List of available suites for %s...\n\n", - subsystems[subsys_index].name); + printf("\n # List of available benchmarks for collection '%s':\n\n", coll->name); - for (i = 0; subsystems[subsys_index].suites[i].name; i++) - printf("%14s: %s\n", - subsystems[subsys_index].suites[i].name, - subsystems[subsys_index].suites[i].summary); + for_each_bench(coll, bench) + printf("%14s: %s\n", bench->name, bench->summary); printf("\n"); - return; } static const char *bench_format_str; + +/* Output/formatting style, exported to benchmark modules: */ int bench_format = BENCH_FORMAT_DEFAULT; static const struct option bench_options[] = { - OPT_STRING('f', "format", &bench_format_str, "default", - "Specify format style"), + OPT_STRING('f', "format", &bench_format_str, "default", "Specify format style"), OPT_END() }; static const char * const bench_usage[] = { - "perf bench [] []", + "perf bench [] []", NULL }; static void print_usage(void) { + struct collection *coll; int i; printf("Usage: \n"); @@ -138,11 +115,10 @@ static void print_usage(void) printf("\t%s\n", bench_usage[i]); printf("\n"); - printf("# List of available subsystems...\n\n"); + printf(" # List of all available benchmark collections:\n\n"); - for (i = 0; subsystems[i].name; i++) - printf("%14s: %s\n", - subsystems[i].name, subsystems[i].summary); + for_each_collection(coll) + printf("%14s: %s\n", coll->name, coll->summary); printf("\n"); } @@ -159,44 +135,74 @@ static int bench_str2int(const char *str) return BENCH_FORMAT_UNKNOWN; } -static void all_suite(struct bench_subsys *subsys) /* FROM HERE */ +/* + * Run a specific benchmark but first rename the running task's ->comm[] + * to something meaningful: + */ +static int run_bench(const char *coll_name, const char *bench_name, bench_fn_t fn, + int argc, const char **argv, const char *prefix) { - int i; + int size; + char *name; + int ret; + + size = strlen(coll_name) + 1 + strlen(bench_name) + 1; + + name = zalloc(size); + BUG_ON(!name); + + scnprintf(name, size, "%s-%s", coll_name, bench_name); + + prctl(PR_SET_NAME, name); + argv[0] = name; + + ret = fn(argc, argv, prefix); + + free(name); + + return ret; +} + +static void run_collection(struct collection *coll) +{ + struct bench *bench; const char *argv[2]; - struct bench_suite *suites = subsys->suites; argv[1] = NULL; /* * TODO: - * preparing preset parameters for + * + * Preparing preset parameters for * embedded, ordinary PC, HPC, etc... - * will be helpful + * would be helpful. */ - for (i = 0; suites[i].fn; i++) { - printf("# Running %s/%s benchmark...\n", - subsys->name, - suites[i].name); + for_each_bench(coll, bench) { + if (!bench->fn) + break; + printf("# Running %s/%s benchmark...\n", coll->name, bench->name); fflush(stdout); - argv[1] = suites[i].name; - suites[i].fn(1, argv, NULL); + argv[1] = bench->name; + run_bench(coll->name, bench->name, bench->fn, 1, argv, NULL); printf("\n"); } } -static void all_subsystem(void) +static void run_all_collections(void) { - int i; - for (i = 0; subsystems[i].suites; i++) - all_suite(&subsystems[i]); + struct collection *coll; + + for_each_collection(coll) + run_collection(coll); } int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused) { - int i, j, status = 0; + struct collection *coll; + int ret = 0; if (argc < 2) { - /* No subsystem specified. */ + /* No collection specified. */ print_usage(); goto end; } @@ -206,7 +212,7 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused) bench_format = bench_str2int(bench_format_str); if (bench_format == BENCH_FORMAT_UNKNOWN) { - printf("Unknown format descriptor:%s\n", bench_format_str); + printf("Unknown format descriptor: '%s'\n", bench_format_str); goto end; } @@ -216,52 +222,51 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused) } if (!strcmp(argv[0], "all")) { - all_subsystem(); + run_all_collections(); goto end; } - for (i = 0; subsystems[i].name; i++) { - if (strcmp(subsystems[i].name, argv[0])) + for_each_collection(coll) { + struct bench *bench; + + if (strcmp(coll->name, argv[0])) continue; if (argc < 2) { - /* No suite specified. */ - dump_suites(i); + /* No bench specified. */ + dump_benchmarks(coll); goto end; } if (!strcmp(argv[1], "all")) { - all_suite(&subsystems[i]); + run_collection(coll); goto end; } - for (j = 0; subsystems[i].suites[j].name; j++) { - if (strcmp(subsystems[i].suites[j].name, argv[1])) + for_each_bench(coll, bench) { + if (strcmp(bench->name, argv[1])) continue; if (bench_format == BENCH_FORMAT_DEFAULT) - printf("# Running %s/%s benchmark...\n", - subsystems[i].name, - subsystems[i].suites[j].name); + printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name); fflush(stdout); - status = subsystems[i].suites[j].fn(argc - 1, - argv + 1, prefix); + ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1, prefix); goto end; } if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) { - dump_suites(i); + dump_benchmarks(coll); goto end; } - printf("Unknown suite:%s for %s\n", argv[1], argv[0]); - status = 1; + printf("Unknown benchmark: '%s' for collection '%s'\n", argv[1], argv[0]); + ret = 1; goto end; } - printf("Unknown subsystem:%s\n", argv[0]); - status = 1; + printf("Unknown collection: '%s'\n", argv[0]); + ret = 1; end: - return status; + return ret; } -- cgit v0.10.2 From 8a39df8faa1cb130f136d5e404332c16fbb936c0 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 22 Oct 2013 10:34:15 +0300 Subject: perf sched: Make struct perf_sched sched a local variable Change "struct perf_sched sched" from being global to being local. The build slowdown cured by f36f83f947ed is dealt with in the following patch, by programatically setting perf_sched.curr_pid. Signed-off-by: Adrian Hunter Acked-by: David Ahern Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382427258-17495-12-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 5a46b10..5a33856 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1655,29 +1655,28 @@ static int __cmd_record(int argc, const char **argv) return cmd_record(i, rec_argv, NULL); } -static const char default_sort_order[] = "avg, max, switch, runtime"; -static struct perf_sched sched = { - .tool = { - .sample = perf_sched__process_tracepoint_sample, - .comm = perf_event__process_comm, - .lost = perf_event__process_lost, - .fork = perf_sched__process_fork_event, - .ordered_samples = true, - }, - .cmp_pid = LIST_HEAD_INIT(sched.cmp_pid), - .sort_list = LIST_HEAD_INIT(sched.sort_list), - .start_work_mutex = PTHREAD_MUTEX_INITIALIZER, - .work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER, - .curr_pid = { [0 ... MAX_CPUS - 1] = -1 }, - .sort_order = default_sort_order, - .replay_repeat = 10, - .profile_cpu = -1, - .next_shortname1 = 'A', - .next_shortname2 = '0', -}; - int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) { + const char default_sort_order[] = "avg, max, switch, runtime"; + struct perf_sched sched = { + .tool = { + .sample = perf_sched__process_tracepoint_sample, + .comm = perf_event__process_comm, + .lost = perf_event__process_lost, + .fork = perf_sched__process_fork_event, + .ordered_samples = true, + }, + .cmp_pid = LIST_HEAD_INIT(sched.cmp_pid), + .sort_list = LIST_HEAD_INIT(sched.sort_list), + .start_work_mutex = PTHREAD_MUTEX_INITIALIZER, + .work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER, + .curr_pid = { [0 ... MAX_CPUS - 1] = -1 }, + .sort_order = default_sort_order, + .replay_repeat = 10, + .profile_cpu = -1, + .next_shortname1 = 'A', + .next_shortname2 = '0', + }; const struct option latency_options[] = { OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", "sort by key(s): runtime, switch, avg, max"), -- cgit v0.10.2 From 156a2b022907687f28c72d1ba601015f295cd99e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 22 Oct 2013 10:34:16 +0300 Subject: perf sched: Optimize build time builtin-sched.c took a log time to build with -O6 optimization. This turned out to be caused by: .curr_pid = { [0 ... MAX_CPUS - 1] = -1 }, Fix by initializing curr_pid programmatically. This addresses the problem cured in f36f83f947ed using a smaller hammer. Signed-off-by: Adrian Hunter Acked-by: David Ahern Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382427258-17495-13-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 5a33856..ddb5dc1 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1670,7 +1670,6 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) .sort_list = LIST_HEAD_INIT(sched.sort_list), .start_work_mutex = PTHREAD_MUTEX_INITIALIZER, .work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER, - .curr_pid = { [0 ... MAX_CPUS - 1] = -1 }, .sort_order = default_sort_order, .replay_repeat = 10, .profile_cpu = -1, @@ -1732,6 +1731,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) .switch_event = replay_switch_event, .fork_event = replay_fork_event, }; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++) + sched.curr_pid[i] = -1; argc = parse_options(argc, argv, sched_options, sched_usage, PARSE_OPT_STOP_AT_NON_OPTION); -- cgit v0.10.2 From 6f3e5eda9d6cc74538430d8f9e8e4baa01249160 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 22 Oct 2013 10:34:07 +0300 Subject: perf script: Make perf_script a local variable Change perf_script from being global to being local. Signed-off-by: Adrian Hunter Acked-by: David Ahern Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382427258-17495-4-git-send-email-adrian.hunter@intel.com [ Made the minor consistency changes suggested by David Ahern ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 27de606..0ae88c2 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -542,18 +542,9 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, return 0; } -static struct perf_tool perf_script = { - .sample = process_sample_event, - .mmap = perf_event__process_mmap, - .mmap2 = perf_event__process_mmap2, - .comm = perf_event__process_comm, - .exit = perf_event__process_exit, - .fork = perf_event__process_fork, - .attr = perf_event__process_attr, - .tracing_data = perf_event__process_tracing_data, - .build_id = perf_event__process_build_id, - .ordered_samples = true, - .ordering_requires_timestamps = true, +struct perf_script { + struct perf_tool tool; + struct perf_session *session; }; static void sig_handler(int sig __maybe_unused) @@ -561,13 +552,13 @@ static void sig_handler(int sig __maybe_unused) session_done = 1; } -static int __cmd_script(struct perf_session *session) +static int __cmd_script(struct perf_script *script) { int ret; signal(SIGINT, sig_handler); - ret = perf_session__process_events(session, &perf_script); + ret = perf_session__process_events(script->session, &script->tool); if (debug_mode) pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered); @@ -1273,6 +1264,21 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) char *script_path = NULL; const char **__argv; int i, j, err; + struct perf_script script = { + .tool = { + .sample = process_sample_event, + .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, + .comm = perf_event__process_comm, + .exit = perf_event__process_exit, + .fork = perf_event__process_fork, + .attr = perf_event__process_attr, + .tracing_data = perf_event__process_tracing_data, + .build_id = perf_event__process_build_id, + .ordered_samples = true, + .ordering_requires_timestamps = true, + }, + }; const struct option options[] = { OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), @@ -1498,10 +1504,12 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) if (!script_name) setup_pager(); - session = perf_session__new(&file, false, &perf_script); + session = perf_session__new(&file, false, &script.tool); if (session == NULL) return -ENOMEM; + script.session = session; + if (cpu_list) { if (perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap)) return -1; @@ -1565,7 +1573,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) if (err < 0) goto out; - err = __cmd_script(session); + err = __cmd_script(&script); perf_session__delete(session); cleanup_scripting(); -- cgit v0.10.2 From 89c97d936e76b064a52ee056602b2a62b3f1ef70 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 22 Oct 2013 10:34:09 +0300 Subject: perf inject: Do not repipe attributes to a perf.data file perf.data files contain the attributes separately, do not put them in the event stream as well. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382427258-17495-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index eb1a594..409ceaf 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -72,12 +72,17 @@ static int perf_event__repipe_attr(struct perf_tool *tool, union perf_event *event, struct perf_evlist **pevlist) { + struct perf_inject *inject = container_of(tool, struct perf_inject, + tool); int ret; ret = perf_event__process_attr(tool, event, pevlist); if (ret) return ret; + if (!inject->pipe_output) + return 0; + return perf_event__repipe_synth(tool, event); } -- cgit v0.10.2 From 56921becdd1eb0720603fc2e6e4c7f518196d917 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 22 Oct 2013 10:34:17 +0300 Subject: perf tools: Do not accept parse_tag_value() overflow parse_tag_value() accepts an "unsigned long" and multiplies it according to a tag character. Do not accept the value if the multiplication overflows. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382427258-17495-14-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index c25e57b..28a0a89 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -386,6 +386,8 @@ unsigned long parse_tag_value(const char *str, struct parse_tag *tags) if (s != endptr) break; + if (value > ULONG_MAX / i->mult) + break; value *= i->mult; return value; } -- cgit v0.10.2 From 2fbe4abe944868aafdde233557ac85379b60ce46 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 22 Oct 2013 10:34:18 +0300 Subject: perf evlist: Validate that mmap_pages is not too big Amend perf_evlist__parse_mmap_pages() to check that the mmap_pages entered via the --mmap_pages/-m option is not too big. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382427258-17495-15-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 85c4c80..2ce92ec 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -698,7 +698,8 @@ static size_t perf_evlist__mmap_size(unsigned long pages) int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, int unset __maybe_unused) { - unsigned int pages, val, *mmap_pages = opt->value; + unsigned int *mmap_pages = opt->value; + unsigned long pages, val; size_t size; static struct parse_tag tags[] = { { .tag = 'B', .mult = 1 }, @@ -709,12 +710,12 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, }; val = parse_tag_value(str, tags); - if (val != (unsigned int) -1) { + if (val != (unsigned long) -1) { /* we got file size value */ pages = PERF_ALIGN(val, page_size) / page_size; - if (!is_power_of_2(pages)) { + if (pages < (1UL << 31) && !is_power_of_2(pages)) { pages = next_pow2(pages); - pr_info("rounding mmap pages size to %u (%u pages)\n", + pr_info("rounding mmap pages size to %lu (%lu pages)\n", pages * page_size, pages); } } else { @@ -727,6 +728,11 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, } } + if (pages > UINT_MAX || pages > SIZE_MAX / page_size) { + pr_err("--mmap_pages/-m value too big\n"); + return -1; + } + size = perf_evlist__mmap_size(pages); if (!size) { pr_err("--mmap_pages/-m value must be a power of two."); -- cgit v0.10.2 From 74af377bc25dd9ebcb0be12836abb6b401b5dd08 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 22 Oct 2013 10:34:05 +0300 Subject: perf tools: Fix non-debug build In the absence of s DEBUG variable definition on the command line perf tools was building without optimization. Fix by assigning DEBUG if it is not defined. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382427258-17495-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index c516d6ba..543aa95 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -66,6 +66,10 @@ ifneq ($(WERROR),0) CFLAGS += -Werror endif +ifndef DEBUG + DEBUG := 0 +endif + ifeq ($(DEBUG),0) CFLAGS += -O6 endif -- cgit v0.10.2 From 4779a2e99af80e133ee1c70c7093dc6cc13429a1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 23 Oct 2013 14:08:48 -0300 Subject: perf ui: Rename ui_progress to ui_progress_ops Reserving 'struct ui_progress' to the per progress instances, not to the particular set of operations used to implmenet a progress bar in the current UI (GTK, TUI, etc). Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-zjqbfp9gx3yo45s0rp9uv42n@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 326a26e..8a9ca38 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -487,6 +487,7 @@ ifndef NO_SLANG LIB_OBJS += $(OUTPUT)ui/tui/util.o LIB_OBJS += $(OUTPUT)ui/tui/helpline.o LIB_OBJS += $(OUTPUT)ui/tui/progress.o + LIB_H += ui/tui/tui.h LIB_H += ui/browser.h LIB_H += ui/browsers/map.h LIB_H += ui/keysyms.h diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h index 8576cf1..0a9173f 100644 --- a/tools/perf/ui/gtk/gtk.h +++ b/tools/perf/ui/gtk/gtk.h @@ -34,7 +34,7 @@ struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window); int perf_gtk__deactivate_context(struct perf_gtk_context **ctx); void perf_gtk__init_helpline(void); -void perf_gtk__init_progress(void); +void gtk_ui_progress__init(void); void perf_gtk__init_hpp(void); void perf_gtk__signal(int sig); diff --git a/tools/perf/ui/gtk/progress.c b/tools/perf/ui/gtk/progress.c index 482bcf3..195c7f9 100644 --- a/tools/perf/ui/gtk/progress.c +++ b/tools/perf/ui/gtk/progress.c @@ -7,7 +7,7 @@ static GtkWidget *dialog; static GtkWidget *progress; -static void gtk_progress_update(u64 curr, u64 total, const char *title) +static void gtk_ui_progress__update(u64 curr, u64 total, const char *title) { double fraction = total ? 1.0 * curr / total : 0.0; char buf[1024]; @@ -40,7 +40,7 @@ static void gtk_progress_update(u64 curr, u64 total, const char *title) gtk_main_iteration(); } -static void gtk_progress_finish(void) +static void gtk_ui_progress__finish(void) { /* this will also destroy all of its children */ gtk_widget_destroy(dialog); @@ -48,12 +48,12 @@ static void gtk_progress_finish(void) dialog = NULL; } -static struct ui_progress gtk_progress_fns = { - .update = gtk_progress_update, - .finish = gtk_progress_finish, +static struct ui_progress_ops gtk_ui_progress__ops = { + .update = gtk_ui_progress__update, + .finish = gtk_ui_progress__finish, }; -void perf_gtk__init_progress(void) +void gtk_ui_progress__init(void) { - progress_fns = >k_progress_fns; + ui_progress__ops = >k_ui_progress__ops; } diff --git a/tools/perf/ui/gtk/setup.c b/tools/perf/ui/gtk/setup.c index 6c2dd2e..1d57676 100644 --- a/tools/perf/ui/gtk/setup.c +++ b/tools/perf/ui/gtk/setup.c @@ -8,7 +8,7 @@ int perf_gtk__init(void) { perf_error__register(&perf_gtk_eops); perf_gtk__init_helpline(); - perf_gtk__init_progress(); + gtk_ui_progress__init(); perf_gtk__init_hpp(); return gtk_init_check(NULL, NULL) ? 0 : -1; diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c index 3ec69560..d753821 100644 --- a/tools/perf/ui/progress.c +++ b/tools/perf/ui/progress.c @@ -1,26 +1,26 @@ #include "../cache.h" #include "progress.h" -static void nop_progress_update(u64 curr __maybe_unused, - u64 total __maybe_unused, - const char *title __maybe_unused) +static void null_progress__update(u64 curr __maybe_unused, + u64 total __maybe_unused, + const char *title __maybe_unused) { } -static struct ui_progress default_progress_fns = +static struct ui_progress_ops null_progress__ops = { - .update = nop_progress_update, + .update = null_progress__update, }; -struct ui_progress *progress_fns = &default_progress_fns; +struct ui_progress_ops *ui_progress__ops = &null_progress__ops; void ui_progress__update(u64 curr, u64 total, const char *title) { - return progress_fns->update(curr, total, title); + return ui_progress__ops->update(curr, total, title); } void ui_progress__finish(void) { - if (progress_fns->finish) - progress_fns->finish(); + if (ui_progress__ops->finish) + ui_progress__ops->finish(); } diff --git a/tools/perf/ui/progress.h b/tools/perf/ui/progress.h index 257cc22..d41bde5 100644 --- a/tools/perf/ui/progress.h +++ b/tools/perf/ui/progress.h @@ -3,14 +3,12 @@ #include <../types.h> -struct ui_progress { +struct ui_progress_ops { void (*update)(u64, u64, const char *); void (*finish)(void); }; -extern struct ui_progress *progress_fns; - -void ui_progress__init(void); +extern struct ui_progress_ops *ui_progress__ops; void ui_progress__update(u64 curr, u64 total, const char *title); void ui_progress__finish(void); diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c index 6c2184d..0fcc5a1 100644 --- a/tools/perf/ui/tui/progress.c +++ b/tools/perf/ui/tui/progress.c @@ -2,6 +2,7 @@ #include "../progress.h" #include "../libslang.h" #include "../ui.h" +#include "tui.h" #include "../browser.h" static void tui_progress__update(u64 curr, u64 total, const char *title) @@ -31,12 +32,12 @@ static void tui_progress__update(u64 curr, u64 total, const char *title) pthread_mutex_unlock(&ui__lock); } -static struct ui_progress tui_progress_fns = +static struct ui_progress_ops tui_progress__ops = { .update = tui_progress__update, }; -void ui_progress__init(void) +void tui_progress__init(void) { - progress_fns = &tui_progress_fns; + ui_progress__ops = &tui_progress__ops; } diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index b940148..2f61256 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -9,6 +9,7 @@ #include "../util.h" #include "../libslang.h" #include "../keysyms.h" +#include "tui.h" static volatile int ui__need_resize; @@ -119,7 +120,7 @@ int ui__init(void) ui_helpline__init(); ui_browser__init(); - ui_progress__init(); + tui_progress__init(); signal(SIGSEGV, ui__signal); signal(SIGFPE, ui__signal); diff --git a/tools/perf/ui/tui/tui.h b/tools/perf/ui/tui/tui.h new file mode 100644 index 0000000..18961c7 --- /dev/null +++ b/tools/perf/ui/tui/tui.h @@ -0,0 +1,6 @@ +#ifndef _PERF_TUI_H_ +#define _PERF_TUI_H_ 1 + +void tui_progress__init(void); + +#endif /* _PERF_TUI_H_ */ -- cgit v0.10.2 From 4d3001fdfdfacd2b35ee74ff0f037274eeebd3f6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 23 Oct 2013 15:40:38 -0300 Subject: perf ui progress: Per progress bar state That will ease using a progress bar across multiple functions, like in the upcoming patches that will present a progress bar when collapsing histograms. Based on a previous patch by Namhyung Kim. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-cr7lq7ud9fj21bg7wvq27w1u@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/gtk/progress.c b/tools/perf/ui/gtk/progress.c index 195c7f9..b656655 100644 --- a/tools/perf/ui/gtk/progress.c +++ b/tools/perf/ui/gtk/progress.c @@ -7,14 +7,14 @@ static GtkWidget *dialog; static GtkWidget *progress; -static void gtk_ui_progress__update(u64 curr, u64 total, const char *title) +static void gtk_ui_progress__update(struct ui_progress *p) { - double fraction = total ? 1.0 * curr / total : 0.0; + double fraction = p->total ? 1.0 * p->curr / p->total : 0.0; char buf[1024]; if (dialog == NULL) { GtkWidget *vbox = gtk_vbox_new(TRUE, 5); - GtkWidget *label = gtk_label_new(title); + GtkWidget *label = gtk_label_new(p->title); dialog = gtk_window_new(GTK_WINDOW_TOPLEVEL); progress = gtk_progress_bar_new(); @@ -32,7 +32,7 @@ static void gtk_ui_progress__update(u64 curr, u64 total, const char *title) } gtk_progress_bar_set_fraction(GTK_PROGRESS_BAR(progress), fraction); - snprintf(buf, sizeof(buf), "%"PRIu64" / %"PRIu64, curr, total); + snprintf(buf, sizeof(buf), "%"PRIu64" / %"PRIu64, p->curr, p->total); gtk_progress_bar_set_text(GTK_PROGRESS_BAR(progress), buf); /* we didn't call gtk_main yet, so do it manually */ diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c index d753821..a0f24c7 100644 --- a/tools/perf/ui/progress.c +++ b/tools/perf/ui/progress.c @@ -1,9 +1,7 @@ #include "../cache.h" #include "progress.h" -static void null_progress__update(u64 curr __maybe_unused, - u64 total __maybe_unused, - const char *title __maybe_unused) +static void null_progress__update(struct ui_progress *p __maybe_unused) { } @@ -14,9 +12,23 @@ static struct ui_progress_ops null_progress__ops = struct ui_progress_ops *ui_progress__ops = &null_progress__ops; -void ui_progress__update(u64 curr, u64 total, const char *title) +void ui_progress__update(struct ui_progress *p, u64 adv) { - return ui_progress__ops->update(curr, total, title); + p->curr += adv; + + if (p->curr >= p->next) { + p->next += p->step; + ui_progress__ops->update(p); + } +} + +void ui_progress__init(struct ui_progress *p, u64 total, const char *title) +{ + p->curr = 0; + p->next = p->step = total / 16; + p->total = total; + p->title = title; + } void ui_progress__finish(void) diff --git a/tools/perf/ui/progress.h b/tools/perf/ui/progress.h index d41bde5..29ec8ef 100644 --- a/tools/perf/ui/progress.h +++ b/tools/perf/ui/progress.h @@ -3,14 +3,21 @@ #include <../types.h> +void ui_progress__finish(void); + +struct ui_progress { + const char *title; + u64 curr, next, step, total; +}; + +void ui_progress__init(struct ui_progress *p, u64 total, const char *title); +void ui_progress__update(struct ui_progress *p, u64 adv); + struct ui_progress_ops { - void (*update)(u64, u64, const char *); + void (*update)(struct ui_progress *p); void (*finish)(void); }; extern struct ui_progress_ops *ui_progress__ops; -void ui_progress__update(u64 curr, u64 total, const char *title); -void ui_progress__finish(void); - #endif diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c index 0fcc5a1..3e2d936 100644 --- a/tools/perf/ui/tui/progress.c +++ b/tools/perf/ui/tui/progress.c @@ -5,7 +5,7 @@ #include "tui.h" #include "../browser.h" -static void tui_progress__update(u64 curr, u64 total, const char *title) +static void tui_progress__update(struct ui_progress *p) { int bar, y; /* @@ -15,7 +15,7 @@ static void tui_progress__update(u64 curr, u64 total, const char *title) if (use_browser <= 0) return; - if (total == 0) + if (p->total == 0) return; ui__refresh_dimensions(true); @@ -24,9 +24,9 @@ static void tui_progress__update(u64 curr, u64 total, const char *title) SLsmg_set_color(0); SLsmg_draw_box(y, 0, 3, SLtt_Screen_Cols); SLsmg_gotorc(y++, 1); - SLsmg_write_string((char *)title); + SLsmg_write_string((char *)p->title); SLsmg_set_color(HE_COLORSET_SELECTED); - bar = ((SLtt_Screen_Cols - 2) * curr) / total; + bar = ((SLtt_Screen_Cols - 2) * p->curr) / p->total; SLsmg_fill_region(y, 1, 1, bar, ' '); SLsmg_refresh(); pthread_mutex_unlock(&ui__lock); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 854c5aa..4ba7b54 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -503,13 +503,16 @@ static int flush_sample_queue(struct perf_session *s, struct perf_sample sample; u64 limit = os->next_flush; u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL; - unsigned idx = 0, progress_next = os->nr_samples / 16; bool show_progress = limit == ULLONG_MAX; + struct ui_progress prog; int ret; if (!tool->ordered_samples || !limit) return 0; + if (show_progress) + ui_progress__init(&prog, os->nr_samples, "Processing time ordered events..."); + list_for_each_entry_safe(iter, tmp, head, list) { if (session_done()) return 0; @@ -530,11 +533,9 @@ static int flush_sample_queue(struct perf_session *s, os->last_flush = iter->timestamp; list_del(&iter->list); list_add(&iter->list, &os->sample_cache); - if (show_progress && (++idx >= progress_next)) { - progress_next += os->nr_samples / 16; - ui_progress__update(idx, os->nr_samples, - "Processing time ordered events..."); - } + + if (show_progress) + ui_progress__update(&prog, 1); } if (list_empty(head)) { @@ -1285,12 +1286,13 @@ int __perf_session__process_events(struct perf_session *session, u64 file_size, struct perf_tool *tool) { int fd = perf_data_file__fd(session->file); - u64 head, page_offset, file_offset, file_pos, progress_next; + u64 head, page_offset, file_offset, file_pos; int err, mmap_prot, mmap_flags, map_idx = 0; size_t mmap_size; char *buf, *mmaps[NUM_MMAPS]; union perf_event *event; uint32_t size; + struct ui_progress prog; perf_tool__fill_defaults(tool); @@ -1301,7 +1303,7 @@ int __perf_session__process_events(struct perf_session *session, if (data_size && (data_offset + data_size < file_size)) file_size = data_offset + data_size; - progress_next = file_size / 16; + ui_progress__init(&prog, file_size, "Processing events..."); mmap_size = MMAP_SIZE; if (mmap_size > file_size) @@ -1356,11 +1358,7 @@ more: head += size; file_pos += size; - if (file_pos >= progress_next) { - progress_next += file_size / 16; - ui_progress__update(file_pos, file_size, - "Processing events..."); - } + ui_progress__update(&prog, size); if (session_done()) goto out; -- cgit v0.10.2 From c1fb5651bb40f9efaf32d280f39e06df7e352673 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 11 Oct 2013 14:15:38 +0900 Subject: perf tools: Show progress on histogram collapsing It can take quite amount of time so add progress bar UI to inform user. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Linus Torvalds Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1381468543-25334-4-git-send-email-namhyung@kernel.org [ perf_progress -> ui_progress ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 17c6b49..6c5ae57 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -247,7 +247,7 @@ static int __cmd_annotate(struct perf_annotate *ann) if (nr_samples > 0) { total_nr_samples += nr_samples; - hists__collapse_resort(hists); + hists__collapse_resort(hists, NULL); hists__output_resort(hists); if (symbol_conf.event_group && diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 9c82888..b605009 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -369,7 +369,7 @@ static void perf_evlist__collapse_resort(struct perf_evlist *evlist) list_for_each_entry(evsel, &evlist->entries, node) { struct hists *hists = &evsel->hists; - hists__collapse_resort(hists); + hists__collapse_resort(hists, NULL); } } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e3598a4..98d3891 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -496,6 +496,7 @@ static int __cmd_report(struct perf_report *rep) struct map *kernel_map; struct kmap *kernel_kmap; const char *help = "For a higher level overview, try: perf report --sort comm,dso"; + struct ui_progress prog; struct perf_data_file *file = session->file; signal(SIGINT, sig_handler); @@ -558,13 +559,19 @@ static int __cmd_report(struct perf_report *rep) } nr_samples = 0; + list_for_each_entry(pos, &session->evlist->entries, node) + nr_samples += pos->hists.nr_entries; + + ui_progress__init(&prog, nr_samples, "Merging related events..."); + + nr_samples = 0; list_for_each_entry(pos, &session->evlist->entries, node) { struct hists *hists = &pos->hists; if (pos->idx == 0) hists->symbol_filter_str = rep->symbol_filter_str; - hists__collapse_resort(hists); + hists__collapse_resort(hists, &prog); nr_samples += hists->stats.nr_events[PERF_RECORD_SAMPLE]; /* Non-group events are considered as leader */ @@ -576,6 +583,7 @@ static int __cmd_report(struct perf_report *rep) hists__link(leader_hists, hists); } } + ui_progress__finish(); if (session_done()) return 0; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 386d833..76c9264 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -286,7 +286,7 @@ static void perf_top__print_sym_table(struct perf_top *top) return; } - hists__collapse_resort(&top->sym_evsel->hists); + hists__collapse_resort(&top->sym_evsel->hists, NULL); hists__output_resort(&top->sym_evsel->hists); hists__decay_entries(&top->sym_evsel->hists, top->hide_user_symbols, @@ -552,7 +552,7 @@ static void perf_top__sort_new_samples(void *arg) if (t->evlist->selected != NULL) t->sym_evsel = t->evlist->selected; - hists__collapse_resort(&t->sym_evsel->hists); + hists__collapse_resort(&t->sym_evsel->hists, NULL); hists__output_resort(&t->sym_evsel->hists); hists__decay_entries(&t->sym_evsel->hists, t->hide_user_symbols, diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 025503a..b51abcb 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -467,7 +467,7 @@ int test__hists_link(void) goto out; list_for_each_entry(evsel, &evlist->entries, node) { - hists__collapse_resort(&evsel->hists); + hists__collapse_resort(&evsel->hists, NULL); if (verbose > 2) print_hists(&evsel->hists); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index f0b863e..7e80253 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -399,6 +399,7 @@ static struct hist_entry *add_hist_entry(struct hists *hists, if (!he) return NULL; + hists->nr_entries++; rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, hists->entries_in); out: @@ -604,7 +605,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he) hists__filter_entry_by_symbol(hists, he); } -void hists__collapse_resort(struct hists *hists) +void hists__collapse_resort(struct hists *hists, struct ui_progress *prog) { struct rb_root *root; struct rb_node *next; @@ -631,6 +632,8 @@ void hists__collapse_resort(struct hists *hists) */ hists__apply_filters(hists, n); } + if (prog) + ui_progress__update(prog, 1); } } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 20b1758..0c7ce8b 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -5,6 +5,7 @@ #include #include "callchain.h" #include "header.h" +#include "ui/progress.h" extern struct callchain_param callchain_param; @@ -108,7 +109,7 @@ struct hist_entry *__hists__add_mem_entry(struct hists *self, u64 weight); void hists__output_resort(struct hists *self); -void hists__collapse_resort(struct hists *self); +void hists__collapse_resort(struct hists *self, struct ui_progress *prog); void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel); void hists__output_recalc_col_len(struct hists *hists, int max_rows); -- cgit v0.10.2 From d9494cb4299da66541a3f3ab82c552889bee0606 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 Oct 2013 15:36:19 +0200 Subject: perf: Remove useless atomic_t There's nothing atomic about atomic_set vs atomic_read; so remove the atomic_t usage. Also, make running_sample_length static as it really is (and should be) local to this translation unit. Signed-off-by: Peter Zijlstra Cc: eranian@google.com Cc: Don Zickus Cc: jmario@redhat.com Cc: acme@infradead.org Cc: dave.hansen@linux.intel.com Link: http://lkml.kernel.org/n/tip-vw9lg588x1ic248whybjon0c@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index 5bd7fe4..028dad9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -175,8 +175,8 @@ int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE; static int max_samples_per_tick __read_mostly = DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ); static int perf_sample_period_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS; -static atomic_t perf_sample_allowed_ns __read_mostly = - ATOMIC_INIT( DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100); +static int perf_sample_allowed_ns __read_mostly = + DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100; void update_perf_cpu_limits(void) { @@ -184,7 +184,7 @@ void update_perf_cpu_limits(void) tmp *= sysctl_perf_cpu_time_max_percent; do_div(tmp, 100); - atomic_set(&perf_sample_allowed_ns, tmp); + ACCESS_ONCE(perf_sample_allowed_ns) = tmp; } static int perf_rotate_context(struct perf_cpu_context *cpuctx); @@ -228,14 +228,15 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, * we detect that events are taking too long. */ #define NR_ACCUMULATED_SAMPLES 128 -DEFINE_PER_CPU(u64, running_sample_length); +static DEFINE_PER_CPU(u64, running_sample_length); void perf_sample_event_took(u64 sample_len_ns) { u64 avg_local_sample_len; u64 local_samples_len; + u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns); - if (atomic_read(&perf_sample_allowed_ns) == 0) + if (allowed_ns == 0) return; /* decay the counter by 1 average sample */ @@ -251,7 +252,7 @@ void perf_sample_event_took(u64 sample_len_ns) */ avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES; - if (avg_local_sample_len <= atomic_read(&perf_sample_allowed_ns)) + if (avg_local_sample_len <= allowed_ns) return; if (max_samples_per_tick <= 1) @@ -262,10 +263,9 @@ void perf_sample_event_took(u64 sample_len_ns) perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; printk_ratelimited(KERN_WARNING - "perf samples too long (%lld > %d), lowering " + "perf samples too long (%lld > %lld), lowering " "kernel.perf_event_max_sample_rate to %d\n", - avg_local_sample_len, - atomic_read(&perf_sample_allowed_ns), + avg_local_sample_len, allowed_ns, sysctl_perf_event_sample_rate); update_perf_cpu_limits(); -- cgit v0.10.2 From 32c5fb7e7d18b4fd37c5e29dea731151e9d66866 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 16 Oct 2013 22:09:45 +0200 Subject: perf: Kill the dead !vma->vm_mm code in perf_event_mmap_event() 1. perf_event_mmap(vma) is never called with a gate_vma-like arg, remove the "if (!vma->vm_mm)" code. 2. arch_vma_name() can use the chached value of mmap_event->vma. 3. Change the code to not call arch_vma_name() twice. 4. Purely cosmetic, but since we use "goto got_name" all the time remove "else" from "[stack]" branch just for symmetry. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20131016200945.GB23214@redhat.com Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index 028dad9..3ea5605 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5136,21 +5136,19 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) min = MINOR(dev); } else { - if (arch_vma_name(mmap_event->vma)) { - name = strncpy(tmp, arch_vma_name(mmap_event->vma), - sizeof(tmp) - 1); + name = arch_vma_name(vma); + if (name) { + name = strncpy(tmp, name, sizeof(tmp) - 1); tmp[sizeof(tmp) - 1] = '\0'; goto got_name; } - if (!vma->vm_mm) { - name = strncpy(tmp, "[vdso]", sizeof(tmp)); - goto got_name; - } else if (vma->vm_start <= vma->vm_mm->start_brk && + if (vma->vm_start <= vma->vm_mm->start_brk && vma->vm_end >= vma->vm_mm->brk) { name = strncpy(tmp, "[heap]", sizeof(tmp)); goto got_name; - } else if (vma->vm_start <= vma->vm_mm->start_stack && + } + if (vma->vm_start <= vma->vm_mm->start_stack && vma->vm_end >= vma->vm_mm->start_stack) { name = strncpy(tmp, "[stack]", sizeof(tmp)); goto got_name; -- cgit v0.10.2 From 3ea2f2b96f9e636f49eb10962e96db3e19cab157 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 16 Oct 2013 22:10:04 +0200 Subject: perf: Do not waste PAGE_SIZE bytes for ALIGN(8) in perf_event_mmap_event() perf_event_mmap_event() does kzalloc(PATH_MAX + sizeof(u64)) to ensure we can align the size later. However this means that we actually allocate PAGE_SIZE * 2 buffer, seems too much. Change this code to allocate PATH_MAX==PAGE_SIZE bytes, but tell d_path() to not use the last sizeof(u64) bytes. Note: it is not clear why do we need __GFP_ZERO, see the next patch. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20131016201004.GC23214@redhat.com Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index 3ea5605..b409e75 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5113,17 +5113,18 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) if (file) { struct inode *inode; dev_t dev; - /* - * d_path works from the end of the rb backwards, so we - * need to add enough zero bytes after the string to handle - * the 64bit alignment we do later. - */ - buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL); + + buf = kzalloc(PATH_MAX, GFP_KERNEL); if (!buf) { name = strncpy(tmp, "//enomem", sizeof(tmp)); goto got_name; } - name = d_path(&file->f_path, buf, PATH_MAX); + /* + * d_path() works from the end of the rb backwards, so we + * need to add enough zero bytes after the string to handle + * the 64bit alignment we do later. + */ + name = d_path(&file->f_path, buf, PATH_MAX - sizeof(u64)); if (IS_ERR(name)) { name = strncpy(tmp, "//toolong", sizeof(tmp)); goto got_name; -- cgit v0.10.2 From 2c42cfbfe10872929c2ba1f8130e31063ff59b94 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 Oct 2013 00:06:46 +0200 Subject: perf: Change zero-padding of strings in perf_event_mmap_event() Oleg complained about the excessive 0-ing in perf_event_mmap_event(), so try and be smarter about it while keeping it fairly fool proof and avoid leaking random bits out to userspace. Suggested-by: Oleg Nesterov Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-8jirlm99m6if2z13wd6rbyu6@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index b409e75..85a8bbd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5106,15 +5106,13 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) unsigned int size; char tmp[16]; char *buf = NULL; - const char *name; - - memset(tmp, 0, sizeof(tmp)); + char *name; if (file) { struct inode *inode; dev_t dev; - buf = kzalloc(PATH_MAX, GFP_KERNEL); + buf = kmalloc(PATH_MAX, GFP_KERNEL); if (!buf) { name = strncpy(tmp, "//enomem", sizeof(tmp)); goto got_name; @@ -5137,7 +5135,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) min = MINOR(dev); } else { - name = arch_vma_name(vma); + name = (char *)arch_vma_name(vma); if (name) { name = strncpy(tmp, name, sizeof(tmp) - 1); tmp[sizeof(tmp) - 1] = '\0'; @@ -5160,7 +5158,14 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) } got_name: - size = ALIGN(strlen(name)+1, sizeof(u64)); + /* + * Since our buffer works in 8 byte units we need to align our string + * size to a multiple of 8. However, we must guarantee the tail end is + * zero'd out to avoid leaking random bits to userspace. + */ + size = strlen(name)+1; + while (!IS_ALIGNED(size, sizeof(u64))) + name[size++] = '\0'; mmap_event->file_name = name; mmap_event->file_size = size; -- cgit v0.10.2 From e00b12e64be9a34ef071de7b6052ca9ea29dd460 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 24 Oct 2013 12:52:06 +0200 Subject: perf/x86: Further optimize copy_from_user_nmi() Now that we can deal with nested NMI due to IRET re-enabling NMIs and can deal with faults from NMI by making sure we preserve CR2 over NMIs we can in fact simply access user-space memory from NMI context. So rewrite copy_from_user_nmi() to use __copy_from_user_inatomic() and rework the fault path to do the minimal required work before taking the in_atomic() fault handler. In particular avoid perf_sw_event() which would make perf recurse on itself (it should be harmless as our recursion protections should be able to deal with this -- but why tempt fate). Also rename notify_page_fault() to kprobes_fault() as that is a much better name; there is no notifier in it and its specific to kprobes. Don measured that his worst case NMI path shrunk from ~300K cycles to ~150K cycles. Cc: Stephane Eranian Cc: jmario@redhat.com Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Andi Kleen Cc: dave.hansen@linux.intel.com Tested-by: Don Zickus Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20131024105206.GM2490@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 4f74d94..5465b86 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -11,39 +11,26 @@ #include /* - * best effort, GUP based copy_from_user() that is NMI-safe + * We rely on the nested NMI work to allow atomic faults from the NMI path; the + * nested NMI paths are careful to preserve CR2. */ unsigned long copy_from_user_nmi(void *to, const void __user *from, unsigned long n) { - unsigned long offset, addr = (unsigned long)from; - unsigned long size, len = 0; - struct page *page; - void *map; - int ret; + unsigned long ret; if (__range_not_ok(from, n, TASK_SIZE)) - return len; - - do { - ret = __get_user_pages_fast(addr, 1, 0, &page); - if (!ret) - break; - - offset = addr & (PAGE_SIZE - 1); - size = min(PAGE_SIZE - offset, n - len); - - map = kmap_atomic(page); - memcpy(to, map+offset, size); - kunmap_atomic(map); - put_page(page); - - len += size; - to += size; - addr += size; - - } while (len < n); - - return len; + return 0; + + /* + * Even though this function is typically called from NMI/IRQ context + * disable pagefaults so that its behaviour is consistent even when + * called form other contexts. + */ + pagefault_disable(); + ret = __copy_from_user_inatomic(to, from, n); + pagefault_enable(); + + return n - ret; } EXPORT_SYMBOL_GPL(copy_from_user_nmi); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 3aaeffc..7a517bb 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -51,7 +51,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr) return 0; } -static inline int __kprobes notify_page_fault(struct pt_regs *regs) +static inline int __kprobes kprobes_fault(struct pt_regs *regs) { int ret = 0; @@ -1048,7 +1048,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) return; /* kprobes don't want to hook the spurious faults: */ - if (notify_page_fault(regs)) + if (kprobes_fault(regs)) return; /* * Don't take the mm semaphore here. If we fixup a prefetch @@ -1060,23 +1060,8 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) } /* kprobes don't want to hook the spurious faults: */ - if (unlikely(notify_page_fault(regs))) + if (unlikely(kprobes_fault(regs))) return; - /* - * It's safe to allow irq's after cr2 has been saved and the - * vmalloc fault has been handled. - * - * User-mode registers count as a user access even for any - * potential system fault or CPU buglet: - */ - if (user_mode_vm(regs)) { - local_irq_enable(); - error_code |= PF_USER; - flags |= FAULT_FLAG_USER; - } else { - if (regs->flags & X86_EFLAGS_IF) - local_irq_enable(); - } if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address); @@ -1088,8 +1073,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) } } - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); - /* * If we're in an interrupt, have no user context or are running * in an atomic region then we must not take the fault: @@ -1099,6 +1082,24 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) return; } + /* + * It's safe to allow irq's after cr2 has been saved and the + * vmalloc fault has been handled. + * + * User-mode registers count as a user access even for any + * potential system fault or CPU buglet: + */ + if (user_mode_vm(regs)) { + local_irq_enable(); + error_code |= PF_USER; + flags |= FAULT_FLAG_USER; + } else { + if (regs->flags & X86_EFLAGS_IF) + local_irq_enable(); + } + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + if (error_code & PF_WRITE) flags |= FAULT_FLAG_WRITE; -- cgit v0.10.2 From 5a3126d4fe7c311fe12f98fef0470f6cb582d1ef Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 7 Oct 2013 17:12:48 +0200 Subject: perf: Fix the perf context switch optimization Currently we only optimize the context switch between two contexts that have the same parent; this forgoes the optimization between parent and child context, even though these contexts could be equivalent too. Signed-off-by: Peter Zijlstra Cc: Frederic Weisbecker Cc: Adrian Hunter Cc: Shishkin, Alexander Link: http://lkml.kernel.org/r/20131007164257.GH3081@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index 85a8bbd..17b3c6c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -899,6 +899,7 @@ static void unclone_ctx(struct perf_event_context *ctx) put_ctx(ctx->parent_ctx); ctx->parent_ctx = NULL; } + ctx->generation++; } static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) @@ -1136,6 +1137,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) ctx->nr_events++; if (event->attr.inherit_stat) ctx->nr_stat++; + + ctx->generation++; } /* @@ -1313,6 +1316,8 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) */ if (event->state > PERF_EVENT_STATE_OFF) event->state = PERF_EVENT_STATE_OFF; + + ctx->generation++; } static void perf_group_detach(struct perf_event *event) @@ -2149,22 +2154,38 @@ static void ctx_sched_out(struct perf_event_context *ctx, } /* - * Test whether two contexts are equivalent, i.e. whether they - * have both been cloned from the same version of the same context - * and they both have the same number of enabled events. - * If the number of enabled events is the same, then the set - * of enabled events should be the same, because these are both - * inherited contexts, therefore we can't access individual events - * in them directly with an fd; we can only enable/disable all - * events via prctl, or enable/disable all events in a family - * via ioctl, which will have the same effect on both contexts. + * Test whether two contexts are equivalent, i.e. whether they have both been + * cloned from the same version of the same context. + * + * Equivalence is measured using a generation number in the context that is + * incremented on each modification to it; see unclone_ctx(), list_add_event() + * and list_del_event(). */ static int context_equiv(struct perf_event_context *ctx1, struct perf_event_context *ctx2) { - return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx - && ctx1->parent_gen == ctx2->parent_gen - && !ctx1->pin_count && !ctx2->pin_count; + /* Pinning disables the swap optimization */ + if (ctx1->pin_count || ctx2->pin_count) + return 0; + + /* If ctx1 is the parent of ctx2 */ + if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen) + return 1; + + /* If ctx2 is the parent of ctx1 */ + if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation) + return 1; + + /* + * If ctx1 and ctx2 have the same parent; we flatten the parent + * hierarchy, see perf_event_init_context(). + */ + if (ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx && + ctx1->parent_gen == ctx2->parent_gen) + return 1; + + /* Unmatched */ + return 0; } static void __perf_event_sync_stat(struct perf_event *event, @@ -2247,7 +2268,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, { struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; struct perf_event_context *next_ctx; - struct perf_event_context *parent; + struct perf_event_context *parent, *next_parent; struct perf_cpu_context *cpuctx; int do_switch = 1; @@ -2259,10 +2280,18 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, return; rcu_read_lock(); - parent = rcu_dereference(ctx->parent_ctx); next_ctx = next->perf_event_ctxp[ctxn]; - if (parent && next_ctx && - rcu_dereference(next_ctx->parent_ctx) == parent) { + if (!next_ctx) + goto unlock; + + parent = rcu_dereference(ctx->parent_ctx); + next_parent = rcu_dereference(next_ctx->parent_ctx); + + /* If neither context have a parent context; they cannot be clones. */ + if (!parent && !next_parent) + goto unlock; + + if (next_parent == ctx || next_ctx == parent || next_parent == parent) { /* * Looks like the two contexts are clones, so we might be * able to optimize the context switch. We lock both @@ -2290,6 +2319,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, raw_spin_unlock(&next_ctx->lock); raw_spin_unlock(&ctx->lock); } +unlock: rcu_read_unlock(); if (do_switch) { @@ -7136,7 +7166,6 @@ SYSCALL_DEFINE5(perf_event_open, } perf_install_in_context(ctx, event, event->cpu); - ++ctx->generation; perf_unpin_context(ctx); mutex_unlock(&ctx->mutex); @@ -7219,7 +7248,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); perf_install_in_context(ctx, event, cpu); - ++ctx->generation; perf_unpin_context(ctx); mutex_unlock(&ctx->mutex); -- cgit v0.10.2 From c2d3f25dda016d9697c5416810d4528770f0a281 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 9 Oct 2013 14:08:09 +0200 Subject: uprobes: Remove the wrong __weak attribute linux/uprobes.h declares arch_uprobe_skip_sstep() as a weak function. But as there is no definition of generic version so when trying to build uprobes for an architecture that doesn't yet have a arch_uprobe_skip_sstep() implementation, the vmlinux will try to call arch_uprobe_skip_sstep() somehwere in Stupidhistan leading to a system crash. We rather want a proper link error so remove arch_uprobe_skip_sstep(). Signed-off-by: Ralf Baechle Signed-off-by: Oleg Nesterov diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 06f28be..e6fba62 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -123,7 +123,7 @@ extern int uprobe_post_sstep_notifier(struct pt_regs *regs); extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); extern void uprobe_notify_resume(struct pt_regs *regs); extern bool uprobe_deny_signal(void); -extern bool __weak arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); +extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); extern void uprobe_clear_state(struct mm_struct *mm); #else /* !CONFIG_UPROBES */ struct uprobes_state { -- cgit v0.10.2 From b68e0749100e1b901bf11330f149b321c082178e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:31 +0200 Subject: uprobes: Change the callsite of uprobe_copy_process() Preparation for the next patches. Move the callsite of uprobe_copy_process() in copy_process() down to the succesfull return. We do not care if copy_process() fails, uprobe_free_utask() won't be called in this case so the wrong ->utask != NULL doesn't matter. OTOH, with this change we know that copy_process() can't fail when uprobe_copy_process() is called, the new task should either return to user-mode or call do_exit(). This way uprobe_copy_process() can: 1. setup p->utask != NULL if necessary 2. setup uprobes_state.xol_area 3. use task_work_add(p) Also, move the definition of uprobe_copy_process() down so that it can see get_utask(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ad8e1bd..db7a1dc 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1345,14 +1345,6 @@ void uprobe_free_utask(struct task_struct *t) } /* - * Called in context of a new clone/fork from copy_process. - */ -void uprobe_copy_process(struct task_struct *t) -{ - t->utask = NULL; -} - -/* * Allocate a uprobe_task object for the task if if necessary. * Called when the thread hits a breakpoint. * @@ -1368,6 +1360,14 @@ static struct uprobe_task *get_utask(void) } /* + * Called in context of a new clone/fork from copy_process. + */ +void uprobe_copy_process(struct task_struct *t) +{ + t->utask = NULL; +} + +/* * Current area->vaddr notion assume the trampoline address is always * equal area->vaddr. * diff --git a/kernel/fork.c b/kernel/fork.c index 086fe73..d3603b8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1373,7 +1373,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif - uprobe_copy_process(p); /* * sigaltstack should be cleared when sharing the same VM */ @@ -1490,6 +1489,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, perf_event_fork(p); trace_task_newtask(p, clone_flags); + uprobe_copy_process(p); return p; -- cgit v0.10.2 From 6441ec8b7c108b72789d120562b9f1d976e4aaaf Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:35 +0200 Subject: uprobes: Introduce __create_xol_area() No functional changes, preparation. Extract the code which actually allocates/installs the new area into the new helper, __create_xol_area(). While at it remove the unnecessary "ret = ENOMEM" and "ret = 0" in xol_add_vma(), they both have no effect. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index db7a1dc..ad17d81 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1096,16 +1096,14 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon } /* Slot allocation for XOL */ -static int xol_add_vma(struct xol_area *area) +static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) { - struct mm_struct *mm = current->mm; int ret = -EALREADY; down_write(&mm->mmap_sem); if (mm->uprobes_state.xol_area) goto fail; - ret = -ENOMEM; /* Try to map as high as possible, this is only a hint. */ area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); if (area->vaddr & ~PAGE_MASK) { @@ -1120,28 +1118,17 @@ static int xol_add_vma(struct xol_area *area) smp_wmb(); /* pairs with get_xol_area() */ mm->uprobes_state.xol_area = area; - ret = 0; fail: up_write(&mm->mmap_sem); return ret; } -/* - * get_xol_area - Allocate process's xol_area if necessary. - * This area will be used for storing instructions for execution out of line. - * - * Returns the allocated area or NULL. - */ -static struct xol_area *get_xol_area(void) +static struct xol_area *__create_xol_area(void) { struct mm_struct *mm = current->mm; - struct xol_area *area; uprobe_opcode_t insn = UPROBE_SWBP_INSN; - - area = mm->uprobes_state.xol_area; - if (area) - goto ret; + struct xol_area *area; area = kzalloc(sizeof(*area), GFP_KERNEL); if (unlikely(!area)) @@ -1155,13 +1142,13 @@ static struct xol_area *get_xol_area(void) if (!area->page) goto free_bitmap; - /* allocate first slot of task's xol_area for the return probes */ + init_waitqueue_head(&area->wq); + /* Reserve the 1st slot for get_trampoline_vaddr() */ set_bit(0, area->bitmap); - copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); atomic_set(&area->slot_count, 1); - init_waitqueue_head(&area->wq); + copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); - if (!xol_add_vma(area)) + if (!xol_add_vma(mm, area)) return area; __free_page(area->page); @@ -1170,9 +1157,25 @@ static struct xol_area *get_xol_area(void) free_area: kfree(area); out: + return NULL; +} + +/* + * get_xol_area - Allocate process's xol_area if necessary. + * This area will be used for storing instructions for execution out of line. + * + * Returns the allocated area or NULL. + */ +static struct xol_area *get_xol_area(void) +{ + struct mm_struct *mm = current->mm; + struct xol_area *area; + + if (!mm->uprobes_state.xol_area) + __create_xol_area(); + area = mm->uprobes_state.xol_area; - ret: - smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ + smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ return area; } -- cgit v0.10.2 From af0d95af79773f7637107cd3871aaabcb425f15a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:38 +0200 Subject: uprobes: Teach __create_xol_area() to accept the predefined vaddr Currently xol_add_vma() uses get_unmapped_area() for area->vaddr, but the next patches need to use the fixed address. So this patch adds the new "vaddr" argument to __create_xol_area() which should be used as area->vaddr if it is nonzero. xol_add_vma() doesn't bother to verify that the predefined addr is not used, insert_vm_struct() should fail if find_vma_links() detects the overlap with the existing vma. Also, __create_xol_area() doesn't need __GFP_ZERO to allocate area. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ad17d81..7d12a45 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1104,11 +1104,14 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) if (mm->uprobes_state.xol_area) goto fail; - /* Try to map as high as possible, this is only a hint. */ - area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); - if (area->vaddr & ~PAGE_MASK) { - ret = area->vaddr; - goto fail; + if (!area->vaddr) { + /* Try to map as high as possible, this is only a hint. */ + area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, + PAGE_SIZE, 0, 0); + if (area->vaddr & ~PAGE_MASK) { + ret = area->vaddr; + goto fail; + } } ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE, @@ -1124,13 +1127,13 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) return ret; } -static struct xol_area *__create_xol_area(void) +static struct xol_area *__create_xol_area(unsigned long vaddr) { struct mm_struct *mm = current->mm; uprobe_opcode_t insn = UPROBE_SWBP_INSN; struct xol_area *area; - area = kzalloc(sizeof(*area), GFP_KERNEL); + area = kmalloc(sizeof(*area), GFP_KERNEL); if (unlikely(!area)) goto out; @@ -1142,6 +1145,7 @@ static struct xol_area *__create_xol_area(void) if (!area->page) goto free_bitmap; + area->vaddr = vaddr; init_waitqueue_head(&area->wq); /* Reserve the 1st slot for get_trampoline_vaddr() */ set_bit(0, area->bitmap); @@ -1172,7 +1176,7 @@ static struct xol_area *get_xol_area(void) struct xol_area *area; if (!mm->uprobes_state.xol_area) - __create_xol_area(); + __create_xol_area(0); area = mm->uprobes_state.xol_area; smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ -- cgit v0.10.2 From 248d3a7b2f100078c5f6878351177859380582e9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:41 +0200 Subject: uprobes: Change uprobe_copy_process() to dup return_instances uprobe_copy_process() assumes that the new child doesn't need ->utask, it should be allocated by demand. But this is not true if the forking task has the pending ret- probes, the child should report them as well and thus it needs the copy of parent's ->return_instances chain. Otherwise the child crashes when it returns from the probed function. Alternatively we could cleanup the child's stack, but this needs per-arch changes and this is not what we want. At least systemtap expects a .return in the child too. Note: this change alone doesn't fix the problem, see the next change. Reported-by: Martin Cermak Reported-by: David Smith Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 7d12a45..1c6cda6 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1366,12 +1366,55 @@ static struct uprobe_task *get_utask(void) return current->utask; } +static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) +{ + struct uprobe_task *n_utask; + struct return_instance **p, *o, *n; + + n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); + if (!n_utask) + return -ENOMEM; + t->utask = n_utask; + + p = &n_utask->return_instances; + for (o = o_utask->return_instances; o; o = o->next) { + n = kmalloc(sizeof(struct return_instance), GFP_KERNEL); + if (!n) + return -ENOMEM; + + *n = *o; + atomic_inc(&n->uprobe->ref); + n->next = NULL; + + *p = n; + p = &n->next; + n_utask->depth++; + } + + return 0; +} + +static void uprobe_warn(struct task_struct *t, const char *msg) +{ + pr_warn("uprobe: %s:%d failed to %s\n", + current->comm, current->pid, msg); +} + /* * Called in context of a new clone/fork from copy_process. */ void uprobe_copy_process(struct task_struct *t) { + struct uprobe_task *utask = current->utask; + struct mm_struct *mm = current->mm; + t->utask = NULL; + + if (mm == t->mm || !utask || !utask->return_instances) + return; + + if (dup_utask(t, utask)) + return uprobe_warn(t, "dup ret instances"); } /* -- cgit v0.10.2 From aa59c53fd4599c91ccf9629af0c2777b89929076 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:44 +0200 Subject: uprobes: Change uprobe_copy_process() to dup xol_area This finally fixes the serious bug in uretprobes: a forked child crashes if the parent called fork() with the pending ret probe. Trivial test-case: # perf probe -x /lib/libc.so.6 __fork%return # perf record -e probe_libc:__fork perl -le 'fork || print "OK"' (the child doesn't print "OK", it is killed by SIGSEGV) If the child returns from the probed function it actually returns to trampoline_vaddr, because it got the copy of parent's stack mangled by prepare_uretprobe() when the parent entered this func. It crashes because a) this address is not mapped and b) until the previous change it doesn't have the proper->return_instances info. This means that uprobe_copy_process() has to create xol_area which has the trampoline slot, and its vaddr should be equal to parent's xol_area->vaddr. Unfortunately, uprobe_copy_process() can not simply do __create_xol_area(child, xol_area->vaddr). This could actually work but perf_event_mmap() doesn't expect the usage of foreign ->mm. So we offload this to task_work_run(), and pass the argument via not yet used utask->vaddr. We know that this vaddr is fine for install_special_mapping(), the necessary hole was recently "created" by dup_mmap() which skips the parent's VM_DONTCOPY area, and nobody else could use the new mm. Unfortunately, this also means that we can not handle the errors properly, we obviously can not abort the already completed fork(). So we simply print the warning if GFP_KERNEL allocation (the only possible reason) fails. Reported-by: Martin Cermak Reported-by: David Smith Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 1c6cda6..9f282e1 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -35,6 +35,7 @@ #include /* notifier mechanism */ #include "../../mm/internal.h" /* munlock_vma_page */ #include +#include #include @@ -1400,6 +1401,17 @@ static void uprobe_warn(struct task_struct *t, const char *msg) current->comm, current->pid, msg); } +static void dup_xol_work(struct callback_head *work) +{ + kfree(work); + + if (current->flags & PF_EXITING) + return; + + if (!__create_xol_area(current->utask->vaddr)) + uprobe_warn(current, "dup xol area"); +} + /* * Called in context of a new clone/fork from copy_process. */ @@ -1407,6 +1419,8 @@ void uprobe_copy_process(struct task_struct *t) { struct uprobe_task *utask = current->utask; struct mm_struct *mm = current->mm; + struct callback_head *work; + struct xol_area *area; t->utask = NULL; @@ -1415,6 +1429,20 @@ void uprobe_copy_process(struct task_struct *t) if (dup_utask(t, utask)) return uprobe_warn(t, "dup ret instances"); + + /* The task can fork() after dup_xol_work() fails */ + area = mm->uprobes_state.xol_area; + if (!area) + return uprobe_warn(t, "dup xol area"); + + /* TODO: move it into the union in uprobe_task */ + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return uprobe_warn(t, "dup xol area"); + + utask->vaddr = area->vaddr; + init_task_work(work, dup_xol_work); + task_work_add(t, work, true); } /* -- cgit v0.10.2 From 3ab679661721b1ec2aaad99a801870ed59ab1110 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 16 Oct 2013 19:39:37 +0200 Subject: uprobes: Teach uprobe_copy_process() to handle CLONE_VFORK uprobe_copy_process() does nothing if the child shares ->mm with the forking process, but there is a special case: CLONE_VFORK. In this case it would be more correct to do dup_utask() but avoid dup_xol(). This is not that important, the child should not unwind its stack too much, this can corrupt the parent's stack, but at least we need this to allow to ret-probe __vfork() itself. Note: in theory, it would be better to check task_pt_regs(p)->sp instead of CLONE_VFORK, we need to dup_utask() if and only if the child can return from the function called by the parent. But this needs the arch-dependant helper, and I think that nobody actually does clone(same_stack, CLONE_VM). Reported-by: Martin Cermak Reported-by: David Smith Signed-off-by: Oleg Nesterov diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index e6fba62..9e0d5a6 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -117,7 +117,7 @@ extern void uprobe_start_dup_mmap(void); extern void uprobe_end_dup_mmap(void); extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); extern void uprobe_free_utask(struct task_struct *t); -extern void uprobe_copy_process(struct task_struct *t); +extern void uprobe_copy_process(struct task_struct *t, unsigned long flags); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); extern int uprobe_post_sstep_notifier(struct pt_regs *regs); extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); @@ -174,7 +174,7 @@ static inline unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) static inline void uprobe_free_utask(struct task_struct *t) { } -static inline void uprobe_copy_process(struct task_struct *t) +static inline void uprobe_copy_process(struct task_struct *t, unsigned long flags) { } static inline void uprobe_clear_state(struct mm_struct *mm) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 9f282e1..ae9e1d2 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1415,7 +1415,7 @@ static void dup_xol_work(struct callback_head *work) /* * Called in context of a new clone/fork from copy_process. */ -void uprobe_copy_process(struct task_struct *t) +void uprobe_copy_process(struct task_struct *t, unsigned long flags) { struct uprobe_task *utask = current->utask; struct mm_struct *mm = current->mm; @@ -1424,7 +1424,10 @@ void uprobe_copy_process(struct task_struct *t) t->utask = NULL; - if (mm == t->mm || !utask || !utask->return_instances) + if (!utask || !utask->return_instances) + return; + + if (mm == t->mm && !(flags & CLONE_VFORK)) return; if (dup_utask(t, utask)) @@ -1435,6 +1438,9 @@ void uprobe_copy_process(struct task_struct *t) if (!area) return uprobe_warn(t, "dup xol area"); + if (mm == t->mm) + return; + /* TODO: move it into the union in uprobe_task */ work = kmalloc(sizeof(*work), GFP_KERNEL); if (!work) diff --git a/kernel/fork.c b/kernel/fork.c index d3603b8..8531609 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1489,7 +1489,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, perf_event_fork(p); trace_task_newtask(p, clone_flags); - uprobe_copy_process(p); + uprobe_copy_process(p, clone_flags); return p; -- cgit v0.10.2 From 53805eca3d89b095062c11a6798689bb0af09216 Mon Sep 17 00:00:00 2001 From: Michael Hudson-Doyle Date: Thu, 31 Oct 2013 16:47:45 -0700 Subject: perf tools: Remove cast of non-variadic function to variadic The 4fb71074a570 (perf ui/hist: Consolidate hpp helpers) cset introduced a cast of percent_color_snprintf to a function pointer type with varargs. Change percent_color_snprintf to be variadic and remove the cast. The symptom of this was all percentages being reported as 0.00% in perf report --stdio output on the armhf arch. Signed-off-by: Michael Hudson-Doyle Acked-by: Namhyung Kim Acked-by: Will Deacon Cc: Jean Pihet Cc: Jiri Olsa Cc: Will Deacon Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/87zjppvw7y.fsf@canonical.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 0a19328..78f4c92 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -117,7 +117,7 @@ static int hpp__color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ struct perf_hpp *hpp, struct hist_entry *he) \ { \ return __hpp__fmt(hpp, he, he_get_##_field, " %6.2f%%", \ - (hpp_snprint_fn)percent_color_snprintf, true); \ + percent_color_snprintf, true); \ } #define __HPP_ENTRY_PERCENT_FN(_type, _field) \ diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index 11e46da..66e44a5 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -318,8 +318,15 @@ int percent_color_fprintf(FILE *fp, const char *fmt, double percent) return r; } -int percent_color_snprintf(char *bf, size_t size, const char *fmt, double percent) +int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...) { - const char *color = get_percent_color(percent); + va_list args; + double percent; + const char *color; + + va_start(args, fmt); + percent = va_arg(args, double); + va_end(args); + color = get_percent_color(percent); return color_snprintf(bf, size, color, fmt, percent); } diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index dea082b..fced384 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -39,7 +39,7 @@ int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...); int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...); int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); -int percent_color_snprintf(char *bf, size_t size, const char *fmt, double percent); +int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...); int percent_color_fprintf(FILE *fp, const char *fmt, double percent); const char *get_percent_color(double percent); -- cgit v0.10.2 From 32bf5bd181026fc99c0e15045abe409167285ba8 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sun, 22 Sep 2013 16:49:24 +0800 Subject: perf bench: Fix two warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are two warnings in bench/numa, when building this on 32-bit machine. The warning output is attached: bench/numa.c:1113:20: error: comparison between signed and unsigned integer expressions [-Werror=sign-compare] bench/numa.c:1161:6: error: format ‘%lx’ expects argument of t'long unsigned int’, but argument 5 has type ‘u64’ [-Werror=format] This patch fixes these two warnings. Signed-off-by: Wei Yang Acked-by: Ingo Molnar Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1379839764-9245-1-git-send-email-weiyang@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 30d1c32..a73c4ed 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -1110,7 +1110,7 @@ static void *worker_thread(void *__tdata) /* Check whether our max runtime timed out: */ if (g->p.nr_secs) { timersub(&stop, &start0, &diff); - if (diff.tv_sec >= g->p.nr_secs) { + if (diff.tv_sec >= (time_t)g->p.nr_secs) { g->stop_work = true; break; } @@ -1157,7 +1157,7 @@ static void *worker_thread(void *__tdata) runtime_ns_max += diff.tv_usec * 1000; if (details >= 0) { - printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016lx]\n", + printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016"PRIu64"]\n", process_nr, thread_nr, runtime_ns_max / bytes_done, val); } fflush(stdout); -- cgit v0.10.2 From 46d525eae2a076adfde92dca1db12d9a3b8ad8bb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 31 Oct 2013 09:46:59 -0300 Subject: perf test: Update command line callchain attribute tests The "struct perf_event_attr setup" entry in 'perf test' is in fact a series of tests that will exec the tools, passing different sets of command line arguments to then intercept the sys_perf_event_open syscall, in user space, to check that the perf_event_attr->sample_type and other feature request bits are setup as expected. We recently restored the callchain requesting command line argument, -g, to not require a parameter ("dwarf" or "fp"), instead using a default ("fp" for now) and making the long option variant, --call-chain, be the one to be used when a different callchain collection method is preferred. The "struct perf_event_attr setup" test failed because we forgot to update the tests involving callchains, not switching from, '-g dwarf' to '--call-chain dwarf', making 'perf test' detect it: [root@sandy ~]# perf test -v 13 13: struct perf_event_attr setup : --- start --- running '/home/acme/libexec/perf-core/tests/attr/test-record-basic' running '/home/acme/libexec/perf-core/tests/attr/test-record-branch-any' running '/home/acme/libexec/perf-core/tests/attr/test-record-graph-default' running '/home/acme/libexec/perf-core/tests/attr/test-record-graph-dwarf' expected sample_type=12583, got 295 expected exclude_callchain_user=1, got 0 expected sample_stack_user=8192, got 0 FAILED '/home/acme/libexec/perf-core/tests/attr/test-record-graph-dwarf' - match failure ---- end ---- struct perf_event_attr setup: FAILED! [root@sandy ~]# Fix all of them now to use --call-chain when explicitely specifying a method. There is still work to do, as '-g fp', for instance, passed without problems. In that case 'perf test' saw no problems as the intercepted syscall got the bits as expected, i.e. the default is 'fp', but the fact that 'fp' may be an existing program and the specified workload would then be passed as a parameter to it is an usability problem that needs fixing. Next merge window tho. Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-jr3oq1k5iywnp7vvqlslzydm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/attr/README index d102957..430024f 100644 --- a/tools/perf/tests/attr/README +++ b/tools/perf/tests/attr/README @@ -44,9 +44,9 @@ Following tests are defined (with perf commands): perf record -c 123 kill (test-record-count) perf record -d kill (test-record-data) perf record -F 100 kill (test-record-freq) - perf record -g -- kill (test-record-graph-default) - perf record -g dwarf -- kill (test-record-graph-dwarf) - perf record -g fp kill (test-record-graph-fp) + perf record -g kill (test-record-graph-default) + perf record --call-graph dwarf kill (test-record-graph-dwarf) + perf record --call-graph fp kill (test-record-graph-fp) perf record --group -e cycles,instructions kill (test-record-group) perf record -e '{cycles,instructions}' kill (test-record-group1) perf record -D kill (test-record-no-delay) diff --git a/tools/perf/tests/attr/test-record-graph-default b/tools/perf/tests/attr/test-record-graph-default index 833d184..853597a 100644 --- a/tools/perf/tests/attr/test-record-graph-default +++ b/tools/perf/tests/attr/test-record-graph-default @@ -1,6 +1,6 @@ [config] command = record -args = -g -- kill >/dev/null 2>&1 +args = -g kill >/dev/null 2>&1 [event:base-record] sample_type=295 diff --git a/tools/perf/tests/attr/test-record-graph-dwarf b/tools/perf/tests/attr/test-record-graph-dwarf index e93e082..d6f324e 100644 --- a/tools/perf/tests/attr/test-record-graph-dwarf +++ b/tools/perf/tests/attr/test-record-graph-dwarf @@ -1,6 +1,6 @@ [config] command = record -args = -g dwarf -- kill >/dev/null 2>&1 +args = --call-graph dwarf -- kill >/dev/null 2>&1 [event:base-record] sample_type=12583 diff --git a/tools/perf/tests/attr/test-record-graph-fp b/tools/perf/tests/attr/test-record-graph-fp index 7cef374..055e3be 100644 --- a/tools/perf/tests/attr/test-record-graph-fp +++ b/tools/perf/tests/attr/test-record-graph-fp @@ -1,6 +1,6 @@ [config] command = record -args = -g fp kill >/dev/null 2>&1 +args = --call-graph fp kill >/dev/null 2>&1 [event:base-record] sample_type=295 -- cgit v0.10.2 From 9ef0438a957937bf0edc26d58bce891034ff9e30 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 24 Oct 2013 17:36:31 -0300 Subject: perf probe: Fix typo s/tyep/type/g. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-cznw5tnruonyoisxu8be11bv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index e41b094..2200dad 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -583,7 +583,7 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, } if (die_find_member(&type, field->name, die_mem) == NULL) { - pr_warning("%s(tyep:%s) has no member %s.\n", varname, + pr_warning("%s(type:%s) has no member %s.\n", varname, dwarf_diename(&type), field->name); return -EINVAL; } -- cgit v0.10.2 From 6e6dc401d528e3b64626de82322fa237f1c1e576 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 26 Oct 2013 20:53:14 +0200 Subject: perf tools: Add missing data.h into LIB_H headers Adding missing data.h into LIB_H headers so the build could keep up with its changes. Reported-by: Adrian Hunter Cc: Adrian Hunter Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20131026185314.GA14973@krava.brq.redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 8a9ca38..bc7cfa1 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -295,6 +295,7 @@ LIB_H += ui/helpline.h LIB_H += ui/progress.h LIB_H += ui/util.h LIB_H += ui/ui.h +LIB_H += util/data.h LIB_OBJS += $(OUTPUT)util/abspath.o LIB_OBJS += $(OUTPUT)util/alias.o -- cgit v0.10.2 From b9c5143a012a543c4ee872498d6dbae5c10beb2e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 11 Sep 2013 14:46:56 +0200 Subject: perf tools: Use an accessor to read thread comm As the thread comm is going to be implemented by way of a more complicated data structure than just a pointer to a string from the thread struct, convert the readers of comm to use an accessor instead of accessing it directly. The accessor will be later overriden to support an enhanced comm implementation. Signed-off-by: Frederic Weisbecker Tested-by: Jiri Olsa Cc: Jiri Olsa Cc: David Ahern Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-wr683zwy94hmj4ibogmnv9ce@git.kernel.org [ Rename thread__comm_curr() to thread__comm_str() ] Signed-off-by: Namhyung Kim [ Fixed up some minor const pointer issues ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 1126382..a28970f 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -315,7 +315,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, return -1; } - dump_printf(" ... thread: %s:%d\n", thread->comm, thread->tid); + dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid); if (evsel->handler.func != NULL) { tracepoint_handler f = evsel->handler.func; diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 33c7253..35f9aaa 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -767,7 +767,7 @@ static void dump_threads(void) while (node) { st = container_of(node, struct thread_stat, rb); t = perf_session__findnew(session, st->tid); - pr_info("%10d: %s\n", st->tid, t->comm); + pr_info("%10d: %s\n", st->tid, thread__comm_str(t)); node = rb_next(node); }; } diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index ddb5dc1..a81ab18 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -737,12 +737,12 @@ static int replay_fork_event(struct perf_sched *sched, if (verbose) { printf("fork event\n"); - printf("... parent: %s/%d\n", parent->comm, parent->tid); - printf("... child: %s/%d\n", child->comm, child->tid); + printf("... parent: %s/%d\n", thread__comm_str(parent), parent->tid); + printf("... child: %s/%d\n", thread__comm_str(child), child->tid); } - register_pid(sched, parent->tid, parent->comm); - register_pid(sched, child->tid, child->comm); + register_pid(sched, parent->tid, thread__comm_str(parent)); + register_pid(sched, child->tid, thread__comm_str(child)); return 0; } @@ -1077,7 +1077,7 @@ static int latency_migrate_task_event(struct perf_sched *sched, if (!atoms) { if (thread_atoms_insert(sched, migrant)) return -1; - register_pid(sched, migrant->tid, migrant->comm); + register_pid(sched, migrant->tid, thread__comm_str(migrant)); atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); if (!atoms) { pr_err("migration-event: Internal tree error"); @@ -1111,13 +1111,13 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_ /* * Ignore idle threads: */ - if (!strcmp(work_list->thread->comm, "swapper")) + if (!strcmp(thread__comm_str(work_list->thread), "swapper")) return; sched->all_runtime += work_list->total_runtime; sched->all_count += work_list->nb_atoms; - ret = printf(" %s:%d ", work_list->thread->comm, work_list->thread->tid); + ret = printf(" %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid); for (i = 0; i < 24 - ret; i++) printf(" "); @@ -1334,7 +1334,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, printf(" %12.6f secs ", (double)timestamp/1e9); if (new_shortname) { printf("%s => %s:%d\n", - sched_in->shortname, sched_in->comm, sched_in->tid); + sched_in->shortname, thread__comm_str(sched_in), sched_in->tid); } else { printf("\n"); } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 0ae88c2..b866cc8 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -291,11 +291,11 @@ static void print_sample_start(struct perf_sample *sample, if (PRINT_FIELD(COMM)) { if (latency_format) - printf("%8.8s ", thread->comm); + printf("%8.8s ", thread__comm_str(thread)); else if (PRINT_FIELD(IP) && symbol_conf.use_callchain) - printf("%s ", thread->comm); + printf("%s ", thread__comm_str(thread)); else - printf("%16s ", thread->comm); + printf("%16s ", thread__comm_str(thread)); } if (PRINT_FIELD(PID) && PRINT_FIELD(TID)) diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index b51abcb..4475b0f 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -421,7 +421,7 @@ static void print_hists(struct hists *hists) he = rb_entry(node, struct hist_entry, rb_node_in); pr_info("%2d: entry: %-8s [%-8s] %20s: period = %"PRIu64"\n", - i, he->thread->comm, he->ms.map->dso->short_name, + i, thread__comm_str(he->thread), he->ms.map->dso->short_name, he->ms.sym->name, he->stat.period); i++; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 7ef36c3..a91b6b2 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1255,7 +1255,7 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size, if (thread) printed += scnprintf(bf + printed, size - printed, ", Thread: %s(%d)", - (thread->comm_set ? thread->comm : ""), + (thread->comm_set ? thread__comm_str(thread) : ""), thread->tid); if (dso) printed += scnprintf(bf + printed, size - printed, @@ -1578,7 +1578,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (thread != NULL && asprintf(&options[nr_options], "Zoom %s %s(%d) thread", (browser->hists->thread_filter ? "out of" : "into"), - (thread->comm_set ? thread->comm : ""), + (thread->comm_set ? thread__comm_str(thread) : ""), thread->tid) > 0) zoom_thread = nr_options++; @@ -1598,7 +1598,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, struct symbol *sym; if (asprintf(&options[nr_options], "Run scripts for samples of thread [%s]", - browser->he_selection->thread->comm) > 0) + thread__comm_str(browser->he_selection->thread)) > 0) scripts_comm = nr_options++; sym = browser->he_selection->ms.sym; @@ -1701,7 +1701,7 @@ zoom_out_thread: sort_thread.elide = false; } else { ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"", - thread->comm_set ? thread->comm : "", + thread->comm_set ? thread__comm_str(thread) : "", thread->tid); browser->hists->thread_filter = thread; sort_thread.elide = true; @@ -1717,7 +1717,7 @@ do_scripts: memset(script_opt, 0, 64); if (choice == scripts_comm) - sprintf(script_opt, " -c %s ", browser->he_selection->thread->comm); + sprintf(script_opt, " -c %s ", thread__comm_str(browser->he_selection->thread)); if (choice == scripts_symbol) sprintf(script_opt, " -S %s ", browser->he_selection->ms.sym->name); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 49096ea..7a2842e 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -721,10 +721,10 @@ int perf_event__preprocess_sample(const union perf_event *event, return -1; if (symbol_conf.comm_list && - !strlist__has_entry(symbol_conf.comm_list, thread->comm)) + !strlist__has_entry(symbol_conf.comm_list, thread__comm_str(thread))) goto out_filtered; - dump_printf(" ... thread: %s:%d\n", thread->comm, thread->tid); + dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid); /* * Have we already created the kernel maps for this machine? * diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index c0c9795..d5e5969 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -273,7 +273,7 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused, int cpu = sample->cpu; void *data = sample->raw_data; unsigned long long nsecs = sample->time; - char *comm = thread->comm; + const char *comm = thread__comm_str(thread); dSP; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 95d91a0..53c20e7 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -250,7 +250,7 @@ static void python_process_tracepoint(union perf_event *perf_event int cpu = sample->cpu; void *data = sample->raw_data; unsigned long long nsecs = sample->time; - char *comm = thread->comm; + const char *comm = thread__comm_str(thread); t = PyTuple_New(MAX_FIELDS); if (!t) @@ -389,7 +389,7 @@ static void python_process_general_event(union perf_event *perf_event pydict_set_item_string_decref(dict, "raw_buf", PyString_FromStringAndSize( (const char *)sample->raw_data, sample->raw_size)); pydict_set_item_string_decref(dict, "comm", - PyString_FromString(thread->comm)); + PyString_FromString(thread__comm_str(thread))); if (al->map) { pydict_set_item_string_decref(dict, "dso", PyString_FromString(al->map->dso->name)); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 19b4aa2..835e8bd 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -42,7 +42,7 @@ static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...) return n; } -static int64_t cmp_null(void *l, void *r) +static int64_t cmp_null(const void *l, const void *r) { if (!l && !r) return 0; @@ -63,8 +63,9 @@ sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { + const char *comm = thread__comm_str(he->thread); return repsep_snprintf(bf, size, "%*s:%5d", width - 6, - he->thread->comm ?: "", he->thread->tid); + comm ?: "", he->thread->tid); } struct sort_entry sort_thread = { @@ -85,8 +86,8 @@ sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) static int64_t sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) { - char *comm_l = left->thread->comm; - char *comm_r = right->thread->comm; + const char *comm_l = thread__comm_str(left->thread); + const char *comm_r = thread__comm_str(right->thread); if (!comm_l || !comm_r) return cmp_null(comm_l, comm_r); @@ -97,7 +98,7 @@ sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) static int hist_entry__comm_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%*s", width, he->thread->comm); + return repsep_snprintf(bf, size, "%*s", width, thread__comm_str(he->thread)); } struct sort_entry sort_comm = { diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 80d19a0..5676007 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -45,6 +45,11 @@ int thread__set_comm(struct thread *thread, const char *comm) return err; } +const char *thread__comm_str(const struct thread *thread) +{ + return thread->comm; +} + int thread__comm_len(struct thread *thread) { if (!thread->comm_len) { @@ -58,7 +63,7 @@ int thread__comm_len(struct thread *thread) size_t thread__fprintf(struct thread *thread, FILE *fp) { - return fprintf(fp, "Thread %d %s\n", thread->tid, thread->comm) + + return fprintf(fp, "Thread %d %s\n", thread->tid, thread__comm_str(thread)) + map_groups__fprintf(&thread->mg, verbose, fp); } diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 4ebbb40..6561ad2 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -35,6 +35,7 @@ static inline void thread__exited(struct thread *thread) int thread__set_comm(struct thread *self, const char *comm); int thread__comm_len(struct thread *self); +const char *thread__comm_str(const struct thread *thread); void thread__insert_map(struct thread *self, struct map *map); int thread__fork(struct thread *self, struct thread *parent); size_t thread__fprintf(struct thread *thread, FILE *fp); -- cgit v0.10.2 From 162f0befda3becc2cc9f44075fccc030e55baec1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 11 Sep 2013 16:18:24 +0200 Subject: perf tools: Add time argument on COMM setting This way we can later delimit a lifecycle for the COMM and map a hist to a precise COMM:timeslice couple. PERF_RECORD_COMM and PERF_RECORD_FORK events that don't have PERF_SAMPLE_TIME samples can only send 0 value as a timestamp and thus should overwrite any previous COMM on a given thread because there is no sensible way to keep track of all the comms lifecycles in a thread without time informations. Signed-off-by: Frederic Weisbecker Tested-by: Jiri Olsa Cc: Jiri Olsa Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-6tyow99vgmmtt9qwr2u2lqd7@git.kernel.org [ Made it cope with PERF_RECORD_MMAP2 ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index a6ea956..21db76d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -856,7 +856,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) &sample, machine); } else if (event->header.type < PERF_RECORD_MAX) { hists__inc_nr_events(&evsel->hists, event->header.type); - machine__process_event(machine, event); + machine__process_event(machine, event, &sample); } else ++session->stats.nr_unknown_events; next_event: diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index dc3da65..95d6392 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1122,7 +1122,7 @@ static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thre } static int trace__process_event(struct trace *trace, struct machine *machine, - union perf_event *event) + union perf_event *event, struct perf_sample *sample) { int ret = 0; @@ -1130,9 +1130,9 @@ static int trace__process_event(struct trace *trace, struct machine *machine, case PERF_RECORD_LOST: color_fprintf(trace->output, PERF_COLOR_RED, "LOST %" PRIu64 " events!\n", event->lost.lost); - ret = machine__process_lost_event(machine, event); + ret = machine__process_lost_event(machine, event, sample); default: - ret = machine__process_event(machine, event); + ret = machine__process_event(machine, event, sample); break; } @@ -1141,11 +1141,11 @@ static int trace__process_event(struct trace *trace, struct machine *machine, static int trace__tool_process(struct perf_tool *tool, union perf_event *event, - struct perf_sample *sample __maybe_unused, + struct perf_sample *sample, struct machine *machine) { struct trace *trace = container_of(tool, struct trace, tool); - return trace__process_event(trace, machine, event); + return trace__process_event(trace, machine, event, sample); } static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) @@ -1751,7 +1751,7 @@ again: trace->base_time = sample.time; if (type != PERF_RECORD_SAMPLE) { - trace__process_event(trace, trace->host, event); + trace__process_event(trace, trace->host, event, &sample); continue; } diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index e3fedfa..49ccc3b 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -276,7 +276,7 @@ static int process_event(struct machine *machine, struct perf_evlist *evlist, return process_sample_event(machine, evlist, event, state); if (event->header.type < PERF_RECORD_MAX) - return machine__process_event(machine, event); + return machine__process_event(machine, event, NULL); return 0; } diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 4475b0f..6c337e6 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -93,7 +93,7 @@ static struct machine *setup_fake_machine(struct machines *machines) if (thread == NULL) goto out; - thread__set_comm(thread, fake_threads[i].comm); + thread__set_comm(thread, fake_threads[i].comm, 0); } for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) { @@ -110,7 +110,7 @@ static struct machine *setup_fake_machine(struct machines *machines) strcpy(fake_mmap_event.mmap.filename, fake_mmap_info[i].filename); - machine__process_mmap_event(machine, &fake_mmap_event); + machine__process_mmap_event(machine, &fake_mmap_event, NULL); } for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) { diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 7a2842e..c26b353 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -512,18 +512,18 @@ size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp) int perf_event__process_comm(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __maybe_unused, + struct perf_sample *sample, struct machine *machine) { - return machine__process_comm_event(machine, event); + return machine__process_comm_event(machine, event, sample); } int perf_event__process_lost(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __maybe_unused, + struct perf_sample *sample, struct machine *machine) { - return machine__process_lost_event(machine, event); + return machine__process_lost_event(machine, event, sample); } size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) @@ -546,18 +546,18 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp) int perf_event__process_mmap(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __maybe_unused, + struct perf_sample *sample, struct machine *machine) { - return machine__process_mmap_event(machine, event); + return machine__process_mmap_event(machine, event, sample); } int perf_event__process_mmap2(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __maybe_unused, + struct perf_sample *sample, struct machine *machine) { - return machine__process_mmap2_event(machine, event); + return machine__process_mmap2_event(machine, event, sample); } size_t perf_event__fprintf_task(union perf_event *event, FILE *fp) @@ -569,18 +569,18 @@ size_t perf_event__fprintf_task(union perf_event *event, FILE *fp) int perf_event__process_fork(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __maybe_unused, + struct perf_sample *sample, struct machine *machine) { - return machine__process_fork_event(machine, event); + return machine__process_fork_event(machine, event, sample); } int perf_event__process_exit(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __maybe_unused, + struct perf_sample *sample, struct machine *machine) { - return machine__process_exit_event(machine, event); + return machine__process_exit_event(machine, event, sample); } size_t perf_event__fprintf(union perf_event *event, FILE *fp) @@ -611,10 +611,10 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) int perf_event__process(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __maybe_unused, + struct perf_sample *sample, struct machine *machine) { - return machine__process_event(machine, event); + return machine__process_event(machine, event, sample); } void thread__find_addr_map(struct thread *self, diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index ea93425..ce034c1 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -40,7 +40,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) return -ENOMEM; snprintf(comm, sizeof(comm), "[guest/%d]", pid); - thread__set_comm(thread, comm); + thread__set_comm(thread, comm, 0); } return 0; @@ -331,7 +331,8 @@ struct thread *machine__find_thread(struct machine *machine, pid_t tid) return __machine__findnew_thread(machine, 0, tid, false); } -int machine__process_comm_event(struct machine *machine, union perf_event *event) +int machine__process_comm_event(struct machine *machine, union perf_event *event, + struct perf_sample *sample) { struct thread *thread = machine__findnew_thread(machine, event->comm.pid, @@ -340,7 +341,7 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event if (dump_trace) perf_event__fprintf_comm(event, stdout); - if (thread == NULL || thread__set_comm(thread, event->comm.comm)) { + if (thread == NULL || thread__set_comm(thread, event->comm.comm, sample->time)) { dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); return -1; } @@ -349,7 +350,7 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event } int machine__process_lost_event(struct machine *machine __maybe_unused, - union perf_event *event) + union perf_event *event, struct perf_sample *sample __maybe_unused) { dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n", event->lost.id, event->lost.lost); @@ -984,7 +985,8 @@ out_problem: } int machine__process_mmap2_event(struct machine *machine, - union perf_event *event) + union perf_event *event, + struct perf_sample *sample __maybe_unused) { u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct thread *thread; @@ -1031,7 +1033,8 @@ out_problem: return 0; } -int machine__process_mmap_event(struct machine *machine, union perf_event *event) +int machine__process_mmap_event(struct machine *machine, union perf_event *event, + struct perf_sample *sample __maybe_unused) { u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct thread *thread; @@ -1088,7 +1091,8 @@ static void machine__remove_thread(struct machine *machine, struct thread *th) list_add_tail(&th->node, &machine->dead_threads); } -int machine__process_fork_event(struct machine *machine, union perf_event *event) +int machine__process_fork_event(struct machine *machine, union perf_event *event, + struct perf_sample *sample) { struct thread *thread = machine__find_thread(machine, event->fork.tid); struct thread *parent = machine__findnew_thread(machine, @@ -1105,7 +1109,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event perf_event__fprintf_task(event, stdout); if (thread == NULL || parent == NULL || - thread__fork(thread, parent) < 0) { + thread__fork(thread, parent, sample->time) < 0) { dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); return -1; } @@ -1113,8 +1117,8 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event return 0; } -int machine__process_exit_event(struct machine *machine __maybe_unused, - union perf_event *event) +int machine__process_exit_event(struct machine *machine, union perf_event *event, + struct perf_sample *sample __maybe_unused) { struct thread *thread = machine__find_thread(machine, event->fork.tid); @@ -1127,23 +1131,24 @@ int machine__process_exit_event(struct machine *machine __maybe_unused, return 0; } -int machine__process_event(struct machine *machine, union perf_event *event) +int machine__process_event(struct machine *machine, union perf_event *event, + struct perf_sample *sample) { int ret; switch (event->header.type) { case PERF_RECORD_COMM: - ret = machine__process_comm_event(machine, event); break; + ret = machine__process_comm_event(machine, event, sample); break; case PERF_RECORD_MMAP: - ret = machine__process_mmap_event(machine, event); break; + ret = machine__process_mmap_event(machine, event, sample); break; case PERF_RECORD_MMAP2: - ret = machine__process_mmap2_event(machine, event); break; + ret = machine__process_mmap2_event(machine, event, sample); break; case PERF_RECORD_FORK: - ret = machine__process_fork_event(machine, event); break; + ret = machine__process_fork_event(machine, event, sample); break; case PERF_RECORD_EXIT: - ret = machine__process_exit_event(machine, event); break; + ret = machine__process_exit_event(machine, event, sample); break; case PERF_RECORD_LOST: - ret = machine__process_lost_event(machine, event); break; + ret = machine__process_lost_event(machine, event, sample); break; default: ret = -1; break; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 4c1f5d5..2389ba8 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -40,13 +40,20 @@ struct map *machine__kernel_map(struct machine *machine, enum map_type type) struct thread *machine__find_thread(struct machine *machine, pid_t tid); -int machine__process_comm_event(struct machine *machine, union perf_event *event); -int machine__process_exit_event(struct machine *machine, union perf_event *event); -int machine__process_fork_event(struct machine *machine, union perf_event *event); -int machine__process_lost_event(struct machine *machine, union perf_event *event); -int machine__process_mmap_event(struct machine *machine, union perf_event *event); -int machine__process_mmap2_event(struct machine *machine, union perf_event *event); -int machine__process_event(struct machine *machine, union perf_event *event); +int machine__process_comm_event(struct machine *machine, union perf_event *event, + struct perf_sample *sample); +int machine__process_exit_event(struct machine *machine, union perf_event *event, + struct perf_sample *sample); +int machine__process_fork_event(struct machine *machine, union perf_event *event, + struct perf_sample *sample); +int machine__process_lost_event(struct machine *machine, union perf_event *event, + struct perf_sample *sample); +int machine__process_mmap_event(struct machine *machine, union perf_event *event, + struct perf_sample *sample); +int machine__process_mmap2_event(struct machine *machine, union perf_event *event, + struct perf_sample *sample); +int machine__process_event(struct machine *machine, union perf_event *event, + struct perf_sample *sample); typedef void (*machine__process_t)(struct machine *machine, void *data); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 4ba7b54..3c1b301 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1100,7 +1100,7 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se { struct thread *thread = perf_session__findnew(self, 0); - if (thread == NULL || thread__set_comm(thread, "swapper")) { + if (thread == NULL || thread__set_comm(thread, "swapper", 0)) { pr_err("problem inserting idle task.\n"); thread = NULL; } diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 5676007..0ea73fe 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -31,7 +31,8 @@ void thread__delete(struct thread *thread) free(thread); } -int thread__set_comm(struct thread *thread, const char *comm) +int thread__set_comm(struct thread *thread, const char *comm, + u64 timestamp __maybe_unused) { int err; @@ -73,7 +74,8 @@ void thread__insert_map(struct thread *thread, struct map *map) map_groups__insert(&thread->mg, map); } -int thread__fork(struct thread *thread, struct thread *parent) +int thread__fork(struct thread *thread, struct thread *parent, + u64 timestamp __maybe_unused) { int i; diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 6561ad2..4e97242 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -33,11 +33,11 @@ static inline void thread__exited(struct thread *thread) thread->dead = true; } -int thread__set_comm(struct thread *self, const char *comm); +int thread__set_comm(struct thread *thread, const char *comm, u64 timestamp); int thread__comm_len(struct thread *self); const char *thread__comm_str(const struct thread *thread); void thread__insert_map(struct thread *self, struct map *map); -int thread__fork(struct thread *self, struct thread *parent); +int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp); size_t thread__fprintf(struct thread *thread, FILE *fp); static inline struct map *thread__find_map(struct thread *self, -- cgit v0.10.2 From 1902efe7f626fdebe1520f5ff11f1309ec506708 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 11 Sep 2013 16:56:44 +0200 Subject: perf tools: Add new COMM infrastructure This new COMM infrastructure provides two features: 1) It keeps track of all comms lifecycle for a given thread. This way we can associate a timeframe to any thread COMM, as long as PERF_SAMPLE_TIME samples are joined to COMM and fork events. As a result we should have more precise COMM sorted hists with seperated entries for pre and post exec time after a fork. 2) It also makes sure that a given COMM string is not duplicated but rather shared among the threads that refer to it. This way the threads COMM can be compared against pointer values from the sort infrastructure. Signed-off-by: Frederic Weisbecker Tested-by: Jiri Olsa Cc: Jiri Olsa Cc: David Ahern Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-hwjf70b2wve9m2kosxiq8bb3@git.kernel.org [ Rename some accessor functions ] Signed-off-by: Namhyung Kim [ Use __ as separator for class__method for private comm_str methods ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index bc7cfa1..cb52bdb 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -273,6 +273,7 @@ LIB_H += util/color.h LIB_H += util/values.h LIB_H += util/sort.h LIB_H += util/hist.h +LIB_H += util/comm.h LIB_H += util/thread.h LIB_H += util/thread_map.h LIB_H += util/trace-event.h @@ -341,6 +342,7 @@ LIB_OBJS += $(OUTPUT)util/machine.o LIB_OBJS += $(OUTPUT)util/map.o LIB_OBJS += $(OUTPUT)util/pstack.o LIB_OBJS += $(OUTPUT)util/session.o +LIB_OBJS += $(OUTPUT)util/comm.o LIB_OBJS += $(OUTPUT)util/thread.o LIB_OBJS += $(OUTPUT)util/thread_map.o LIB_OBJS += $(OUTPUT)util/trace-event-parse.o diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 95d6392..b3e57dc 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1114,7 +1114,7 @@ static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thre if (trace->multiple_threads) { if (trace->show_comm) - printed += fprintf(fp, "%.14s/", thread->comm); + printed += fprintf(fp, "%.14s/", thread__comm_str(thread)); printed += fprintf(fp, "%d ", thread->tid); } @@ -1986,7 +1986,7 @@ static int trace__fprintf_one_thread(struct thread *thread, void *priv) else if (ratio > 5.0) color = PERF_COLOR_YELLOW; - printed += color_fprintf(fp, color, "%20s", thread->comm); + printed += color_fprintf(fp, color, "%20s", thread__comm_str(thread)); printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events); printed += color_fprintf(fp, color, "%5.1f%%", ratio); printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms); diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c new file mode 100644 index 0000000..8b3ac9f --- /dev/null +++ b/tools/perf/util/comm.c @@ -0,0 +1,106 @@ +#include "comm.h" +#include "util.h" +#include +#include + +struct comm_str { + char *str; + struct rb_node rb_node; + int ref; +}; + +/* Should perhaps be moved to struct machine */ +static struct rb_root comm_str_root; + +static void comm_str__get(struct comm_str *cs) +{ + cs->ref++; +} + +static void comm_str__put(struct comm_str *cs) +{ + if (!--cs->ref) { + rb_erase(&cs->rb_node, &comm_str_root); + free(cs->str); + free(cs); + } +} + +static struct comm_str *comm_str__alloc(const char *str) +{ + struct comm_str *cs; + + cs = zalloc(sizeof(*cs)); + if (!cs) + return NULL; + + cs->str = strdup(str); + if (!cs->str) { + free(cs); + return NULL; + } + + return cs; +} + +static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct comm_str *iter, *new; + int cmp; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct comm_str, rb_node); + + cmp = strcmp(str, iter->str); + if (!cmp) + return iter; + + if (cmp < 0) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + new = comm_str__alloc(str); + if (!new) + return NULL; + + rb_link_node(&new->rb_node, parent, p); + rb_insert_color(&new->rb_node, root); + + return new; +} + +struct comm *comm__new(const char *str, u64 timestamp) +{ + struct comm *comm = zalloc(sizeof(*comm)); + + if (!comm) + return NULL; + + comm->start = timestamp; + + comm->comm_str = comm_str__findnew(str, &comm_str_root); + if (!comm->comm_str) { + free(comm); + return NULL; + } + + comm_str__get(comm->comm_str); + + return comm; +} + +void comm__free(struct comm *comm) +{ + comm_str__put(comm->comm_str); + free(comm); +} + +const char *comm__str(const struct comm *comm) +{ + return comm->comm_str->str; +} diff --git a/tools/perf/util/comm.h b/tools/perf/util/comm.h new file mode 100644 index 0000000..f62d215 --- /dev/null +++ b/tools/perf/util/comm.h @@ -0,0 +1,20 @@ +#ifndef __PERF_COMM_H +#define __PERF_COMM_H + +#include "../perf.h" +#include +#include + +struct comm_str; + +struct comm { + struct comm_str *comm_str; + u64 start; + struct list_head list; +}; + +void comm__free(struct comm *comm); +struct comm *comm__new(const char *str, u64 timestamp); +const char *comm__str(const struct comm *comm); + +#endif /* __PERF_COMM_H */ diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 0ea73fe..15c53c2 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -6,9 +6,12 @@ #include "thread.h" #include "util.h" #include "debug.h" +#include "comm.h" struct thread *thread__new(pid_t pid, pid_t tid) { + char *comm_str; + struct comm *comm; struct thread *thread = zalloc(sizeof(*thread)); if (thread != NULL) { @@ -16,47 +19,88 @@ struct thread *thread__new(pid_t pid, pid_t tid) thread->pid_ = pid; thread->tid = tid; thread->ppid = -1; - thread->comm = malloc(32); - if (thread->comm) - snprintf(thread->comm, 32, ":%d", thread->tid); + INIT_LIST_HEAD(&thread->comm_list); + + comm_str = malloc(32); + if (!comm_str) + goto err_thread; + + snprintf(comm_str, 32, ":%d", tid); + comm = comm__new(comm_str, 0); + free(comm_str); + if (!comm) + goto err_thread; + + list_add(&comm->list, &thread->comm_list); } return thread; + +err_thread: + free(thread); + return NULL; } void thread__delete(struct thread *thread) { + struct comm *comm, *tmp; + map_groups__exit(&thread->mg); - free(thread->comm); + list_for_each_entry_safe(comm, tmp, &thread->comm_list, list) { + list_del(&comm->list); + comm__free(comm); + } + free(thread); } -int thread__set_comm(struct thread *thread, const char *comm, - u64 timestamp __maybe_unused) +static struct comm *thread__comm(const struct thread *thread) { - int err; + if (list_empty(&thread->comm_list)) + return NULL; - if (thread->comm) - free(thread->comm); - thread->comm = strdup(comm); - err = thread->comm == NULL ? -ENOMEM : 0; - if (!err) { - thread->comm_set = true; + return list_first_entry(&thread->comm_list, struct comm, list); +} + +/* CHECKME: time should always be 0 if event aren't ordered */ +int thread__set_comm(struct thread *thread, const char *str, u64 timestamp) +{ + struct comm *new, *curr = thread__comm(thread); + + /* Override latest entry if it had no specific time coverage */ + if (!curr->start) { + list_del(&curr->list); + comm__free(curr); } - return err; + + new = comm__new(str, timestamp); + if (!new) + return -ENOMEM; + + list_add(&new->list, &thread->comm_list); + thread->comm_set = true; + + return 0; } const char *thread__comm_str(const struct thread *thread) { - return thread->comm; + const struct comm *comm = thread__comm(thread); + + if (!comm) + return NULL; + + return comm__str(comm); } +/* CHECKME: it should probably better return the max comm len from its comm list */ int thread__comm_len(struct thread *thread) { if (!thread->comm_len) { - if (!thread->comm) + const char *comm = thread__comm_str(thread); + if (!comm) return 0; - thread->comm_len = strlen(thread->comm); + thread->comm_len = strlen(comm); } return thread->comm_len; @@ -74,17 +118,17 @@ void thread__insert_map(struct thread *thread, struct map *map) map_groups__insert(&thread->mg, map); } -int thread__fork(struct thread *thread, struct thread *parent, - u64 timestamp __maybe_unused) +int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) { - int i; + int i, err; if (parent->comm_set) { - if (thread->comm) - free(thread->comm); - thread->comm = strdup(parent->comm); - if (!thread->comm) + const char *comm = thread__comm_str(parent); + if (!comm) return -ENOMEM; + err = thread__set_comm(thread, comm, timestamp); + if (!err) + return err; thread->comm_set = true; } diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 4e97242..8702c6b 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -2,6 +2,7 @@ #define __PERF_THREAD_H #include +#include #include #include #include "symbol.h" @@ -18,7 +19,7 @@ struct thread { char shortname[3]; bool comm_set; bool dead; /* if set thread has exited */ - char *comm; + struct list_head comm_list; int comm_len; void *priv; -- cgit v0.10.2 From fedd63d3cdc9004df43b02df5c874b8957992fe8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 11 Sep 2013 17:18:09 +0200 Subject: perf tools: Compare hists comm by addresses Now that comm strings are allocated only once and refcounted to be shared among threads, these can now be safely compared by addresses. This should remove most hists collapses on post processing. Signed-off-by: Frederic Weisbecker Tested-by: Jiri Olsa Cc: Jiri Olsa Cc: David Ahern Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1381468543-25334-8-git-send-email-namhyung@kernel.org Signed-off-by: Namhyung Kim diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 835e8bd..bf91d0e 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -80,7 +80,8 @@ struct sort_entry sort_thread = { static int64_t sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) { - return right->thread->tid - left->thread->tid; + /* Compare the addr that should be unique among comm */ + return thread__comm_str(right->thread) - thread__comm_str(left->thread); } static int64_t -- cgit v0.10.2 From 4dfced359fbc719a35527416f1b4b3999647f68b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 13 Sep 2013 16:28:57 +0900 Subject: perf tools: Get current comm instead of last one At insert time, a hist entry should reference comm at the time otherwise it'll get the last comm anyway. Signed-off-by: Namhyung Kim Acked-by: Frederic Weisbecker Tested-by: Jiri Olsa Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-n6pykiiymtgmcjs834go2t8x@git.kernel.org [ Fixed up const pointer issues ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 8b3ac9f..ee0df0e 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -94,6 +94,21 @@ struct comm *comm__new(const char *str, u64 timestamp) return comm; } +void comm__override(struct comm *comm, const char *str, u64 timestamp) +{ + struct comm_str *old = comm->comm_str; + + comm->comm_str = comm_str__findnew(str, &comm_str_root); + if (!comm->comm_str) { + comm->comm_str = old; + return; + } + + comm->start = timestamp; + comm_str__get(comm->comm_str); + comm_str__put(old); +} + void comm__free(struct comm *comm) { comm_str__put(comm->comm_str); diff --git a/tools/perf/util/comm.h b/tools/perf/util/comm.h index f62d215..7a86e56 100644 --- a/tools/perf/util/comm.h +++ b/tools/perf/util/comm.h @@ -16,5 +16,6 @@ struct comm { void comm__free(struct comm *comm); struct comm *comm__new(const char *str, u64 timestamp); const char *comm__str(const struct comm *comm); +void comm__override(struct comm *comm, const char *str, u64 timestamp); #endif /* __PERF_COMM_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 7e80253..30793f9 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -416,6 +416,7 @@ struct hist_entry *__hists__add_mem_entry(struct hists *hists, { struct hist_entry entry = { .thread = al->thread, + .comm = thread__comm(al->thread), .ms = { .map = al->map, .sym = al->sym, @@ -446,6 +447,7 @@ struct hist_entry *__hists__add_branch_entry(struct hists *hists, { struct hist_entry entry = { .thread = al->thread, + .comm = thread__comm(al->thread), .ms = { .map = bi->to.map, .sym = bi->to.sym, @@ -475,6 +477,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, { struct hist_entry entry = { .thread = al->thread, + .comm = thread__comm(al->thread), .ms = { .map = al->map, .sym = al->sym, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index bf91d0e..3c1b75c 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1,5 +1,6 @@ #include "sort.h" #include "hist.h" +#include "comm.h" #include "symbol.h" regex_t parent_regex; @@ -81,25 +82,20 @@ static int64_t sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) { /* Compare the addr that should be unique among comm */ - return thread__comm_str(right->thread) - thread__comm_str(left->thread); + return comm__str(right->comm) - comm__str(left->comm); } static int64_t sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) { - const char *comm_l = thread__comm_str(left->thread); - const char *comm_r = thread__comm_str(right->thread); - - if (!comm_l || !comm_r) - return cmp_null(comm_l, comm_r); - - return strcmp(comm_l, comm_r); + /* Compare the addr that should be unique among comm */ + return comm__str(right->comm) - comm__str(left->comm); } static int hist_entry__comm_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%*s", width, thread__comm_str(he->thread)); + return repsep_snprintf(bf, size, "%*s", width, comm__str(he->comm)); } struct sort_entry sort_comm = { diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index bf43336..f4e16f3 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -84,6 +84,7 @@ struct hist_entry { struct he_stat stat; struct map_symbol ms; struct thread *thread; + struct comm *comm; u64 ip; u64 transaction; s32 cpu; diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 15c53c2..cd8e2f5 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -54,7 +54,7 @@ void thread__delete(struct thread *thread) free(thread); } -static struct comm *thread__comm(const struct thread *thread) +struct comm *thread__comm(const struct thread *thread) { if (list_empty(&thread->comm_list)) return NULL; @@ -69,8 +69,8 @@ int thread__set_comm(struct thread *thread, const char *str, u64 timestamp) /* Override latest entry if it had no specific time coverage */ if (!curr->start) { - list_del(&curr->list); - comm__free(curr); + comm__override(curr, str, timestamp); + return 0; } new = comm__new(str, timestamp); diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 8702c6b..373c055 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -26,6 +26,7 @@ struct thread { }; struct machine; +struct comm; struct thread *thread__new(pid_t pid, pid_t tid); void thread__delete(struct thread *self); @@ -36,6 +37,7 @@ static inline void thread__exited(struct thread *thread) int thread__set_comm(struct thread *thread, const char *comm, u64 timestamp); int thread__comm_len(struct thread *self); +struct comm *thread__comm(const struct thread *thread); const char *thread__comm_str(const struct thread *thread); void thread__insert_map(struct thread *self, struct map *map); int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp); -- cgit v0.10.2 From f852fd621ca19f557f2e3d05900366be7c7afb83 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 1 Nov 2013 15:51:29 +0200 Subject: perf evsel: Add a debug print if perf_event_open fails There is a debug print (at verbose level 2) for each call to perf_event_open. Add another debug print if the call fails, and print the error number. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1383313899-15987-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 3a334f0..f0e65de 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1051,6 +1051,8 @@ retry_open: group_fd, flags); if (FD(evsel, cpu, thread) < 0) { err = -errno; + pr_debug2("perf_event_open failed, error %d\n", + err); goto try_fallback; } set_rlimit = NO_CHANGE; -- cgit v0.10.2 From 7ea95727af571d592c9d6aa7627690d44b114a2d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 1 Nov 2013 15:51:30 +0200 Subject: perf script: Set up output options for in-stream attributes Attributes (struct perf_event_attr) are recorded separately in the perf.data file. perf script uses them to set up output options. However attributes can also be in the event stream, for example when the input is a pipe (i.e. live mode). This patch makes perf script process in-stream attributes in the same way as on-file attributes. Here is an example: Before this patch: $ perf record uname | perf script Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.015 MB (null) (~655 samples) ] :4220 4220 [-01] 2933367.838906: cycles: :4220 4220 [-01] 2933367.838910: cycles: :4220 4220 [-01] 2933367.838912: cycles: :4220 4220 [-01] 2933367.838914: cycles: :4220 4220 [-01] 2933367.838916: cycles: :4220 4220 [-01] 2933367.838918: cycles: uname 4220 [-01] 2933367.838938: cycles: uname 4220 [-01] 2933367.839207: cycles: After this patch: $ perf record uname | perf script Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.015 MB (null) (~655 samples) ] :4582 4582 2933425.707724: cycles: ffffffff81043ffa native_write_msr_safe ([kernel.kallsyms]) :4582 4582 2933425.707728: cycles: ffffffff81043ffa native_write_msr_safe ([kernel.kallsyms]) :4582 4582 2933425.707730: cycles: ffffffff81043ffa native_write_msr_safe ([kernel.kallsyms]) :4582 4582 2933425.707732: cycles: ffffffff81043ffa native_write_msr_safe ([kernel.kallsyms]) :4582 4582 2933425.707734: cycles: ffffffff81043ffa native_write_msr_safe ([kernel.kallsyms]) :4582 4582 2933425.707736: cycles: ffffffff81309a24 memcpy ([kernel.kallsyms]) uname 4582 2933425.707760: cycles: ffffffff8109c1c7 enqueue_task_fair ([kernel.kallsyms]) uname 4582 2933425.707978: cycles: ffffffff81308457 clear_page_c ([kernel.kallsyms]) Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1383313899-15987-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index b866cc8..baf1798 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -229,6 +229,24 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, return 0; } +static void set_print_ip_opts(struct perf_event_attr *attr) +{ + unsigned int type = attr->type; + + output[type].print_ip_opts = 0; + if (PRINT_FIELD(IP)) + output[type].print_ip_opts |= PRINT_IP_OPT_IP; + + if (PRINT_FIELD(SYM)) + output[type].print_ip_opts |= PRINT_IP_OPT_SYM; + + if (PRINT_FIELD(DSO)) + output[type].print_ip_opts |= PRINT_IP_OPT_DSO; + + if (PRINT_FIELD(SYMOFFSET)) + output[type].print_ip_opts |= PRINT_IP_OPT_SYMOFFSET; +} + /* * verify all user requested events exist and the samples * have the expected data @@ -237,7 +255,6 @@ static int perf_session__check_output_opt(struct perf_session *session) { int j; struct perf_evsel *evsel; - struct perf_event_attr *attr; for (j = 0; j < PERF_TYPE_MAX; ++j) { evsel = perf_session__find_first_evtype(session, j); @@ -260,20 +277,7 @@ static int perf_session__check_output_opt(struct perf_session *session) if (evsel == NULL) continue; - attr = &evsel->attr; - - output[j].print_ip_opts = 0; - if (PRINT_FIELD(IP)) - output[j].print_ip_opts |= PRINT_IP_OPT_IP; - - if (PRINT_FIELD(SYM)) - output[j].print_ip_opts |= PRINT_IP_OPT_SYM; - - if (PRINT_FIELD(DSO)) - output[j].print_ip_opts |= PRINT_IP_OPT_DSO; - - if (PRINT_FIELD(SYMOFFSET)) - output[j].print_ip_opts |= PRINT_IP_OPT_SYMOFFSET; + set_print_ip_opts(&evsel->attr); } return 0; @@ -547,6 +551,34 @@ struct perf_script { struct perf_session *session; }; +static int process_attr(struct perf_tool *tool, union perf_event *event, + struct perf_evlist **pevlist) +{ + struct perf_script *scr = container_of(tool, struct perf_script, tool); + struct perf_evlist *evlist; + struct perf_evsel *evsel, *pos; + int err; + + err = perf_event__process_attr(tool, event, pevlist); + if (err) + return err; + + evlist = *pevlist; + evsel = perf_evlist__last(*pevlist); + + if (evsel->attr.type >= PERF_TYPE_MAX) + return 0; + + list_for_each_entry(pos, &evlist->entries, node) { + if (pos->attr.type == evsel->attr.type && pos != evsel) + return 0; + } + + set_print_ip_opts(&evsel->attr); + + return perf_evsel__check_attr(evsel, scr->session); +} + static void sig_handler(int sig __maybe_unused) { session_done = 1; @@ -1272,7 +1304,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) .comm = perf_event__process_comm, .exit = perf_event__process_exit, .fork = perf_event__process_fork, - .attr = perf_event__process_attr, + .attr = process_attr, .tracing_data = perf_event__process_tracing_data, .build_id = perf_event__process_build_id, .ordered_samples = true, -- cgit v0.10.2 From 28e962b9d79f496f214d7fc8ffd1a3f2a67e9090 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 1 Nov 2013 15:51:31 +0200 Subject: perf tools: Fix 32-bit cross build Setting EXTRA_CFLAGS=-m32 did not work because it was not passed around. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1383313899-15987-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index cb52bdb..5b863902 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -711,7 +711,7 @@ $(LIB_FILE): $(LIB_OBJS) TE_SOURCES = $(wildcard $(TRACE_EVENT_DIR)*.[ch]) $(LIBTRACEEVENT): $(TE_SOURCES) - $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libtraceevent.a + $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) CFLAGS="-g -Wall $(EXTRA_CFLAGS)" libtraceevent.a $(LIBTRACEEVENT)-clean: $(call QUIET_CLEAN, libtraceevent) diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 543aa95..2f1d7d7 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -96,7 +96,7 @@ endif feature_check = $(eval $(feature_check_code)) define feature_check_code - feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) LDFLAGS=$(LDFLAGS) -C config/feature-checks test-$1 >/dev/null 2>/dev/null && echo 1 || echo 0) + feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS=$(LDFLAGS) -C config/feature-checks test-$1 >/dev/null 2>/dev/null && echo 1 || echo 0) endef feature_set = $(eval $(feature_set_code)) @@ -173,7 +173,7 @@ ifeq ($(feature-all), 1) # $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_set,$(feat))) else - $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) LDFLAGS=$(LDFLAGS) -i -j -C config/feature-checks $(CORE_FEATURE_TESTS) >/dev/null 2>&1) + $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS=$(LDFLAGS) -i -j -C config/feature-checks $(CORE_FEATURE_TESTS) >/dev/null 2>&1) $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_check,$(feat))) endif diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 452b67c..353c00c 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -31,7 +31,7 @@ CC := $(CC) -MD all: $(FILES) -BUILD = $(CC) $(LDFLAGS) -o $(OUTPUT)$@ $@.c +BUILD = $(CC) $(CFLAGS) $(LDFLAGS) -o $(OUTPUT)$@ $@.c ############################### -- cgit v0.10.2 From 8a0c4c2843d3b72e23c3c12079b8d5c3ae99f3e3 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 1 Nov 2013 15:51:32 +0200 Subject: perf tools: Fix libunwind build and feature detection for 32-bit build Use -lunwind-x86 instead of -lunwind-x86_64 for 32-bit build. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1383313899-15987-5-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 2f1d7d7..ffb5f55 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -25,9 +25,11 @@ ifeq ($(ARCH),x86_64) RAW_ARCH := x86_64 CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S + LIBUNWIND_LIBS = -lunwind -lunwind-x86_64 + else + LIBUNWIND_LIBS = -lunwind -lunwind-x86 endif NO_PERF_REGS := 0 - LIBUNWIND_LIBS = -lunwind -lunwind-x86_64 endif ifeq ($(NO_PERF_REGS),0) @@ -96,7 +98,7 @@ endif feature_check = $(eval $(feature_check_code)) define feature_check_code - feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS=$(LDFLAGS) -C config/feature-checks test-$1 >/dev/null 2>/dev/null && echo 1 || echo 0) + feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS=$(LDFLAGS) LIBUNWIND_LIBS="$(LIBUNWIND_LIBS)" -C config/feature-checks test-$1 >/dev/null 2>/dev/null && echo 1 || echo 0) endef feature_set = $(eval $(feature_set_code)) diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 353c00c..d37d58d 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -36,7 +36,7 @@ BUILD = $(CC) $(CFLAGS) $(LDFLAGS) -o $(OUTPUT)$@ $@.c ############################### test-all: - $(BUILD) -Werror -fstack-protector -fstack-protector-all -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lunwind -lunwind-x86_64 -lelf -laudit -I/usr/include/slang -lslang $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl + $(BUILD) -Werror -fstack-protector -fstack-protector-all -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma $(LIBUNWIND_LIBS) -lelf -laudit -I/usr/include/slang -lslang $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl test-hello: $(BUILD) @@ -72,7 +72,7 @@ test-libnuma: $(BUILD) -lnuma test-libunwind: - $(BUILD) -lunwind -lunwind-x86_64 -lelf + $(BUILD) $(LIBUNWIND_LIBS) -lelf test-libaudit: $(BUILD) -laudit -- cgit v0.10.2 From 026359658aecd348bc5c4a136a26f204b169103b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 1 Nov 2013 15:51:33 +0200 Subject: perf evlist: Add a debug print if event buffer mmap fails Add a debug print if mmap of the perf event ring buffer fails. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1383313899-15987-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 0582f67..1c173cc 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -607,6 +607,8 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, prot, MAP_SHARED, fd, 0); if (evlist->mmap[idx].base == MAP_FAILED) { + pr_debug2("failed to mmap perf event ring buffer, error %d\n", + errno); evlist->mmap[idx].base = NULL; return -1; } -- cgit v0.10.2 From 1e7ed5ec54e3998bda6ea625599a0644404cb421 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 1 Nov 2013 15:51:35 +0200 Subject: perf evsel: Always use perf_evsel__set_sample_bit() Always use perf_evsel__set_sample_bit() rather than just setting the bit. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1383313899-15987-8-git-send-email-adrian.hunter@intel.com [ Cope with 3090ffb "perf: Disable PERF_RECORD_MMAP2 support" ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index f0e65de..47bbf03 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -663,7 +663,7 @@ void perf_evsel__config(struct perf_evsel *evsel, } if (opts->sample_address) - attr->sample_type |= PERF_SAMPLE_DATA_SRC; + perf_evsel__set_sample_bit(evsel, DATA_SRC); if (opts->no_delay) { attr->watermark = 0; @@ -675,13 +675,13 @@ void perf_evsel__config(struct perf_evsel *evsel, } if (opts->sample_weight) - attr->sample_type |= PERF_SAMPLE_WEIGHT; + perf_evsel__set_sample_bit(evsel, WEIGHT); attr->mmap = track; attr->comm = track; if (opts->sample_transaction) - attr->sample_type |= PERF_SAMPLE_TRANSACTION; + perf_evsel__set_sample_bit(evsel, TRANSACTION); /* * XXX see the function comment above -- cgit v0.10.2 From 87b955247d71975460774435241be3aa05218a7b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 1 Nov 2013 15:51:36 +0200 Subject: perf evsel: Add missing overflow check for TRANSACTION Add missing overflow check for PERF_SAMPLE_TRANSACTION in perf_evsel__parse_sample(). Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1383313899-15987-9-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 47bbf03..b121717c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1481,6 +1481,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, data->transaction = 0; if (type & PERF_SAMPLE_TRANSACTION) { + OVERFLOW_CHECK_u64(array); data->transaction = *array; array++; } -- cgit v0.10.2 From 091a4ef5a94d46d26a05f0c32d2f64800ed91306 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 1 Nov 2013 15:51:37 +0200 Subject: perf test: Update "sample parsing" test for PERF_SAMPLE_TRANSACTION In fact the "sample parsing" test does not automatically check new sample type bits - they must be added to the comparison logic. Doing that shows that the test fails because the functions perf_event__synthesize_sample() and perf_event__sample_event_size() have not been updated with PERF_SAMPLE_TRANSACTION either. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1383313899-15987-10-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 61c9da2..1b67720 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -121,6 +121,9 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_DATA_SRC) COMP(data_src); + if (type & PERF_SAMPLE_TRANSACTION) + COMP(transaction); + return true; } @@ -165,6 +168,7 @@ static int do_test(u64 sample_type, u64 sample_regs_user, u64 read_format) .cpu = 110, .raw_size = sizeof(raw_data), .data_src = 111, + .transaction = 112, .raw_data = (void *)raw_data, .callchain = &callchain.callchain, .branch_stack = &branch_stack.branch_stack, @@ -273,7 +277,8 @@ int test__sample_parsing(void) /* * Fail the test if it has not been updated when new sample format bits - * were added. + * were added. Please actually update the test rather than just change + * the condition below. */ if (PERF_SAMPLE_MAX > PERF_SAMPLE_TRANSACTION << 1) { pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n"); -- cgit v0.10.2 From 42d88910c717ba21089251d0ca559abfef0df22d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 1 Nov 2013 15:51:38 +0200 Subject: perf evsel: Synthesize PERF_SAMPLE_TRANSACTION Add missing PERF_SAMPLE_TRANSACTION to perf_event__synthesize_sample() and perf_event__sample_event_size(). This makes the "sample parsing" test pass. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1383313899-15987-11-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index b121717c..5280820 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1578,6 +1578,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_DATA_SRC) result += sizeof(u64); + if (type & PERF_SAMPLE_TRANSACTION) + result += sizeof(u64); + return result; } @@ -1751,6 +1754,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, array++; } + if (type & PERF_SAMPLE_TRANSACTION) { + *array = sample->transaction; + array++; + } + return 0; } -- cgit v0.10.2 From ac6976255076d4bf761abfbecb19d46af5b88046 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 1 Nov 2013 16:33:11 +0900 Subject: perf tools: Show single option when failed to parse Current option parser outputs whole option help string when it failed to parse an option. However this is not good for user if the command has many option, she might feel hard which one is related easily. Fix it by just showing the help message of the given option only. Signed-off-by: Namhyung Kim Requested-by: Ingo Molnar Acked-by: Ingo Molnar Reviewed-by: Ingo Molnar Enthusiastically-Supported-by: Ingo Molnar Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1383291195-24386-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 2bc9e70..1caf7b9 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -339,10 +339,10 @@ int parse_options_step(struct parse_opt_ctx_t *ctx, if (arg[1] != '-') { ctx->opt = arg + 1; if (internal_help && *ctx->opt == 'h') - return parse_options_usage(usagestr, options); + return usage_with_options_internal(usagestr, options, 0); switch (parse_short_opt(ctx, options)) { case -1: - return parse_options_usage(usagestr, options); + return parse_options_usage(usagestr, options, arg + 1, 1); case -2: goto unknown; default: @@ -352,10 +352,11 @@ int parse_options_step(struct parse_opt_ctx_t *ctx, check_typos(arg + 1, options); while (ctx->opt) { if (internal_help && *ctx->opt == 'h') - return parse_options_usage(usagestr, options); + return usage_with_options_internal(usagestr, options, 0); + arg = ctx->opt; switch (parse_short_opt(ctx, options)) { case -1: - return parse_options_usage(usagestr, options); + return parse_options_usage(usagestr, options, arg, 1); case -2: /* fake a short option thing to hide the fact that we may have * started to parse aggregated stuff @@ -383,12 +384,12 @@ int parse_options_step(struct parse_opt_ctx_t *ctx, if (internal_help && !strcmp(arg + 2, "help-all")) return usage_with_options_internal(usagestr, options, 1); if (internal_help && !strcmp(arg + 2, "help")) - return parse_options_usage(usagestr, options); + return usage_with_options_internal(usagestr, options, 0); if (!strcmp(arg + 2, "list-opts")) return PARSE_OPT_LIST; switch (parse_long_opt(ctx, arg + 2, options)) { case -1: - return parse_options_usage(usagestr, options); + return parse_options_usage(usagestr, options, arg + 2, 0); case -2: goto unknown; default: @@ -445,6 +446,89 @@ int parse_options(int argc, const char **argv, const struct option *options, #define USAGE_OPTS_WIDTH 24 #define USAGE_GAP 2 +static void print_option_help(const struct option *opts, int full) +{ + size_t pos; + int pad; + + if (opts->type == OPTION_GROUP) { + fputc('\n', stderr); + if (*opts->help) + fprintf(stderr, "%s\n", opts->help); + return; + } + if (!full && (opts->flags & PARSE_OPT_HIDDEN)) + return; + + pos = fprintf(stderr, " "); + if (opts->short_name) + pos += fprintf(stderr, "-%c", opts->short_name); + else + pos += fprintf(stderr, " "); + + if (opts->long_name && opts->short_name) + pos += fprintf(stderr, ", "); + if (opts->long_name) + pos += fprintf(stderr, "--%s", opts->long_name); + + switch (opts->type) { + case OPTION_ARGUMENT: + break; + case OPTION_LONG: + case OPTION_U64: + case OPTION_INTEGER: + case OPTION_UINTEGER: + if (opts->flags & PARSE_OPT_OPTARG) + if (opts->long_name) + pos += fprintf(stderr, "[=]"); + else + pos += fprintf(stderr, "[]"); + else + pos += fprintf(stderr, " "); + break; + case OPTION_CALLBACK: + if (opts->flags & PARSE_OPT_NOARG) + break; + /* FALLTHROUGH */ + case OPTION_STRING: + if (opts->argh) { + if (opts->flags & PARSE_OPT_OPTARG) + if (opts->long_name) + pos += fprintf(stderr, "[=<%s>]", opts->argh); + else + pos += fprintf(stderr, "[<%s>]", opts->argh); + else + pos += fprintf(stderr, " <%s>", opts->argh); + } else { + if (opts->flags & PARSE_OPT_OPTARG) + if (opts->long_name) + pos += fprintf(stderr, "[=...]"); + else + pos += fprintf(stderr, "[...]"); + else + pos += fprintf(stderr, " ..."); + } + break; + default: /* OPTION_{BIT,BOOLEAN,SET_UINT,SET_PTR} */ + case OPTION_END: + case OPTION_GROUP: + case OPTION_BIT: + case OPTION_BOOLEAN: + case OPTION_INCR: + case OPTION_SET_UINT: + case OPTION_SET_PTR: + break; + } + + if (pos <= USAGE_OPTS_WIDTH) + pad = USAGE_OPTS_WIDTH - pos; + else { + fputc('\n', stderr); + pad = USAGE_OPTS_WIDTH; + } + fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help); +} + int usage_with_options_internal(const char * const *usagestr, const struct option *opts, int full) { @@ -464,87 +548,9 @@ int usage_with_options_internal(const char * const *usagestr, if (opts->type != OPTION_GROUP) fputc('\n', stderr); - for (; opts->type != OPTION_END; opts++) { - size_t pos; - int pad; - - if (opts->type == OPTION_GROUP) { - fputc('\n', stderr); - if (*opts->help) - fprintf(stderr, "%s\n", opts->help); - continue; - } - if (!full && (opts->flags & PARSE_OPT_HIDDEN)) - continue; - - pos = fprintf(stderr, " "); - if (opts->short_name) - pos += fprintf(stderr, "-%c", opts->short_name); - else - pos += fprintf(stderr, " "); - - if (opts->long_name && opts->short_name) - pos += fprintf(stderr, ", "); - if (opts->long_name) - pos += fprintf(stderr, "--%s", opts->long_name); + for ( ; opts->type != OPTION_END; opts++) + print_option_help(opts, full); - switch (opts->type) { - case OPTION_ARGUMENT: - break; - case OPTION_LONG: - case OPTION_U64: - case OPTION_INTEGER: - case OPTION_UINTEGER: - if (opts->flags & PARSE_OPT_OPTARG) - if (opts->long_name) - pos += fprintf(stderr, "[=]"); - else - pos += fprintf(stderr, "[]"); - else - pos += fprintf(stderr, " "); - break; - case OPTION_CALLBACK: - if (opts->flags & PARSE_OPT_NOARG) - break; - /* FALLTHROUGH */ - case OPTION_STRING: - if (opts->argh) { - if (opts->flags & PARSE_OPT_OPTARG) - if (opts->long_name) - pos += fprintf(stderr, "[=<%s>]", opts->argh); - else - pos += fprintf(stderr, "[<%s>]", opts->argh); - else - pos += fprintf(stderr, " <%s>", opts->argh); - } else { - if (opts->flags & PARSE_OPT_OPTARG) - if (opts->long_name) - pos += fprintf(stderr, "[=...]"); - else - pos += fprintf(stderr, "[...]"); - else - pos += fprintf(stderr, " ..."); - } - break; - default: /* OPTION_{BIT,BOOLEAN,SET_UINT,SET_PTR} */ - case OPTION_END: - case OPTION_GROUP: - case OPTION_BIT: - case OPTION_BOOLEAN: - case OPTION_INCR: - case OPTION_SET_UINT: - case OPTION_SET_PTR: - break; - } - - if (pos <= USAGE_OPTS_WIDTH) - pad = USAGE_OPTS_WIDTH - pos; - else { - fputc('\n', stderr); - pad = USAGE_OPTS_WIDTH; - } - fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help); - } fputc('\n', stderr); return PARSE_OPT_HELP; @@ -559,9 +565,44 @@ void usage_with_options(const char * const *usagestr, } int parse_options_usage(const char * const *usagestr, - const struct option *opts) + const struct option *opts, + const char *optstr, bool short_opt) { - return usage_with_options_internal(usagestr, opts, 0); + if (!usagestr) + return PARSE_OPT_HELP; + + fprintf(stderr, "\n usage: %s\n", *usagestr++); + while (*usagestr && **usagestr) + fprintf(stderr, " or: %s\n", *usagestr++); + while (*usagestr) { + fprintf(stderr, "%s%s\n", + **usagestr ? " " : "", + *usagestr); + usagestr++; + } + fputc('\n', stderr); + + for ( ; opts->type != OPTION_END; opts++) { + if (short_opt) { + if (opts->short_name == *optstr) + break; + continue; + } + + if (opts->long_name == NULL) + continue; + + if (!prefixcmp(optstr, opts->long_name)) + break; + if (!prefixcmp(optstr, "no-") && + !prefixcmp(optstr + 3, opts->long_name)) + break; + } + + if (opts->type != OPTION_END) + print_option_help(opts, 0); + + return PARSE_OPT_HELP; } diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index 7bb5999..b0241e2 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -158,7 +158,9 @@ struct parse_opt_ctx_t { }; extern int parse_options_usage(const char * const *usagestr, - const struct option *opts); + const struct option *opts, + const char *optstr, + bool short_opt); extern void parse_options_start(struct parse_opt_ctx_t *ctx, int argc, const char **argv, int flags); -- cgit v0.10.2 From 4bceffbc26fab2444742db59c6f8124c29e41369 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 1 Nov 2013 16:33:12 +0900 Subject: perf report: Postpone setting up browser after parsing options If setup_browser() called earlier than option parsing, the actual error message can be discarded during the terminal reset. So move it after setup_sorting() checks whether the sort keys are valid. Signed-off-by: Namhyung Kim Acked-by: Ingo Molnar Reviewed-by: Ingo Molnar Enthusiastically-Supported-by: Ingo Molnar Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1383291195-24386-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 98d3891..4df3161 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -905,13 +905,6 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) input_name = "perf.data"; } - if (strcmp(input_name, "-") != 0) - setup_browser(true); - else { - use_browser = 0; - perf_hpp__init(); - } - file.path = input_name; file.force = report.force; @@ -957,6 +950,18 @@ repeat: if (setup_sorting() < 0) usage_with_options(report_usage, options); + if (parent_pattern != default_parent_pattern) { + if (sort_dimension__add("parent") < 0) + goto error; + } + + if (strcmp(input_name, "-") != 0) + setup_browser(true); + else { + use_browser = 0; + perf_hpp__init(); + } + /* * Only in the TUI browser we are doing integrated annotation, * so don't allocate extra space that won't be used in the stdio @@ -986,11 +991,6 @@ repeat: if (symbol__init() < 0) goto error; - if (parent_pattern != default_parent_pattern) { - if (sort_dimension__add("parent") < 0) - goto error; - } - if (argc) { /* * Special case: if there's an argument left then assume that -- cgit v0.10.2 From 91aba0a62e24ff7a567e13e1d88deab711df6f0f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 1 Nov 2013 16:33:13 +0900 Subject: perf report: Use parse_options_usage() for -s option failure The -s (--sort) option was processed after normal option parsing so that it cannot call the parse_options_usage() automatically. Currently it calls usage_with_options() which shows entire help messages for event option. Fix it by showing just -s options. $ perf report -s help Error: Unknown --sort key: `help' usage: perf report [] -s, --sort sort by key(s): pid, comm, dso, symbol, ... Signed-off-by: Namhyung Kim Acked-by: Ingo Molnar Reviewed-by: Ingo Molnar Enthusiastically-Supported-by: Ingo Molnar Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1383291195-24386-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 4df3161..25f83d5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -947,8 +947,10 @@ repeat: sort_order = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked"; } - if (setup_sorting() < 0) - usage_with_options(report_usage, options); + if (setup_sorting() < 0) { + parse_options_usage(report_usage, options, "s", 1); + goto error; + } if (parent_pattern != default_parent_pattern) { if (sort_dimension__add("parent") < 0) -- cgit v0.10.2 From d37a92dcb45094dc02836c8a77c693c6f9916fb2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 1 Nov 2013 16:33:14 +0900 Subject: perf top: Use parse_options_usage() for -s option failure The -s (--sort) option was processed after normal option parsing so that it cannot call the parse_options_usage() automatically. Currently it calls usage_with_options() which shows entire help messages for event option. Fix it by showing just -s options. $ perf top -s help Error: Unknown --sort key: `help' usage: perf top [] -s, --sort sort by key(s): pid, comm, dso, symbol, ... Signed-off-by: Namhyung Kim Acked-by: Ingo Molnar Reviewed-by: Ingo Molnar Enthusiastically-Supported-by: Ingo Molnar Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1383291195-24386-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 21db76d..ca5ca37 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1040,7 +1040,7 @@ parse_percent_limit(const struct option *opt, const char *arg, int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) { - int status; + int status = -1; char errbuf[BUFSIZ]; struct perf_top top = { .count_filter = 5, @@ -1159,8 +1159,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) if (sort_order == default_sort_order) sort_order = "dso,symbol"; - if (setup_sorting() < 0) - usage_with_options(top_usage, options); + if (setup_sorting() < 0) { + parse_options_usage(top_usage, options, "s", 1); + goto out_delete_evlist; + } /* display thread wants entries to be collapsed in a different tree */ sort__need_collapse = 1; -- cgit v0.10.2 From cc03c54296ccbeca5363dfe8f49af42d14960f28 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 1 Nov 2013 16:33:15 +0900 Subject: perf stat: Enhance option parse error message Print related option help messages only when it failed to process options. While at it, modify parse_options_usage() to skip usage part so that it can be used for showing multiple option help messages naturally like below: $ perf stat -Bx, ls -B option not supported with -x usage: perf stat [] [] -B, --big-num print large numbers with thousands' separators -x, --field-separator print counts with custom separator Signed-off-by: Namhyung Kim Acked-by: Ingo Molnar Reviewed-by: Ingo Molnar Enthusiastically-Supported-by: Ingo Molnar Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1383291195-24386-6-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 1a9c95d..0fc1c94 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1596,7 +1596,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) "perf stat [] []", NULL }; - int status = -ENOMEM, run_idx; + int status = -EINVAL, run_idx; const char *mode; setlocale(LC_ALL, ""); @@ -1614,12 +1614,15 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) if (output_name && output_fd) { fprintf(stderr, "cannot use both --output and --log-fd\n"); - usage_with_options(stat_usage, options); + parse_options_usage(stat_usage, options, "o", 1); + parse_options_usage(NULL, options, "log-fd", 0); + goto out; } if (output_fd < 0) { fprintf(stderr, "argument to --log-fd must be a > 0\n"); - usage_with_options(stat_usage, options); + parse_options_usage(stat_usage, options, "log-fd", 0); + goto out; } if (!output) { @@ -1656,7 +1659,9 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) /* User explicitly passed -B? */ if (big_num_opt == 1) { fprintf(stderr, "-B option not supported with -x\n"); - usage_with_options(stat_usage, options); + parse_options_usage(stat_usage, options, "B", 1); + parse_options_usage(NULL, options, "x", 1); + goto out; } else /* Nope, so disable big number formatting */ big_num = false; } else if (big_num_opt == 0) /* User passed --no-big-num */ @@ -1666,7 +1671,9 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) usage_with_options(stat_usage, options); if (run_count < 0) { - usage_with_options(stat_usage, options); + pr_err("Run count must be a positive number\n"); + parse_options_usage(stat_usage, options, "r", 1); + goto out; } else if (run_count == 0) { forever = true; run_count = 1; @@ -1678,8 +1685,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) fprintf(stderr, "both cgroup and no-aggregation " "modes only available in system-wide mode\n"); - usage_with_options(stat_usage, options); - return -1; + parse_options_usage(stat_usage, options, "G", 1); + parse_options_usage(NULL, options, "A", 1); + parse_options_usage(NULL, options, "a", 1); + goto out; } if (add_default_attributes()) @@ -1688,25 +1697,28 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) perf_target__validate(&target); if (perf_evlist__create_maps(evsel_list, &target) < 0) { - if (perf_target__has_task(&target)) + if (perf_target__has_task(&target)) { pr_err("Problems finding threads of monitor\n"); - if (perf_target__has_cpu(&target)) + parse_options_usage(stat_usage, options, "p", 1); + parse_options_usage(NULL, options, "t", 1); + } else if (perf_target__has_cpu(&target)) { perror("failed to parse CPUs map"); - - usage_with_options(stat_usage, options); - return -1; + parse_options_usage(stat_usage, options, "C", 1); + parse_options_usage(NULL, options, "a", 1); + } + goto out; } if (interval && interval < 100) { pr_err("print interval must be >= 100ms\n"); - usage_with_options(stat_usage, options); - return -1; + parse_options_usage(stat_usage, options, "I", 1); + goto out_free_maps; } if (perf_evlist__alloc_stats(evsel_list, interval)) goto out_free_maps; if (perf_stat_init_aggr_mode()) - goto out; + goto out_free_maps; /* * We dont want to block the signals - that would cause diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 1caf7b9..31f404a 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -569,7 +569,7 @@ int parse_options_usage(const char * const *usagestr, const char *optstr, bool short_opt) { if (!usagestr) - return PARSE_OPT_HELP; + goto opt; fprintf(stderr, "\n usage: %s\n", *usagestr++); while (*usagestr && **usagestr) @@ -582,6 +582,7 @@ int parse_options_usage(const char * const *usagestr, } fputc('\n', stderr); +opt: for ( ; opts->type != OPTION_END; opts++) { if (short_opt) { if (opts->short_name == *optstr) -- cgit v0.10.2 From 1b372ca52a02cc97520c13d79bdfb0a7ff81b772 Mon Sep 17 00:00:00 2001 From: Yoshihiro YUNOMAE Date: Fri, 1 Nov 2013 17:53:53 -0400 Subject: tools lib traceevent: Add support for extracting trace_clock in report If trace-cmd extracts trace_clock, trace-cmd reads trace_clock data from the trace.dat and switches outputting format of timestamp for each trace_clock. Signed-off-by: Yoshihiro YUNOMAE Cc: Andrew Morton Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20130424231305.14877.86147.stgit@yunodevel Signed-off-by: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index d1c2a6a..deedff9 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -305,6 +305,11 @@ int pevent_register_comm(struct pevent *pevent, const char *comm, int pid) return 0; } +void pevent_register_trace_clock(struct pevent *pevent, char *trace_clock) +{ + pevent->trace_clock = trace_clock; +} + struct func_map { unsigned long long addr; char *func; @@ -4443,8 +4448,21 @@ void pevent_event_info(struct trace_seq *s, struct event_format *event, trace_seq_terminate(s); } +static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock) +{ + if (!use_trace_clock) + return true; + + if (!strcmp(trace_clock, "local") || !strcmp(trace_clock, "global") + || !strcmp(trace_clock, "uptime") || !strcmp(trace_clock, "perf")) + return true; + + /* trace_clock is setting in tsc or counter mode */ + return false; +} + void pevent_print_event(struct pevent *pevent, struct trace_seq *s, - struct pevent_record *record) + struct pevent_record *record, bool use_trace_clock) { static const char *spaces = " "; /* 20 spaces */ struct event_format *event; @@ -4457,9 +4475,14 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s, int pid; int len; int p; + bool use_usec_format; - secs = record->ts / NSECS_PER_SEC; - nsecs = record->ts - secs * NSECS_PER_SEC; + use_usec_format = is_timestamp_in_us(pevent->trace_clock, + use_trace_clock); + if (use_usec_format) { + secs = record->ts / NSECS_PER_SEC; + nsecs = record->ts - secs * NSECS_PER_SEC; + } if (record->size < 0) { do_warning("ug! negative record size %d", record->size); @@ -4484,15 +4507,20 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s, } else trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu); - if (pevent->flags & PEVENT_NSEC_OUTPUT) { - usecs = nsecs; - p = 9; - } else { - usecs = (nsecs + 500) / NSECS_PER_USEC; - p = 6; - } + if (use_usec_format) { + if (pevent->flags & PEVENT_NSEC_OUTPUT) { + usecs = nsecs; + p = 9; + } else { + usecs = (nsecs + 500) / NSECS_PER_USEC; + p = 6; + } - trace_seq_printf(s, " %5lu.%0*lu: %s: ", secs, p, usecs, event->name); + trace_seq_printf(s, " %5lu.%0*lu: %s: ", + secs, p, usecs, event->name); + } else + trace_seq_printf(s, " %12llu: %s: ", + record->ts, event->name); /* Space out the event names evenly. */ len = strlen(event->name); diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index c37b202..7503edf 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -20,6 +20,7 @@ #ifndef _PARSE_EVENTS_H #define _PARSE_EVENTS_H +#include #include #include @@ -450,6 +451,8 @@ struct pevent { /* cache */ struct event_format *last_event; + + char *trace_clock; }; static inline void pevent_set_flag(struct pevent *pevent, int flag) @@ -527,6 +530,7 @@ enum trace_flag_type { }; int pevent_register_comm(struct pevent *pevent, const char *comm, int pid); +void pevent_register_trace_clock(struct pevent *pevent, char *trace_clock); int pevent_register_function(struct pevent *pevent, char *name, unsigned long long addr, char *mod); int pevent_register_print_string(struct pevent *pevent, char *fmt, @@ -534,7 +538,7 @@ int pevent_register_print_string(struct pevent *pevent, char *fmt, int pevent_pid_is_registered(struct pevent *pevent, int pid); void pevent_print_event(struct pevent *pevent, struct trace_seq *s, - struct pevent_record *record); + struct pevent_record *record, bool use_trace_clock); int pevent_parse_header_page(struct pevent *pevent, char *buf, unsigned long size, int long_size); -- cgit v0.10.2 From 18900af8292180151c82f0762506fa0740aa54a5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 1 Nov 2013 17:53:54 -0400 Subject: tools lib traceevent: Update printk formats when entered Instead of cropping off the '"' and '\n"' from a printk format every time it is referenced, do it when it's added. This makes it easier to reference a printk_map and should speed things up a little. Signed-off-by: Steven Rostedt Cc: Andrew Morton Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20131101215500.495619312@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index deedff9..856b791 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -604,10 +604,11 @@ find_printk(struct pevent *pevent, unsigned long long addr) * This registers a string by the address it was stored in the kernel. * The @fmt passed in is duplicated. */ -int pevent_register_print_string(struct pevent *pevent, char *fmt, +int pevent_register_print_string(struct pevent *pevent, const char *fmt, unsigned long long addr) { struct printk_list *item = malloc(sizeof(*item)); + char *p; if (!item) return -1; @@ -615,10 +616,21 @@ int pevent_register_print_string(struct pevent *pevent, char *fmt, item->next = pevent->printklist; item->addr = addr; + /* Strip off quotes and '\n' from the end */ + if (fmt[0] == '"') + fmt++; item->printk = strdup(fmt); if (!item->printk) goto out_free; + p = item->printk + strlen(item->printk) - 1; + if (*p == '"') + *p = 0; + + p -= 2; + if (strcmp(p, "\\n") == 0) + *p = 0; + pevent->printklist = item; pevent->printk_count++; @@ -3887,7 +3899,6 @@ get_bprint_format(void *data, int size __maybe_unused, struct format_field *field; struct printk_map *printk; char *format; - char *p; field = pevent->bprint_fmt_field; @@ -3909,20 +3920,8 @@ get_bprint_format(void *data, int size __maybe_unused, return format; } - p = printk->printk; - /* Remove any quotes. */ - if (*p == '"') - p++; - if (asprintf(&format, "%s : %s", "%pf", p) < 0) + if (asprintf(&format, "%s : %s", "%pf", printk->printk) < 0) return NULL; - /* remove ending quotes and new line since we will add one too */ - p = format + strlen(format) - 1; - if (*p == '"') - *p = 0; - - p -= 2; - if (strcmp(p, "\\n") == 0) - *p = 0; return format; } diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 7503edf..9ab6367 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -533,7 +533,7 @@ int pevent_register_comm(struct pevent *pevent, const char *comm, int pid); void pevent_register_trace_clock(struct pevent *pevent, char *trace_clock); int pevent_register_function(struct pevent *pevent, char *name, unsigned long long addr, char *mod); -int pevent_register_print_string(struct pevent *pevent, char *fmt, +int pevent_register_print_string(struct pevent *pevent, const char *fmt, unsigned long long addr); int pevent_pid_is_registered(struct pevent *pevent, int pid); -- cgit v0.10.2 From 0970b5f438261216afcd0ccaa2fcfffc83df7ca2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 1 Nov 2013 17:53:55 -0400 Subject: tools lib traceevent: If %s is a pointer, check printk formats If the format string of TP_printk() contains a %s, and the argument is not a string, check if the argument is a pointer that might match the printk_formats that were stored. Signed-off-by: Steven Rostedt Cc: Andrew Morton Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20131101215500.698924777@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 856b791..013c8d3 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3505,6 +3505,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, struct pevent *pevent = event->pevent; struct print_flag_sym *flag; struct format_field *field; + struct printk_map *printk; unsigned long long val, fval; unsigned long addr; char *str; @@ -3540,7 +3541,12 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, if (!(field->flags & FIELD_IS_ARRAY) && field->size == pevent->long_size) { addr = *(unsigned long *)(data + field->offset); - trace_seq_printf(s, "%lx", addr); + /* Check if it matches a print format */ + printk = find_printk(pevent, addr); + if (printk) + trace_seq_puts(s, printk->printk); + else + trace_seq_printf(s, "%lx", addr); break; } str = malloc(len + 1); -- cgit v0.10.2 From b30f75eba27a9ab0704cbc501e9be3b025ce56fe Mon Sep 17 00:00:00 2001 From: Howard Cochran Date: Fri, 1 Nov 2013 17:53:56 -0400 Subject: tools lib traceevent: Handle __print_hex(__get_dynamic_array(fieldname), len) The kernel has a few events with a format similar to this excerpt: field:unsigned int len; offset:12; size:4; signed:0; field:__data_loc unsigned char[] data_array; offset:16; size:4; signed:0; print fmt: "%s", __print_hex(__get_dynamic_array(data_array), REC->len) trace-cmd could already parse that arg correctly, but print_str_arg() was unable to handle the first parameter being a dynamic array. (It just printed a "field not found" warning). Teach print_str_arg's PRINT_HEX case to handle the nested PRINT_DYNAMIC_ARRAY correctly. The output now matches the kernel's own formatting for this case. Signed-off-by: Howard Cochran Cc: Andrew Morton Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1381503349-12271-1-git-send-email-hcochran@lexmark.com [ Removed "polish compare", we don't do that here ] Signed-off-by: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 013c8d3..0a1ffe0 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3588,15 +3588,23 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, } break; case PRINT_HEX: - field = arg->hex.field->field.field; - if (!field) { - str = arg->hex.field->field.name; - field = pevent_find_any_field(event, str); - if (!field) - goto out_warning_field; - arg->hex.field->field.field = field; + if (arg->hex.field->type == PRINT_DYNAMIC_ARRAY) { + unsigned long offset; + offset = pevent_read_number(pevent, + data + arg->hex.field->dynarray.field->offset, + arg->hex.field->dynarray.field->size); + hex = data + (offset & 0xffff); + } else { + field = arg->hex.field->field.field; + if (!field) { + str = arg->hex.field->field.name; + field = pevent_find_any_field(event, str); + if (!field) + goto out_warning_field; + arg->hex.field->field.field = field; + } + hex = data + field->offset; } - hex = data + field->offset; len = eval_num_arg(data, size, event, arg->hex.size); for (i = 0; i < len; i++) { if (i) -- cgit v0.10.2 From 0883d9d730fc294c3d90ebd190b94e5782ead316 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 1 Nov 2013 17:53:57 -0400 Subject: tools lib traceevent: Have bprintk output the same as the kernel does The trace_bprintk() in the kernel looks like: ring_buffer_producer_thread: Missed: 0 ring_buffer_producer_thread: Hit: 62174350 ring_buffer_producer_thread: Entries per millisec: 6296 ring_buffer_producer_thread: 158 ns per entry ring_buffer_producer_thread: Sleeping for 10 secs ring_buffer_producer_thread: Starting ring buffer hammer ring_buffer_producer_thread: End ring buffer hammer But the current output looks like this: ring_buffer_producer_thread : Time: 9407018 (usecs) ring_buffer_producer_thread : Overruns: 43285485 ring_buffer_producer_thread : Read: 4405365 (by events) ring_buffer_producer_thread : Entries: 0 ring_buffer_producer_thread : Total: 47690850 ring_buffer_producer_thread : Missed: 0 ring_buffer_producer_thread : Hit: 47690850 Remove the space between the function and the colon. Signed-off-by: Steven Rostedt Cc: Andrew Morton Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20131101215501.272654481@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 0a1ffe0..e1c743c 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3802,8 +3802,8 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc if (asprintf(&arg->atom.atom, "%lld", ip) < 0) goto out_free; - /* skip the first "%pf : " */ - for (ptr = fmt + 6, bptr = data + field->offset; + /* skip the first "%pf: " */ + for (ptr = fmt + 5, bptr = data + field->offset; bptr < data + size && *ptr; ptr++) { int ls = 0; @@ -3929,12 +3929,12 @@ get_bprint_format(void *data, int size __maybe_unused, printk = find_printk(pevent, addr); if (!printk) { - if (asprintf(&format, "%%pf : (NO FORMAT FOUND at %llx)\n", addr) < 0) + if (asprintf(&format, "%%pf: (NO FORMAT FOUND at %llx)\n", addr) < 0) return NULL; return format; } - if (asprintf(&format, "%s : %s", "%pf", printk->printk) < 0) + if (asprintf(&format, "%s: %s", "%pf", printk->printk) < 0) return NULL; return format; -- cgit v0.10.2 From 5efb9fbd5f1bfe4435bd0a3ea5f0e187875509c2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 1 Nov 2013 17:53:58 -0400 Subject: tools lib traceevent: Check for spaces in character array Currently when using the raw format for fields, when looking at a character array, to determine if it is a string or not, we make sure all characters are "isprint()". If not, then we consider it a numeric array, and print the hex numbers of the characters instead. But it seems that '\n' fails the isprint() check! Add isspace() to the check as well, such that if all characters pass isprint() or isspace() it will assume the character array is a string. Reported-by: Xenia Ragiadakou Signed-off-by: Steven Rostedt Cc: Andrew Morton Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Xenia Ragiadakou Link: http://lkml.kernel.org/r/20131101215501.465091682@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index e1c743c..85cbbdd 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3981,7 +3981,7 @@ static int is_printable_array(char *p, unsigned int len) unsigned int i; for (i = 0; i < len && p[i]; i++) - if (!isprint(p[i])) + if (!isprint(p[i]) && !isspace(p[i])) return 0; return 1; } -- cgit v0.10.2 From c6c2b960b7a4105f096499fba3df65d6c0272a20 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Nov 2013 17:53:59 -0400 Subject: tools lib traceevent: Add flags NOHANDLE and PRINTRAW to individual events Add the flags EVENT_FL_NOHANDLE and EVENT_FL_PRINTRAW to the event flags to have the event either ignore the register handler or to ignore the handler and also print the raw format respectively. This allows a tool to force a raw format or non handle for an event. Signed-off-by: Steven Rostedt Cc: Andrew Morton Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20131101215501.655258742@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 85cbbdd..fc6f35f 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4446,11 +4446,11 @@ void pevent_event_info(struct trace_seq *s, struct event_format *event, { int print_pretty = 1; - if (event->pevent->print_raw) + if (event->pevent->print_raw || (event->flags & EVENT_FL_PRINTRAW)) print_event_fields(s, record->data, record->size, event); else { - if (event->handler) + if (event->handler && !(event->flags & EVENT_FL_NOHANDLE)) print_pretty = event->handler(s, record, event, event->context); diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 9ab6367..dc8539e 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -308,6 +308,8 @@ enum { EVENT_FL_ISBPRINT = 0x04, EVENT_FL_ISFUNCENT = 0x10, EVENT_FL_ISFUNCRET = 0x20, + EVENT_FL_NOHANDLE = 0x40, + EVENT_FL_PRINTRAW = 0x80, EVENT_FL_FAILED = 0x80000000 }; -- cgit v0.10.2 From 6d862b8c14ba539c7c87ffc77f2e1d6dc9630c4d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Nov 2013 17:54:00 -0400 Subject: tools lib traceevent: Add pevent_print_func_field() helper function Add the pevent_print_func_field() that will look up a field that is expected to be a function pointer, and it will print the function name and offset of the address given by the field. Signed-off-by: Steven Rostedt Cc: Andrew Morton Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20131101215501.869542711@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index fc6f35f..8f450ad 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -5367,6 +5367,48 @@ int pevent_print_num_field(struct trace_seq *s, const char *fmt, return -1; } +/** + * pevent_print_func_field - print a field and a format for function pointers + * @s: The seq to print to + * @fmt: The printf format to print the field with. + * @event: the event that the field is for + * @name: The name of the field + * @record: The record with the field name. + * @err: print default error if failed. + * + * Returns: 0 on success, -1 field not found, or 1 if buffer is full. + */ +int pevent_print_func_field(struct trace_seq *s, const char *fmt, + struct event_format *event, const char *name, + struct pevent_record *record, int err) +{ + struct format_field *field = pevent_find_field(event, name); + struct pevent *pevent = event->pevent; + unsigned long long val; + struct func_map *func; + char tmp[128]; + + if (!field) + goto failed; + + if (pevent_read_number_field(field, record->data, &val)) + goto failed; + + func = find_func(pevent, val); + + if (func) + snprintf(tmp, 128, "%s/0x%llx", func->func, func->addr - val); + else + sprintf(tmp, "0x%08llx", val); + + return trace_seq_printf(s, fmt, tmp); + + failed: + if (err) + trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name); + return -1; +} + static void free_func_handle(struct pevent_function_handler *func) { struct pevent_func_params *params; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index dc8539e..8d73d25 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -569,6 +569,10 @@ int pevent_print_num_field(struct trace_seq *s, const char *fmt, struct event_format *event, const char *name, struct pevent_record *record, int err); +int pevent_print_func_field(struct trace_seq *s, const char *fmt, + struct event_format *event, const char *name, + struct pevent_record *record, int err); + int pevent_register_event_handler(struct pevent *pevent, int id, const char *sys_name, const char *event_name, pevent_event_handler_func func, void *context); -- cgit v0.10.2 From 41a4e6e2a0237e8ac895f43158ef7c91ab7af157 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 31 Oct 2013 15:56:03 +0900 Subject: perf hists: Consolidate __hists__add_*entry() The __hists__add_{branch,mem}_entry() does almost the same thing that __hists__add_entry() does. Consolidate them into one. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rodrigo Campos Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1383202576-28141-2-git-send-email-namhyung@kernel.org [ Fixup clash with new COMM infrastructure ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 6c5ae57..4087ab1 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -65,7 +65,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, return 0; } - he = __hists__add_entry(&evsel->hists, al, NULL, 1, 1, 0); + he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0); if (he == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index b605009..3b67ea2 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -307,7 +307,8 @@ static int hists__add_entry(struct hists *hists, struct addr_location *al, u64 period, u64 weight, u64 transaction) { - if (__hists__add_entry(hists, al, NULL, period, weight, transaction) != NULL) + if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight, + transaction) != NULL) return 0; return -ENOMEM; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 25f83d5..8cf8e66 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -115,7 +115,8 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool, * and this is indirectly achieved by passing period=weight here * and the he_stat__add_period() function. */ - he = __hists__add_mem_entry(&evsel->hists, al, parent, mi, cost, cost); + he = __hists__add_entry(&evsel->hists, al, parent, NULL, mi, + cost, cost, 0); if (!he) return -ENOMEM; @@ -200,12 +201,16 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, err = -ENOMEM; + /* overwrite the 'al' to branch-to info */ + al->map = bi[i].to.map; + al->sym = bi[i].to.sym; + al->addr = bi[i].to.addr; /* * The report shows the percentage of total branches captured * and not events sampled. Thus we use a pseudo period of 1. */ - he = __hists__add_branch_entry(&evsel->hists, al, parent, - &bi[i], 1, 1); + he = __hists__add_entry(&evsel->hists, al, parent, &bi[i], NULL, + 1, 1, 0); if (he) { struct annotation *notes; bx = he->branch_info; @@ -266,8 +271,9 @@ static int perf_evsel__add_hist_entry(struct perf_tool *tool, return err; } - he = __hists__add_entry(&evsel->hists, al, parent, sample->period, - sample->weight, sample->transaction); + he = __hists__add_entry(&evsel->hists, al, parent, NULL, NULL, + sample->period, sample->weight, + sample->transaction); if (he == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ca5ca37..21897f0 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -246,8 +246,9 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel, struct hist_entry *he; pthread_mutex_lock(&evsel->hists.lock); - he = __hists__add_entry(&evsel->hists, al, NULL, sample->period, - sample->weight, sample->transaction); + he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, + sample->period, sample->weight, + sample->transaction); pthread_mutex_unlock(&evsel->hists.lock); if (he == NULL) return NULL; diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 6c337e6..173bf42 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -223,7 +223,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) goto out; he = __hists__add_entry(&evsel->hists, &al, NULL, - 1, 1, 0); + NULL, NULL, 1, 1, 0); if (he == NULL) goto out; @@ -245,8 +245,8 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) &sample) < 0) goto out; - he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1, - 0); + he = __hists__add_entry(&evsel->hists, &al, NULL, + NULL, NULL, 1, 1, 0); if (he == NULL) goto out; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 30793f9..822903e 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -407,73 +407,12 @@ out: return he; } -struct hist_entry *__hists__add_mem_entry(struct hists *hists, - struct addr_location *al, - struct symbol *sym_parent, - struct mem_info *mi, - u64 period, - u64 weight) -{ - struct hist_entry entry = { - .thread = al->thread, - .comm = thread__comm(al->thread), - .ms = { - .map = al->map, - .sym = al->sym, - }, - .stat = { - .period = period, - .weight = weight, - .nr_events = 1, - }, - .cpu = al->cpu, - .ip = al->addr, - .level = al->level, - .parent = sym_parent, - .filtered = symbol__parent_filter(sym_parent), - .hists = hists, - .mem_info = mi, - .branch_info = NULL, - }; - return add_hist_entry(hists, &entry, al, period, weight); -} - -struct hist_entry *__hists__add_branch_entry(struct hists *hists, - struct addr_location *al, - struct symbol *sym_parent, - struct branch_info *bi, - u64 period, - u64 weight) -{ - struct hist_entry entry = { - .thread = al->thread, - .comm = thread__comm(al->thread), - .ms = { - .map = bi->to.map, - .sym = bi->to.sym, - }, - .cpu = al->cpu, - .ip = bi->to.addr, - .level = al->level, - .stat = { - .period = period, - .nr_events = 1, - .weight = weight, - }, - .parent = sym_parent, - .filtered = symbol__parent_filter(sym_parent), - .branch_info = bi, - .hists = hists, - .mem_info = NULL, - }; - - return add_hist_entry(hists, &entry, al, period, weight); -} - struct hist_entry *__hists__add_entry(struct hists *hists, struct addr_location *al, - struct symbol *sym_parent, u64 period, - u64 weight, u64 transaction) + struct symbol *sym_parent, + struct branch_info *bi, + struct mem_info *mi, + u64 period, u64 weight, u64 transaction) { struct hist_entry entry = { .thread = al->thread, @@ -486,15 +425,15 @@ struct hist_entry *__hists__add_entry(struct hists *hists, .ip = al->addr, .level = al->level, .stat = { - .period = period, .nr_events = 1, + .period = period, .weight = weight, }, .parent = sym_parent, .filtered = symbol__parent_filter(sym_parent), .hists = hists, - .branch_info = NULL, - .mem_info = NULL, + .branch_info = bi, + .mem_info = mi, .transaction = transaction, }; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 9d2d022..307f1c7 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -86,7 +86,9 @@ struct hists { struct hist_entry *__hists__add_entry(struct hists *self, struct addr_location *al, - struct symbol *parent, u64 period, + struct symbol *parent, + struct branch_info *bi, + struct mem_info *mi, u64 period, u64 weight, u64 transaction); int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); @@ -95,20 +97,6 @@ int hist_entry__sort_snprintf(struct hist_entry *self, char *bf, size_t size, struct hists *hists); void hist_entry__free(struct hist_entry *); -struct hist_entry *__hists__add_branch_entry(struct hists *self, - struct addr_location *al, - struct symbol *sym_parent, - struct branch_info *bi, - u64 period, - u64 weight); - -struct hist_entry *__hists__add_mem_entry(struct hists *self, - struct addr_location *al, - struct symbol *sym_parent, - struct mem_info *mi, - u64 period, - u64 weight); - void hists__output_resort(struct hists *self); void hists__collapse_resort(struct hists *self, struct ui_progress *prog); -- cgit v0.10.2 From 87419c9afff1431d4b62b388baf6bfa07e0b14ff Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 29 Oct 2013 10:43:16 -0600 Subject: perf kvm: Disable live command if timerfd is not supported If the OS does not have timerfd support (e.g., older OS'es like RHEL5) disable perf kvm stat live. Signed-off-by: David Ahern Reviewed-by: Jiri Olsa Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1383064996-20933-2-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index cb05f39..cd9f920 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -20,7 +20,9 @@ #include "util/data.h" #include +#ifdef HAVE_TIMERFD_SUPPORT #include +#endif #include #include @@ -337,6 +339,7 @@ static void init_kvm_event_record(struct perf_kvm_stat *kvm) INIT_LIST_HEAD(&kvm->kvm_events_cache[i]); } +#ifdef HAVE_TIMERFD_SUPPORT static void clear_events_cache_stats(struct list_head *kvm_events_cache) { struct list_head *head; @@ -358,6 +361,7 @@ static void clear_events_cache_stats(struct list_head *kvm_events_cache) } } } +#endif static int kvm_events_hash_fn(u64 key) { @@ -783,6 +787,7 @@ static void print_result(struct perf_kvm_stat *kvm) pr_info("\nLost events: %" PRIu64 "\n\n", kvm->lost_events); } +#ifdef HAVE_TIMERFD_SUPPORT static int process_lost_event(struct perf_tool *tool, union perf_event *event __maybe_unused, struct perf_sample *sample __maybe_unused, @@ -793,6 +798,7 @@ static int process_lost_event(struct perf_tool *tool, kvm->lost_events++; return 0; } +#endif static bool skip_sample(struct perf_kvm_stat *kvm, struct perf_sample *sample) @@ -872,6 +878,7 @@ static bool verify_vcpu(int vcpu) return true; } +#ifdef HAVE_TIMERFD_SUPPORT /* keeping the max events to a modest level to keep * the processing of samples per mmap smooth. */ @@ -1213,6 +1220,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm) out: return rc; } +#endif static int read_events(struct perf_kvm_stat *kvm) { @@ -1379,6 +1387,7 @@ kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv) return kvm_events_report_vcpu(kvm); } +#ifdef HAVE_TIMERFD_SUPPORT static struct perf_evlist *kvm_live_event_list(void) { struct perf_evlist *evlist; @@ -1566,6 +1575,7 @@ out: return err; } +#endif static void print_kvm_stat_usage(void) { @@ -1604,8 +1614,10 @@ static int kvm_cmd_stat(const char *file_name, int argc, const char **argv) if (!strncmp(argv[1], "rep", 3)) return kvm_events_report(&kvm, argc - 1 , argv + 1); +#ifdef HAVE_TIMERFD_SUPPORT if (!strncmp(argv[1], "live", 4)) return kvm_events_live(&kvm, argc - 1 , argv + 1); +#endif perf_stat: return cmd_stat(argc, argv, NULL); diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index ffb5f55..be8bb9a 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -397,6 +397,13 @@ else endif endif +$(call feature_check,timerfd) +ifeq ($(feature-timerfd), 1) + CFLAGS += -DHAVE_TIMERFD_SUPPORT +else + msg := $(warning No timerfd support. Disables 'perf kvm stat live'); +endif + disable-python = $(eval $(disable-python_code)) define disable-python_code CFLAGS += -DNO_LIBPYTHON diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index d37d58d..c803f17 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -25,7 +25,8 @@ FILES= \ test-libunwind \ test-on-exit \ test-stackprotector-all \ - test-stackprotector + test-stackprotector \ + test-timerfd CC := $(CC) -MD @@ -136,6 +137,9 @@ test-on-exit: test-backtrace: $(BUILD) +test-timerfd: + $(BUILD) + -include *.d ############################### diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/perf/config/feature-checks/test-all.c index 50d4318..59e7a70 100644 --- a/tools/perf/config/feature-checks/test-all.c +++ b/tools/perf/config/feature-checks/test-all.c @@ -81,6 +81,10 @@ # include "test-libnuma.c" #undef main +#define main main_test_timerfd +# include "test-timerfd.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -101,6 +105,7 @@ int main(int argc, char *argv[]) main_test_on_exit(); main_test_backtrace(); main_test_libnuma(); + main_test_timerfd(); return 0; } diff --git a/tools/perf/config/feature-checks/test-timerfd.c b/tools/perf/config/feature-checks/test-timerfd.c new file mode 100644 index 0000000..8c5c083 --- /dev/null +++ b/tools/perf/config/feature-checks/test-timerfd.c @@ -0,0 +1,18 @@ +/* + * test for timerfd functions used by perf-kvm-stat-live + */ +#include + +int main(void) +{ + struct itimerspec new_value; + + int fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK); + if (fd < 0) + return 1; + + if (timerfd_settime(fd, 0, &new_value, NULL) != 0) + return 1; + + return 0; +} -- cgit v0.10.2 From 5febff0066b8111785d58903b54d414e9ec6a3d0 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 29 Oct 2013 10:43:15 -0600 Subject: tools/perf/build: Fix detection of non-core features feature_check needs to be invoked through call, and LDFLAGS may not be set so quotes are needed. Thanks to Jiri for spotting the quotes around LDFLAGS; that one was driving me nuts with the upcoming timerfd feature detection. Signed-off-by: David Ahern Reviewed-by: Jiri Olsa Tested-by: Jiri Olsa Acked-by: Ingo Molnar Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1383064996-20933-1-git-send-email-dsahern@gmail.com [ Fixed conflict with 8a0c4c2843d3 ("perf tools: Fix libunwind build and feature detection for 32-bit build") ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index be8bb9a..58b2d37 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -98,7 +98,7 @@ endif feature_check = $(eval $(feature_check_code)) define feature_check_code - feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS=$(LDFLAGS) LIBUNWIND_LIBS="$(LIBUNWIND_LIBS)" -C config/feature-checks test-$1 >/dev/null 2>/dev/null && echo 1 || echo 0) + feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS="$(LDFLAGS)" LIBUNWIND_LIBS="$(LIBUNWIND_LIBS)" -C config/feature-checks test-$1 >/dev/null 2>/dev/null && echo 1 || echo 0) endef feature_set = $(eval $(feature_set_code)) @@ -235,7 +235,7 @@ CFLAGS += -I$(LIB_INCLUDE) CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE ifndef NO_BIONIC - $(feature_check,bionic) + $(call feature_check,bionic) ifeq ($(feature-bionic), 1) BIONIC := 1 EXTLIBS := $(filter-out -lrt,$(EXTLIBS)) @@ -479,15 +479,15 @@ else CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT else ifneq ($(feature-libbfd), 1) - $(feature_check,liberty) + $(call feature_check,liberty) ifeq ($(feature-liberty), 1) EXTLIBS += -lbfd -liberty else - $(feature_check,liberty-z) + $(call feature_check,liberty-z) ifeq ($(feature-liberty-z), 1) EXTLIBS += -lbfd -liberty -lz else - $(feature_check,cplus-demangle) + $(call feature_check,cplus-demangle) ifeq ($(feature-cplus-demangle), 1) EXTLIBS += -liberty CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT -- cgit v0.10.2 From 8e00ddc9dfe09ee131144fdaf6c96ebe95bbbbcb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 30 Oct 2013 10:15:06 -0600 Subject: perf list: Remove a level of indentation With a return after the if check an indentation level can be removed. Indentation shift only; no functional changes. Signed-off-by: David Ahern Acked-by: Ingo Molnar Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1383149707-1008-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index e79f423..45000e7 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -17,48 +17,49 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) { + int i; + setup_pager(); - if (argc == 1) + if (argc == 1) { print_events(NULL, false); - else { - int i; - - for (i = 1; i < argc; ++i) { - if (i > 2) - putchar('\n'); - if (strncmp(argv[i], "tracepoint", 10) == 0) - print_tracepoint_events(NULL, NULL, false); - else if (strcmp(argv[i], "hw") == 0 || - strcmp(argv[i], "hardware") == 0) - print_events_type(PERF_TYPE_HARDWARE); - else if (strcmp(argv[i], "sw") == 0 || - strcmp(argv[i], "software") == 0) - print_events_type(PERF_TYPE_SOFTWARE); - else if (strcmp(argv[i], "cache") == 0 || - strcmp(argv[i], "hwcache") == 0) - print_hwcache_events(NULL, false); - else if (strcmp(argv[i], "pmu") == 0) - print_pmu_events(NULL, false); - else if (strcmp(argv[i], "--raw-dump") == 0) - print_events(NULL, true); - else { - char *sep = strchr(argv[i], ':'), *s; - int sep_idx; + return 0; + } - if (sep == NULL) { - print_events(argv[i], false); - continue; - } - sep_idx = sep - argv[i]; - s = strdup(argv[i]); - if (s == NULL) - return -1; + for (i = 1; i < argc; ++i) { + if (i > 2) + putchar('\n'); + if (strncmp(argv[i], "tracepoint", 10) == 0) + print_tracepoint_events(NULL, NULL, false); + else if (strcmp(argv[i], "hw") == 0 || + strcmp(argv[i], "hardware") == 0) + print_events_type(PERF_TYPE_HARDWARE); + else if (strcmp(argv[i], "sw") == 0 || + strcmp(argv[i], "software") == 0) + print_events_type(PERF_TYPE_SOFTWARE); + else if (strcmp(argv[i], "cache") == 0 || + strcmp(argv[i], "hwcache") == 0) + print_hwcache_events(NULL, false); + else if (strcmp(argv[i], "pmu") == 0) + print_pmu_events(NULL, false); + else if (strcmp(argv[i], "--raw-dump") == 0) + print_events(NULL, true); + else { + char *sep = strchr(argv[i], ':'), *s; + int sep_idx; - s[sep_idx] = '\0'; - print_tracepoint_events(s, s + sep_idx + 1, false); - free(s); + if (sep == NULL) { + print_events(argv[i], false); + continue; } + sep_idx = sep - argv[i]; + s = strdup(argv[i]); + if (s == NULL) + return -1; + + s[sep_idx] = '\0'; + print_tracepoint_events(s, s + sep_idx + 1, false); + free(s); } } return 0; -- cgit v0.10.2 From 44d742e01e6d3dd544ee1873b660a3c8bc1413bb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 30 Oct 2013 10:28:29 -0600 Subject: perf list: Add usage Currently 'perf list' is not very helpful if you forget the syntax: $ perf list -h List of pre-defined events (to be used in -e): After: $ perf list -h usage: perf list [hw|sw|cache|tracepoint|pmu|event_glob] Signed-off-by: David Ahern Acked-by: Ingo Molnar Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/527133AD.4030003@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 45000e7..011195e 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -14,20 +14,31 @@ #include "util/parse-events.h" #include "util/cache.h" #include "util/pmu.h" +#include "util/parse-options.h" int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) { int i; + const struct option list_options[] = { + OPT_END() + }; + const char * const list_usage[] = { + "perf list [hw|sw|cache|tracepoint|pmu|event_glob]", + NULL + }; + + argc = parse_options(argc, argv, list_options, list_usage, + PARSE_OPT_STOP_AT_NON_OPTION); setup_pager(); - if (argc == 1) { + if (argc == 0) { print_events(NULL, false); return 0; } - for (i = 1; i < argc; ++i) { - if (i > 2) + for (i = 0; i < argc; ++i) { + if (i) putchar('\n'); if (strncmp(argv[i], "tracepoint", 10) == 0) print_tracepoint_events(NULL, NULL, false); -- cgit v0.10.2 From 4299a549979783668d787959d61ba22b6b200877 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 5 Nov 2013 15:14:45 +0100 Subject: perf tools: Factor sysfs code into generic fs object Moving sysfs code into generic fs object and preparing it to carry procfs support. This should be merged with tools/lib/lk/debugfs.c at some point in the future. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1383660887-1734-2-git-send-email-jolsa@redhat.com [ Added fs__ namespace qualifier to some more functions ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5b863902..7fc8f17 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -242,7 +242,7 @@ LIB_H += util/cache.h LIB_H += util/callchain.h LIB_H += util/build-id.h LIB_H += util/debug.h -LIB_H += util/sysfs.h +LIB_H += util/fs.h LIB_H += util/pmu.h LIB_H += util/event.h LIB_H += util/evsel.h @@ -304,7 +304,7 @@ LIB_OBJS += $(OUTPUT)util/annotate.o LIB_OBJS += $(OUTPUT)util/build-id.o LIB_OBJS += $(OUTPUT)util/config.o LIB_OBJS += $(OUTPUT)util/ctype.o -LIB_OBJS += $(OUTPUT)util/sysfs.o +LIB_OBJS += $(OUTPUT)util/fs.o LIB_OBJS += $(OUTPUT)util/pmu.o LIB_OBJS += $(OUTPUT)util/environment.o LIB_OBJS += $(OUTPUT)util/event.o diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 48114d1..f47bf45 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -2,7 +2,7 @@ #include "parse-events.h" #include "evsel.h" #include "evlist.h" -#include "sysfs.h" +#include "fs.h" #include #include "tests.h" #include diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index beb8cf9..4af5a23 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -1,5 +1,5 @@ #include "util.h" -#include "sysfs.h" +#include "fs.h" #include "../perf.h" #include "cpumap.h" #include diff --git a/tools/perf/util/fs.c b/tools/perf/util/fs.c new file mode 100644 index 0000000..a2413e8 --- /dev/null +++ b/tools/perf/util/fs.c @@ -0,0 +1,107 @@ + +/* TODO merge/factor into tools/lib/lk/debugfs.c */ + +#include "util.h" +#include "util/fs.h" + +static const char * const sysfs__fs_known_mountpoints[] = { + "/sys", + 0, +}; + +struct fs { + const char *name; + const char * const *mounts; + char path[PATH_MAX + 1]; + bool found; + long magic; +}; + +enum { + FS__SYSFS = 0, +}; + +static struct fs fs__entries[] = { + [FS__SYSFS] = { + .name = "sysfs", + .mounts = sysfs__fs_known_mountpoints, + .magic = SYSFS_MAGIC, + }, +}; + +static bool fs__read_mounts(struct fs *fs) +{ + bool found = false; + char type[100]; + FILE *fp; + + fp = fopen("/proc/mounts", "r"); + if (fp == NULL) + return NULL; + + while (!found && + fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", + fs->path, type) == 2) { + + if (strcmp(type, fs->name) == 0) + found = true; + } + + fclose(fp); + return fs->found = found; +} + +static int fs__valid_mount(const char *fs, long magic) +{ + struct statfs st_fs; + + if (statfs(fs, &st_fs) < 0) + return -ENOENT; + else if (st_fs.f_type != magic) + return -ENOENT; + + return 0; +} + +static bool fs__check_mounts(struct fs *fs) +{ + const char * const *ptr; + + ptr = fs->mounts; + while (*ptr) { + if (fs__valid_mount(*ptr, fs->magic) == 0) { + fs->found = true; + strcpy(fs->path, *ptr); + return true; + } + ptr++; + } + + return false; +} + +static const char *fs__get_mountpoint(struct fs *fs) +{ + if (fs__check_mounts(fs)) + return fs->path; + + return fs__read_mounts(fs) ? fs->path : NULL; +} + +static const char *fs__find_mountpoint(int idx) +{ + struct fs *fs = &fs__entries[idx]; + + if (fs->found) + return (const char *)fs->path; + + return fs__get_mountpoint(fs); +} + +#define FIND_MOUNTPOINT(name, idx) \ +const char *name##_find_mountpoint(void) \ +{ \ + return fs__find_mountpoint(idx); \ +} + +FIND_MOUNTPOINT(sysfs, FS__SYSFS); diff --git a/tools/perf/util/fs.h b/tools/perf/util/fs.h new file mode 100644 index 0000000..082edbd --- /dev/null +++ b/tools/perf/util/fs.h @@ -0,0 +1,6 @@ +#ifndef __PERF_FS +#define __PERF_FS + +const char *sysfs_find_mountpoint(void); + +#endif /* __PERF_FS */ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 64362fe..45b42df 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -4,7 +4,7 @@ #include #include #include -#include "sysfs.h" +#include "fs.h" #include "util.h" #include "pmu.h" #include "parse-events.h" diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index f75ae1b..239036f 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -17,5 +17,5 @@ util/xyarray.c util/cgroup.c util/rblist.c util/strlist.c -util/sysfs.c +util/fs.c ../../lib/rbtree.c diff --git a/tools/perf/util/sysfs.c b/tools/perf/util/sysfs.c deleted file mode 100644 index f71e9ea..0000000 --- a/tools/perf/util/sysfs.c +++ /dev/null @@ -1,60 +0,0 @@ - -#include "util.h" -#include "sysfs.h" - -static const char * const sysfs_known_mountpoints[] = { - "/sys", - 0, -}; - -static int sysfs_found; -char sysfs_mountpoint[PATH_MAX + 1]; - -static int sysfs_valid_mountpoint(const char *sysfs) -{ - struct statfs st_fs; - - if (statfs(sysfs, &st_fs) < 0) - return -ENOENT; - else if (st_fs.f_type != (long) SYSFS_MAGIC) - return -ENOENT; - - return 0; -} - -const char *sysfs_find_mountpoint(void) -{ - const char * const *ptr; - char type[100]; - FILE *fp; - - if (sysfs_found) - return (const char *) sysfs_mountpoint; - - ptr = sysfs_known_mountpoints; - while (*ptr) { - if (sysfs_valid_mountpoint(*ptr) == 0) { - sysfs_found = 1; - strcpy(sysfs_mountpoint, *ptr); - return sysfs_mountpoint; - } - ptr++; - } - - /* give up and parse /proc/mounts */ - fp = fopen("/proc/mounts", "r"); - if (fp == NULL) - return NULL; - - while (!sysfs_found && - fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", - sysfs_mountpoint, type) == 2) { - - if (strcmp(type, "sysfs") == 0) - sysfs_found = 1; - } - - fclose(fp); - - return sysfs_found ? sysfs_mountpoint : NULL; -} diff --git a/tools/perf/util/sysfs.h b/tools/perf/util/sysfs.h deleted file mode 100644 index a813b72..0000000 --- a/tools/perf/util/sysfs.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __SYSFS_H__ -#define __SYSFS_H__ - -const char *sysfs_find_mountpoint(void); - -#endif /* __DEBUGFS_H__ */ -- cgit v0.10.2 From cf38fadade52df937521dd70d4437df1a9354cd9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 5 Nov 2013 14:48:50 -0300 Subject: perf fs: Rename NAME_find_mountpoint() to NAME__mountpoint() Shorten it, "finding" it is an implementation detail, what callers want is the pathname, not to ask for it to _always_ do the lookup. And the existing implementation already caches it, i.e. it doesn't "finds" it on every call. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-r24wa4bvtccg7mnkessrbbdj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index f47bf45..ef671cd 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1456,7 +1456,7 @@ static int test_pmu(void) int ret; snprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu/format/", - sysfs_find_mountpoint()); + sysfs__mountpoint()); ret = stat(path, &st); if (ret) @@ -1473,7 +1473,7 @@ static int test_pmu_events(void) int ret; snprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu/events/", - sysfs_find_mountpoint()); + sysfs__mountpoint()); ret = stat(path, &st); if (ret) { diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 4af5a23..a9b48c4 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -216,7 +216,7 @@ int cpu_map__get_socket(struct cpu_map *map, int idx) cpu = map->map[idx]; - mnt = sysfs_find_mountpoint(); + mnt = sysfs__mountpoint(); if (!mnt) return -1; @@ -279,7 +279,7 @@ int cpu_map__get_core(struct cpu_map *map, int idx) cpu = map->map[idx]; - mnt = sysfs_find_mountpoint(); + mnt = sysfs__mountpoint(); if (!mnt) return -1; diff --git a/tools/perf/util/fs.c b/tools/perf/util/fs.c index a2413e8..77bac4e 100644 --- a/tools/perf/util/fs.c +++ b/tools/perf/util/fs.c @@ -88,7 +88,7 @@ static const char *fs__get_mountpoint(struct fs *fs) return fs__read_mounts(fs) ? fs->path : NULL; } -static const char *fs__find_mountpoint(int idx) +static const char *fs__mountpoint(int idx) { struct fs *fs = &fs__entries[idx]; @@ -98,10 +98,10 @@ static const char *fs__find_mountpoint(int idx) return fs__get_mountpoint(fs); } -#define FIND_MOUNTPOINT(name, idx) \ -const char *name##_find_mountpoint(void) \ -{ \ - return fs__find_mountpoint(idx); \ +#define FS__MOUNTPOINT(name, idx) \ +const char *name##__mountpoint(void) \ +{ \ + return fs__mountpoint(idx); \ } -FIND_MOUNTPOINT(sysfs, FS__SYSFS); +FS__MOUNTPOINT(sysfs, FS__SYSFS); diff --git a/tools/perf/util/fs.h b/tools/perf/util/fs.h index 082edbd..a7561c8 100644 --- a/tools/perf/util/fs.h +++ b/tools/perf/util/fs.h @@ -1,6 +1,6 @@ #ifndef __PERF_FS #define __PERF_FS -const char *sysfs_find_mountpoint(void); +const char *sysfs__mountpoint(void); #endif /* __PERF_FS */ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 45b42df..c232d8d 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -77,9 +77,8 @@ static int pmu_format(const char *name, struct list_head *format) { struct stat st; char path[PATH_MAX]; - const char *sysfs; + const char *sysfs = sysfs__mountpoint(); - sysfs = sysfs_find_mountpoint(); if (!sysfs) return -1; @@ -166,9 +165,8 @@ static int pmu_aliases(const char *name, struct list_head *head) { struct stat st; char path[PATH_MAX]; - const char *sysfs; + const char *sysfs = sysfs__mountpoint(); - sysfs = sysfs_find_mountpoint(); if (!sysfs) return -1; @@ -212,11 +210,10 @@ static int pmu_type(const char *name, __u32 *type) { struct stat st; char path[PATH_MAX]; - const char *sysfs; FILE *file; int ret = 0; + const char *sysfs = sysfs__mountpoint(); - sysfs = sysfs_find_mountpoint(); if (!sysfs) return -1; @@ -241,11 +238,10 @@ static int pmu_type(const char *name, __u32 *type) static void pmu_read_sysfs(void) { char path[PATH_MAX]; - const char *sysfs; DIR *dir; struct dirent *dent; + const char *sysfs = sysfs__mountpoint(); - sysfs = sysfs_find_mountpoint(); if (!sysfs) return; @@ -270,11 +266,10 @@ static struct cpu_map *pmu_cpumask(const char *name) { struct stat st; char path[PATH_MAX]; - const char *sysfs; FILE *file; struct cpu_map *cpus; + const char *sysfs = sysfs__mountpoint(); - sysfs = sysfs_find_mountpoint(); if (!sysfs) return NULL; -- cgit v0.10.2 From a9862418547e818aa5842840aecfa81d733b97e9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 5 Nov 2013 15:14:46 +0100 Subject: perf fs: Add procfs support Adding procfs support into fs class. The interface function: const char *procfs__mountpoint(void); provides existing mountpoint path for procfs. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1383660887-1734-3-git-send-email-jolsa@redhat.com [ Fixup namespace ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/fs.c b/tools/perf/util/fs.c index 77bac4e..f5be1f2 100644 --- a/tools/perf/util/fs.c +++ b/tools/perf/util/fs.c @@ -9,6 +9,11 @@ static const char * const sysfs__fs_known_mountpoints[] = { 0, }; +static const char * const procfs__known_mountpoints[] = { + "/proc", + 0, +}; + struct fs { const char *name; const char * const *mounts; @@ -18,7 +23,8 @@ struct fs { }; enum { - FS__SYSFS = 0, + FS__SYSFS = 0, + FS__PROCFS = 1, }; static struct fs fs__entries[] = { @@ -27,6 +33,11 @@ static struct fs fs__entries[] = { .mounts = sysfs__fs_known_mountpoints, .magic = SYSFS_MAGIC, }, + [FS__PROCFS] = { + .name = "proc", + .mounts = procfs__known_mountpoints, + .magic = PROC_SUPER_MAGIC, + }, }; static bool fs__read_mounts(struct fs *fs) @@ -104,4 +115,5 @@ const char *name##__mountpoint(void) \ return fs__mountpoint(idx); \ } -FS__MOUNTPOINT(sysfs, FS__SYSFS); +FS__MOUNTPOINT(sysfs, FS__SYSFS); +FS__MOUNTPOINT(procfs, FS__PROCFS); diff --git a/tools/perf/util/fs.h b/tools/perf/util/fs.h index a7561c8..5e09ce1 100644 --- a/tools/perf/util/fs.h +++ b/tools/perf/util/fs.h @@ -2,5 +2,6 @@ #define __PERF_FS const char *sysfs__mountpoint(void); +const char *procfs__mountpoint(void); #endif /* __PERF_FS */ diff --git a/tools/perf/util/include/linux/magic.h b/tools/perf/util/include/linux/magic.h index 58b64ed..07d63cf 100644 --- a/tools/perf/util/include/linux/magic.h +++ b/tools/perf/util/include/linux/magic.h @@ -9,4 +9,8 @@ #define SYSFS_MAGIC 0x62656572 #endif +#ifndef PROC_SUPER_MAGIC +#define PROC_SUPER_MAGIC 0x9fa0 +#endif + #endif -- cgit v0.10.2 From 714647bdc516330e4405b39677d7f763e016c685 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 5 Nov 2013 15:14:47 +0100 Subject: perf tools: Check maximum frequency rate for record/top Adding the check for maximum allowed frequency rate defined in following file: /proc/sys/kernel/perf_event_max_sample_rate When we cross the maximum value we fail and display detailed error message with advise. $ perf record -F 3000 ls Maximum frequency rate (2000) reached. Please use -F freq option with lower value or consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. In case user does not specify the frequency and the default value cross the maximum, we display warning and set the frequency value to the current maximum. $ perf record ls Lowering default frequency rate to 2000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. Same messages are used for 'perf top'. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1383660887-1734-4-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 8b45fce..ea4c04f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -958,20 +958,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0) usage_with_options(record_usage, record_options); - if (rec->opts.user_interval != ULLONG_MAX) - rec->opts.default_interval = rec->opts.user_interval; - if (rec->opts.user_freq != UINT_MAX) - rec->opts.freq = rec->opts.user_freq; - - /* - * User specified count overrides default frequency. - */ - if (rec->opts.default_interval) - rec->opts.freq = 0; - else if (rec->opts.freq) { - rec->opts.default_interval = rec->opts.freq; - } else { - ui__error("frequency and count are zero, aborting\n"); + if (perf_record_opts__config(&rec->opts)) { err = -EINVAL; goto out_free_fd; } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 21897f0..9acca88 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1209,20 +1209,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) if (top.delay_secs < 1) top.delay_secs = 1; - if (opts->user_interval != ULLONG_MAX) - opts->default_interval = opts->user_interval; - if (opts->user_freq != UINT_MAX) - opts->freq = opts->user_freq; - - /* - * User specified count overrides default frequency. - */ - if (opts->default_interval) - opts->freq = 0; - else if (opts->freq) { - opts->default_interval = opts->freq; - } else { - ui__error("frequency and count are zero, aborting\n"); + if (perf_record_opts__config(opts)) { status = -EINVAL; goto out_delete_maps; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 6e8acc9..0617ce2 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -99,6 +99,7 @@ void perf_evlist__set_id_pos(struct perf_evlist *evlist); bool perf_can_sample_identifier(void); void perf_evlist__config(struct perf_evlist *evlist, struct perf_record_opts *opts); +int perf_record_opts__config(struct perf_record_opts *opts); int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct perf_target *target, diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 18d73aa..c8845b1 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -2,6 +2,8 @@ #include "evsel.h" #include "cpumap.h" #include "parse-events.h" +#include "fs.h" +#include "util.h" typedef void (*setup_probe_fn_t)(struct perf_evsel *evsel); @@ -106,3 +108,72 @@ void perf_evlist__config(struct perf_evlist *evlist, perf_evlist__set_id_pos(evlist); } + +static int get_max_rate(unsigned int *rate) +{ + char path[PATH_MAX]; + const char *procfs = procfs__mountpoint(); + + if (!procfs) + return -1; + + snprintf(path, PATH_MAX, + "%s/sys/kernel/perf_event_max_sample_rate", procfs); + + return filename__read_int(path, (int *) rate); +} + +static int perf_record_opts__config_freq(struct perf_record_opts *opts) +{ + bool user_freq = opts->user_freq != UINT_MAX; + unsigned int max_rate; + + if (opts->user_interval != ULLONG_MAX) + opts->default_interval = opts->user_interval; + if (user_freq) + opts->freq = opts->user_freq; + + /* + * User specified count overrides default frequency. + */ + if (opts->default_interval) + opts->freq = 0; + else if (opts->freq) { + opts->default_interval = opts->freq; + } else { + pr_err("frequency and count are zero, aborting\n"); + return -1; + } + + if (get_max_rate(&max_rate)) + return 0; + + /* + * User specified frequency is over current maximum. + */ + if (user_freq && (max_rate < opts->freq)) { + pr_err("Maximum frequency rate (%u) reached.\n" + "Please use -F freq option with lower value or consider\n" + "tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n", + max_rate); + return -1; + } + + /* + * Default frequency is over current maximum. + */ + if (max_rate < opts->freq) { + pr_warning("Lowering default frequency rate to %u.\n" + "Please consider tweaking " + "/proc/sys/kernel/perf_event_max_sample_rate.\n", + max_rate); + opts->freq = max_rate; + } + + return 0; +} + +int perf_record_opts__config(struct perf_record_opts *opts) +{ + return perf_record_opts__config_freq(opts); +} -- cgit v0.10.2 From 316c7136f8bad924609163b9b115f68d59a68c82 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 5 Nov 2013 15:32:36 -0300 Subject: perf tools: Finish the removal of 'self' arguments They convey no information, perhaps I was bitten by some snake at some point, complete the detox by naming the last of those arguments more sensibly. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-u1r0dnjoro08dgztiy2g3t2q@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c index 315067b..fcd1dd6 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c +++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c @@ -25,7 +25,7 @@ PyMODINIT_FUNC initperf_trace_context(void); -static PyObject *perf_trace_context_common_pc(PyObject *self, PyObject *args) +static PyObject *perf_trace_context_common_pc(PyObject *obj, PyObject *args) { static struct scripting_context *scripting_context; PyObject *context; @@ -40,7 +40,7 @@ static PyObject *perf_trace_context_common_pc(PyObject *self, PyObject *args) return Py_BuildValue("i", retval); } -static PyObject *perf_trace_context_common_flags(PyObject *self, +static PyObject *perf_trace_context_common_flags(PyObject *obj, PyObject *args) { static struct scripting_context *scripting_context; @@ -56,7 +56,7 @@ static PyObject *perf_trace_context_common_flags(PyObject *self, return Py_BuildValue("i", retval); } -static PyObject *perf_trace_context_common_lock_depth(PyObject *self, +static PyObject *perf_trace_context_common_lock_depth(PyObject *obj, PyObject *args) { static struct scripting_context *scripting_context; diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index 404ff66a..7d45d2f 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -21,32 +21,32 @@ struct ui_browser { void *priv; const char *title; char *helpline; - unsigned int (*refresh)(struct ui_browser *self); - void (*write)(struct ui_browser *self, void *entry, int row); - void (*seek)(struct ui_browser *self, off_t offset, int whence); - bool (*filter)(struct ui_browser *self, void *entry); + unsigned int (*refresh)(struct ui_browser *browser); + void (*write)(struct ui_browser *browser, void *entry, int row); + void (*seek)(struct ui_browser *browser, off_t offset, int whence); + bool (*filter)(struct ui_browser *browser, void *entry); u32 nr_entries; bool navkeypressed; bool use_navkeypressed; }; int ui_browser__set_color(struct ui_browser *browser, int color); -void ui_browser__set_percent_color(struct ui_browser *self, +void ui_browser__set_percent_color(struct ui_browser *browser, double percent, bool current); -bool ui_browser__is_current_entry(struct ui_browser *self, unsigned row); -void ui_browser__refresh_dimensions(struct ui_browser *self); -void ui_browser__reset_index(struct ui_browser *self); +bool ui_browser__is_current_entry(struct ui_browser *browser, unsigned row); +void ui_browser__refresh_dimensions(struct ui_browser *browser); +void ui_browser__reset_index(struct ui_browser *browser); -void ui_browser__gotorc(struct ui_browser *self, int y, int x); +void ui_browser__gotorc(struct ui_browser *browser, int y, int x); void ui_browser__write_graph(struct ui_browser *browser, int graph); void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, u64 start, u64 end); void __ui_browser__show_title(struct ui_browser *browser, const char *title); void ui_browser__show_title(struct ui_browser *browser, const char *title); -int ui_browser__show(struct ui_browser *self, const char *title, +int ui_browser__show(struct ui_browser *browser, const char *title, const char *helpline, ...); -void ui_browser__hide(struct ui_browser *self); -int ui_browser__refresh(struct ui_browser *self); +void ui_browser__hide(struct ui_browser *browser); +int ui_browser__refresh(struct ui_browser *browser); int ui_browser__run(struct ui_browser *browser, int delay_secs); void ui_browser__update_nr_entries(struct ui_browser *browser, u32 nr_entries); void ui_browser__handle_resize(struct ui_browser *browser); @@ -63,11 +63,11 @@ int ui_browser__input_window(const char *title, const char *text, char *input, void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence); unsigned int ui_browser__argv_refresh(struct ui_browser *browser); -void ui_browser__rb_tree_seek(struct ui_browser *self, off_t offset, int whence); -unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self); +void ui_browser__rb_tree_seek(struct ui_browser *browser, off_t offset, int whence); +unsigned int ui_browser__rb_tree_refresh(struct ui_browser *browser); -void ui_browser__list_head_seek(struct ui_browser *self, off_t offset, int whence); -unsigned int ui_browser__list_head_refresh(struct ui_browser *self); +void ui_browser__list_head_seek(struct ui_browser *browser, off_t offset, int whence); +unsigned int ui_browser__list_head_refresh(struct ui_browser *browser); void ui_browser__init(void); void annotate_browser__init(void); diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index a91b6b2..16848bb 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1889,7 +1889,7 @@ out: return key; } -static bool filter_group_entries(struct ui_browser *self __maybe_unused, +static bool filter_group_entries(struct ui_browser *browser __maybe_unused, void *entry) { struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node); diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c index 95c7cfb..b11639f 100644 --- a/tools/perf/ui/browsers/map.c +++ b/tools/perf/ui/browsers/map.c @@ -18,30 +18,30 @@ struct map_browser { u8 addrlen; }; -static void map_browser__write(struct ui_browser *self, void *nd, int row) +static void map_browser__write(struct ui_browser *browser, void *nd, int row) { struct symbol *sym = rb_entry(nd, struct symbol, rb_node); - struct map_browser *mb = container_of(self, struct map_browser, b); - bool current_entry = ui_browser__is_current_entry(self, row); + struct map_browser *mb = container_of(browser, struct map_browser, b); + bool current_entry = ui_browser__is_current_entry(browser, row); int width; - ui_browser__set_percent_color(self, 0, current_entry); + ui_browser__set_percent_color(browser, 0, current_entry); slsmg_printf("%*" PRIx64 " %*" PRIx64 " %c ", mb->addrlen, sym->start, mb->addrlen, sym->end, sym->binding == STB_GLOBAL ? 'g' : sym->binding == STB_LOCAL ? 'l' : 'w'); - width = self->width - ((mb->addrlen * 2) + 4); + width = browser->width - ((mb->addrlen * 2) + 4); if (width > 0) slsmg_write_nstring(sym->name, width); } /* FIXME uber-kludgy, see comment on cmd_report... */ -static u32 *symbol__browser_index(struct symbol *self) +static u32 *symbol__browser_index(struct symbol *browser) { - return ((void *)self) - sizeof(struct rb_node) - sizeof(u32); + return ((void *)browser) - sizeof(struct rb_node) - sizeof(u32); } -static int map_browser__search(struct map_browser *self) +static int map_browser__search(struct map_browser *browser) { char target[512]; struct symbol *sym; @@ -53,37 +53,37 @@ static int map_browser__search(struct map_browser *self) if (target[0] == '0' && tolower(target[1]) == 'x') { u64 addr = strtoull(target, NULL, 16); - sym = map__find_symbol(self->map, addr, NULL); + sym = map__find_symbol(browser->map, addr, NULL); } else - sym = map__find_symbol_by_name(self->map, target, NULL); + sym = map__find_symbol_by_name(browser->map, target, NULL); if (sym != NULL) { u32 *idx = symbol__browser_index(sym); - self->b.top = &sym->rb_node; - self->b.index = self->b.top_idx = *idx; + browser->b.top = &sym->rb_node; + browser->b.index = browser->b.top_idx = *idx; } else ui_helpline__fpush("%s not found!", target); return 0; } -static int map_browser__run(struct map_browser *self) +static int map_browser__run(struct map_browser *browser) { int key; - if (ui_browser__show(&self->b, self->map->dso->long_name, + if (ui_browser__show(&browser->b, browser->map->dso->long_name, "Press <- or ESC to exit, %s / to search", verbose ? "" : "restart with -v to use") < 0) return -1; while (1) { - key = ui_browser__run(&self->b, 0); + key = ui_browser__run(&browser->b, 0); switch (key) { case '/': if (verbose) - map_browser__search(self); + map_browser__search(browser); default: break; case K_LEFT: @@ -94,20 +94,20 @@ static int map_browser__run(struct map_browser *self) } } out: - ui_browser__hide(&self->b); + ui_browser__hide(&browser->b); return key; } -int map__browse(struct map *self) +int map__browse(struct map *map) { struct map_browser mb = { .b = { - .entries = &self->dso->symbols[self->type], + .entries = &map->dso->symbols[map->type], .refresh = ui_browser__rb_tree_refresh, .seek = ui_browser__rb_tree_seek, .write = map_browser__write, }, - .map = self, + .map = map, }; struct rb_node *nd; char tmp[BITS_PER_LONG / 4]; diff --git a/tools/perf/ui/browsers/map.h b/tools/perf/ui/browsers/map.h index df8581a..2d58e4b 100644 --- a/tools/perf/ui/browsers/map.h +++ b/tools/perf/ui/browsers/map.h @@ -2,5 +2,5 @@ #define _PERF_UI_MAP_BROWSER_H_ 1 struct map; -int map__browse(struct map *self); +int map__browse(struct map *map); #endif /* _PERF_UI_MAP_BROWSER_H_ */ diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c index 12f009e..d63c68e 100644 --- a/tools/perf/ui/browsers/scripts.c +++ b/tools/perf/ui/browsers/scripts.c @@ -84,22 +84,22 @@ static void script_browser__write(struct ui_browser *browser, slsmg_write_nstring(sline->line, browser->width); } -static int script_browser__run(struct perf_script_browser *self) +static int script_browser__run(struct perf_script_browser *browser) { int key; - if (ui_browser__show(&self->b, self->script_name, + if (ui_browser__show(&browser->b, browser->script_name, "Press <- or ESC to exit") < 0) return -1; while (1) { - key = ui_browser__run(&self->b, 0); + key = ui_browser__run(&browser->b, 0); /* We can add some special key handling here if needed */ break; } - ui_browser__hide(&self->b); + ui_browser__hide(&browser->b); return key; } diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 6c15268..c244cb5 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -213,20 +213,19 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, return ret; } -static size_t __callchain__fprintf_flat(FILE *fp, - struct callchain_node *self, +static size_t __callchain__fprintf_flat(FILE *fp, struct callchain_node *node, u64 total_samples) { struct callchain_list *chain; size_t ret = 0; - if (!self) + if (!node) return 0; - ret += __callchain__fprintf_flat(fp, self->parent, total_samples); + ret += __callchain__fprintf_flat(fp, node->parent, total_samples); - list_for_each_entry(chain, &self->val, list) { + list_for_each_entry(chain, &node->val, list) { if (chain->ip >= PERF_CONTEXT_MAX) continue; if (chain->ms.sym) @@ -239,15 +238,14 @@ static size_t __callchain__fprintf_flat(FILE *fp, return ret; } -static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *self, +static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *tree, u64 total_samples) { size_t ret = 0; u32 entries_printed = 0; - struct rb_node *rb_node; struct callchain_node *chain; + struct rb_node *rb_node = rb_first(tree); - rb_node = rb_first(self); while (rb_node) { double percent; diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index a811f5c..929f28a 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -10,10 +10,9 @@ extern struct perf_tool build_id__mark_dso_hit_ops; struct dso; int build_id__sprintf(const u8 *build_id, int len, char *bf); -char *dso__build_id_filename(struct dso *self, char *bf, size_t size); +char *dso__build_id_filename(struct dso *dso, char *bf, size_t size); int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine); - #endif diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index c26b353..ec9ae11 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -617,15 +617,15 @@ int perf_event__process(struct perf_tool *tool __maybe_unused, return machine__process_event(machine, event, sample); } -void thread__find_addr_map(struct thread *self, +void thread__find_addr_map(struct thread *thread, struct machine *machine, u8 cpumode, enum map_type type, u64 addr, struct addr_location *al) { - struct map_groups *mg = &self->mg; + struct map_groups *mg = &thread->mg; bool load_map = false; - al->thread = self; + al->thread = thread; al->addr = addr; al->cpumode = cpumode; al->filtered = false; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 752709c..f8d70f3 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -251,7 +251,8 @@ int perf_event__process(struct perf_tool *tool, struct machine *machine); struct addr_location; -int perf_event__preprocess_sample(const union perf_event *self, + +int perf_event__preprocess_sample(const union perf_event *event, struct machine *machine, struct addr_location *al, struct perf_sample *sample); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 0617ce2..e99eaed 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -88,7 +88,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); -union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx); +union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx); void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 307f1c7..b621347a 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -84,7 +84,7 @@ struct hists { u16 col_len[HISTC_NR_COLS]; }; -struct hist_entry *__hists__add_entry(struct hists *self, +struct hist_entry *__hists__add_entry(struct hists *hists, struct addr_location *al, struct symbol *parent, struct branch_info *bi, @@ -93,34 +93,34 @@ struct hist_entry *__hists__add_entry(struct hists *self, int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); int hist_entry__transaction_len(void); -int hist_entry__sort_snprintf(struct hist_entry *self, char *bf, size_t size, +int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size, struct hists *hists); void hist_entry__free(struct hist_entry *); -void hists__output_resort(struct hists *self); -void hists__collapse_resort(struct hists *self, struct ui_progress *prog); +void hists__output_resort(struct hists *hists); +void hists__collapse_resort(struct hists *hists, struct ui_progress *prog); void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel); void hists__output_recalc_col_len(struct hists *hists, int max_rows); void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h); -void hists__inc_nr_events(struct hists *self, u32 type); +void hists__inc_nr_events(struct hists *hists, u32 type); void events_stats__inc(struct events_stats *stats, u32 type); size_t events_stats__fprintf(struct events_stats *stats, FILE *fp); -size_t hists__fprintf(struct hists *self, bool show_header, int max_rows, +size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, int max_cols, float min_pcnt, FILE *fp); -int hist_entry__inc_addr_samples(struct hist_entry *self, int evidx, u64 addr); -int hist_entry__annotate(struct hist_entry *self, size_t privsize); +int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); +int hist_entry__annotate(struct hist_entry *he, size_t privsize); void hists__filter_by_dso(struct hists *hists); void hists__filter_by_thread(struct hists *hists); void hists__filter_by_symbol(struct hists *hists); -u16 hists__col_len(struct hists *self, enum hist_column col); -void hists__set_col_len(struct hists *self, enum hist_column col, u16 len); -bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len); +u16 hists__col_len(struct hists *hists, enum hist_column col); +void hists__set_col_len(struct hists *hists, enum hist_column col, u16 len); +bool hists__new_col_len(struct hists *hists, enum hist_column col, u16 len); void hists__reset_col_len(struct hists *hists); void hists__calc_col_len(struct hists *hists, struct hist_entry *he); @@ -210,12 +210,9 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused, return 0; } -static inline int hist_entry__tui_annotate(struct hist_entry *self - __maybe_unused, - struct perf_evsel *evsel - __maybe_unused, - struct hist_browser_timer *hbt - __maybe_unused) +static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, + struct perf_evsel *evsel __maybe_unused, + struct hist_browser_timer *hbt __maybe_unused) { return 0; } @@ -230,5 +227,5 @@ static inline int script_browse(const char *script_opt __maybe_unused) #define K_SWITCH_INPUT_DATA -3000 #endif -unsigned int hists__sort_list_width(struct hists *self); +unsigned int hists__sort_list_width(struct hists *hists); #endif /* __PERF_HIST_H */ diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 2200dad..ffb657f 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -115,7 +115,7 @@ static const Dwfl_Callbacks offline_callbacks = { }; /* Get a Dwarf from offline image */ -static int debuginfo__init_offline_dwarf(struct debuginfo *self, +static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, const char *path) { int fd; @@ -124,25 +124,25 @@ static int debuginfo__init_offline_dwarf(struct debuginfo *self, if (fd < 0) return fd; - self->dwfl = dwfl_begin(&offline_callbacks); - if (!self->dwfl) + dbg->dwfl = dwfl_begin(&offline_callbacks); + if (!dbg->dwfl) goto error; - self->mod = dwfl_report_offline(self->dwfl, "", "", fd); - if (!self->mod) + dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd); + if (!dbg->mod) goto error; - self->dbg = dwfl_module_getdwarf(self->mod, &self->bias); - if (!self->dbg) + dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias); + if (!dbg->dbg) goto error; return 0; error: - if (self->dwfl) - dwfl_end(self->dwfl); + if (dbg->dwfl) + dwfl_end(dbg->dwfl); else close(fd); - memset(self, 0, sizeof(*self)); + memset(dbg, 0, sizeof(*dbg)); return -ENOENT; } @@ -180,24 +180,24 @@ static const Dwfl_Callbacks kernel_callbacks = { }; /* Get a Dwarf from live kernel image */ -static int debuginfo__init_online_kernel_dwarf(struct debuginfo *self, +static int debuginfo__init_online_kernel_dwarf(struct debuginfo *dbg, Dwarf_Addr addr) { - self->dwfl = dwfl_begin(&kernel_callbacks); - if (!self->dwfl) + dbg->dwfl = dwfl_begin(&kernel_callbacks); + if (!dbg->dwfl) return -EINVAL; /* Load the kernel dwarves: Don't care the result here */ - dwfl_linux_kernel_report_kernel(self->dwfl); - dwfl_linux_kernel_report_modules(self->dwfl); + dwfl_linux_kernel_report_kernel(dbg->dwfl); + dwfl_linux_kernel_report_modules(dbg->dwfl); - self->dbg = dwfl_addrdwarf(self->dwfl, addr, &self->bias); + dbg->dbg = dwfl_addrdwarf(dbg->dwfl, addr, &dbg->bias); /* Here, check whether we could get a real dwarf */ - if (!self->dbg) { + if (!dbg->dbg) { pr_debug("Failed to find kernel dwarf at %lx\n", (unsigned long)addr); - dwfl_end(self->dwfl); - memset(self, 0, sizeof(*self)); + dwfl_end(dbg->dwfl); + memset(dbg, 0, sizeof(*dbg)); return -ENOENT; } @@ -205,7 +205,7 @@ static int debuginfo__init_online_kernel_dwarf(struct debuginfo *self, } #else /* With older elfutils, this just support kernel module... */ -static int debuginfo__init_online_kernel_dwarf(struct debuginfo *self, +static int debuginfo__init_online_kernel_dwarf(struct debuginfo *dbg, Dwarf_Addr addr __maybe_unused) { const char *path = kernel_get_module_path("kernel"); @@ -216,44 +216,45 @@ static int debuginfo__init_online_kernel_dwarf(struct debuginfo *self, } pr_debug2("Use file %s for debuginfo\n", path); - return debuginfo__init_offline_dwarf(self, path); + return debuginfo__init_offline_dwarf(dbg, path); } #endif struct debuginfo *debuginfo__new(const char *path) { - struct debuginfo *self = zalloc(sizeof(struct debuginfo)); - if (!self) + struct debuginfo *dbg = zalloc(sizeof(*dbg)); + if (!dbg) return NULL; - if (debuginfo__init_offline_dwarf(self, path) < 0) { - free(self); - self = NULL; + if (debuginfo__init_offline_dwarf(dbg, path) < 0) { + free(dbg); + dbg = NULL; } - return self; + return dbg; } struct debuginfo *debuginfo__new_online_kernel(unsigned long addr) { - struct debuginfo *self = zalloc(sizeof(struct debuginfo)); - if (!self) + struct debuginfo *dbg = zalloc(sizeof(*dbg)); + + if (!dbg) return NULL; - if (debuginfo__init_online_kernel_dwarf(self, (Dwarf_Addr)addr) < 0) { - free(self); - self = NULL; + if (debuginfo__init_online_kernel_dwarf(dbg, (Dwarf_Addr)addr) < 0) { + free(dbg); + dbg = NULL; } - return self; + return dbg; } -void debuginfo__delete(struct debuginfo *self) +void debuginfo__delete(struct debuginfo *dbg) { - if (self) { - if (self->dwfl) - dwfl_end(self->dwfl); - free(self); + if (dbg) { + if (dbg->dwfl) + dwfl_end(dbg->dwfl); + free(dbg); } } @@ -1083,7 +1084,7 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data) } /* Find probe points from debuginfo */ -static int debuginfo__find_probes(struct debuginfo *self, +static int debuginfo__find_probes(struct debuginfo *dbg, struct probe_finder *pf) { struct perf_probe_point *pp = &pf->pev->point; @@ -1094,7 +1095,7 @@ static int debuginfo__find_probes(struct debuginfo *self, #if _ELFUTILS_PREREQ(0, 142) /* Get the call frame information from this dwarf */ - pf->cfi = dwarf_getcfi(self->dbg); + pf->cfi = dwarf_getcfi(dbg->dbg); #endif off = 0; @@ -1113,7 +1114,7 @@ static int debuginfo__find_probes(struct debuginfo *self, .data = pf, }; - dwarf_getpubnames(self->dbg, pubname_search_cb, + dwarf_getpubnames(dbg->dbg, pubname_search_cb, &pubname_param, 0); if (pubname_param.found) { ret = probe_point_search_cb(&pf->sp_die, &probe_param); @@ -1123,9 +1124,9 @@ static int debuginfo__find_probes(struct debuginfo *self, } /* Loop on CUs (Compilation Unit) */ - while (!dwarf_nextcu(self->dbg, off, &noff, &cuhl, NULL, NULL, NULL)) { + while (!dwarf_nextcu(dbg->dbg, off, &noff, &cuhl, NULL, NULL, NULL)) { /* Get the DIE(Debugging Information Entry) of this CU */ - diep = dwarf_offdie(self->dbg, off + cuhl, &pf->cu_die); + diep = dwarf_offdie(dbg->dbg, off + cuhl, &pf->cu_die); if (!diep) continue; @@ -1281,13 +1282,13 @@ end: } /* Find probe_trace_events specified by perf_probe_event from debuginfo */ -int debuginfo__find_trace_events(struct debuginfo *self, +int debuginfo__find_trace_events(struct debuginfo *dbg, struct perf_probe_event *pev, struct probe_trace_event **tevs, int max_tevs) { struct trace_event_finder tf = { .pf = {.pev = pev, .callback = add_probe_trace_event}, - .mod = self->mod, .max_tevs = max_tevs}; + .mod = dbg->mod, .max_tevs = max_tevs}; int ret; /* Allocate result tevs array */ @@ -1298,7 +1299,7 @@ int debuginfo__find_trace_events(struct debuginfo *self, tf.tevs = *tevs; tf.ntevs = 0; - ret = debuginfo__find_probes(self, &tf.pf); + ret = debuginfo__find_probes(dbg, &tf.pf); if (ret < 0) { free(*tevs); *tevs = NULL; @@ -1389,14 +1390,14 @@ out: } /* Find available variables at given probe point */ -int debuginfo__find_available_vars_at(struct debuginfo *self, +int debuginfo__find_available_vars_at(struct debuginfo *dbg, struct perf_probe_event *pev, struct variable_list **vls, int max_vls, bool externs) { struct available_var_finder af = { .pf = {.pev = pev, .callback = add_available_vars}, - .mod = self->mod, + .mod = dbg->mod, .max_vls = max_vls, .externs = externs}; int ret; @@ -1408,7 +1409,7 @@ int debuginfo__find_available_vars_at(struct debuginfo *self, af.vls = *vls; af.nvls = 0; - ret = debuginfo__find_probes(self, &af.pf); + ret = debuginfo__find_probes(dbg, &af.pf); if (ret < 0) { /* Free vlist for error */ while (af.nvls--) { @@ -1426,7 +1427,7 @@ int debuginfo__find_available_vars_at(struct debuginfo *self, } /* Reverse search */ -int debuginfo__find_probe_point(struct debuginfo *self, unsigned long addr, +int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, struct perf_probe_point *ppt) { Dwarf_Die cudie, spdie, indie; @@ -1435,10 +1436,10 @@ int debuginfo__find_probe_point(struct debuginfo *self, unsigned long addr, int baseline = 0, lineno = 0, ret = 0; /* Adjust address with bias */ - addr += self->bias; + addr += dbg->bias; /* Find cu die */ - if (!dwarf_addrdie(self->dbg, (Dwarf_Addr)addr - self->bias, &cudie)) { + if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr - dbg->bias, &cudie)) { pr_warning("Failed to find debug information for address %lx\n", addr); ret = -EINVAL; @@ -1639,7 +1640,7 @@ static int find_line_range_by_func(struct line_finder *lf) return param.retval; } -int debuginfo__find_line_range(struct debuginfo *self, struct line_range *lr) +int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr) { struct line_finder lf = {.lr = lr, .found = 0}; int ret = 0; @@ -1656,7 +1657,7 @@ int debuginfo__find_line_range(struct debuginfo *self, struct line_range *lr) struct dwarf_callback_param line_range_param = { .data = (void *)&lf, .retval = 0}; - dwarf_getpubnames(self->dbg, pubname_search_cb, + dwarf_getpubnames(dbg->dbg, pubname_search_cb, &pubname_param, 0); if (pubname_param.found) { line_range_search_cb(&lf.sp_die, &line_range_param); @@ -1667,12 +1668,12 @@ int debuginfo__find_line_range(struct debuginfo *self, struct line_range *lr) /* Loop on CUs (Compilation Unit) */ while (!lf.found && ret >= 0) { - if (dwarf_nextcu(self->dbg, off, &noff, &cuhl, + if (dwarf_nextcu(dbg->dbg, off, &noff, &cuhl, NULL, NULL, NULL) != 0) break; /* Get the DIE(Debugging Information Entry) of this CU */ - diep = dwarf_offdie(self->dbg, off + cuhl, &lf.cu_die); + diep = dwarf_offdie(dbg->dbg, off + cuhl, &lf.cu_die); if (!diep) continue; diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index d6dab0e..ffc33cd 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -31,25 +31,25 @@ struct debuginfo { extern struct debuginfo *debuginfo__new(const char *path); extern struct debuginfo *debuginfo__new_online_kernel(unsigned long addr); -extern void debuginfo__delete(struct debuginfo *self); +extern void debuginfo__delete(struct debuginfo *dbg); /* Find probe_trace_events specified by perf_probe_event from debuginfo */ -extern int debuginfo__find_trace_events(struct debuginfo *self, +extern int debuginfo__find_trace_events(struct debuginfo *dbg, struct perf_probe_event *pev, struct probe_trace_event **tevs, int max_tevs); /* Find a perf_probe_point from debuginfo */ -extern int debuginfo__find_probe_point(struct debuginfo *self, +extern int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, struct perf_probe_point *ppt); /* Find a line range */ -extern int debuginfo__find_line_range(struct debuginfo *self, +extern int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr); /* Find available variables */ -extern int debuginfo__find_available_vars_at(struct debuginfo *self, +extern int debuginfo__find_available_vars_at(struct debuginfo *dbg, struct perf_probe_event *pev, struct variable_list **vls, int max_points, bool externs); diff --git a/tools/perf/util/pstack.h b/tools/perf/util/pstack.h index 4cedea5..c3cb658 100644 --- a/tools/perf/util/pstack.h +++ b/tools/perf/util/pstack.h @@ -5,10 +5,10 @@ struct pstack; struct pstack *pstack__new(unsigned short max_nr_entries); -void pstack__delete(struct pstack *self); -bool pstack__empty(const struct pstack *self); -void pstack__remove(struct pstack *self, void *key); -void pstack__push(struct pstack *self, void *key); -void *pstack__pop(struct pstack *self); +void pstack__delete(struct pstack *pstack); +bool pstack__empty(const struct pstack *pstack); +void pstack__remove(struct pstack *pstack, void *key); +void pstack__push(struct pstack *pstack, void *key); +void *pstack__pop(struct pstack *pstack); #endif /* _PERF_PSTACK_ */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 3c1b301..0ce4694 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -16,11 +16,11 @@ #include "perf_regs.h" #include "vdso.h" -static int perf_session__open(struct perf_session *self) +static int perf_session__open(struct perf_session *session) { - struct perf_data_file *file = self->file; + struct perf_data_file *file = session->file; - if (perf_session__read_header(self) < 0) { + if (perf_session__read_header(session) < 0) { pr_err("incompatible file format (rerun with -v to learn more)"); return -1; } @@ -28,17 +28,17 @@ static int perf_session__open(struct perf_session *self) if (perf_data_file__is_pipe(file)) return 0; - if (!perf_evlist__valid_sample_type(self->evlist)) { + if (!perf_evlist__valid_sample_type(session->evlist)) { pr_err("non matching sample_type"); return -1; } - if (!perf_evlist__valid_sample_id_all(self->evlist)) { + if (!perf_evlist__valid_sample_id_all(session->evlist)) { pr_err("non matching sample_id_all"); return -1; } - if (!perf_evlist__valid_read_format(self->evlist)) { + if (!perf_evlist__valid_read_format(session->evlist)) { pr_err("non matching read_format"); return -1; } @@ -53,46 +53,45 @@ void perf_session__set_id_hdr_size(struct perf_session *session) machines__set_id_hdr_size(&session->machines, id_hdr_size); } -int perf_session__create_kernel_maps(struct perf_session *self) +int perf_session__create_kernel_maps(struct perf_session *session) { - int ret = machine__create_kernel_maps(&self->machines.host); + int ret = machine__create_kernel_maps(&session->machines.host); if (ret >= 0) - ret = machines__create_guest_kernel_maps(&self->machines); + ret = machines__create_guest_kernel_maps(&session->machines); return ret; } -static void perf_session__destroy_kernel_maps(struct perf_session *self) +static void perf_session__destroy_kernel_maps(struct perf_session *session) { - machines__destroy_kernel_maps(&self->machines); + machines__destroy_kernel_maps(&session->machines); } struct perf_session *perf_session__new(struct perf_data_file *file, bool repipe, struct perf_tool *tool) { - struct perf_session *self; + struct perf_session *session = zalloc(sizeof(*session)); - self = zalloc(sizeof(*self)); - if (!self) + if (!session) goto out; - self->repipe = repipe; - INIT_LIST_HEAD(&self->ordered_samples.samples); - INIT_LIST_HEAD(&self->ordered_samples.sample_cache); - INIT_LIST_HEAD(&self->ordered_samples.to_free); - machines__init(&self->machines); + session->repipe = repipe; + INIT_LIST_HEAD(&session->ordered_samples.samples); + INIT_LIST_HEAD(&session->ordered_samples.sample_cache); + INIT_LIST_HEAD(&session->ordered_samples.to_free); + machines__init(&session->machines); if (file) { if (perf_data_file__open(file)) goto out_delete; - self->file = file; + session->file = file; if (perf_data_file__is_read(file)) { - if (perf_session__open(self) < 0) + if (perf_session__open(session) < 0) goto out_close; - perf_session__set_id_hdr_size(self); + perf_session__set_id_hdr_size(session); } } @@ -101,22 +100,22 @@ struct perf_session *perf_session__new(struct perf_data_file *file, * In O_RDONLY mode this will be performed when reading the * kernel MMAP event, in perf_event__process_mmap(). */ - if (perf_session__create_kernel_maps(self) < 0) + if (perf_session__create_kernel_maps(session) < 0) goto out_delete; } if (tool && tool->ordering_requires_timestamps && - tool->ordered_samples && !perf_evlist__sample_id_all(self->evlist)) { + tool->ordered_samples && !perf_evlist__sample_id_all(session->evlist)) { dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); tool->ordered_samples = false; } - return self; + return session; out_close: perf_data_file__close(file); out_delete: - perf_session__delete(self); + perf_session__delete(session); out: return NULL; } @@ -147,16 +146,16 @@ static void perf_session_env__delete(struct perf_session_env *env) free(env->pmu_mappings); } -void perf_session__delete(struct perf_session *self) +void perf_session__delete(struct perf_session *session) { - perf_session__destroy_kernel_maps(self); - perf_session__delete_dead_threads(self); - perf_session__delete_threads(self); - perf_session_env__delete(&self->header.env); - machines__exit(&self->machines); - if (self->file) - perf_data_file__close(self->file); - free(self); + perf_session__destroy_kernel_maps(session); + perf_session__delete_dead_threads(session); + perf_session__delete_threads(session); + perf_session_env__delete(&session->header.env); + machines__exit(&session->machines); + if (session->file) + perf_data_file__close(session->file); + free(session); vdso__exit(); } @@ -1084,11 +1083,11 @@ static int perf_session__process_event(struct perf_session *session, file_offset); } -void perf_event_header__bswap(struct perf_event_header *self) +void perf_event_header__bswap(struct perf_event_header *hdr) { - self->type = bswap_32(self->type); - self->misc = bswap_16(self->misc); - self->size = bswap_16(self->size); + hdr->type = bswap_32(hdr->type); + hdr->misc = bswap_16(hdr->misc); + hdr->size = bswap_16(hdr->size); } struct thread *perf_session__findnew(struct perf_session *session, pid_t pid) @@ -1096,9 +1095,9 @@ struct thread *perf_session__findnew(struct perf_session *session, pid_t pid) return machine__findnew_thread(&session->machines.host, 0, pid); } -static struct thread *perf_session__register_idle_thread(struct perf_session *self) +static struct thread *perf_session__register_idle_thread(struct perf_session *session) { - struct thread *thread = perf_session__findnew(self, 0); + struct thread *thread = perf_session__findnew(session, 0); if (thread == NULL || thread__set_comm(thread, "swapper", 0)) { pr_err("problem inserting idle task.\n"); @@ -1150,10 +1149,10 @@ static void perf_session__warn_about_errors(const struct perf_session *session, volatile int session_done; -static int __perf_session__process_pipe_events(struct perf_session *self, +static int __perf_session__process_pipe_events(struct perf_session *session, struct perf_tool *tool) { - int fd = perf_data_file__fd(self->file); + int fd = perf_data_file__fd(session->file); union perf_event *event; uint32_t size, cur_size = 0; void *buf = NULL; @@ -1181,7 +1180,7 @@ more: goto out_err; } - if (self->header.needs_swap) + if (session->header.needs_swap) perf_event_header__bswap(&event->header); size = event->header.size; @@ -1216,7 +1215,7 @@ more: } } - if ((skip = perf_session__process_event(self, event, tool, head)) < 0) { + if ((skip = perf_session__process_event(session, event, tool, head)) < 0) { pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", head, event->header.size, event->header.type); err = -EINVAL; @@ -1232,12 +1231,12 @@ more: goto more; done: /* do the final flush for ordered samples */ - self->ordered_samples.next_flush = ULLONG_MAX; - err = flush_sample_queue(self, tool); + session->ordered_samples.next_flush = ULLONG_MAX; + err = flush_sample_queue(session, tool); out_err: free(buf); - perf_session__warn_about_errors(self, tool); - perf_session_free_sample_buffers(self); + perf_session__warn_about_errors(session, tool); + perf_session_free_sample_buffers(session); return err; } @@ -1377,22 +1376,22 @@ out_err: return err; } -int perf_session__process_events(struct perf_session *self, +int perf_session__process_events(struct perf_session *session, struct perf_tool *tool) { - u64 size = perf_data_file__size(self->file); + u64 size = perf_data_file__size(session->file); int err; - if (perf_session__register_idle_thread(self) == NULL) + if (perf_session__register_idle_thread(session) == NULL) return -ENOMEM; - if (!perf_data_file__is_pipe(self->file)) - err = __perf_session__process_events(self, - self->header.data_offset, - self->header.data_size, + if (!perf_data_file__is_pipe(session->file)) + err = __perf_session__process_events(session, + session->header.data_offset, + session->header.data_size, size, tool); else - err = __perf_session__process_pipe_events(self, tool); + err = __perf_session__process_pipe_events(session, tool); return err; } @@ -1441,15 +1440,15 @@ int maps__set_kallsyms_ref_reloc_sym(struct map **maps, return 0; } -size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp) +size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp) { - return machines__fprintf_dsos(&self->machines, fp); + return machines__fprintf_dsos(&session->machines, fp); } -size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp, +size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp, bool (skip)(struct dso *dso, int parm), int parm) { - return machines__fprintf_dsos_buildid(&self->machines, fp, skip, parm); + return machines__fprintf_dsos_buildid(&session->machines, fp, skip, parm); } size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 27c74d3..50f6409 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -51,12 +51,12 @@ struct perf_session *perf_session__new(struct perf_data_file *file, bool repipe, struct perf_tool *tool); void perf_session__delete(struct perf_session *session); -void perf_event_header__bswap(struct perf_event_header *self); +void perf_event_header__bswap(struct perf_event_header *hdr); -int __perf_session__process_events(struct perf_session *self, +int __perf_session__process_events(struct perf_session *session, u64 data_offset, u64 data_size, u64 size, struct perf_tool *tool); -int perf_session__process_events(struct perf_session *self, +int perf_session__process_events(struct perf_session *session, struct perf_tool *tool); int perf_session_queue_event(struct perf_session *s, union perf_event *event, @@ -64,37 +64,38 @@ int perf_session_queue_event(struct perf_session *s, union perf_event *event, void perf_tool__fill_defaults(struct perf_tool *tool); -int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel *evsel, +int perf_session__resolve_callchain(struct perf_session *session, + struct perf_evsel *evsel, struct thread *thread, struct ip_callchain *chain, struct symbol **parent); -bool perf_session__has_traces(struct perf_session *self, const char *msg); +bool perf_session__has_traces(struct perf_session *session, const char *msg); void mem_bswap_64(void *src, int byte_size); void mem_bswap_32(void *src, int byte_size); void perf_event__attr_swap(struct perf_event_attr *attr); -int perf_session__create_kernel_maps(struct perf_session *self); +int perf_session__create_kernel_maps(struct perf_session *session); void perf_session__set_id_hdr_size(struct perf_session *session); static inline -struct machine *perf_session__find_machine(struct perf_session *self, pid_t pid) +struct machine *perf_session__find_machine(struct perf_session *session, pid_t pid) { - return machines__find(&self->machines, pid); + return machines__find(&session->machines, pid); } static inline -struct machine *perf_session__findnew_machine(struct perf_session *self, pid_t pid) +struct machine *perf_session__findnew_machine(struct perf_session *session, pid_t pid) { - return machines__findnew(&self->machines, pid); + return machines__findnew(&session->machines, pid); } -struct thread *perf_session__findnew(struct perf_session *self, pid_t pid); -size_t perf_session__fprintf(struct perf_session *self, FILE *fp); +struct thread *perf_session__findnew(struct perf_session *session, pid_t pid); +size_t perf_session__fprintf(struct perf_session *session, FILE *fp); -size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp); +size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp); size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp, bool (fn)(struct dso *dso, int parm), int parm); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index f4e16f3..f4cc147 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -180,7 +180,7 @@ struct sort_entry { int64_t (*se_cmp)(struct hist_entry *, struct hist_entry *); int64_t (*se_collapse)(struct hist_entry *, struct hist_entry *); - int (*se_snprintf)(struct hist_entry *self, char *bf, size_t size, + int (*se_snprintf)(struct hist_entry *he, char *bf, size_t size, unsigned int width); u8 se_width_idx; bool elide; diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c index 67e4a00..3edd053 100644 --- a/tools/perf/util/strfilter.c +++ b/tools/perf/util/strfilter.c @@ -62,15 +62,15 @@ static struct strfilter_node *strfilter_node__alloc(const char *op, struct strfilter_node *l, struct strfilter_node *r) { - struct strfilter_node *ret = zalloc(sizeof(struct strfilter_node)); + struct strfilter_node *node = zalloc(sizeof(*node)); - if (ret) { - ret->p = op; - ret->l = l; - ret->r = r; + if (node) { + node->p = op; + node->l = l; + node->r = r; } - return ret; + return node; } static struct strfilter_node *strfilter_node__new(const char *s, @@ -154,20 +154,20 @@ error: */ struct strfilter *strfilter__new(const char *rules, const char **err) { - struct strfilter *ret = zalloc(sizeof(struct strfilter)); + struct strfilter *filter = zalloc(sizeof(*filter)); const char *ep = NULL; - if (ret) - ret->root = strfilter_node__new(rules, &ep); + if (filter) + filter->root = strfilter_node__new(rules, &ep); - if (!ret || !ret->root || *ep != '\0') { + if (!filter || !filter->root || *ep != '\0') { if (err) *err = ep; - strfilter__delete(ret); - ret = NULL; + strfilter__delete(filter); + filter = NULL; } - return ret; + return filter; } static bool strfilter_node__compare(struct strfilter_node *node, @@ -191,9 +191,9 @@ static bool strfilter_node__compare(struct strfilter_node *node, } /* Return true if STR matches the filter rules */ -bool strfilter__compare(struct strfilter *node, const char *str) +bool strfilter__compare(struct strfilter *filter, const char *str) { - if (!node) + if (!filter) return false; - return strfilter_node__compare(node->root, str); + return strfilter_node__compare(filter->root, str); } diff --git a/tools/perf/util/strfilter.h b/tools/perf/util/strfilter.h index 00f58a7..fe611f3 100644 --- a/tools/perf/util/strfilter.h +++ b/tools/perf/util/strfilter.h @@ -30,19 +30,19 @@ struct strfilter *strfilter__new(const char *rules, const char **err); /** * strfilter__compare - compare given string and a string filter - * @self: String filter + * @filter: String filter * @str: target string * - * Compare @str and @self. Return true if the str match the rule + * Compare @str and @filter. Return true if the str match the rule */ -bool strfilter__compare(struct strfilter *self, const char *str); +bool strfilter__compare(struct strfilter *filter, const char *str); /** * strfilter__delete - delete a string filter - * @self: String filter to delete + * @filter: String filter to delete * - * Delete @self. + * Delete @filter. */ -void strfilter__delete(struct strfilter *self); +void strfilter__delete(struct strfilter *filter); #endif diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 373c055..897c1b2 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -29,24 +29,24 @@ struct machine; struct comm; struct thread *thread__new(pid_t pid, pid_t tid); -void thread__delete(struct thread *self); +void thread__delete(struct thread *thread); static inline void thread__exited(struct thread *thread) { thread->dead = true; } int thread__set_comm(struct thread *thread, const char *comm, u64 timestamp); -int thread__comm_len(struct thread *self); +int thread__comm_len(struct thread *thread); struct comm *thread__comm(const struct thread *thread); const char *thread__comm_str(const struct thread *thread); -void thread__insert_map(struct thread *self, struct map *map); +void thread__insert_map(struct thread *thread, struct map *map); int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp); size_t thread__fprintf(struct thread *thread, FILE *fp); -static inline struct map *thread__find_map(struct thread *self, +static inline struct map *thread__find_map(struct thread *thread, enum map_type type, u64 addr) { - return self ? map_groups__find(&self->mg, type, addr) : NULL; + return thread ? map_groups__find(&thread->mg, type, addr) : NULL; } void thread__find_addr_map(struct thread *thread, struct machine *machine, -- cgit v0.10.2 From 26c86da8821f7b64fced498674990318bc34c8de Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 10:19:59 +0100 Subject: perf: Simplify the ring-buffer code By using CIRC_SPACE() we can obviate the need for perf_output_space(). Shrinks the size of perf_output_begin() by 17 bytes on x86_64-defconfig. Signed-off-by: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Michael Ellerman Cc: Michael Neuling Cc: "Paul E. McKenney" Cc: james.hogan@imgtec.com Cc: Vince Weaver Cc: Victor Kaplansky Cc: Oleg Nesterov Cc: Anton Blanchard Link: http://lkml.kernel.org/n/tip-vtb0xb0llebmsdlfn1v5vtfj@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 9c2ddfb..6929c58 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -12,40 +12,10 @@ #include #include #include +#include #include "internal.h" -static bool perf_output_space(struct ring_buffer *rb, unsigned long tail, - unsigned long offset, unsigned long head) -{ - unsigned long sz = perf_data_size(rb); - unsigned long mask = sz - 1; - - /* - * check if user-writable - * overwrite : over-write its own tail - * !overwrite: buffer possibly drops events. - */ - if (rb->overwrite) - return true; - - /* - * verify that payload is not bigger than buffer - * otherwise masking logic may fail to detect - * the "not enough space" condition - */ - if ((head - offset) > sz) - return false; - - offset = (offset - tail) & mask; - head = (head - tail) & mask; - - if ((int)(head - offset) < 0) - return false; - - return true; -} - static void perf_output_wakeup(struct perf_output_handle *handle) { atomic_set(&handle->rb->poll, POLL_IN); @@ -181,9 +151,10 @@ int perf_output_begin(struct perf_output_handle *handle, tail = ACCESS_ONCE(rb->user_page->data_tail); smp_mb(); offset = head = local_read(&rb->head); - head += size; - if (unlikely(!perf_output_space(rb, tail, offset, head))) + if (!rb->overwrite && + unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size)) goto fail; + head += size; } while (local_cmpxchg(&rb->head, offset, head) != offset); if (head - local_read(&rb->wakeup) > rb->watermark) -- cgit v0.10.2 From c72b42a3dde487132da80202756c101b371b2add Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 17:20:25 +0100 Subject: perf: Add unlikely() to the ring-buffer code Add unlikely() annotations to 'slow' paths: When having a sampling event but no output buffer; you have bigger issues -- also the bail is still faster than actually doing the work. When having a sampling event but a control page only buffer, you have bigger issues -- again the bail is still faster than actually doing work. Optimize for the case where you're not loosing events -- again, not doing the work is still faster but make sure that when you have to actually do work its as fast as possible. The typical watermark is 1/2 the buffer size, so most events will not take this path. Shrinks perf_output_begin() by 16 bytes on x86_64-defconfig. Signed-off-by: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Michael Ellerman Cc: Michael Neuling Cc: "Paul E. McKenney" Cc: james.hogan@imgtec.com Cc: Vince Weaver Cc: Victor Kaplansky Cc: Oleg Nesterov Cc: Anton Blanchard Link: http://lkml.kernel.org/n/tip-wlg3jew3qnutm8opd0hyeuwn@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 6929c58..383cde4 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -121,17 +121,17 @@ int perf_output_begin(struct perf_output_handle *handle, event = event->parent; rb = rcu_dereference(event->rb); - if (!rb) + if (unlikely(!rb)) goto out; - handle->rb = rb; - handle->event = event; - - if (!rb->nr_pages) + if (unlikely(!rb->nr_pages)) goto out; + handle->rb = rb; + handle->event = event; + have_lost = local_read(&rb->lost); - if (have_lost) { + if (unlikely(have_lost)) { lost_event.header.size = sizeof(lost_event); perf_event_header__init_id(&lost_event.header, &sample_data, event); @@ -157,7 +157,7 @@ int perf_output_begin(struct perf_output_handle *handle, head += size; } while (local_cmpxchg(&rb->head, offset, head) != offset); - if (head - local_read(&rb->wakeup) > rb->watermark) + if (unlikely(head - local_read(&rb->wakeup) > rb->watermark)) local_add(rb->watermark, &rb->wakeup); handle->page = offset >> (PAGE_SHIFT + page_order(rb)); @@ -167,7 +167,7 @@ int perf_output_begin(struct perf_output_handle *handle, handle->addr += handle->size; handle->size = (PAGE_SIZE << page_order(rb)) - handle->size; - if (have_lost) { + if (unlikely(have_lost)) { lost_event.header.type = PERF_RECORD_LOST; lost_event.header.misc = 0; lost_event.id = event->id; -- cgit v0.10.2 From 85f59edf9684603026c64c902791748116d29478 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 17:25:38 +0100 Subject: perf: Optimize perf_output_begin() There's no point in re-doing the memory-barrier when we fail the cmpxchg(). Also placing it after the space reservation loop makes it clearer it only separates the userpage->tail read from the data stores. Signed-off-by: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Michael Ellerman Cc: Michael Neuling Cc: "Paul E. McKenney" Cc: james.hogan@imgtec.com Cc: Vince Weaver Cc: Victor Kaplansky Cc: Oleg Nesterov Cc: Anton Blanchard Link: http://lkml.kernel.org/n/tip-c19u6egfldyx86tpyc3zgkw9@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 383cde4..6ed16ec 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -141,15 +141,7 @@ int perf_output_begin(struct perf_output_handle *handle, perf_output_get_handle(handle); do { - /* - * Userspace could choose to issue a mb() before updating the - * tail pointer. So that all reads will be completed before the - * write is issued. - * - * See perf_output_put_handle(). - */ tail = ACCESS_ONCE(rb->user_page->data_tail); - smp_mb(); offset = head = local_read(&rb->head); if (!rb->overwrite && unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size)) @@ -157,6 +149,15 @@ int perf_output_begin(struct perf_output_handle *handle, head += size; } while (local_cmpxchg(&rb->head, offset, head) != offset); + /* + * Separate the userpage->tail read from the data stores below. + * Matches the MB userspace SHOULD issue after reading the data + * and before storing the new tail position. + * + * See perf_output_put_handle(). + */ + smp_mb(); + if (unlikely(head - local_read(&rb->wakeup) > rb->watermark)) local_add(rb->watermark, &rb->wakeup); -- cgit v0.10.2 From d20a973f46ed83e0d7d24f6c512064133038e193 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 17:29:29 +0100 Subject: perf: Optimize perf_output_begin() -- lost_event case Avoid touching the lost_event and sample_data cachelines twince. Its not like we end up doing less work, but it might help to keep all accesses to these cachelines in one place. Due to code shuffle, this looses 4 bytes on x86_64-defconfig. Signed-off-by: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Michael Ellerman Cc: Michael Neuling Cc: "Paul E. McKenney" Cc: james.hogan@imgtec.com Cc: Vince Weaver Cc: Victor Kaplansky Cc: Oleg Nesterov Cc: Anton Blanchard Link: http://lkml.kernel.org/n/tip-zfxnc58qxj0eawdoj31hhupv@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 6ed16ec..e4d70f3 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -106,7 +106,6 @@ int perf_output_begin(struct perf_output_handle *handle, struct ring_buffer *rb; unsigned long tail, offset, head; int have_lost; - struct perf_sample_data sample_data; struct { struct perf_event_header header; u64 id; @@ -132,10 +131,9 @@ int perf_output_begin(struct perf_output_handle *handle, have_lost = local_read(&rb->lost); if (unlikely(have_lost)) { - lost_event.header.size = sizeof(lost_event); - perf_event_header__init_id(&lost_event.header, &sample_data, - event); - size += lost_event.header.size; + size += sizeof(lost_event); + if (event->attr.sample_id_all) + size += event->id_header_size; } perf_output_get_handle(handle); @@ -169,11 +167,16 @@ int perf_output_begin(struct perf_output_handle *handle, handle->size = (PAGE_SIZE << page_order(rb)) - handle->size; if (unlikely(have_lost)) { + struct perf_sample_data sample_data; + + lost_event.header.size = sizeof(lost_event); lost_event.header.type = PERF_RECORD_LOST; lost_event.header.misc = 0; lost_event.id = event->id; lost_event.lost = local_xchg(&rb->lost, 0); + perf_event_header__init_id(&lost_event.header, + &sample_data, event); perf_output_put(handle, lost_event); perf_event__output_id_sample(event, handle, &sample_data); } -- cgit v0.10.2 From 524feca5e9da9e5f9e5aa5d5613b1d762db9509e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 17:36:25 +0100 Subject: perf: Optimize perf_output_begin() -- address calculation Rewrite the handle address calculation code to be clearer. Saves 8 bytes on x86_64-defconfig. Signed-off-by: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Michael Ellerman Cc: Michael Neuling Cc: "Paul E. McKenney" Cc: james.hogan@imgtec.com Cc: Vince Weaver Cc: Victor Kaplansky Cc: Oleg Nesterov Cc: Anton Blanchard Link: http://lkml.kernel.org/n/tip-3trb2n2henb9m27tncef3ag7@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index e4d70f3..c52a32f 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -105,7 +105,7 @@ int perf_output_begin(struct perf_output_handle *handle, { struct ring_buffer *rb; unsigned long tail, offset, head; - int have_lost; + int have_lost, page_shift; struct { struct perf_event_header header; u64 id; @@ -159,12 +159,12 @@ int perf_output_begin(struct perf_output_handle *handle, if (unlikely(head - local_read(&rb->wakeup) > rb->watermark)) local_add(rb->watermark, &rb->wakeup); - handle->page = offset >> (PAGE_SHIFT + page_order(rb)); - handle->page &= rb->nr_pages - 1; - handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1); - handle->addr = rb->data_pages[handle->page]; - handle->addr += handle->size; - handle->size = (PAGE_SIZE << page_order(rb)) - handle->size; + page_shift = PAGE_SHIFT + page_order(rb); + + handle->page = (offset >> page_shift) & (rb->nr_pages - 1); + offset &= (1UL << page_shift) - 1; + handle->addr = rb->data_pages[handle->page] + offset; + handle->size = (1UL << page_shift) - offset; if (unlikely(have_lost)) { struct perf_sample_data sample_data; -- cgit v0.10.2 From 394570b7939e1262f39373866166d8ee0a506e88 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 17:41:23 +0100 Subject: perf: Update a stale comment Signed-off-by: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Michael Ellerman Cc: Michael Neuling Cc: "Paul E. McKenney" Cc: james.hogan@imgtec.com Cc: Vince Weaver Cc: Victor Kaplansky Cc: Oleg Nesterov Cc: Anton Blanchard Link: http://lkml.kernel.org/n/tip-9s5mze78gmlz19agt39i8rii@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index c52a32f..e8b168a 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -85,8 +85,8 @@ again: rb->user_page->data_head = head; /* - * Now check if we missed an update, rely on the (compiler) - * barrier in atomic_dec_and_test() to re-read rb->head. + * Now check if we missed an update -- rely on previous implied + * compiler barriers to force a re-read. */ if (unlikely(head != local_read(&rb->head))) { local_inc(&rb->nest); -- cgit v0.10.2 From 0a196848ca365ec582c6d86659be456be6d4ed96 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Oct 2013 21:16:22 +0100 Subject: perf: Fix arch_perf_out_copy_user default The arch_perf_output_copy_user() default of __copy_from_user_inatomic() returns bytes not copied, while all other argument functions given DEFINE_OUTPUT_COPY() return bytes copied. Since copy_from_user_nmi() is the odd duck out by returning bytes copied where all other *copy_{to,from}* functions return bytes not copied, change it over and ammend DEFINE_OUTPUT_COPY() to expect bytes not copied. Oddly enough DEFINE_OUTPUT_COPY() already returned bytes not copied while expecting its worker functions to return bytes copied. Signed-off-by: Peter Zijlstra Acked-by: will.deacon@arm.com Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20131030201622.GR16117@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8a87a32..8e13293 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1989,7 +1989,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) frame.return_address = 0; bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); - if (bytes != sizeof(frame)) + if (bytes != 0) break; if (!valid_user_frame(fp, sizeof(frame))) @@ -2041,7 +2041,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) frame.return_address = 0; bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); - if (bytes != sizeof(frame)) + if (bytes != 0) break; if (!valid_user_frame(fp, sizeof(frame))) diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index c1760ff..ae96cfa 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -789,7 +789,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) size = ip - to; /* Must fit our buffer, see above */ bytes = copy_from_user_nmi(buf, (void __user *)to, size); - if (bytes != size) + if (bytes != 0) return 0; kaddr = buf; diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 90ee6c1..d82d155 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -491,7 +491,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort) /* may fail if text not present */ bytes = copy_from_user_nmi(buf, (void __user *)from, size); - if (bytes != size) + if (bytes != 0) return X86_BR_NONE; addr = buf; diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 5465b86..ddf9ecb 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -31,6 +31,6 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) ret = __copy_from_user_inatomic(to, from, n); pagefault_enable(); - return n - ret; + return ret; } EXPORT_SYMBOL_GPL(copy_from_user_nmi); diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index d6aa6e8..5d04be5 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -47,7 +47,7 @@ dump_user_backtrace_32(struct stack_frame_ia32 *head) unsigned long bytes; bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); - if (bytes != sizeof(bufhead)) + if (bytes != 0) return NULL; fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame); @@ -93,7 +93,7 @@ static struct stack_frame *dump_user_backtrace(struct stack_frame *head) unsigned long bytes; bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); - if (bytes != sizeof(bufhead)) + if (bytes != 0) return NULL; oprofile_add_trace(bufhead[0].return_address); diff --git a/kernel/events/internal.h b/kernel/events/internal.h index ca65997..569b2187 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -82,16 +82,16 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb) } #define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \ -static inline unsigned int \ +static inline unsigned long \ func_name(struct perf_output_handle *handle, \ - const void *buf, unsigned int len) \ + const void *buf, unsigned long len) \ { \ unsigned long size, written; \ \ do { \ - size = min_t(unsigned long, handle->size, len); \ - \ + size = min(handle->size, len); \ written = memcpy_func(handle->addr, buf, size); \ + written = size - written; \ \ len -= written; \ handle->addr += written; \ @@ -110,20 +110,37 @@ func_name(struct perf_output_handle *handle, \ return len; \ } -static inline int memcpy_common(void *dst, const void *src, size_t n) +static inline unsigned long +memcpy_common(void *dst, const void *src, unsigned long n) { memcpy(dst, src, n); - return n; + return 0; } DEFINE_OUTPUT_COPY(__output_copy, memcpy_common) -#define MEMCPY_SKIP(dst, src, n) (n) +static inline unsigned long +memcpy_skip(void *dst, const void *src, unsigned long n) +{ + return 0; +} -DEFINE_OUTPUT_COPY(__output_skip, MEMCPY_SKIP) +DEFINE_OUTPUT_COPY(__output_skip, memcpy_skip) #ifndef arch_perf_out_copy_user -#define arch_perf_out_copy_user __copy_from_user_inatomic +#define arch_perf_out_copy_user arch_perf_out_copy_user + +static inline unsigned long +arch_perf_out_copy_user(void *dst, const void *src, unsigned long n) +{ + unsigned long ret; + + pagefault_disable(); + ret = __copy_from_user_inatomic(dst, src, n); + pagefault_enable(); + + return ret; +} #endif DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) -- cgit v0.10.2 From a94d342b9cb09edfe888ea972af0883b6a8d992b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Oct 2013 11:42:46 +0100 Subject: tools/perf: Add required memory barriers To match patch bf378d341e48 ("perf: Fix perf ring buffer memory ordering") change userspace to also adhere to the ordering outlined. Signed-off-by: Peter Zijlstra Cc: Michael Neuling Cc: "Paul E. McKenney" Cc: james.hogan@imgtec.com Cc: Vince Weaver Cc: Victor Kaplansky Cc: Oleg Nesterov Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Michael Ellerman Link: http://lkml.kernel.org/r/20131030104246.GH16117@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar diff --git a/tools/perf/perf.h b/tools/perf/perf.h index f61c230..6a587e84 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -4,6 +4,8 @@ #include #if defined(__i386__) +#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") +#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") #define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") #define cpu_relax() asm volatile("rep; nop" ::: "memory"); #define CPUINFO_PROC "model name" @@ -13,6 +15,8 @@ #endif #if defined(__x86_64__) +#define mb() asm volatile("mfence" ::: "memory") +#define wmb() asm volatile("sfence" ::: "memory") #define rmb() asm volatile("lfence" ::: "memory") #define cpu_relax() asm volatile("rep; nop" ::: "memory"); #define CPUINFO_PROC "model name" @@ -23,45 +27,61 @@ #ifdef __powerpc__ #include "../../arch/powerpc/include/uapi/asm/unistd.h" +#define mb() asm volatile ("sync" ::: "memory") +#define wmb() asm volatile ("sync" ::: "memory") #define rmb() asm volatile ("sync" ::: "memory") -#define cpu_relax() asm volatile ("" ::: "memory"); #define CPUINFO_PROC "cpu" #endif #ifdef __s390__ +#define mb() asm volatile("bcr 15,0" ::: "memory") +#define wmb() asm volatile("bcr 15,0" ::: "memory") #define rmb() asm volatile("bcr 15,0" ::: "memory") -#define cpu_relax() asm volatile("" ::: "memory"); #endif #ifdef __sh__ #if defined(__SH4A__) || defined(__SH5__) +# define mb() asm volatile("synco" ::: "memory") +# define wmb() asm volatile("synco" ::: "memory") # define rmb() asm volatile("synco" ::: "memory") #else +# define mb() asm volatile("" ::: "memory") +# define wmb() asm volatile("" ::: "memory") # define rmb() asm volatile("" ::: "memory") #endif -#define cpu_relax() asm volatile("" ::: "memory") #define CPUINFO_PROC "cpu type" #endif #ifdef __hppa__ +#define mb() asm volatile("" ::: "memory") +#define wmb() asm volatile("" ::: "memory") #define rmb() asm volatile("" ::: "memory") -#define cpu_relax() asm volatile("" ::: "memory"); #define CPUINFO_PROC "cpu" #endif #ifdef __sparc__ +#ifdef __LP64__ +#define mb() asm volatile("ba,pt %%xcc, 1f\n" \ + "membar #StoreLoad\n" \ + "1:\n":::"memory") +#else +#define mb() asm volatile("":::"memory") +#endif +#define wmb() asm volatile("":::"memory") #define rmb() asm volatile("":::"memory") -#define cpu_relax() asm volatile("":::"memory") #define CPUINFO_PROC "cpu" #endif #ifdef __alpha__ +#define mb() asm volatile("mb" ::: "memory") +#define wmb() asm volatile("wmb" ::: "memory") #define rmb() asm volatile("mb" ::: "memory") -#define cpu_relax() asm volatile("" ::: "memory") #define CPUINFO_PROC "cpu model" #endif #ifdef __ia64__ +#define mb() asm volatile ("mf" ::: "memory") +#define wmb() asm volatile ("mf" ::: "memory") #define rmb() asm volatile ("mf" ::: "memory") #define cpu_relax() asm volatile ("hint @pause" ::: "memory") #define CPUINFO_PROC "model name" @@ -72,40 +92,55 @@ * Use the __kuser_memory_barrier helper in the CPU helper page. See * arch/arm/kernel/entry-armv.S in the kernel source for details. */ +#define mb() ((void(*)(void))0xffff0fa0)() +#define wmb() ((void(*)(void))0xffff0fa0)() #define rmb() ((void(*)(void))0xffff0fa0)() -#define cpu_relax() asm volatile("":::"memory") #define CPUINFO_PROC "Processor" #endif #ifdef __aarch64__ -#define rmb() asm volatile("dmb ld" ::: "memory") +#define mb() asm volatile("dmb ish" ::: "memory") +#define wmb() asm volatile("dmb ishld" ::: "memory") +#define rmb() asm volatile("dmb ishst" ::: "memory") #define cpu_relax() asm volatile("yield" ::: "memory") #endif #ifdef __mips__ -#define rmb() asm volatile( \ +#define mb() asm volatile( \ ".set mips2\n\t" \ "sync\n\t" \ ".set mips0" \ : /* no output */ \ : /* no input */ \ : "memory") -#define cpu_relax() asm volatile("" ::: "memory") +#define wmb() mb() +#define rmb() mb() #define CPUINFO_PROC "cpu model" #endif #ifdef __arc__ +#define mb() asm volatile("" ::: "memory") +#define wmb() asm volatile("" ::: "memory") #define rmb() asm volatile("" ::: "memory") -#define cpu_relax() rmb() #define CPUINFO_PROC "Processor" #endif #ifdef __metag__ +#define mb() asm volatile("" ::: "memory") +#define wmb() asm volatile("" ::: "memory") #define rmb() asm volatile("" ::: "memory") -#define cpu_relax() asm volatile("" ::: "memory") #define CPUINFO_PROC "CPU" #endif +#define barrier() asm volatile ("" ::: "memory") + +#ifndef cpu_relax +#define cpu_relax() barrier() +#endif + +#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) + + #include #include #include diff --git a/tools/perf/tests/rdpmc.c b/tools/perf/tests/rdpmc.c index ff94886..46649c2 100644 --- a/tools/perf/tests/rdpmc.c +++ b/tools/perf/tests/rdpmc.c @@ -9,8 +9,6 @@ #if defined(__x86_64__) || defined(__i386__) -#define barrier() asm volatile("" ::: "memory") - static u64 rdpmc(unsigned int counter) { unsigned int low, high; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index e99eaed..ecaa582 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -177,7 +177,7 @@ int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, s static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm) { struct perf_event_mmap_page *pc = mm->base; - int head = pc->data_head; + int head = ACCESS_ONCE(pc->data_head); rmb(); return head; } @@ -190,7 +190,7 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, /* * ensure all reads are done before we write the tail out. */ - /* mb(); */ + mb(); pc->data_tail = tail; } -- cgit v0.10.2 From c7e548b45ce85f765f6262149dd60d9956a31d60 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 17 Oct 2013 20:24:17 +0200 Subject: perf: Factor out strncpy() in perf_event_mmap_event() While this is really minor, but strncpy() does the unnecessary zero-padding till the end of tmp[16] and it is called every time we are going to use the string literal. Turn these strncpy()'s into the single strlcpy() under the new label, saves 72 bytes. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20131017182417.GA17753@redhat.com Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index 17b3c6c..4dc078d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5144,8 +5144,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) buf = kmalloc(PATH_MAX, GFP_KERNEL); if (!buf) { - name = strncpy(tmp, "//enomem", sizeof(tmp)); - goto got_name; + name = "//enomem"; + goto cpy_name; } /* * d_path() works from the end of the rb backwards, so we @@ -5154,8 +5154,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) */ name = d_path(&file->f_path, buf, PATH_MAX - sizeof(u64)); if (IS_ERR(name)) { - name = strncpy(tmp, "//toolong", sizeof(tmp)); - goto got_name; + name = "//toolong"; + goto cpy_name; } inode = file_inode(vma->vm_file); dev = inode->i_sb->s_dev; @@ -5163,30 +5163,30 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) gen = inode->i_generation; maj = MAJOR(dev); min = MINOR(dev); - + goto got_name; } else { name = (char *)arch_vma_name(vma); - if (name) { - name = strncpy(tmp, name, sizeof(tmp) - 1); - tmp[sizeof(tmp) - 1] = '\0'; - goto got_name; - } + if (name) + goto cpy_name; if (vma->vm_start <= vma->vm_mm->start_brk && vma->vm_end >= vma->vm_mm->brk) { - name = strncpy(tmp, "[heap]", sizeof(tmp)); - goto got_name; + name = "[heap]"; + goto cpy_name; } if (vma->vm_start <= vma->vm_mm->start_stack && vma->vm_end >= vma->vm_mm->start_stack) { - name = strncpy(tmp, "[stack]", sizeof(tmp)); - goto got_name; + name = "[stack]"; + goto cpy_name; } - name = strncpy(tmp, "//anon", sizeof(tmp)); - goto got_name; + name = "//anon"; + goto cpy_name; } +cpy_name: + strlcpy(tmp, name, sizeof(tmp)); + name = tmp; got_name: /* * Since our buffer works in 8 byte units we need to align our string -- cgit v0.10.2 From d1e8f4a8365d16fd179cb8cad5b4d20aafd0a695 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 31 Oct 2013 13:36:54 +0800 Subject: perf/x86/intel/uncore: Add filter support for IvyBridge-EP QPI boxes The encoding for filter registers of IvyBridge-EP uncore QPI boxes is completely the same as SandyBridge-EP. Signed-off-by: Yan, Zheng Signed-off-by: Peter Zijlstra Cc: eranian@google.com Cc: "Yan Zheng" Link: http://lkml.kernel.org/r/1383197815-17706-1-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 4118f9f..6da3999 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -1099,6 +1099,24 @@ static struct attribute *ivt_uncore_qpi_formats_attr[] = { &format_attr_umask.attr, &format_attr_edge.attr, &format_attr_thresh8.attr, + &format_attr_match_rds.attr, + &format_attr_match_rnid30.attr, + &format_attr_match_rnid4.attr, + &format_attr_match_dnid.attr, + &format_attr_match_mc.attr, + &format_attr_match_opc.attr, + &format_attr_match_vnw.attr, + &format_attr_match0.attr, + &format_attr_match1.attr, + &format_attr_mask_rds.attr, + &format_attr_mask_rnid30.attr, + &format_attr_mask_rnid4.attr, + &format_attr_mask_dnid.attr, + &format_attr_mask_mc.attr, + &format_attr_mask_opc.attr, + &format_attr_mask_vnw.attr, + &format_attr_mask0.attr, + &format_attr_mask1.attr, NULL, }; @@ -1312,17 +1330,30 @@ static struct intel_uncore_type ivt_uncore_imc = { IVT_UNCORE_PCI_COMMON_INIT(), }; +static struct intel_uncore_ops ivt_uncore_qpi_ops = { + .init_box = ivt_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snbep_qpi_enable_event, + .read_counter = snbep_uncore_pci_read_counter, + .hw_config = snbep_qpi_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + static struct intel_uncore_type ivt_uncore_qpi = { - .name = "qpi", - .num_counters = 4, - .num_boxes = 3, - .perf_ctr_bits = 48, - .perf_ctr = SNBEP_PCI_PMON_CTR0, - .event_ctl = SNBEP_PCI_PMON_CTL0, - .event_mask = IVT_QPI_PCI_PMON_RAW_EVENT_MASK, - .box_ctl = SNBEP_PCI_PMON_BOX_CTL, - .ops = &ivt_uncore_pci_ops, - .format_group = &ivt_uncore_qpi_format_group, + .name = "qpi", + .num_counters = 4, + .num_boxes = 3, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = IVT_QPI_PCI_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &ivt_uncore_qpi_ops, + .format_group = &ivt_uncore_qpi_format_group, }; static struct intel_uncore_type ivt_uncore_r2pcie = { @@ -1429,6 +1460,16 @@ static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e), .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 2), }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe86), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT0_FILTER), + }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe96), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT1_FILTER), + }, { /* end: all zeroes */ } }; -- cgit v0.10.2 From f891d8cfb8372eb9cfe9d0d4ca61c75bafeaae37 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 31 Oct 2013 13:36:55 +0800 Subject: perf/x86/intel: Add Ivy Bridge-EP uncore IRP box support Unlike other uncore boxes, IRP boxes live in PCI buses with no UBOX device. For PCI bus without UBOX device, we find the next bus that has UBOX device and use its 'bus to socket' mapping. Besides the counter/control registers in IRP boxes are not properly aligned. Signed-off-by: Yan, Zheng Signed-off-by: Peter Zijlstra Cc: eranian@google.com Cc: "Yan Zheng" Link: http://lkml.kernel.org/r/1383197815-17706-2-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 6da3999..29c2487 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -997,6 +997,20 @@ static int snbep_pci2phy_map_init(int devid) } } + if (!err) { + /* + * For PCI bus with no UBOX device, find the next bus + * that has UBOX device and use its mapping. + */ + i = -1; + for (bus = 255; bus >= 0; bus--) { + if (pcibus_to_physid[bus] >= 0) + i = pcibus_to_physid[bus]; + else + pcibus_to_physid[bus] = i; + } + } + if (ubox_dev) pci_dev_put(ubox_dev); @@ -1330,6 +1344,59 @@ static struct intel_uncore_type ivt_uncore_imc = { IVT_UNCORE_PCI_COMMON_INIT(), }; +/* registers in IRP boxes are not properly aligned */ +static unsigned ivt_uncore_irp_ctls[] = {0xd8, 0xdc, 0xe0, 0xe4}; +static unsigned ivt_uncore_irp_ctrs[] = {0xa0, 0xb0, 0xb8, 0xc0}; + +static void ivt_uncore_irp_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, ivt_uncore_irp_ctls[hwc->idx], + hwc->config | SNBEP_PMON_CTL_EN); +} + +static void ivt_uncore_irp_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, ivt_uncore_irp_ctls[hwc->idx], hwc->config); +} + +static u64 ivt_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + u64 count = 0; + + pci_read_config_dword(pdev, ivt_uncore_irp_ctrs[hwc->idx], (u32 *)&count); + pci_read_config_dword(pdev, ivt_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1); + + return count; +} + +static struct intel_uncore_ops ivt_uncore_irp_ops = { + .init_box = ivt_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = ivt_uncore_irp_disable_event, + .enable_event = ivt_uncore_irp_enable_event, + .read_counter = ivt_uncore_irp_read_counter, +}; + +static struct intel_uncore_type ivt_uncore_irp = { + .name = "irp", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .event_mask = IVT_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .ops = &ivt_uncore_irp_ops, + .format_group = &ivt_uncore_format_group, +}; + static struct intel_uncore_ops ivt_uncore_qpi_ops = { .init_box = ivt_uncore_pci_init_box, .disable_box = snbep_uncore_pci_disable_box, @@ -1377,6 +1444,7 @@ static struct intel_uncore_type ivt_uncore_r3qpi = { enum { IVT_PCI_UNCORE_HA, IVT_PCI_UNCORE_IMC, + IVT_PCI_UNCORE_IRP, IVT_PCI_UNCORE_QPI, IVT_PCI_UNCORE_R2PCIE, IVT_PCI_UNCORE_R3QPI, @@ -1385,6 +1453,7 @@ enum { static struct intel_uncore_type *ivt_pci_uncores[] = { [IVT_PCI_UNCORE_HA] = &ivt_uncore_ha, [IVT_PCI_UNCORE_IMC] = &ivt_uncore_imc, + [IVT_PCI_UNCORE_IRP] = &ivt_uncore_irp, [IVT_PCI_UNCORE_QPI] = &ivt_uncore_qpi, [IVT_PCI_UNCORE_R2PCIE] = &ivt_uncore_r2pcie, [IVT_PCI_UNCORE_R3QPI] = &ivt_uncore_r3qpi, @@ -1432,6 +1501,10 @@ static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1), .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 7), }, + { /* IRP */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe39), + .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IRP, 0), + }, { /* QPI0 Port 0 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32), .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 0), -- cgit v0.10.2 From 3820b4d2789f5166afdb136bb14f93166e6cfbc2 Mon Sep 17 00:00:00 2001 From: "David A. Long" Date: Tue, 15 Oct 2013 17:04:16 -0400 Subject: uprobes: Move function declarations out of arch Move the function declarations from the arch headers to the common header, since only the function bodies are architecture-specific. These changes are from Vincent Rabin's uprobes patch. [ oleg: update arch/powerpc/include/asm/uprobes.h ] Signed-off-by: Rabin Vincent Signed-off-by: David A. Long Signed-off-by: Oleg Nesterov diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index 2301602..b6fc317 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -45,11 +45,4 @@ struct arch_uprobe_task { unsigned long saved_trap_nr; }; -extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); -extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); -extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); -extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); -extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); -extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); -extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); #endif /* _ASM_UPROBES_H */ diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 6e51979..b20b4d6 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -49,11 +49,4 @@ struct arch_uprobe_task { unsigned int saved_tf; }; -extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); -extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); -extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); -extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); -extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); -extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); -extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); #endif /* _ASM_UPROBES_H */ diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 9e0d5a6..28473e3 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -30,6 +30,7 @@ struct vm_area_struct; struct mm_struct; struct inode; +struct notifier_block; #ifdef CONFIG_ARCH_SUPPORTS_UPROBES # include @@ -125,6 +126,13 @@ extern void uprobe_notify_resume(struct pt_regs *regs); extern bool uprobe_deny_signal(void); extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); extern void uprobe_clear_state(struct mm_struct *mm); +extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); +extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); +extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); +extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); +extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); +extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); +extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); #else /* !CONFIG_UPROBES */ struct uprobes_state { }; -- cgit v0.10.2 From 736e89d9f782a7dd9a38ecda13b2db916fa72f33 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 31 Oct 2013 19:28:22 +0100 Subject: uprobes: Kill module_init() and module_exit() Turn module_init() into __initcall() and kill module_exit(). This code can't be compiled as a module so these module_*() calls only add the confusion, especially if arch-dependant code needs its own initialization hooks. Signed-off-by: Oleg Nesterov diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ae9e1d2..0012c8e 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1941,9 +1941,4 @@ static int __init init_uprobes(void) return register_die_notifier(&uprobe_exception_nb); } -module_init(init_uprobes); - -static void __exit exit_uprobes(void) -{ -} -module_exit(exit_uprobes); +__initcall(init_uprobes); -- cgit v0.10.2 From 8a8de66c4f6ebd0f6d3da026ec24339aa5d1db12 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 4 Nov 2013 20:27:13 +0100 Subject: uprobes: Introduce arch_uprobe->ixol Currently xol_get_insn_slot() assumes that we should simply copy arch_uprobe->insn[] which is (ignoring arch_uprobe_analyze_insn) just the copy of the original insn. This is not true for arm which needs to create another insn to execute it out-of-line. So this patch simply adds the new member, ->ixol into the union. This doesn't make any difference for x86 and powerpc, but arm can divorce insn/ixol and initialize the correct xol insn in arch_uprobe_analyze_insn(). Signed-off-by: Oleg Nesterov diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index b6fc317..75c6ecd 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -37,6 +37,7 @@ typedef ppc_opcode_t uprobe_opcode_t; struct arch_uprobe { union { u8 insn[MAX_UINSN_BYTES]; + u8 ixol[MAX_UINSN_BYTES]; u32 ainsn; }; }; diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index b20b4d6..3087ea9 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -35,7 +35,10 @@ typedef u8 uprobe_opcode_t; struct arch_uprobe { u16 fixups; - u8 insn[MAX_UINSN_BYTES]; + union { + u8 insn[MAX_UINSN_BYTES]; + u8 ixol[MAX_UINSN_BYTES]; + }; #ifdef CONFIG_X86_64 unsigned long rip_rela_target_address; #endif diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 0012c8e..fbcff61 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1264,7 +1264,8 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe) return 0; /* Initialize the slot */ - copy_to_page(area->page, xol_vaddr, uprobe->arch.insn, MAX_UINSN_BYTES); + copy_to_page(area->page, xol_vaddr, + uprobe->arch.ixol, sizeof(uprobe->arch.ixol)); /* * We probably need flush_icache_user_range() but it needs vma. * This should work on supported architectures too. -- cgit v0.10.2 From f72d41fa902fb19a9b63028202a400b0ce497491 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 5 Nov 2013 19:50:39 +0100 Subject: uprobes: Export write_opcode() as uprobe_write_opcode() set_swbp() and set_orig_insn() are __weak, but this is pointless because write_opcode() is static. Export write_opcode() as uprobe_write_opcode() for the upcoming arm port, this way it can actually override set_swbp() and use __opcode_to_mem_arm(bpinsn) instead if UPROBE_SWBP_INSN. Signed-off-by: Oleg Nesterov diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 28473e3..319eae7 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -109,6 +109,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern bool __weak is_trap_insn(uprobe_opcode_t *insn); +extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index fbcff61..0ac346a 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -245,12 +245,12 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t * the architecture. If an arch has variable length instruction and the * breakpoint instruction is not of the smallest length instruction * supported by that architecture then we need to modify is_trap_at_addr and - * write_opcode accordingly. This would never be a problem for archs that - * have fixed length instructions. + * uprobe_write_opcode accordingly. This would never be a problem for archs + * that have fixed length instructions. */ /* - * write_opcode - write the opcode at a given virtual address. + * uprobe_write_opcode - write the opcode at a given virtual address. * @mm: the probed process address space. * @vaddr: the virtual address to store the opcode. * @opcode: opcode to be written at @vaddr. @@ -261,7 +261,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t * For mm @mm, write the opcode at @vaddr. * Return 0 (success) or a negative errno. */ -static int write_opcode(struct mm_struct *mm, unsigned long vaddr, +int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t opcode) { struct page *old_page, *new_page; @@ -315,7 +315,7 @@ put_old: */ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { - return write_opcode(mm, vaddr, UPROBE_SWBP_INSN); + return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN); } /** @@ -330,7 +330,7 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned int __weak set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { - return write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); + return uprobe_write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); } static int match_uprobe(struct uprobe *l, struct uprobe *r) @@ -577,7 +577,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, if (ret) goto out; - /* write_opcode() assumes we don't cross page boundary */ + /* uprobe_write_opcode() assumes we don't cross page boundary */ BUG_ON((uprobe->offset & ~PAGE_MASK) + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); -- cgit v0.10.2 From 744a971940520cf0818e1fe882b64892c528e6de Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 6 Nov 2013 10:17:38 -0300 Subject: perf evsel: Ditch evsel->handler.data field Not needed since this cset: fcf65bf149af: perf evsel: Cache associated event_format So lets trim this struct a bit. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-j8setslokt0goiwxq9dogzqm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 409ceaf..6a25085 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -106,8 +106,8 @@ static int perf_event__repipe_sample(struct perf_tool *tool, struct perf_evsel *evsel, struct machine *machine) { - if (evsel->handler.func) { - inject_handler f = evsel->handler.func; + if (evsel->handler) { + inject_handler f = evsel->handler; return f(tool, event, sample, evsel, machine); } @@ -383,11 +383,11 @@ static int __cmd_inject(struct perf_inject *inject) if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID")) return -EINVAL; - evsel->handler.func = perf_inject__sched_switch; + evsel->handler = perf_inject__sched_switch; } else if (!strcmp(name, "sched:sched_process_exit")) - evsel->handler.func = perf_inject__sched_process_exit; + evsel->handler = perf_inject__sched_process_exit; else if (!strncmp(name, "sched:sched_stat_", 17)) - evsel->handler.func = perf_inject__sched_stat; + evsel->handler = perf_inject__sched_stat; } } diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index a28970f..929462a 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -317,8 +317,8 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid); - if (evsel->handler.func != NULL) { - tracepoint_handler f = evsel->handler.func; + if (evsel->handler != NULL) { + tracepoint_handler f = evsel->handler; return f(evsel, sample); } diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 35f9aaa..c852c7a 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -819,8 +819,8 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, return -1; } - if (evsel->handler.func != NULL) { - tracepoint_handler f = evsel->handler.func; + if (evsel->handler != NULL) { + tracepoint_handler f = evsel->handler; return f(evsel, sample); } diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index a81ab18..0f3c6551 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1427,8 +1427,8 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_ evsel->hists.stats.total_period += sample->period; hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); - if (evsel->handler.func != NULL) { - tracepoint_handler f = evsel->handler.func; + if (evsel->handler != NULL) { + tracepoint_handler f = evsel->handler; err = f(tool, evsel, sample, machine); } diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index e11c61d..41c9bde2 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -483,8 +483,8 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, if (sample->cpu > numcpus) numcpus = sample->cpu; - if (evsel->handler.func != NULL) { - tracepoint_handler f = evsel->handler.func; + if (evsel->handler != NULL) { + tracepoint_handler f = evsel->handler; return f(evsel, sample); } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index b3e57dc..ee59df3 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1570,7 +1570,7 @@ static int trace__process_sample(struct perf_tool *tool, struct trace *trace = container_of(tool, struct trace, tool); int err = 0; - tracepoint_handler handler = evsel->handler.func; + tracepoint_handler handler = evsel->handler; if (skip_sample(trace, sample)) return 0; @@ -1656,7 +1656,7 @@ static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist) return; } - evsel->handler.func = trace__vfs_getname; + evsel->handler = trace__vfs_getname; perf_evlist__add(evlist, evsel); } @@ -1768,7 +1768,7 @@ again: goto next_event; } - handler = evsel->handler.func; + handler = evsel->handler; handler(trace, evsel, &sample); next_event: perf_evlist__mmap_consume(evlist, i); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 1c173cc..b939221 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -255,7 +255,7 @@ int perf_evlist__add_newtp(struct perf_evlist *evlist, if (evsel == NULL) return -1; - evsel->handler.func = handler; + evsel->handler = handler; perf_evlist__add(evlist, evsel); return 0; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 5aa68cd..64ec8e1 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -74,10 +74,7 @@ struct perf_evsel { off_t id_offset; }; struct cgroup_sel *cgrp; - struct { - void *func; - void *data; - } handler; + void *handler; struct cpu_map *cpus; unsigned int sample_size; int id_pos; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0ce4694..f36d24a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1650,9 +1650,9 @@ int __perf_session__set_tracepoints_handlers(struct perf_session *session, continue; err = -EEXIST; - if (evsel->handler.func != NULL) + if (evsel->handler != NULL) goto out; - evsel->handler.func = assocs[i].handler; + evsel->handler = assocs[i].handler; } err = 0; -- cgit v0.10.2 From a614d01bdd0cc8200d917da25f5a3d539b944193 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 6 Nov 2013 08:55:35 -0700 Subject: perf tools: Fix version when building out of tree When building perf out of tree: $ make perf-tar-src-pkg $ tar -xf perf-.tar -C /tmp $ cd /tmp/perf $ make -C tools/perf you get this warning message: make[1]: *** No rule to make target `kernelversion'. Stop. Fix it by saving the perf version in the tar file and using that for the out of tree builds. v2: removed short form request and fixed up version string from usual output. Signed-off-by: David Ahern Suggested-by: Ingo Molnar Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1383753335-25782-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/scripts/package/Makefile b/scripts/package/Makefile index a4f31c9..c5d4733 100644 --- a/scripts/package/Makefile +++ b/scripts/package/Makefile @@ -115,7 +115,9 @@ git --git-dir=$(srctree)/.git archive --prefix=$(perf-tar)/ \ -o $(perf-tar).tar; \ mkdir -p $(perf-tar); \ git --git-dir=$(srctree)/.git rev-parse HEAD > $(perf-tar)/HEAD; \ -tar rf $(perf-tar).tar $(perf-tar)/HEAD; \ +(cd $(srctree)/tools/perf; \ +util/PERF-VERSION-GEN ../../$(perf-tar)/ 2>/dev/null); \ +tar rf $(perf-tar).tar $(perf-tar)/HEAD $(perf-tar)/PERF-VERSION-FILE; \ rm -r $(perf-tar); \ $(if $(findstring tar-src,$@),, \ $(if $(findstring bz2,$@),bzip2, \ diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN index ce7a804..39f1750 100755 --- a/tools/perf/util/PERF-VERSION-GEN +++ b/tools/perf/util/PERF-VERSION-GEN @@ -19,6 +19,9 @@ if test -d ../../.git -o -f ../../.git then TAG=$(git describe --abbrev=0 --match "v[0-9].[0-9]*" 2>/dev/null ) CID=$(git log -1 --abbrev=4 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID" +elif test -f ../../PERF-VERSION-FILE +then + TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g') fi if test -z "$TAG" then -- cgit v0.10.2 From 77170988ff67fb959602ab4df296ae676f556a59 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 6 Nov 2013 16:35:57 -0300 Subject: perf trace: Don't relookup fields by name in each sample Instead do the lookups just when creating the tracepoints, initially for the most common, raw_syscalls:sys_{enter,exit}. It works by having evsel->priv have a per tracepoint structure with entries for the fields, for direct access, with the offset and a function to get the value from the sample, doing the swap if needed. Using a simple workload that does M millions write syscalls, we go from: # perf stat -i -e cycles /tmp/oldperf trace ./sc_hello 100 > /dev/null Performance counter stats for '/tmp/oldperf trace ./sc_hello 100': 8,366,771,459 cycles 2.668025928 seconds time elapsed # perf stat -i -e cycles perf trace ./sc_hello 100 > /dev/null Performance counter stats for 'perf trace ./sc_hello 100': 8,345,187,650 cycles 2.631748425 seconds time elapsed Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-eyfhvoo510a5i10b27dnvm88@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ee59df3..329b783 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -35,6 +35,189 @@ # define MADV_UNMERGEABLE 13 #endif +struct tp_field { + int offset; + union { + u64 (*integer)(struct tp_field *field, struct perf_sample *sample); + void *(*pointer)(struct tp_field *field, struct perf_sample *sample); + }; +}; + +#define TP_UINT_FIELD(bits) \ +static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ +{ \ + return *(u##bits *)(sample->raw_data + field->offset); \ +} + +TP_UINT_FIELD(8); +TP_UINT_FIELD(16); +TP_UINT_FIELD(32); +TP_UINT_FIELD(64); + +#define TP_UINT_FIELD__SWAPPED(bits) \ +static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ +{ \ + u##bits value = *(u##bits *)(sample->raw_data + field->offset); \ + return bswap_##bits(value);\ +} + +TP_UINT_FIELD__SWAPPED(16); +TP_UINT_FIELD__SWAPPED(32); +TP_UINT_FIELD__SWAPPED(64); + +static int tp_field__init_uint(struct tp_field *field, + struct format_field *format_field, + bool needs_swap) +{ + field->offset = format_field->offset; + + switch (format_field->size) { + case 1: + field->integer = tp_field__u8; + break; + case 2: + field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16; + break; + case 4: + field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32; + break; + case 8: + field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64; + break; + default: + return -1; + } + + return 0; +} + +static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample) +{ + return sample->raw_data + field->offset; +} + +static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field) +{ + field->offset = format_field->offset; + field->pointer = tp_field__ptr; + return 0; +} + +struct syscall_tp { + struct tp_field id; + union { + struct tp_field args, ret; + }; +}; + +static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, + struct tp_field *field, + const char *name) +{ + struct format_field *format_field = perf_evsel__field(evsel, name); + + if (format_field == NULL) + return -1; + + return tp_field__init_uint(field, format_field, evsel->needs_swap); +} + +#define perf_evsel__init_sc_tp_uint_field(evsel, name) \ + ({ struct syscall_tp *sc = evsel->priv;\ + perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) + +static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, + struct tp_field *field, + const char *name) +{ + struct format_field *format_field = perf_evsel__field(evsel, name); + + if (format_field == NULL) + return -1; + + return tp_field__init_ptr(field, format_field); +} + +#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \ + ({ struct syscall_tp *sc = evsel->priv;\ + perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) + +static void perf_evsel__delete_priv(struct perf_evsel *evsel) +{ + free(evsel->priv); + evsel->priv = NULL; + perf_evsel__delete(evsel); +} + +static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, + void *handler, int idx) +{ + struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction, idx); + + if (evsel) { + evsel->priv = malloc(sizeof(struct syscall_tp)); + + if (evsel->priv == NULL) + goto out_delete; + + if (perf_evsel__init_sc_tp_uint_field(evsel, id)) + goto out_delete; + + evsel->handler = handler; + } + + return evsel; + +out_delete: + perf_evsel__delete_priv(evsel); + return NULL; +} + +#define perf_evsel__sc_tp_uint(evsel, name, sample) \ + ({ struct syscall_tp *fields = evsel->priv; \ + fields->name.integer(&fields->name, sample); }) + +#define perf_evsel__sc_tp_ptr(evsel, name, sample) \ + ({ struct syscall_tp *fields = evsel->priv; \ + fields->name.pointer(&fields->name, sample); }) + +static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist, + void *sys_enter_handler, + void *sys_exit_handler) +{ + int ret = -1; + int idx = evlist->nr_entries; + struct perf_evsel *sys_enter, *sys_exit; + + sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler, idx++); + if (sys_enter == NULL) + goto out; + + if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) + goto out_delete_sys_enter; + + sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler, idx++); + if (sys_exit == NULL) + goto out_delete_sys_enter; + + if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) + goto out_delete_sys_exit; + + perf_evlist__add(evlist, sys_enter); + perf_evlist__add(evlist, sys_exit); + + ret = 0; +out: + return ret; + +out_delete_sys_exit: + perf_evsel__delete_priv(sys_exit); +out_delete_sys_enter: + perf_evsel__delete_priv(sys_enter); + goto out; +} + + struct syscall_arg { unsigned long val; struct thread *thread; @@ -1392,7 +1575,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, void *args; size_t printed = 0; struct thread *thread; - int id = perf_evsel__intval(evsel, sample, "id"); + int id = perf_evsel__sc_tp_uint(evsel, id, sample); struct syscall *sc = trace__syscall_info(trace, evsel, id); struct thread_trace *ttrace; @@ -1407,12 +1590,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, if (ttrace == NULL) return -1; - args = perf_evsel__rawptr(evsel, sample, "args"); - if (args == NULL) { - fprintf(trace->output, "Problems reading syscall arguments\n"); - return -1; - } - + args = perf_evsel__sc_tp_ptr(evsel, args, sample); ttrace = thread->priv; if (ttrace->entry_str == NULL) { @@ -1445,7 +1623,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, int ret; u64 duration = 0; struct thread *thread; - int id = perf_evsel__intval(evsel, sample, "id"); + int id = perf_evsel__sc_tp_uint(evsel, id, sample); struct syscall *sc = trace__syscall_info(trace, evsel, id); struct thread_trace *ttrace; @@ -1463,7 +1641,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, if (trace->summary) thread__update_stats(ttrace, id, sample); - ret = perf_evsel__intval(evsel, sample, "ret"); + ret = perf_evsel__sc_tp_uint(evsel, ret, sample); if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) { trace__set_fd_pathname(thread, ret, trace->last_vfs_getname); @@ -1675,8 +1853,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) goto out; } - if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) || - perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) + if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit)) goto out_error_tp; perf_evlist__add_vfs_getname(evlist); -- cgit v0.10.2 From 57706abc19afc60f0b629af839d2ebee17739f59 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 6 Nov 2013 11:41:34 -0700 Subject: perf record: Refactor feature handling into a separate function Code move only. No logic changes. Signed-off-by: David Ahern Acked-by: Ingo Molnar Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1383763297-27066-2-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index ea4c04f..2932069 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -342,9 +342,28 @@ out: return rc; } +static void perf_record__init_features(struct perf_record *rec) +{ + struct perf_evlist *evsel_list = rec->evlist; + struct perf_session *session = rec->session; + int feat; + + for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) + perf_header__set_feat(&session->header, feat); + + if (rec->no_buildid) + perf_header__clear_feat(&session->header, HEADER_BUILD_ID); + + if (!have_tracepoints(&evsel_list->entries)) + perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); + + if (!rec->opts.branch_stack) + perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); +} + static int __cmd_record(struct perf_record *rec, int argc, const char **argv) { - int err, feat; + int err; unsigned long waking = 0; const bool forks = argc > 0; struct machine *machine; @@ -371,17 +390,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) rec->session = session; - for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) - perf_header__set_feat(&session->header, feat); - - if (rec->no_buildid) - perf_header__clear_feat(&session->header, HEADER_BUILD_ID); - - if (!have_tracepoints(&evsel_list->entries)) - perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); - - if (!rec->opts.branch_stack) - perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); + perf_record__init_features(rec); if (forks) { err = perf_evlist__prepare_workload(evsel_list, &opts->target, -- cgit v0.10.2 From f34b9001f9a2f6fa41d3582fe515d194cc86bfb2 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 6 Nov 2013 11:41:35 -0700 Subject: perf record: Remove advance_output function 1 line function with only 1 user; might as well embed directly. Signed-off-by: David Ahern Suggested-by: Ingo Molnar Acked-by: Ingo Molnar Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1383763297-27066-3-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2932069..19c4db6 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -77,11 +77,6 @@ struct perf_record { off_t post_processing_offset; }; -static void advance_output(struct perf_record *rec, size_t size) -{ - rec->bytes_written += size; -} - static int write_output(struct perf_record *rec, void *buf, size_t size) { struct perf_data_file *file = &rec->file; @@ -461,7 +456,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) pr_err("Couldn't record tracing data.\n"); goto out_delete_session; } - advance_output(rec, err); + rec->bytes_written += err; } } -- cgit v0.10.2 From 7ab75cffd6a1b2195944b8522673522f09e7fcb0 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 6 Nov 2013 11:41:36 -0700 Subject: perf record: Remove post_processing_offset variable Duplicates the data_offset from header in the session. Signed-off-by: David Ahern Acked-by: Ingo Molnar Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1383763297-27066-4-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 19c4db6..15280b5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -74,7 +74,6 @@ struct perf_record { bool no_buildid; bool no_buildid_cache; long samples; - off_t post_processing_offset; }; static int write_output(struct perf_record *rec, void *buf, size_t size) @@ -247,13 +246,14 @@ static int process_buildids(struct perf_record *rec) { struct perf_data_file *file = &rec->file; struct perf_session *session = rec->session; + u64 start = session->header.data_offset; u64 size = lseek(file->fd, 0, SEEK_CUR); if (size == 0) return 0; - return __perf_session__process_events(session, rec->post_processing_offset, - size - rec->post_processing_offset, + return __perf_session__process_events(session, start, + size - start, size, &build_id__mark_dso_hit_ops); } @@ -429,8 +429,6 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) goto out_delete_session; } - rec->post_processing_offset = lseek(file->fd, 0, SEEK_CUR); - machine = &session->machines.host; if (file->is_pipe) { -- cgit v0.10.2 From 8ce000e83848578a621d64eccdc88bd34c2fc70c Mon Sep 17 00:00:00 2001 From: Rodrigo Campos Date: Wed, 6 Nov 2013 22:20:54 +0000 Subject: perf tools: Remove unneeded include There is no point in sort.h including itself. The include was added when the file was created, in commit "perf tools: Create util/sort.and use it" (dd68ada2d) and added a include to "sort.h" in lot of files (all the files that started using the file). It was probably added by mistake on sort.h too. Signed-off-by: Rodrigo Campos Acked-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1383776454-10595-1-git-send-email-rodrigo@sdfg.com.ar Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index f4cc147..43e5ff4 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -22,7 +22,6 @@ #include "parse-events.h" #include "thread.h" -#include "sort.h" extern regex_t parent_regex; extern const char *sort_order; -- cgit v0.10.2 From 70d7f98722a7a1df1a55d6a92d0ce959c7aba9fd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 8 Nov 2013 16:35:55 +0100 Subject: uprobes: Fix the wrong usage of current->utask in uprobe_copy_process() Commit aa59c53fd459 "uprobes: Change uprobe_copy_process() to dup xol_area" has a stupid typo, we need to setup t->utask->vaddr but the code wrongly uses current->utask. Even with this bug dup_xol_work() works "in practice", but only because get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE) likely returns the same address every time. Signed-off-by: Oleg Nesterov diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 0ac346a..5e56950 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1447,7 +1447,7 @@ void uprobe_copy_process(struct task_struct *t, unsigned long flags) if (!work) return uprobe_warn(t, "dup xol area"); - utask->vaddr = area->vaddr; + t->utask->vaddr = area->vaddr; init_task_work(work, dup_xol_work); task_work_add(t, work, true); } -- cgit v0.10.2 From 2ded0980a6e4ae96bdd84bda66c7240967d86f3c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 7 Nov 2013 19:41:57 +0100 Subject: uprobes: Fix the memory out of bound overwrite in copy_insn() 1. copy_insn() doesn't look very nice, all calculations are confusing and it is not immediately clear why do we read the 2nd page first. 2. The usage of inode->i_size is wrong on 32-bit machines. 3. "Instruction at end of binary" logic is simply wrong, it doesn't handle the case when uprobe->offset > inode->i_size. In this case "bytes" overflows, and __copy_insn() writes to the memory outside of uprobe->arch.insn. Yes, uprobe_register() checks i_size_read(), but this file can be truncated after that. All i_size checks are racy, we do this only to catch the obvious mistakes. Change copy_insn() to call __copy_insn() in a loop, simplify and fix the bytes/nbytes calculations. Note: we do not care if we read extra bytes after inode->i_size if we got the valid page. This is fine because the task gets the same page after page-fault, and arch_uprobe_analyze_insn() can't know how many bytes were actually read anyway. Signed-off-by: Oleg Nesterov diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 5e56950..24b7d6c 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -504,9 +504,8 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc) return ret; } -static int -__copy_insn(struct address_space *mapping, struct file *filp, char *insn, - unsigned long nbytes, loff_t offset) +static int __copy_insn(struct address_space *mapping, struct file *filp, + void *insn, int nbytes, loff_t offset) { struct page *page; @@ -528,28 +527,28 @@ __copy_insn(struct address_space *mapping, struct file *filp, char *insn, static int copy_insn(struct uprobe *uprobe, struct file *filp) { - struct address_space *mapping; - unsigned long nbytes; - int bytes; - - nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK); - mapping = uprobe->inode->i_mapping; + struct address_space *mapping = uprobe->inode->i_mapping; + loff_t offs = uprobe->offset; + void *insn = uprobe->arch.insn; + int size = MAX_UINSN_BYTES; + int len, err = -EIO; - /* Instruction at end of binary; copy only available bytes */ - if (uprobe->offset + MAX_UINSN_BYTES > uprobe->inode->i_size) - bytes = uprobe->inode->i_size - uprobe->offset; - else - bytes = MAX_UINSN_BYTES; + /* Copy only available bytes, -EIO if nothing was read */ + do { + if (offs >= i_size_read(uprobe->inode)) + break; - /* Instruction at the page-boundary; copy bytes in second page */ - if (nbytes < bytes) { - int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes, - bytes - nbytes, uprobe->offset + nbytes); + len = min_t(int, size, PAGE_SIZE - (offs & ~PAGE_MASK)); + err = __copy_insn(mapping, filp, insn, len, offs); if (err) - return err; - bytes = nbytes; - } - return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset); + break; + + insn += len; + offs += len; + size -= len; + } while (size); + + return err; } static int prepare_uprobe(struct uprobe *uprobe, struct file *file, -- cgit v0.10.2